fix: per-node max_tokens + validation 502 guard + correct_jrxml output validity

- backend/llm.py: per-node max_tokens via get_llm(max_tokens=N), LLM_MAX_TOKENS env var (default 8192)
- agent/nodes.py: 5 generation nodes use max_tokens=32768, generate_skeleton retries at 65536
- agent/nodes.py: fix ns:field regex (<field → <[\w:]*field) to handle namespace prefixes
- agent/nodes.py: fix correct_jrxml never writing back to state["current_jrxml"]
- agent/nodes.py: correct_jrxml rejects non-JRXML output (no <jasperReport tag)
- agent/nodes.py: _strip_continuation_wrapper strips markdown/prefixes from continuation rounds
- agent/nodes.py: _extract_jrxml iterates multiple markdown code blocks, skips fragments
- agent/graph.py: route_after_validate skips correction loop when service_unavailable
- agent/graph.py: route_after_save skips validation for empty JRXML
- backend/validation.py: returns service_unavailable: True for ConnectError and HTTP 5xx
- Docs: CLAUDE.md v14 changelog, README.md LLM_MAX_TOKENS, .env.example LLM_MAX_TOKENS
This commit is contained in:
2026-05-24 15:20:25 +08:00
parent e362f530ea
commit 4e14334030
8 changed files with 388 additions and 32 deletions
+18 -8
View File
@@ -156,8 +156,14 @@ class _LLMLoggingWrapper(_BaseLLM):
raise
def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
"""构造原始 LLM 实例,返回 (实例, model名, backend名)。"""
DEFAULT_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "8192"))
def _build_raw_llm(caller: str = "", max_tokens: int | None = None) -> tuple[_BaseLLM, str, str]:
"""构造原始 LLM 实例,返回 (实例, model名, backend名)。
max_tokens: 覆盖默认输出 token 数。None 使用 LLM_MAX_TOKENS 环境变量或 8192。
"""
backend = os.getenv("LLM_BACKEND", "cloud")
if backend == "local":
from langchain_ollama import ChatOllama
@@ -183,18 +189,19 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
base_url = os.getenv("ANTHROPIC_BASE_URL") or os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic")
model = os.getenv("LLM_MODEL", "MiniMax-M2.7")
temperature = 0.1
max_tokens = 8192
_default_max_tokens = max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS
client = Anthropic(api_key=api_key, base_url=base_url, timeout=120)
class MiniMaxLLM(_BaseLLM):
def __init__(self):
self._last_stop_reason = None
self._max_tokens = _default_max_tokens
def invoke(self, prompt: str) -> Any:
resp = client.messages.create(
model=model,
max_tokens=max_tokens,
max_tokens=self._max_tokens,
temperature=temperature,
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
)
@@ -208,7 +215,7 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
self._last_stop_reason = None
with client.messages.stream(
model=model,
max_tokens=max_tokens,
max_tokens=self._max_tokens,
temperature=temperature,
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
) as s:
@@ -250,9 +257,12 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
return OpenAIWrapper(), model, f"cloud/openai/{model}"
def get_llm(caller: str = "") -> _BaseLLM:
"""返回带日志的 LLM 实例。caller 用于标识调用来源(如 generate、classify_intent)。"""
inner, model, backend = _build_raw_llm(caller)
def get_llm(caller: str = "", max_tokens: int | None = None) -> _BaseLLM:
"""返回带日志的 LLM 实例。caller 用于标识调用来源(如 generate、classify_intent)。
max_tokens: 覆盖默认输出 token 数。用于骨架生成等需要大量输出的节点。
"""
inner, model, backend = _build_raw_llm(caller, max_tokens=max_tokens)
return _LLMLoggingWrapper(inner, model=model, backend=backend, caller=caller)