fix: per-node max_tokens + validation 502 guard + correct_jrxml output validity

- backend/llm.py: per-node max_tokens via get_llm(max_tokens=N), LLM_MAX_TOKENS env var (default 8192) - agent/nodes.py: 5 generation nodes use max_tokens=32768, generate_skeleton retries at 65536 - agent/nodes.py: fix ns:field regex (<field → <[\w:]*field) to handle namespace prefixes - agent/nodes.py: fix correct_jrxml never writing back to state["current_jrxml"] - agent/nodes.py: correct_jrxml rejects non-JRXML output (no <jasperReport tag) - agent/nodes.py: _strip_continuation_wrapper strips markdown/prefixes from continuation rounds - agent/nodes.py: _extract_jrxml iterates multiple markdown code blocks, skips fragments - agent/graph.py: route_after_validate skips correction loop when service_unavailable - agent/graph.py: route_after_save skips validation for empty JRXML - backend/validation.py: returns service_unavailable: True for ConnectError and HTTP 5xx - Docs: CLAUDE.md v14 changelog, README.md LLM_MAX_TOKENS, .env.example LLM_MAX_TOKENS
2026-05-24 15:20:25 +08:00
parent e362f530ea
commit 4e14334030
8 changed files with 388 additions and 32 deletions
@@ -156,8 +156,14 @@ class _LLMLoggingWrapper(_BaseLLM):
            raise


-def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
-    """构造原始 LLM 实例，返回 (实例, model名, backend名)。"""
+DEFAULT_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "8192"))
+
+
+def _build_raw_llm(caller: str = "", max_tokens: int | None = None) -> tuple[_BaseLLM, str, str]:
+    """构造原始 LLM 实例，返回 (实例, model名, backend名)。
+
+    max_tokens: 覆盖默认输出 token 数。None 使用 LLM_MAX_TOKENS 环境变量或 8192。
+    """
    backend = os.getenv("LLM_BACKEND", "cloud")
    if backend == "local":
        from langchain_ollama import ChatOllama
@@ -183,18 +189,19 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
        base_url = os.getenv("ANTHROPIC_BASE_URL") or os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic")
        model = os.getenv("LLM_MODEL", "MiniMax-M2.7")
        temperature = 0.1
-        max_tokens = 8192
+        _default_max_tokens = max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS

        client = Anthropic(api_key=api_key, base_url=base_url, timeout=120)

        class MiniMaxLLM(_BaseLLM):
            def __init__(self):
                self._last_stop_reason = None
+                self._max_tokens = _default_max_tokens

            def invoke(self, prompt: str) -> Any:
                resp = client.messages.create(
                    model=model,
-                    max_tokens=max_tokens,
+                    max_tokens=self._max_tokens,
                    temperature=temperature,
                    messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
                )
@@ -208,7 +215,7 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
                self._last_stop_reason = None
                with client.messages.stream(
                    model=model,
-                    max_tokens=max_tokens,
+                    max_tokens=self._max_tokens,
                    temperature=temperature,
                    messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
                ) as s:
@@ -250,9 +257,12 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
        return OpenAIWrapper(), model, f"cloud/openai/{model}"


-def get_llm(caller: str = "") -> _BaseLLM:
-    """返回带日志的 LLM 实例。caller 用于标识调用来源（如 generate、classify_intent）。"""
-    inner, model, backend = _build_raw_llm(caller)
+def get_llm(caller: str = "", max_tokens: int | None = None) -> _BaseLLM:
+    """返回带日志的 LLM 实例。caller 用于标识调用来源（如 generate、classify_intent）。
+
+    max_tokens: 覆盖默认输出 token 数。用于骨架生成等需要大量输出的节点。
+    """
+    inner, model, backend = _build_raw_llm(caller, max_tokens=max_tokens)
    return _LLMLoggingWrapper(inner, model=model, backend=backend, caller=caller)