fix: per-node max_tokens + validation 502 guard + correct_jrxml output validity
- backend/llm.py: per-node max_tokens via get_llm(max_tokens=N), LLM_MAX_TOKENS env var (default 8192) - agent/nodes.py: 5 generation nodes use max_tokens=32768, generate_skeleton retries at 65536 - agent/nodes.py: fix ns:field regex (<field → <[\w:]*field) to handle namespace prefixes - agent/nodes.py: fix correct_jrxml never writing back to state["current_jrxml"] - agent/nodes.py: correct_jrxml rejects non-JRXML output (no <jasperReport tag) - agent/nodes.py: _strip_continuation_wrapper strips markdown/prefixes from continuation rounds - agent/nodes.py: _extract_jrxml iterates multiple markdown code blocks, skips fragments - agent/graph.py: route_after_validate skips correction loop when service_unavailable - agent/graph.py: route_after_save skips validation for empty JRXML - backend/validation.py: returns service_unavailable: True for ConnectError and HTTP 5xx - Docs: CLAUDE.md v14 changelog, README.md LLM_MAX_TOKENS, .env.example LLM_MAX_TOKENS
This commit is contained in:
+18
-8
@@ -156,8 +156,14 @@ class _LLMLoggingWrapper(_BaseLLM):
|
||||
raise
|
||||
|
||||
|
||||
def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
"""构造原始 LLM 实例,返回 (实例, model名, backend名)。"""
|
||||
DEFAULT_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "8192"))
|
||||
|
||||
|
||||
def _build_raw_llm(caller: str = "", max_tokens: int | None = None) -> tuple[_BaseLLM, str, str]:
|
||||
"""构造原始 LLM 实例,返回 (实例, model名, backend名)。
|
||||
|
||||
max_tokens: 覆盖默认输出 token 数。None 使用 LLM_MAX_TOKENS 环境变量或 8192。
|
||||
"""
|
||||
backend = os.getenv("LLM_BACKEND", "cloud")
|
||||
if backend == "local":
|
||||
from langchain_ollama import ChatOllama
|
||||
@@ -183,18 +189,19 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
base_url = os.getenv("ANTHROPIC_BASE_URL") or os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic")
|
||||
model = os.getenv("LLM_MODEL", "MiniMax-M2.7")
|
||||
temperature = 0.1
|
||||
max_tokens = 8192
|
||||
_default_max_tokens = max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS
|
||||
|
||||
client = Anthropic(api_key=api_key, base_url=base_url, timeout=120)
|
||||
|
||||
class MiniMaxLLM(_BaseLLM):
|
||||
def __init__(self):
|
||||
self._last_stop_reason = None
|
||||
self._max_tokens = _default_max_tokens
|
||||
|
||||
def invoke(self, prompt: str) -> Any:
|
||||
resp = client.messages.create(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
max_tokens=self._max_tokens,
|
||||
temperature=temperature,
|
||||
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
|
||||
)
|
||||
@@ -208,7 +215,7 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
self._last_stop_reason = None
|
||||
with client.messages.stream(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
max_tokens=self._max_tokens,
|
||||
temperature=temperature,
|
||||
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
|
||||
) as s:
|
||||
@@ -250,9 +257,12 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
return OpenAIWrapper(), model, f"cloud/openai/{model}"
|
||||
|
||||
|
||||
def get_llm(caller: str = "") -> _BaseLLM:
|
||||
"""返回带日志的 LLM 实例。caller 用于标识调用来源(如 generate、classify_intent)。"""
|
||||
inner, model, backend = _build_raw_llm(caller)
|
||||
def get_llm(caller: str = "", max_tokens: int | None = None) -> _BaseLLM:
|
||||
"""返回带日志的 LLM 实例。caller 用于标识调用来源(如 generate、classify_intent)。
|
||||
|
||||
max_tokens: 覆盖默认输出 token 数。用于骨架生成等需要大量输出的节点。
|
||||
"""
|
||||
inner, model, backend = _build_raw_llm(caller, max_tokens=max_tokens)
|
||||
return _LLMLoggingWrapper(inner, model=model, backend=backend, caller=caller)
|
||||
|
||||
|
||||
|
||||
+12
-2
@@ -4,6 +4,7 @@ import os
|
||||
|
||||
import httpx
|
||||
from dotenv import load_dotenv
|
||||
from httpx import ConnectError, HTTPStatusError
|
||||
|
||||
from backend.logger import get_logger
|
||||
|
||||
@@ -31,10 +32,19 @@ def validate_jrxml(jrxml_text: str) -> dict:
|
||||
},
|
||||
)
|
||||
return result
|
||||
except httpx.ConnectError:
|
||||
except ConnectError:
|
||||
error_msg = f"无法连接到验证服务 ({VALIDATION_URL})。是否正在运行?"
|
||||
_val_log.error("验证服务连接失败", extra={"error": error_msg, "url": VALIDATION_URL})
|
||||
return {"valid": False, "error": error_msg}
|
||||
return {"valid": False, "error": error_msg, "service_unavailable": True}
|
||||
except HTTPStatusError as e:
|
||||
status_code = e.response.status_code
|
||||
error_msg = f"验证服务返回错误 ({status_code}): {str(e)}"
|
||||
_val_log.error("验证请求异常", extra={"error": str(e), "url": VALIDATION_URL, "status_code": status_code})
|
||||
return {
|
||||
"valid": False,
|
||||
"error": error_msg,
|
||||
"service_unavailable": status_code >= 500,
|
||||
}
|
||||
except Exception as e:
|
||||
error_msg = f"验证请求失败: {str(e)}"
|
||||
_val_log.error("验证请求异常", extra={"error": str(e), "url": VALIDATION_URL})
|
||||
|
||||
Reference in New Issue
Block a user