fix: per-node max_tokens + validation 502 guard + correct_jrxml output validity
- backend/llm.py: per-node max_tokens via get_llm(max_tokens=N), LLM_MAX_TOKENS env var (default 8192) - agent/nodes.py: 5 generation nodes use max_tokens=32768, generate_skeleton retries at 65536 - agent/nodes.py: fix ns:field regex (<field → <[\w:]*field) to handle namespace prefixes - agent/nodes.py: fix correct_jrxml never writing back to state["current_jrxml"] - agent/nodes.py: correct_jrxml rejects non-JRXML output (no <jasperReport tag) - agent/nodes.py: _strip_continuation_wrapper strips markdown/prefixes from continuation rounds - agent/nodes.py: _extract_jrxml iterates multiple markdown code blocks, skips fragments - agent/graph.py: route_after_validate skips correction loop when service_unavailable - agent/graph.py: route_after_save skips validation for empty JRXML - backend/validation.py: returns service_unavailable: True for ConnectError and HTTP 5xx - Docs: CLAUDE.md v14 changelog, README.md LLM_MAX_TOKENS, .env.example LLM_MAX_TOKENS
This commit is contained in:
+18
-8
@@ -156,8 +156,14 @@ class _LLMLoggingWrapper(_BaseLLM):
|
||||
raise
|
||||
|
||||
|
||||
def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
"""构造原始 LLM 实例,返回 (实例, model名, backend名)。"""
|
||||
DEFAULT_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "8192"))
|
||||
|
||||
|
||||
def _build_raw_llm(caller: str = "", max_tokens: int | None = None) -> tuple[_BaseLLM, str, str]:
|
||||
"""构造原始 LLM 实例,返回 (实例, model名, backend名)。
|
||||
|
||||
max_tokens: 覆盖默认输出 token 数。None 使用 LLM_MAX_TOKENS 环境变量或 8192。
|
||||
"""
|
||||
backend = os.getenv("LLM_BACKEND", "cloud")
|
||||
if backend == "local":
|
||||
from langchain_ollama import ChatOllama
|
||||
@@ -183,18 +189,19 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
base_url = os.getenv("ANTHROPIC_BASE_URL") or os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic")
|
||||
model = os.getenv("LLM_MODEL", "MiniMax-M2.7")
|
||||
temperature = 0.1
|
||||
max_tokens = 8192
|
||||
_default_max_tokens = max_tokens if max_tokens is not None else DEFAULT_MAX_TOKENS
|
||||
|
||||
client = Anthropic(api_key=api_key, base_url=base_url, timeout=120)
|
||||
|
||||
class MiniMaxLLM(_BaseLLM):
|
||||
def __init__(self):
|
||||
self._last_stop_reason = None
|
||||
self._max_tokens = _default_max_tokens
|
||||
|
||||
def invoke(self, prompt: str) -> Any:
|
||||
resp = client.messages.create(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
max_tokens=self._max_tokens,
|
||||
temperature=temperature,
|
||||
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
|
||||
)
|
||||
@@ -208,7 +215,7 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
self._last_stop_reason = None
|
||||
with client.messages.stream(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
max_tokens=self._max_tokens,
|
||||
temperature=temperature,
|
||||
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
|
||||
) as s:
|
||||
@@ -250,9 +257,12 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
return OpenAIWrapper(), model, f"cloud/openai/{model}"
|
||||
|
||||
|
||||
def get_llm(caller: str = "") -> _BaseLLM:
|
||||
"""返回带日志的 LLM 实例。caller 用于标识调用来源(如 generate、classify_intent)。"""
|
||||
inner, model, backend = _build_raw_llm(caller)
|
||||
def get_llm(caller: str = "", max_tokens: int | None = None) -> _BaseLLM:
|
||||
"""返回带日志的 LLM 实例。caller 用于标识调用来源(如 generate、classify_intent)。
|
||||
|
||||
max_tokens: 覆盖默认输出 token 数。用于骨架生成等需要大量输出的节点。
|
||||
"""
|
||||
inner, model, backend = _build_raw_llm(caller, max_tokens=max_tokens)
|
||||
return _LLMLoggingWrapper(inner, model=model, backend=backend, caller=caller)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user