fix: MAX_RETRY 5 + rolling continuation + namespace-aware JRXML extraction
- MAX_RETRY: 3→5 (graph.py:35, nodes.py:25) with env override - Rolling continuation: _generate_with_continuation() auto-detects truncated JRXML and sends anchor-based continuation, max 3 rounds - JRXML extraction: regex/end-tag now namespace-prefix aware (ns0:jasperReport, ns:jasperReport, etc.) - All 5 generation nodes refactored to use continuation helper - Tests updated: scenario1 accepts ns-prefixed root, max_retry verifies graph termination - stop_reason capture + WARNING log on max_tokens truncation - Correction prompt now injects OCR context + layout schema
This commit is contained in:
+40
-14
@@ -109,19 +109,36 @@ class _LLMLoggingWrapper(_BaseLLM):
|
||||
resp_text = "".join(full)
|
||||
resp_len = len(resp_text)
|
||||
resp_preview = resp_text[:500]
|
||||
_llm_log.info(
|
||||
"LLM stream 完成",
|
||||
extra={
|
||||
"direction": "response",
|
||||
"model": self._model,
|
||||
"backend": self._backend,
|
||||
"caller": self._caller,
|
||||
"duration_ms": elapsed,
|
||||
"response_length": resp_len,
|
||||
"response_preview": resp_preview,
|
||||
"response": resp_text[:10000],
|
||||
},
|
||||
)
|
||||
stop_reason = getattr(self._inner, '_last_stop_reason', None)
|
||||
self._last_stop_reason = stop_reason
|
||||
if stop_reason == "max_tokens":
|
||||
_llm_log.warning(
|
||||
"LLM stream 截断 (max_tokens),输出可能不完整",
|
||||
extra={
|
||||
"direction": "response",
|
||||
"model": self._model,
|
||||
"backend": self._backend,
|
||||
"caller": self._caller,
|
||||
"duration_ms": elapsed,
|
||||
"response_length": resp_len,
|
||||
"stop_reason": stop_reason,
|
||||
},
|
||||
)
|
||||
else:
|
||||
_llm_log.info(
|
||||
"LLM stream 完成",
|
||||
extra={
|
||||
"direction": "response",
|
||||
"model": self._model,
|
||||
"backend": self._backend,
|
||||
"caller": self._caller,
|
||||
"duration_ms": elapsed,
|
||||
"response_length": resp_len,
|
||||
"response_preview": resp_preview,
|
||||
"response": resp_text[:10000],
|
||||
"stop_reason": stop_reason,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
elapsed = round((time.time() - t0) * 1000)
|
||||
_llm_log.error(
|
||||
@@ -166,11 +183,14 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
base_url = os.getenv("ANTHROPIC_BASE_URL") or os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic")
|
||||
model = os.getenv("LLM_MODEL", "MiniMax-M2.7")
|
||||
temperature = 0.1
|
||||
max_tokens = 4096
|
||||
max_tokens = 8192
|
||||
|
||||
client = Anthropic(api_key=api_key, base_url=base_url, timeout=120)
|
||||
|
||||
class MiniMaxLLM(_BaseLLM):
|
||||
def __init__(self):
|
||||
self._last_stop_reason = None
|
||||
|
||||
def invoke(self, prompt: str) -> Any:
|
||||
resp = client.messages.create(
|
||||
model=model,
|
||||
@@ -185,6 +205,7 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
return type("Response", (), {"content": ""})()
|
||||
|
||||
def stream(self, prompt: str):
|
||||
self._last_stop_reason = None
|
||||
with client.messages.stream(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
@@ -193,6 +214,11 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
||||
) as s:
|
||||
for text in s.text_stream:
|
||||
yield text
|
||||
try:
|
||||
final_msg = s.get_final_message()
|
||||
self._last_stop_reason = getattr(final_msg, 'stop_reason', None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_num_tokens(self, text: str) -> int:
|
||||
resp = client.messages.count_tokens(
|
||||
|
||||
Reference in New Issue
Block a user