fix: MAX_RETRY 5 + rolling continuation + namespace-aware JRXML extraction

- MAX_RETRY: 3→5 (graph.py:35, nodes.py:25) with env override - Rolling continuation: _generate_with_continuation() auto-detects truncated JRXML and sends anchor-based continuation, max 3 rounds - JRXML extraction: regex/end-tag now namespace-prefix aware (ns0:jasperReport, ns:jasperReport, etc.) - All 5 generation nodes refactored to use continuation helper - Tests updated: scenario1 accepts ns-prefixed root, max_retry verifies graph termination - stop_reason capture + WARNING log on max_tokens truncation - Correction prompt now injects OCR context + layout schema
2026-05-23 10:58:46 +08:00
parent 83e801a0b8
commit 1210b926c3
5 changed files with 187 additions and 50 deletions
@@ -109,19 +109,36 @@ class _LLMLoggingWrapper(_BaseLLM):
            resp_text = "".join(full)
            resp_len = len(resp_text)
            resp_preview = resp_text[:500]
-            _llm_log.info(
-                "LLM stream 完成",
-                extra={
-                    "direction": "response",
-                    "model": self._model,
-                    "backend": self._backend,
-                    "caller": self._caller,
-                    "duration_ms": elapsed,
-                    "response_length": resp_len,
-                    "response_preview": resp_preview,
-                    "response": resp_text[:10000],
-                },
-            )
+            stop_reason = getattr(self._inner, '_last_stop_reason', None)
+            self._last_stop_reason = stop_reason
+            if stop_reason == "max_tokens":
+                _llm_log.warning(
+                    "LLM stream 截断 (max_tokens)，输出可能不完整",
+                    extra={
+                        "direction": "response",
+                        "model": self._model,
+                        "backend": self._backend,
+                        "caller": self._caller,
+                        "duration_ms": elapsed,
+                        "response_length": resp_len,
+                        "stop_reason": stop_reason,
+                    },
+                )
+            else:
+                _llm_log.info(
+                    "LLM stream 完成",
+                    extra={
+                        "direction": "response",
+                        "model": self._model,
+                        "backend": self._backend,
+                        "caller": self._caller,
+                        "duration_ms": elapsed,
+                        "response_length": resp_len,
+                        "response_preview": resp_preview,
+                        "response": resp_text[:10000],
+                        "stop_reason": stop_reason,
+                    },
+                )
        except Exception as e:
            elapsed = round((time.time() - t0) * 1000)
            _llm_log.error(
@@ -166,11 +183,14 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
        base_url = os.getenv("ANTHROPIC_BASE_URL") or os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic")
        model = os.getenv("LLM_MODEL", "MiniMax-M2.7")
        temperature = 0.1
-        max_tokens = 4096
+        max_tokens = 8192

        client = Anthropic(api_key=api_key, base_url=base_url, timeout=120)

        class MiniMaxLLM(_BaseLLM):
+            def __init__(self):
+                self._last_stop_reason = None
+
            def invoke(self, prompt: str) -> Any:
                resp = client.messages.create(
                    model=model,
@@ -185,6 +205,7 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
                return type("Response", (), {"content": ""})()

            def stream(self, prompt: str):
+                self._last_stop_reason = None
                with client.messages.stream(
                    model=model,
                    max_tokens=max_tokens,
@@ -193,6 +214,11 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
                ) as s:
                    for text in s.text_stream:
                        yield text
+                    try:
+                        final_msg = s.get_final_message()
+                        self._last_stop_reason = getattr(final_msg, 'stop_reason', None)
+                    except Exception:
+                        pass

            def get_num_tokens(self, text: str) -> int:
                resp = client.messages.count_tokens(