feat: v3 robustness upgrade — EasyOCR, failure recovery, minimum content check

- OCR: EasyOCR (primary, ch_sim+en) with PaddleOCR fallback for Windows compatibility - Validation: _check_minimum_content() rejects empty-shell JRXML (no band/textField) - Retry: MAX_RETRY 3→5, exhaustion records pending_failure_context for next-turn auto-injection - Finalize: only saves jrxml_versions on pass, preserves last good final_jrxml on fail - Extract JRXML: improved empty markdown block handling and XML fragment fallback - UI: real-time node progress via placeholder updates, initial "analyzing" feedback - UI: use agent_state (full) instead of node_state (partial) for summary card routing - UI: unknown template_type now gives LLM meaningful image context instead of metadata - Docs: updated CLAUDE.md and CODE_GUIDE.md to reflect all v3 changes Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-19 19:15:30 +08:00
parent 70614dff5e
commit 6467fd4ae5
9 changed files with 1297 additions and 51 deletions
@@ -65,17 +65,36 @@ def parse_file(file_path: str, file_type: str = "") -> dict:
 # ---------------------------------------------------------------------------

 def _parse_image(path: Path) -> dict:
-    """OCR 提取图片中的文字。"""
+    """OCR 提取图片中的文字。优先 EasyOCR，回退 PaddleOCR。"""
    try:
        img = PIL.Image.open(path)
        info = f"[图片: {img.size[0]}x{img.size[1]}, {img.mode}]"
    except Exception:
        info = "[图片: 无法读取元数据]"

-    # 尝试 PaddleOCR
+    # 优先 EasyOCR（Windows 兼容性更好）
+    try:
+        import easyocr
+        import numpy as np
+        reader = easyocr.Reader(["ch_sim", "en"], gpu=False, verbose=False)
+        result = reader.readtext(np.array(img))
+        lines = [text.strip() for (_, text, _) in result if text.strip()]
+        if lines:
+            return {
+                "text": f"{info}\n识别文本:\n" + "\n".join(lines),
+                "file_type": "image",
+                "method": "easyocr",
+                "error": None,
+            }
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    # 回退 PaddleOCR
    try:
        from paddleocr import PaddleOCR
-        ocr = PaddleOCR(lang="ch", use_angle_cls=False, show_log=False)
+        ocr = PaddleOCR(lang="ch")
        result = ocr.ocr(str(path))
        lines = []
        if result and result[0]:
@@ -97,7 +116,7 @@ def _parse_image(path: Path) -> dict:

    # OCR 不可用 → 返回图片元信息 + 安装提示
    return {
-        "text": f"{info}\n(如需 OCR 文字识别，请安装: pip install paddleocr)",
+        "text": f"{info}\n(如需 OCR 文字识别，请安装: pip install easyocr)",
        "file_type": "image",
        "method": "metadata_only",
        "error": "OCR 引擎未安装，已返回图片元信息",
@@ -371,11 +371,47 @@ def _load_image(path: Path) -> Optional[PIL.Image.Image]:


 def _ocr_elements(img: PIL.Image.Image, file_path: str) -> list[dict]:
+    """OCR 提取图片中的文字元素（位置+内容）。优先 EasyOCR，回退 PaddleOCR。"""
+
+    # 优先 EasyOCR
+    try:
+        import easyocr
+        import numpy as np
+
+        reader = easyocr.Reader(["ch_sim", "en"], gpu=False, verbose=False)
+        result = reader.readtext(np.array(img))
+
+        elements = []
+        for (bbox, text, confidence) in result:
+            if not text.strip():
+                continue
+            xs = [p[0] for p in bbox]
+            ys = [p[1] for p in bbox]
+            x_min, x_max = min(xs), max(xs)
+            y_min, y_max = min(ys), max(ys)
+
+            elements.append({
+                "x": round(x_min, 1),
+                "y": round(y_min, 1),
+                "w": round(x_max - x_min, 1),
+                "h": round(y_max - y_min, 1),
+                "font_size": round(y_max - y_min, 1),
+                "text": text.strip(),
+            })
+
+        elements.sort(key=lambda e: (e["y"], e["x"]))
+        return elements
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    # 回退 PaddleOCR
    try:
        from paddleocr import PaddleOCR
        import numpy as np

-        ocr = PaddleOCR(lang="ch", use_angle_cls=True, show_log=False)
+        ocr = PaddleOCR(lang="ch")
        result = ocr.ocr(np.array(img))

        elements = []
@@ -405,6 +441,8 @@ def _ocr_elements(img: PIL.Image.Image, file_path: str) -> list[dict]:

        elements.sort(key=lambda e: (e["y"], e["x"]))
        return elements
+    except ImportError:
+        pass
    except Exception:
        pass