fix: nodes.py 调用 detect_annotations 前将 bbox 从 [x_min,y_min,x_max,y_max] 转为 {x,y,w,h}

annotation_detector._correlate_with_ocr 期望 bbox 格式为 {x,y,w,h}，但 OcrTextElement.to_dict() 返回 [x_min,y_min,x_max,y_max]。 Bug3 的根因在 nodes.py 而非 layout_analyzer。
2026-05-25 22:24:29 +08:00
parent c9344a2715
commit 963c5e41c8
1 changed files with 19 additions and 4 deletions
@@ -155,7 +155,18 @@ def process_input(state: AgentState) -> Dict:
                    if elements:
                        try:
                            from backend.annotation_detector import detect_annotations
-                            ann_result = detect_annotations(uploaded_path, elements)
+                            elem_dicts = []
+                            for e in elements:
+                                d = e.to_dict() if hasattr(e, "to_dict") else (e if isinstance(e, dict) else {"text": str(e), "bbox": [], "confidence": 0})
+                                # annotation_detector 期望 bbox 为 {x,y,w,h}，但 OcrTextElement.to_dict() 返回 [x_min,y_min,x_max,y_max]
+                                b = d.get("bbox", [])
+                                if isinstance(b, (list, tuple)) and len(b) == 4:
+                                    d["bbox"] = {"x": b[0], "y": b[1], "w": b[2] - b[0], "h": b[3] - b[1]}
+                                elif isinstance(b, dict) and "x" not in b:
+                                    # 已经是 [x,y,w,h] 形式的 list 但被当成 dict 的情况
+                                    d["bbox"] = {"x": b.get(0, 0), "y": b.get(1, 0), "w": b.get(2, 0) - b.get(0, 0), "h": b.get(3, 0) - b.get(1, 0)}
+                                elem_dicts.append(d)
+                            ann_result = detect_annotations(uploaded_path, elem_dicts)
                            if ann_result.get("total", 0) > 0:
                                state["annotation_result"] = ann_result
                                _node_log.info(
@@ -667,10 +678,14 @@ def _format_ocr_context(state: AgentState) -> str:
    if elements:
        parts.append("\n全部文本元素（含坐标）:")
        for e in elements:
-            bbox = e.get("bbox", {})
-            x, y, w, h = bbox.get("x", 0), bbox.get("y", 0), bbox.get("w", 0), bbox.get("h", 0)
+            bbox = e.get("bbox", [])
+            if isinstance(bbox, list) and len(bbox) >= 4:
+                x_min, y_min, x_max, y_max = bbox[0], bbox[1], bbox[2], bbox[3]
+                x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min
+            else:
+                x, y, w, h = 0, 0, 0, 0
            parts.append(
-                f"  [{x},{y} {w}×{h}] {e['text']} "
+                f"  [{x},{y} {w}×{h}] {e.get('text','')} "
                f"(置信度={e.get('confidence',0):.2f})"
            )