fix: nodes.py 调用 detect_annotations 前将 bbox 从 [x_min,y_min,x_max,y_max] 转为 {x,y,w,h}

annotation_detector._correlate_with_ocr 期望 bbox 格式为 {x,y,w,h},
但 OcrTextElement.to_dict() 返回 [x_min,y_min,x_max,y_max]。
Bug3 的根因在 nodes.py 而非 layout_analyzer。
This commit is contained in:
2026-05-25 22:24:29 +08:00
parent c9344a2715
commit 963c5e41c8
+19 -4
View File
@@ -155,7 +155,18 @@ def process_input(state: AgentState) -> Dict:
if elements:
try:
from backend.annotation_detector import detect_annotations
ann_result = detect_annotations(uploaded_path, elements)
elem_dicts = []
for e in elements:
d = e.to_dict() if hasattr(e, "to_dict") else (e if isinstance(e, dict) else {"text": str(e), "bbox": [], "confidence": 0})
# annotation_detector 期望 bbox 为 {x,y,w,h},但 OcrTextElement.to_dict() 返回 [x_min,y_min,x_max,y_max]
b = d.get("bbox", [])
if isinstance(b, (list, tuple)) and len(b) == 4:
d["bbox"] = {"x": b[0], "y": b[1], "w": b[2] - b[0], "h": b[3] - b[1]}
elif isinstance(b, dict) and "x" not in b:
# 已经是 [x,y,w,h] 形式的 list 但被当成 dict 的情况
d["bbox"] = {"x": b.get(0, 0), "y": b.get(1, 0), "w": b.get(2, 0) - b.get(0, 0), "h": b.get(3, 0) - b.get(1, 0)}
elem_dicts.append(d)
ann_result = detect_annotations(uploaded_path, elem_dicts)
if ann_result.get("total", 0) > 0:
state["annotation_result"] = ann_result
_node_log.info(
@@ -667,10 +678,14 @@ def _format_ocr_context(state: AgentState) -> str:
if elements:
parts.append("\n全部文本元素(含坐标):")
for e in elements:
bbox = e.get("bbox", {})
x, y, w, h = bbox.get("x", 0), bbox.get("y", 0), bbox.get("w", 0), bbox.get("h", 0)
bbox = e.get("bbox", [])
if isinstance(bbox, list) and len(bbox) >= 4:
x_min, y_min, x_max, y_max = bbox[0], bbox[1], bbox[2], bbox[3]
x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min
else:
x, y, w, h = 0, 0, 0, 0
parts.append(
f" [{x},{y} {w}×{h}] {e['text']} "
f" [{x},{y} {w}×{h}] {e.get('text','')} "
f"(置信度={e.get('confidence',0):.2f})"
)