diff --git a/agent/nodes.py b/agent/nodes.py index c678c39..d95d44a 100644 --- a/agent/nodes.py +++ b/agent/nodes.py @@ -155,7 +155,18 @@ def process_input(state: AgentState) -> Dict: if elements: try: from backend.annotation_detector import detect_annotations - ann_result = detect_annotations(uploaded_path, elements) + elem_dicts = [] + for e in elements: + d = e.to_dict() if hasattr(e, "to_dict") else (e if isinstance(e, dict) else {"text": str(e), "bbox": [], "confidence": 0}) + # annotation_detector 期望 bbox 为 {x,y,w,h},但 OcrTextElement.to_dict() 返回 [x_min,y_min,x_max,y_max] + b = d.get("bbox", []) + if isinstance(b, (list, tuple)) and len(b) == 4: + d["bbox"] = {"x": b[0], "y": b[1], "w": b[2] - b[0], "h": b[3] - b[1]} + elif isinstance(b, dict) and "x" not in b: + # 已经是 [x,y,w,h] 形式的 list 但被当成 dict 的情况 + d["bbox"] = {"x": b.get(0, 0), "y": b.get(1, 0), "w": b.get(2, 0) - b.get(0, 0), "h": b.get(3, 0) - b.get(1, 0)} + elem_dicts.append(d) + ann_result = detect_annotations(uploaded_path, elem_dicts) if ann_result.get("total", 0) > 0: state["annotation_result"] = ann_result _node_log.info( @@ -667,10 +678,14 @@ def _format_ocr_context(state: AgentState) -> str: if elements: parts.append("\n全部文本元素(含坐标):") for e in elements: - bbox = e.get("bbox", {}) - x, y, w, h = bbox.get("x", 0), bbox.get("y", 0), bbox.get("w", 0), bbox.get("h", 0) + bbox = e.get("bbox", []) + if isinstance(bbox, list) and len(bbox) >= 4: + x_min, y_min, x_max, y_max = bbox[0], bbox[1], bbox[2], bbox[3] + x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min + else: + x, y, w, h = 0, 0, 0, 0 parts.append( - f" [{x},{y} {w}×{h}] {e['text']} " + f" [{x},{y} {w}×{h}] {e.get('text','')} " f"(置信度={e.get('confidence',0):.2f})" )