diff --git a/agent/nodes.py b/agent/nodes.py
index c678c39..d95d44a 100644
--- a/agent/nodes.py
+++ b/agent/nodes.py
@@ -155,7 +155,18 @@ def process_input(state: AgentState) -> Dict:
                     if elements:
                         try:
                             from backend.annotation_detector import detect_annotations
-                            ann_result = detect_annotations(uploaded_path, elements)
+                            elem_dicts = []
+                            for e in elements:
+                                d = e.to_dict() if hasattr(e, "to_dict") else (e if isinstance(e, dict) else {"text": str(e), "bbox": [], "confidence": 0})
+                                # annotation_detector 期望 bbox 为 {x,y,w,h}，但 OcrTextElement.to_dict() 返回 [x_min,y_min,x_max,y_max]
+                                b = d.get("bbox", [])
+                                if isinstance(b, (list, tuple)) and len(b) == 4:
+                                    d["bbox"] = {"x": b[0], "y": b[1], "w": b[2] - b[0], "h": b[3] - b[1]}
+                                elif isinstance(b, dict) and "x" not in b:
+                                    # 已经是 [x,y,w,h] 形式的 list 但被当成 dict 的情况
+                                    d["bbox"] = {"x": b.get(0, 0), "y": b.get(1, 0), "w": b.get(2, 0) - b.get(0, 0), "h": b.get(3, 0) - b.get(1, 0)}
+                                elem_dicts.append(d)
+                            ann_result = detect_annotations(uploaded_path, elem_dicts)
                             if ann_result.get("total", 0) > 0:
                                 state["annotation_result"] = ann_result
                                 _node_log.info(
@@ -667,10 +678,14 @@ def _format_ocr_context(state: AgentState) -> str:
     if elements:
         parts.append("\n全部文本元素（含坐标）:")
         for e in elements:
-            bbox = e.get("bbox", {})
-            x, y, w, h = bbox.get("x", 0), bbox.get("y", 0), bbox.get("w", 0), bbox.get("h", 0)
+            bbox = e.get("bbox", [])
+            if isinstance(bbox, list) and len(bbox) >= 4:
+                x_min, y_min, x_max, y_max = bbox[0], bbox[1], bbox[2], bbox[3]
+                x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min
+            else:
+                x, y, w, h = 0, 0, 0, 0
             parts.append(
-                f"  [{x},{y} {w}×{h}] {e['text']} "
+                f"  [{x},{y} {w}×{h}] {e.get('text','')} "
                 f"(置信度={e.get('confidence',0):.2f})"
             )