feat: 5-issue fix — OCR image parse bug + Vue frontend feature parity + streaming UX

Fix 1 (CRITICAL): file_parser.py suffix normalization ".jpg", api_server.py Path.suffix Fix 2: Sidebar version history download, ProcessSection replaces old components Fix 3: OCR content/position layer structured logging in agent/nodes.py Fix 4: collapsible process sections with per-section stream routing + auto-fold Fix 5: agent_complete total_duration_ms, SummaryCard duration display - backend/file_parser.py: normalize suffix to always include leading dot - api_server.py: step_index in node_start, total_duration_ms in agent_complete - agent/nodes.py: _log_ocr_layers() for [内容层]/[位置层]/[合并] logging - frontend: ProcessSection.vue (NEW), chat.ts sections model, Sidebar versions - CLAUDE.md: updated component list and v6 changelog
2026-05-21 23:43:21 +08:00
parent 60e2f520ba
commit a364e1de81
9 changed files with 492 additions and 21 deletions
@@ -176,6 +176,9 @@ def process_input(state: AgentState) -> Dict:
                state["ocr_extraction_result"] = {"error": str(e)}
        state["uploaded_file_path"] = ""

+    # ── OCR 两层日志：内容层 + 位置层 ──
+    _log_ocr_layers(state)
+
    # 重置本轮请求字段
    state["retry_count"] = 0
    state["user_modification_request"] = user_input
@@ -532,6 +535,76 @@ def _format_ocr_context(state: AgentState) -> str:
    return "\n".join(parts)


+def _log_ocr_layers(state: AgentState) -> None:
+    """记录 OCR 两层分离日志：内容层（文本/字段）+ 位置层（布局/坐标）。"""
+    # ── 内容层：OCR 文本元素 + 提取的字段 ──
+    ocr_result = state.get("ocr_extraction_result")
+    ocr_elements = state.get("ocr_elements", [])
+
+    content_parts = []
+    if isinstance(ocr_result, dict) and not ocr_result.get("error"):
+        total = ocr_result.get("total_elements", 0)
+        fields = ocr_result.get("fields", [])
+        non_empty = [f for f in fields if f.get("field_value")]
+        if total or non_empty:
+            content_parts.append(
+                f"OCR 提取: {total} 个文本元素, {len(non_empty)} 个有效字段"
+            )
+    if isinstance(ocr_elements, list) and ocr_elements:
+        elem_count = sum(len(row.get("elements", [])) for row in ocr_elements)
+        content_parts.append(
+            f"API 注入 OCR 元素: {len(ocr_elements)} 行, {elem_count} 个文本"
+        )
+
+    if content_parts:
+        _node_log.info(
+            "[内容层] " + " | ".join(content_parts),
+            extra={"layer": "content", "phase": "ocr_extraction"},
+        )
+
+    # ── 位置层：布局 schema（行/列/区域）──
+    layout = state.get("layout_schema")
+    if isinstance(layout, dict) and layout.get("total_rows", 0) > 0:
+        regions = layout.get("regions", {})
+        region_names = list(regions.keys()) if regions else []
+        cols = layout.get("total_columns", 0)
+        rows = layout.get("total_rows", 0)
+        regions_label = ", ".join(region_names) if region_names else "标题/表头/数据/表尾"
+        _node_log.info(
+            f"[位置层] 布局 schema: {cols} 列 × {rows} 行, 区域: {regions_label}",
+            extra={
+                "layer": "position",
+                "phase": "layout_analysis",
+                "columns": cols,
+                "rows": rows,
+                "regions": region_names,
+                "a4_confidence": layout.get("a4_confidence", ""),
+            },
+        )
+
+    # ── 合并：两阶段处理总结 ──
+    has_content = (isinstance(ocr_result, dict) and not ocr_result.get("error")) or \
+                  (isinstance(ocr_elements, list) and ocr_elements)
+    has_layout = isinstance(layout, dict) and layout.get("total_rows", 0) > 0
+
+    if has_content and has_layout:
+        _node_log.info(
+            "[合并] 内容层 + 位置层均已就绪 — "
+            "注入 prompt: 骨架生成 → 精调布局 → 字段映射",
+            extra={"layer": "merge", "pipeline": "skeleton→refine→map_fields"},
+        )
+    elif has_content and not has_layout:
+        _node_log.info(
+            "[合并] 仅有内容层 — 使用单阶段 generate（无布局 schema）",
+            extra={"layer": "merge", "pipeline": "generate_only"},
+        )
+    elif has_layout and not has_content:
+        _node_log.info(
+            "[合并] 仅有位置层 — 使用布局 schema 指导生成",
+            extra={"layer": "merge", "pipeline": "layout_only"},
+        )
+
+
@log_node("retrieve")
 def retrieve(state: AgentState) -> Dict:
    """在 ChromaDB + 错误知识库中搜索相关的 JRXML 模板和组件。"""