Merge remote v4/v5 features (multimodal chat input, layered generation, annotation detection) with local v3 features (dialog file upload, XLSX support, session fix)

Key resolutions: - agent/nodes.py: Merged session_id exclusion fix with new persistable fields (ocr_extraction_result, annotation_result, layout_schema, ocr_elements) - app.py: Adopted st-multimodal-chatinput for unified paste/drop/upload, removed custom JS paste bridge - backend/file_parser.py: Kept local XLSX parser, added remote XLS/DOC parsers - CLAUDE.md + CODE_GUIDE.md: Merged documentation from both branches Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-21 10:05:43 +08:00
parent 87ead4fa6a 43a0542a11
commit 2befd44430
22 changed files with 2114 additions and 507 deletions
@@ -21,7 +21,6 @@ import time
 from pathlib import Path

 import streamlit as st
-import streamlit.components.v1 as components

 from dotenv import load_dotenv
 load_dotenv()
@@ -81,6 +80,9 @@ NODE_LABELS = {
    "handle_undo":        "↩ 撤销操作",
    "handle_reset":       "🔄 重置会话",
    "save_session":       "💾 保存会话",
+    "generate_skeleton": "🏗 生成骨架",
+    "refine_layout":     "📐 精调布局",
+    "map_fields":        "🏷 映射字段",
 }

 INTENT_LABELS = {
@@ -107,6 +109,86 @@ def _render_jrxml(jrxml: str, max_lines: int = 30):
    st.code(preview, language="xml")


+# ---- 共享文件上传处理 ----
+def _process_uploaded_file(uploaded_file, suffix: str) -> dict:
+    """处理单个上传文件：保存临时文件、解析、布局分析。
+
+    返回: {"name": str, "text": str, "type": str, "tmp_path": str|None}
+    """
+    import tempfile
+    from backend.file_parser import parse_file
+    from backend.layout_analyzer import analyze_layout
+
+    with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
+        tmp.write(uploaded_file.getvalue())
+        tmp_path = tmp.name
+
+    result = parse_file(tmp_path, suffix)
+    parsed_text = result["text"]
+    parsed_type = result["file_type"]
+
+    # 对图片/PDF 进行 A4 模板布局分析
+    if suffix in (".png", ".jpg", ".jpeg", ".bmp", ".webp", ".pdf"):
+        layout = analyze_layout(tmp_path)
+        tt = layout.get("template_type", "unknown")
+        current_jrxml = st.session_state.agent_state.get("current_jrxml", "")
+
+        if tt == "full_a4":
+            parsed_text = layout["description"]
+            parsed_type = "a4_template"
+            # 存储布局 schema 供分层精确生成使用
+            from backend.layout_analyzer import extract_layout_schema
+            schema = extract_layout_schema(layout)
+            st.session_state.agent_state["layout_schema"] = schema
+            st.session_state.agent_state["ocr_elements"] = layout.get("rows", [])
+        elif tt == "partial_rows":
+            parsed_type = "a4_partial"
+            if current_jrxml.strip():
+                from backend.layout_analyzer import match_rows_to_jrxml
+                match = match_rows_to_jrxml(layout, current_jrxml)
+                parsed_text = (
+                    f"[行片段修改] 上传图片包含 {layout['total_rows']} 行，"
+                    f"视为 A4 报表的一部分。\n\n"
+                    f"{match['description']}\n\n"
+                    f"--- 行结构 ---\n{layout['description']}"
+                )
+            else:
+                parsed_text = layout["description"]
+        else:
+            has_ocr = result.get("method") not in ("metadata_only", None)
+            img_w, img_h = layout["image_size"]
+            ratio = layout["aspect_ratio"]
+            if has_ocr:
+                parsed_text = (
+                    f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}。"
+                    f"未检测到 A4 报表结构，图片将被视为参考样式。\n"
+                    f"请根据用户的文字描述生成报表。"
+                )
+            else:
+                parsed_text = (
+                    f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}。\n"
+                    f"⚠ OCR 引擎未安装，无法识别图片中的文字内容。\n"
+                    f"请严格根据用户的文字描述来推断图片中的报表需求。\n"
+                    f"（提示：如需图片文字识别，请运行 pip install paddleocr）"
+                )
+            parsed_type = "image_reference"
+
+    elif suffix in (".pdf", ".docx", ".xlsx", ".xls", ".doc"):
+        parsed_type = suffix.lstrip(".")
+
+    keep_temp = (
+        suffix in (".png", ".jpg", ".jpeg", ".bmp", ".webp")
+        and result.get("method") not in ("metadata_only", None)
+    )
+
+    return {
+        "name": uploaded_file.name,
+        "text": parsed_text,
+        "type": parsed_type,
+        "tmp_path": tmp_path if keep_temp else None,
+    }
+
+
 # ---- URL 参数 ----
 query_params = st.query_params
 url_session_id = query_params.get("session_id", "")
@@ -118,11 +200,6 @@ if "graph" not in st.session_state:
    st.session_state.graph = build_graph()
 if "pending_action" not in st.session_state:
    st.session_state.pending_action = None
-if "chat_attached_files" not in st.session_state:
-    st.session_state.chat_attached_files = []  # [{name, text, type, path}]
-if "_paste_processed_ts" not in st.session_state:
-    st.session_state._paste_processed_ts = 0
-
 if "agent_state" not in st.session_state:
    if url_session_id:
        data = load_session(url_session_id)
@@ -220,7 +297,8 @@ def run_agent(user_input: str):
                            f"找到 {len(ctx)} 字符参考模板" if ctx else "未匹配到模板"
                        )

-                    elif node_name in ("generate", "modify_jrxml", "correct_jrxml"):
+                    elif node_name in ("generate", "modify_jrxml", "correct_jrxml",
+                                       "generate_skeleton", "refine_layout", "map_fields"):
                        jrxml = node_state.get("current_jrxml", "")
                        executed_nodes[-1]["detail"] = f"生成 {len(jrxml)} 字符 JRXML"

@@ -491,7 +569,8 @@ with st.sidebar:

    uploaded = st.file_uploader(
        "选择文件",
-        type=["png", "jpg", "jpeg", "bmp", "webp", "pdf", "docx", "xlsx", "txt", "csv", "json", "xml"],
+        type=["png", "jpg", "jpeg", "bmp", "webp", "pdf", "docx", "xlsx", "xls", "doc",
+              "txt", "csv", "json", "xml"],
        accept_multiple_files=True,
        key="file_uploader",
        label_visibility="collapsed",
@@ -502,77 +581,21 @@ with st.sidebar:
            # 去重
            if any(f["name"] == uf.name for f in st.session_state.uploaded_files):
                continue
-            import tempfile
-            from backend.file_parser import parse_file
-            from backend.layout_analyzer import analyze_layout

            suffix = Path(uf.name).suffix.lower()
-            with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
-                tmp.write(uf.getvalue())
-                tmp_path = tmp.name
+            result = _process_uploaded_file(uf, suffix)

-            result = parse_file(tmp_path, suffix)
-
-            # 对图片/PDF 进行 A4 模板布局分析
-            parsed_text = result["text"]
-            parsed_type = result["file_type"]
-            if suffix in (".png", ".jpg", ".jpeg", ".bmp", ".webp", ".pdf"):
-                layout = analyze_layout(tmp_path)
-                tt = layout.get("template_type", "unknown")
-                current_jrxml = st.session_state.agent_state.get("current_jrxml", "")
-
-                if tt == "full_a4":
-                    parsed_text = layout["description"]
-                    parsed_type = "a4_template"
-                elif tt == "partial_rows":
-                    parsed_type = "a4_partial"
-                    if current_jrxml.strip():
-                        # 修改模式：尝试行匹配
-                        from backend.layout_analyzer import match_rows_to_jrxml
-                        match = match_rows_to_jrxml(layout, current_jrxml)
-                        parsed_text = (
-                            f"[行片段修改] 上传图片包含 {layout['total_rows']} 行，"
-                            f"视为 A4 报表的一部分。\n\n"
-                            f"{match['description']}\n\n"
-                            f"--- 行结构 ---\n{layout['description']}"
-                        )
-                    else:
-                        # 新建模式：按 A4 模板处理
-                        parsed_text = layout["description"]
-                else:
-                    # tt == "unknown": OCR 不可用或未检测到文字元素
-                    has_ocr = result.get("method") not in ("metadata_only", None)
-                    img_w, img_h = layout["image_size"]
-                    ratio = layout["aspect_ratio"]
-                    if has_ocr:
-                        parsed_text = (
-                            f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}。"
-                            f"未检测到 A4 报表结构，图片将被视为参考样式。\n"
-                            f"请根据用户的文字描述生成报表。"
-                        )
-                    else:
-                        parsed_text = (
-                            f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}。\n"
-                            f"⚠ OCR 引擎未安装，无法识别图片中的文字内容。\n"
-                            f"请严格根据用户的文字描述来推断图片中的报表需求。\n"
-                            f"（提示：如需图片文字识别，请运行 pip install paddleocr）"
-                        )
-                    parsed_type = "image_reference"
-
-            if parsed_text:
+            if result["text"]:
                st.session_state.uploaded_files.append({
-                    "name": uf.name,
-                    "text": parsed_text,
-                    "type": parsed_type,
+                    "name": result["name"],
+                    "text": result["text"],
+                    "type": result["type"],
                })

-            # 对图片类型，保存路径以便 OCR 字段提取（延迟到 process_input 阶段）
-            img_suffixes = (".png", ".jpg", ".jpeg", ".bmp", ".webp")
-            if suffix in img_suffixes and result.get("method") not in ("metadata_only", None):
+            tmp_path = result["tmp_path"]
+            if tmp_path:
                st.session_state.agent_state["uploaded_file_path"] = tmp_path
                st.session_state.uploaded_temp_paths.append(tmp_path)
-            else:
-                Path(tmp_path).unlink(missing_ok=True)

    if st.session_state.uploaded_files:
        for i, f in enumerate(st.session_state.uploaded_files):
@@ -624,95 +647,6 @@ with st.sidebar:
                    key=f"dl_v{i}",
                )

-# ---- 文件粘贴/拖拽全局处理器 ----
-st.html("""
-<script>
-(function() {
-    if (window.__jrxml_drop_paste) return;
-    window.__jrxml_drop_paste = true;
-    var MAX_SIZE = 20 * 1024 * 1024;
-    function handleFiles(files) {
-        var fd = []; var n = 0; var total = Math.min(files.length, 10);
-        for (var i = 0; i < total; i++) {
-            var f = files[i];
-            if (f.size > MAX_SIZE) { n++; continue; }
-            var reader = new FileReader();
-            reader.onload = (function(file) {
-                return function(e) {
-                    fd.push({name: file.name, size: file.size, data: e.target.result});
-                    n++;
-                    if (n === total && fd.length) {
-                        sessionStorage.setItem('_jrxml_paste', JSON.stringify({ts: Date.now(), files: fd}));
-                    }
-                };
-            })(f);
-            reader.readAsDataURL(f);
-        }
-    }
-    document.addEventListener('paste', function(e) {
-        var fs = e.clipboardData && e.clipboardData.files;
-        if (fs && fs.length) { e.preventDefault(); handleFiles(fs); }
-    });
-    document.addEventListener('dragover', function(e) {
-        e.preventDefault(); e.dataTransfer.dropEffect = 'copy';
-    });
-    document.addEventListener('drop', function(e) {
-        var fs = e.dataTransfer && e.dataTransfer.files;
-        if (fs && fs.length) { e.preventDefault(); handleFiles(fs); }
-    });
-})();
-</script>
-""")
-
-# ---- 粘贴桥接组件 ----
-paste_data = components.html("""
-<script>
-(function poll() {
-    var raw = sessionStorage.getItem('_jrxml_paste');
-    if (raw) {
-        try { sessionStorage.removeItem('_jrxml_paste'); Streamlit.setComponentValue(JSON.parse(raw)); return; }
-        catch(e) {}
-    }
-    setTimeout(poll, 800);
-})();
-</script>
-""", height=0, default=0)
-
-if paste_data and paste_data != 0:
-    pts = paste_data.get("ts", 0)
-    if pts > st.session_state._paste_processed_ts:
-        st.session_state._paste_processed_ts = pts
-        import base64, tempfile
-        from backend.file_parser import parse_file
-        from backend.layout_analyzer import analyze_layout
-        for fi in paste_data.get("files", []):
-            if not any(f["name"] == fi["name"] for f in st.session_state.chat_attached_files):
-                header, b64 = fi["data"].split(",", 1)
-                raw = base64.b64decode(b64)
-                suffix = Path(fi["name"]).suffix.lower()
-                with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
-                    tmp.write(raw)
-                    tmp_path = tmp.name
-                result = parse_file(tmp_path, suffix)
-                text = result["text"]
-                file_type = result["file_type"]
-                img_suffixes = (".png", ".jpg", ".jpeg", ".bmp", ".webp")
-                if suffix in img_suffixes and result.get("method") not in ("metadata_only", None):
-                    try:
-                        layout = analyze_layout(tmp_path)
-                        tt = layout.get("template_type", "unknown")
-                        if tt == "full_a4":
-                            text = layout["description"]
-                            file_type = "a4_template"
-                        elif tt == "partial_rows":
-                            file_type = "a4_partial"
-                    except Exception:
-                        pass
-                st.session_state.chat_attached_files.append({
-                    "name": fi["name"], "text": text, "type": file_type, "path": tmp_path
-                })
-        st.rerun()
-
 # ---- 标题 ----
 st.title("📝 JRXML 报表生成器")
 st.caption("用自然语言描述您的报表需求，我将逐步生成可用的 JRXML 模板。")
@@ -732,127 +666,106 @@ for msg in st.session_state.messages:
        else:
            st.markdown(msg["content"])

-# ---- 已附加文件预览 ----
-if st.session_state.chat_attached_files:
-    n_files = len(st.session_state.chat_attached_files)
-    chip_cols = st.columns(min(n_files, 4))
-    files_to_remove = []
-    for i, f in enumerate(st.session_state.chat_attached_files):
-        with chip_cols[i % len(chip_cols)]:
-            c1, c2 = st.columns([5, 1])
-            with c1:
-                name = f["name"]
-                short_name = name[:16] + ("…" if len(name) > 16 else "")
-                emoji_map = {"a4_template": "📷", "image": "🖼", "pdf": "📄", "docx": "📝", "xlsx": "📊"}
-                emoji = emoji_map.get(f["type"], "📎")
-                st.caption(f"{emoji} {short_name}")
-            with c2:
-                if st.button("✕", key=f"rm_chip_{i}"):
-                    files_to_remove.append(i)
-    if files_to_remove:
-        for i in sorted(files_to_remove, reverse=True):
-            try:
-                Path(st.session_state.chat_attached_files[i]["path"]).unlink(missing_ok=True)
-            except Exception:
-                pass
-            st.session_state.chat_attached_files.pop(i)
-        st.rerun()
+# ---- 聊天输入（支持粘贴/拖拽文件） ----
+from st_multimodal_chatinput import multimodal_chatinput
+import base64
+import io
+from pathlib import Path as _Path

-# ---- 对话区域文件上传 ----
-col_fu, col_hint = st.columns([5, 1])
-with col_fu:
-    chat_uploads = st.file_uploader(
-        "附加文件",
-        type=["png", "jpg", "jpeg", "bmp", "webp", "pdf", "docx", "xlsx", "txt", "csv", "json", "xml"],
-        accept_multiple_files=True,
-        key="chat_file_uploader",
-        label_visibility="visible",
-    )
-with col_hint:
-    st.caption("Ctrl+V 粘贴\n或拖拽到页面")
+# MIME type → 文件扩展名映射（用于剪贴板粘贴无扩展名的文件）
+MIME_TO_EXT = {
+    "image/png": ".png",
+    "image/jpeg": ".jpg",
+    "image/bmp": ".bmp",
+    "image/webp": ".webp",
+    "application/pdf": ".pdf",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
+    "application/vnd.ms-excel": ".xls",
+    "application/msword": ".doc",
+    "text/plain": ".txt",
+    "text/csv": ".csv",
+    "application/json": ".json",
+    "text/xml": ".xml",
+}

-if chat_uploads:
-    newly_added = False
-    import tempfile
-    from backend.file_parser import parse_file
-    from backend.layout_analyzer import analyze_layout
-    for uf in chat_uploads:
-        if not any(f["name"] == uf.name for f in st.session_state.chat_attached_files):
-            suffix = Path(uf.name).suffix.lower()
-            with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
-                tmp.write(uf.getvalue())
-                tmp_path = tmp.name
-            result = parse_file(tmp_path, suffix)
-            text = result["text"]
-            file_type = result["file_type"]
-            img_suffixes = (".png", ".jpg", ".jpeg", ".bmp", ".webp")
-            if suffix in img_suffixes and result.get("method") not in ("metadata_only", None):
-                try:
-                    layout = analyze_layout(tmp_path)
-                    tt = layout.get("template_type", "unknown")
-                    if tt == "full_a4":
-                        text = layout["description"]
-                        file_type = "a4_template"
-                    elif tt == "partial_rows":
-                        file_type = "a4_partial"
-                except Exception:
-                    pass
-            st.session_state.chat_attached_files.append({
-                "name": uf.name, "text": text, "type": file_type, "path": tmp_path
-            })
-            newly_added = True
-    if newly_added:
-        st.session_state.chat_file_uploader = []
-        st.rerun()
+chat_result = multimodal_chatinput()
+if chat_result:
+    prompt = (chat_result.get("textInput") or "").strip()
+    chat_files = chat_result.get("uploadedFiles") or []

-# ---- 聊天输入 ----
-if prompt := st.chat_input("描述您的报表需求..."):
-    # 拼接对话区域附加文件的文本
-    file_texts = []
-    attached_info = []
-    for f in st.session_state.chat_attached_files:
-        file_texts.append(f"[附加文件: {f['name']} ({f['type']})]\n{f['text']}")
-        attached_info.append({"name": f["name"], "type": f["type"], "length": len(f["text"])})
+    # 处理聊天中上传/粘贴的文件
+    uploaded_texts = []
+    uploaded_files_info = []

-    # 同时拼接侧边栏上传的文件（向后兼容）
+    # 先收集侧边栏已上传的文件
    if st.session_state.get("uploaded_files"):
        for f in st.session_state.uploaded_files:
-            file_texts.append(f"[上传文件: {f['name']}]\n{f['text']}")
-            attached_info.append({"name": f["name"], "type": f["type"], "length": len(f["text"])})
+            uploaded_texts.append(f"[上传文件: {f['name']}]\n{f['text']}")
+            uploaded_files_info.append({"name": f["name"], "type": f["type"], "length": len(f["text"])})
+        st.session_state.uploaded_files = []

-    if file_texts:
-        full_prompt = "\n\n".join(file_texts) + "\n\n---\n用户需求:\n" + prompt
-    else:
-        full_prompt = prompt
+    # 处理聊天中的文件
+    class _Base64File:
+        """包装 base64 文件为类 UploadedFile 接口。"""
+        def __init__(self, name, data_bytes):
+            self.name = name
+            self._data = data_bytes

-    # 将第一个图片文件的路径传给 agent，供 OCR 字段精确提取
-    for f in st.session_state.chat_attached_files:
-        if f["type"] in ("image", "a4_template", "a4_partial"):
-            st.session_state.agent_state["uploaded_file_path"] = f["path"]
-            break
+        def getvalue(self):
+            return self._data

-    # 清理临时文件和状态
-    st.session_state.uploaded_files = []
-    for f in st.session_state.chat_attached_files:
+    for cf in chat_files:
+        name = cf.get("name", "clipboard_file")
+        mime = cf.get("type", "")
+        content_b64 = cf.get("content", "")
+        if not content_b64:
+            continue
        try:
-            Path(f["path"]).unlink(missing_ok=True)
+            data = base64.b64decode(content_b64)
        except Exception:
-            pass
-    st.session_state.chat_attached_files = []
+            continue

-    _app_log.info(
-        "收到用户输入",
-        extra={
-            "session_id": current_session_id,
-            "prompt_preview": prompt[:200],
-            "prompt_length": len(prompt),
-            "has_uploaded_files": bool(attached_info),
-            "uploaded_files": attached_info,
-        },
-    )
+        suffix = _Path(name).suffix.lower()
+        if not suffix and mime in MIME_TO_EXT:
+            suffix = MIME_TO_EXT[mime]
+            name = f"{_Path(name).stem}{suffix}"

-    st.session_state.messages.append({"role": "user", "content": prompt})
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    run_agent(full_prompt)
-    st.rerun()
+        wrapper = _Base64File(name, data)
+        result = _process_uploaded_file(wrapper, suffix)
+
+        if result["text"]:
+            uploaded_texts.append(f"[上传文件: {result['name']}]\n{result['text']}")
+            uploaded_files_info.append({"name": result["name"], "type": result["type"], "length": len(result["text"])})
+
+        tmp_path = result["tmp_path"]
+        if tmp_path:
+            st.session_state.agent_state["uploaded_file_path"] = tmp_path
+            st.session_state.uploaded_temp_paths.append(tmp_path)
+
+    if prompt or uploaded_texts:
+        if uploaded_texts:
+            full_prompt = "\n\n".join(uploaded_texts)
+            if prompt:
+                full_prompt += "\n\n---\n用户需求:\n" + prompt
+        else:
+            full_prompt = prompt
+
+        displayed_prompt = prompt or "(已上传文件，未输入文字)"
+
+        _app_log.info(
+            "收到用户输入",
+            extra={
+                "session_id": current_session_id,
+                "prompt_preview": displayed_prompt[:200],
+                "prompt_length": len(full_prompt),
+                "has_uploaded_files": bool(uploaded_files_info),
+                "uploaded_files": uploaded_files_info,
+            },
+        )
+
+        st.session_state.messages.append({"role": "user", "content": displayed_prompt})
+        with st.chat_message("user"):
+            st.markdown(displayed_prompt)
+        run_agent(full_prompt)
+        st.rerun()