Merge remote v4/v5 features (multimodal chat input, layered generation, annotation detection) with local v3 features (dialog file upload, XLSX support, session fix)

Key resolutions:
- agent/nodes.py: Merged session_id exclusion fix with new persistable fields (ocr_extraction_result, annotation_result, layout_schema, ocr_elements)
- app.py: Adopted st-multimodal-chatinput for unified paste/drop/upload, removed custom JS paste bridge
- backend/file_parser.py: Kept local XLSX parser, added remote XLS/DOC parsers
- CLAUDE.md + CODE_GUIDE.md: Merged documentation from both branches

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-21 10:05:43 +08:00
22 changed files with 2114 additions and 507 deletions
+184 -271
View File
@@ -21,7 +21,6 @@ import time
from pathlib import Path
import streamlit as st
import streamlit.components.v1 as components
from dotenv import load_dotenv
load_dotenv()
@@ -81,6 +80,9 @@ NODE_LABELS = {
"handle_undo": "↩ 撤销操作",
"handle_reset": "🔄 重置会话",
"save_session": "💾 保存会话",
"generate_skeleton": "🏗 生成骨架",
"refine_layout": "📐 精调布局",
"map_fields": "🏷 映射字段",
}
INTENT_LABELS = {
@@ -107,6 +109,86 @@ def _render_jrxml(jrxml: str, max_lines: int = 30):
st.code(preview, language="xml")
# ---- 共享文件上传处理 ----
def _process_uploaded_file(uploaded_file, suffix: str) -> dict:
"""处理单个上传文件:保存临时文件、解析、布局分析。
返回: {"name": str, "text": str, "type": str, "tmp_path": str|None}
"""
import tempfile
from backend.file_parser import parse_file
from backend.layout_analyzer import analyze_layout
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(uploaded_file.getvalue())
tmp_path = tmp.name
result = parse_file(tmp_path, suffix)
parsed_text = result["text"]
parsed_type = result["file_type"]
# 对图片/PDF 进行 A4 模板布局分析
if suffix in (".png", ".jpg", ".jpeg", ".bmp", ".webp", ".pdf"):
layout = analyze_layout(tmp_path)
tt = layout.get("template_type", "unknown")
current_jrxml = st.session_state.agent_state.get("current_jrxml", "")
if tt == "full_a4":
parsed_text = layout["description"]
parsed_type = "a4_template"
# 存储布局 schema 供分层精确生成使用
from backend.layout_analyzer import extract_layout_schema
schema = extract_layout_schema(layout)
st.session_state.agent_state["layout_schema"] = schema
st.session_state.agent_state["ocr_elements"] = layout.get("rows", [])
elif tt == "partial_rows":
parsed_type = "a4_partial"
if current_jrxml.strip():
from backend.layout_analyzer import match_rows_to_jrxml
match = match_rows_to_jrxml(layout, current_jrxml)
parsed_text = (
f"[行片段修改] 上传图片包含 {layout['total_rows']} 行,"
f"视为 A4 报表的一部分。\n\n"
f"{match['description']}\n\n"
f"--- 行结构 ---\n{layout['description']}"
)
else:
parsed_text = layout["description"]
else:
has_ocr = result.get("method") not in ("metadata_only", None)
img_w, img_h = layout["image_size"]
ratio = layout["aspect_ratio"]
if has_ocr:
parsed_text = (
f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}"
f"未检测到 A4 报表结构,图片将被视为参考样式。\n"
f"请根据用户的文字描述生成报表。"
)
else:
parsed_text = (
f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}\n"
f"⚠ OCR 引擎未安装,无法识别图片中的文字内容。\n"
f"请严格根据用户的文字描述来推断图片中的报表需求。\n"
f"(提示:如需图片文字识别,请运行 pip install paddleocr"
)
parsed_type = "image_reference"
elif suffix in (".pdf", ".docx", ".xlsx", ".xls", ".doc"):
parsed_type = suffix.lstrip(".")
keep_temp = (
suffix in (".png", ".jpg", ".jpeg", ".bmp", ".webp")
and result.get("method") not in ("metadata_only", None)
)
return {
"name": uploaded_file.name,
"text": parsed_text,
"type": parsed_type,
"tmp_path": tmp_path if keep_temp else None,
}
# ---- URL 参数 ----
query_params = st.query_params
url_session_id = query_params.get("session_id", "")
@@ -118,11 +200,6 @@ if "graph" not in st.session_state:
st.session_state.graph = build_graph()
if "pending_action" not in st.session_state:
st.session_state.pending_action = None
if "chat_attached_files" not in st.session_state:
st.session_state.chat_attached_files = [] # [{name, text, type, path}]
if "_paste_processed_ts" not in st.session_state:
st.session_state._paste_processed_ts = 0
if "agent_state" not in st.session_state:
if url_session_id:
data = load_session(url_session_id)
@@ -220,7 +297,8 @@ def run_agent(user_input: str):
f"找到 {len(ctx)} 字符参考模板" if ctx else "未匹配到模板"
)
elif node_name in ("generate", "modify_jrxml", "correct_jrxml"):
elif node_name in ("generate", "modify_jrxml", "correct_jrxml",
"generate_skeleton", "refine_layout", "map_fields"):
jrxml = node_state.get("current_jrxml", "")
executed_nodes[-1]["detail"] = f"生成 {len(jrxml)} 字符 JRXML"
@@ -491,7 +569,8 @@ with st.sidebar:
uploaded = st.file_uploader(
"选择文件",
type=["png", "jpg", "jpeg", "bmp", "webp", "pdf", "docx", "xlsx", "txt", "csv", "json", "xml"],
type=["png", "jpg", "jpeg", "bmp", "webp", "pdf", "docx", "xlsx", "xls", "doc",
"txt", "csv", "json", "xml"],
accept_multiple_files=True,
key="file_uploader",
label_visibility="collapsed",
@@ -502,77 +581,21 @@ with st.sidebar:
# 去重
if any(f["name"] == uf.name for f in st.session_state.uploaded_files):
continue
import tempfile
from backend.file_parser import parse_file
from backend.layout_analyzer import analyze_layout
suffix = Path(uf.name).suffix.lower()
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(uf.getvalue())
tmp_path = tmp.name
result = _process_uploaded_file(uf, suffix)
result = parse_file(tmp_path, suffix)
# 对图片/PDF 进行 A4 模板布局分析
parsed_text = result["text"]
parsed_type = result["file_type"]
if suffix in (".png", ".jpg", ".jpeg", ".bmp", ".webp", ".pdf"):
layout = analyze_layout(tmp_path)
tt = layout.get("template_type", "unknown")
current_jrxml = st.session_state.agent_state.get("current_jrxml", "")
if tt == "full_a4":
parsed_text = layout["description"]
parsed_type = "a4_template"
elif tt == "partial_rows":
parsed_type = "a4_partial"
if current_jrxml.strip():
# 修改模式:尝试行匹配
from backend.layout_analyzer import match_rows_to_jrxml
match = match_rows_to_jrxml(layout, current_jrxml)
parsed_text = (
f"[行片段修改] 上传图片包含 {layout['total_rows']} 行,"
f"视为 A4 报表的一部分。\n\n"
f"{match['description']}\n\n"
f"--- 行结构 ---\n{layout['description']}"
)
else:
# 新建模式:按 A4 模板处理
parsed_text = layout["description"]
else:
# tt == "unknown": OCR 不可用或未检测到文字元素
has_ocr = result.get("method") not in ("metadata_only", None)
img_w, img_h = layout["image_size"]
ratio = layout["aspect_ratio"]
if has_ocr:
parsed_text = (
f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}"
f"未检测到 A4 报表结构,图片将被视为参考样式。\n"
f"请根据用户的文字描述生成报表。"
)
else:
parsed_text = (
f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}\n"
f"⚠ OCR 引擎未安装,无法识别图片中的文字内容。\n"
f"请严格根据用户的文字描述来推断图片中的报表需求。\n"
f"(提示:如需图片文字识别,请运行 pip install paddleocr"
)
parsed_type = "image_reference"
if parsed_text:
if result["text"]:
st.session_state.uploaded_files.append({
"name": uf.name,
"text": parsed_text,
"type": parsed_type,
"name": result["name"],
"text": result["text"],
"type": result["type"],
})
# 对图片类型,保存路径以便 OCR 字段提取(延迟到 process_input 阶段)
img_suffixes = (".png", ".jpg", ".jpeg", ".bmp", ".webp")
if suffix in img_suffixes and result.get("method") not in ("metadata_only", None):
tmp_path = result["tmp_path"]
if tmp_path:
st.session_state.agent_state["uploaded_file_path"] = tmp_path
st.session_state.uploaded_temp_paths.append(tmp_path)
else:
Path(tmp_path).unlink(missing_ok=True)
if st.session_state.uploaded_files:
for i, f in enumerate(st.session_state.uploaded_files):
@@ -624,95 +647,6 @@ with st.sidebar:
key=f"dl_v{i}",
)
# ---- 文件粘贴/拖拽全局处理器 ----
st.html("""
<script>
(function() {
if (window.__jrxml_drop_paste) return;
window.__jrxml_drop_paste = true;
var MAX_SIZE = 20 * 1024 * 1024;
function handleFiles(files) {
var fd = []; var n = 0; var total = Math.min(files.length, 10);
for (var i = 0; i < total; i++) {
var f = files[i];
if (f.size > MAX_SIZE) { n++; continue; }
var reader = new FileReader();
reader.onload = (function(file) {
return function(e) {
fd.push({name: file.name, size: file.size, data: e.target.result});
n++;
if (n === total && fd.length) {
sessionStorage.setItem('_jrxml_paste', JSON.stringify({ts: Date.now(), files: fd}));
}
};
})(f);
reader.readAsDataURL(f);
}
}
document.addEventListener('paste', function(e) {
var fs = e.clipboardData && e.clipboardData.files;
if (fs && fs.length) { e.preventDefault(); handleFiles(fs); }
});
document.addEventListener('dragover', function(e) {
e.preventDefault(); e.dataTransfer.dropEffect = 'copy';
});
document.addEventListener('drop', function(e) {
var fs = e.dataTransfer && e.dataTransfer.files;
if (fs && fs.length) { e.preventDefault(); handleFiles(fs); }
});
})();
</script>
""")
# ---- 粘贴桥接组件 ----
paste_data = components.html("""
<script>
(function poll() {
var raw = sessionStorage.getItem('_jrxml_paste');
if (raw) {
try { sessionStorage.removeItem('_jrxml_paste'); Streamlit.setComponentValue(JSON.parse(raw)); return; }
catch(e) {}
}
setTimeout(poll, 800);
})();
</script>
""", height=0, default=0)
if paste_data and paste_data != 0:
pts = paste_data.get("ts", 0)
if pts > st.session_state._paste_processed_ts:
st.session_state._paste_processed_ts = pts
import base64, tempfile
from backend.file_parser import parse_file
from backend.layout_analyzer import analyze_layout
for fi in paste_data.get("files", []):
if not any(f["name"] == fi["name"] for f in st.session_state.chat_attached_files):
header, b64 = fi["data"].split(",", 1)
raw = base64.b64decode(b64)
suffix = Path(fi["name"]).suffix.lower()
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(raw)
tmp_path = tmp.name
result = parse_file(tmp_path, suffix)
text = result["text"]
file_type = result["file_type"]
img_suffixes = (".png", ".jpg", ".jpeg", ".bmp", ".webp")
if suffix in img_suffixes and result.get("method") not in ("metadata_only", None):
try:
layout = analyze_layout(tmp_path)
tt = layout.get("template_type", "unknown")
if tt == "full_a4":
text = layout["description"]
file_type = "a4_template"
elif tt == "partial_rows":
file_type = "a4_partial"
except Exception:
pass
st.session_state.chat_attached_files.append({
"name": fi["name"], "text": text, "type": file_type, "path": tmp_path
})
st.rerun()
# ---- 标题 ----
st.title("📝 JRXML 报表生成器")
st.caption("用自然语言描述您的报表需求,我将逐步生成可用的 JRXML 模板。")
@@ -732,127 +666,106 @@ for msg in st.session_state.messages:
else:
st.markdown(msg["content"])
# ---- 已附加文件预览 ----
if st.session_state.chat_attached_files:
n_files = len(st.session_state.chat_attached_files)
chip_cols = st.columns(min(n_files, 4))
files_to_remove = []
for i, f in enumerate(st.session_state.chat_attached_files):
with chip_cols[i % len(chip_cols)]:
c1, c2 = st.columns([5, 1])
with c1:
name = f["name"]
short_name = name[:16] + ("" if len(name) > 16 else "")
emoji_map = {"a4_template": "📷", "image": "🖼", "pdf": "📄", "docx": "📝", "xlsx": "📊"}
emoji = emoji_map.get(f["type"], "📎")
st.caption(f"{emoji} {short_name}")
with c2:
if st.button("", key=f"rm_chip_{i}"):
files_to_remove.append(i)
if files_to_remove:
for i in sorted(files_to_remove, reverse=True):
try:
Path(st.session_state.chat_attached_files[i]["path"]).unlink(missing_ok=True)
except Exception:
pass
st.session_state.chat_attached_files.pop(i)
st.rerun()
# ---- 聊天输入(支持粘贴/拖拽文件) ----
from st_multimodal_chatinput import multimodal_chatinput
import base64
import io
from pathlib import Path as _Path
# ---- 对话区域文件上传 ----
col_fu, col_hint = st.columns([5, 1])
with col_fu:
chat_uploads = st.file_uploader(
"附加文件",
type=["png", "jpg", "jpeg", "bmp", "webp", "pdf", "docx", "xlsx", "txt", "csv", "json", "xml"],
accept_multiple_files=True,
key="chat_file_uploader",
label_visibility="visible",
)
with col_hint:
st.caption("Ctrl+V 粘贴\n或拖拽到页面")
# MIME type → 文件扩展名映射(用于剪贴板粘贴无扩展名的文件)
MIME_TO_EXT = {
"image/png": ".png",
"image/jpeg": ".jpg",
"image/bmp": ".bmp",
"image/webp": ".webp",
"application/pdf": ".pdf",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/vnd.ms-excel": ".xls",
"application/msword": ".doc",
"text/plain": ".txt",
"text/csv": ".csv",
"application/json": ".json",
"text/xml": ".xml",
}
if chat_uploads:
newly_added = False
import tempfile
from backend.file_parser import parse_file
from backend.layout_analyzer import analyze_layout
for uf in chat_uploads:
if not any(f["name"] == uf.name for f in st.session_state.chat_attached_files):
suffix = Path(uf.name).suffix.lower()
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(uf.getvalue())
tmp_path = tmp.name
result = parse_file(tmp_path, suffix)
text = result["text"]
file_type = result["file_type"]
img_suffixes = (".png", ".jpg", ".jpeg", ".bmp", ".webp")
if suffix in img_suffixes and result.get("method") not in ("metadata_only", None):
try:
layout = analyze_layout(tmp_path)
tt = layout.get("template_type", "unknown")
if tt == "full_a4":
text = layout["description"]
file_type = "a4_template"
elif tt == "partial_rows":
file_type = "a4_partial"
except Exception:
pass
st.session_state.chat_attached_files.append({
"name": uf.name, "text": text, "type": file_type, "path": tmp_path
})
newly_added = True
if newly_added:
st.session_state.chat_file_uploader = []
st.rerun()
chat_result = multimodal_chatinput()
if chat_result:
prompt = (chat_result.get("textInput") or "").strip()
chat_files = chat_result.get("uploadedFiles") or []
# ---- 聊天输入 ----
if prompt := st.chat_input("描述您的报表需求..."):
# 拼接对话区域附加文件的文本
file_texts = []
attached_info = []
for f in st.session_state.chat_attached_files:
file_texts.append(f"[附加文件: {f['name']} ({f['type']})]\n{f['text']}")
attached_info.append({"name": f["name"], "type": f["type"], "length": len(f["text"])})
# 处理聊天中上传/粘贴的文件
uploaded_texts = []
uploaded_files_info = []
# 同时拼接侧边栏上传的文件(向后兼容)
# 先收集侧边栏上传的文件
if st.session_state.get("uploaded_files"):
for f in st.session_state.uploaded_files:
file_texts.append(f"[上传文件: {f['name']}]\n{f['text']}")
attached_info.append({"name": f["name"], "type": f["type"], "length": len(f["text"])})
uploaded_texts.append(f"[上传文件: {f['name']}]\n{f['text']}")
uploaded_files_info.append({"name": f["name"], "type": f["type"], "length": len(f["text"])})
st.session_state.uploaded_files = []
if file_texts:
full_prompt = "\n\n".join(file_texts) + "\n\n---\n用户需求:\n" + prompt
else:
full_prompt = prompt
# 处理聊天中的文件
class _Base64File:
"""包装 base64 文件为类 UploadedFile 接口。"""
def __init__(self, name, data_bytes):
self.name = name
self._data = data_bytes
# 将第一个图片文件的路径传给 agent,供 OCR 字段精确提取
for f in st.session_state.chat_attached_files:
if f["type"] in ("image", "a4_template", "a4_partial"):
st.session_state.agent_state["uploaded_file_path"] = f["path"]
break
def getvalue(self):
return self._data
# 清理临时文件和状态
st.session_state.uploaded_files = []
for f in st.session_state.chat_attached_files:
for cf in chat_files:
name = cf.get("name", "clipboard_file")
mime = cf.get("type", "")
content_b64 = cf.get("content", "")
if not content_b64:
continue
try:
Path(f["path"]).unlink(missing_ok=True)
data = base64.b64decode(content_b64)
except Exception:
pass
st.session_state.chat_attached_files = []
continue
_app_log.info(
"收到用户输入",
extra={
"session_id": current_session_id,
"prompt_preview": prompt[:200],
"prompt_length": len(prompt),
"has_uploaded_files": bool(attached_info),
"uploaded_files": attached_info,
},
)
suffix = _Path(name).suffix.lower()
if not suffix and mime in MIME_TO_EXT:
suffix = MIME_TO_EXT[mime]
name = f"{_Path(name).stem}{suffix}"
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
run_agent(full_prompt)
st.rerun()
wrapper = _Base64File(name, data)
result = _process_uploaded_file(wrapper, suffix)
if result["text"]:
uploaded_texts.append(f"[上传文件: {result['name']}]\n{result['text']}")
uploaded_files_info.append({"name": result["name"], "type": result["type"], "length": len(result["text"])})
tmp_path = result["tmp_path"]
if tmp_path:
st.session_state.agent_state["uploaded_file_path"] = tmp_path
st.session_state.uploaded_temp_paths.append(tmp_path)
if prompt or uploaded_texts:
if uploaded_texts:
full_prompt = "\n\n".join(uploaded_texts)
if prompt:
full_prompt += "\n\n---\n用户需求:\n" + prompt
else:
full_prompt = prompt
displayed_prompt = prompt or "(已上传文件,未输入文字)"
_app_log.info(
"收到用户输入",
extra={
"session_id": current_session_id,
"prompt_preview": displayed_prompt[:200],
"prompt_length": len(full_prompt),
"has_uploaded_files": bool(uploaded_files_info),
"uploaded_files": uploaded_files_info,
},
)
st.session_state.messages.append({"role": "user", "content": displayed_prompt})
with st.chat_message("user"):
st.markdown(displayed_prompt)
run_agent(full_prompt)
st.rerun()