feat: 对话区域文件上传(粘贴/拖拽) + XLSX支持 + 会话切换无限循环修复

- 对话区域: st.file_uploader + 全局 paste/drop 事件监听 + sessionStorage 桥接
- 文件预览芯片: 上传后显示在对话区域,可逐文件移除
- OCR 双层解析全面接入: file_parser(文字) + ocr_extractor(字段提取)
- XLSX 解析: openpyxl 逐工作表/逐行读取
- 修复: create_session 强制写入 agent_state.session_id
- 修复: load_session_node 不再从磁盘覆盖 session_id
- 修复: 切换会话 _last_switched_to 哨兵防止无限 rerun

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-20 12:04:02 +08:00
parent da79640259
commit 87ead4fa6a
6 changed files with 268 additions and 29 deletions
+32
View File
@@ -51,6 +51,7 @@ def parse_file(file_path: str, file_type: str = "") -> dict:
".webp": _parse_image,
".pdf": _parse_pdf,
".docx": _parse_docx,
".xlsx": _parse_xlsx,
}
parser = parsers.get(suffix)
@@ -195,6 +196,37 @@ def _parse_docx(path: Path) -> dict:
"error": "DOCX 解析需要安装 python-docx"}
def _parse_xlsx(path: Path) -> dict:
"""提取 Excel (.xlsx) 表格内容为文本。"""
try:
import openpyxl
wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
sheets_text = []
for sheet_name in wb.sheetnames:
ws = wb[sheet_name]
rows = []
for row in ws.iter_rows(values_only=True):
cells = [str(c) if c is not None else "" for c in row]
if any(c.strip() for c in cells):
rows.append(" | ".join(cells))
if rows:
sheets_text.append(f"--- 工作表: {sheet_name} ---\n" + "\n".join(rows))
wb.close()
if sheets_text:
return {
"text": "\n\n".join(sheets_text),
"file_type": "xlsx",
"method": "openpyxl",
"error": None,
}
except ImportError:
pass
except Exception:
pass
return {"text": "", "file_type": "xlsx", "method": "none",
"error": "XLSX 解析需要安装 openpyxl"}
def _parse_text(path: Path) -> dict:
"""读取纯文本文件。"""
try:
+3 -1
View File
@@ -38,12 +38,14 @@ def create_session(name: str = "", agent_state: Optional[dict] = None) -> dict:
_ensure_dir()
sid = generate_session_id()
now = _now_iso()
agent_state = agent_state or {}
agent_state["session_id"] = sid
data = {
"session_id": sid,
"session_name": name or f"新建报表 {now[:10]}",
"created_at": now,
"updated_at": now,
"agent_state": agent_state or {},
"agent_state": agent_state,
}
with open(_session_path(sid), "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)