feat: 对话区域文件上传(粘贴/拖拽) + XLSX支持 + 会话切换无限循环修复
- 对话区域: st.file_uploader + 全局 paste/drop 事件监听 + sessionStorage 桥接 - 文件预览芯片: 上传后显示在对话区域,可逐文件移除 - OCR 双层解析全面接入: file_parser(文字) + ocr_extractor(字段提取) - XLSX 解析: openpyxl 逐工作表/逐行读取 - 修复: create_session 强制写入 agent_state.session_id - 修复: load_session_node 不再从磁盘覆盖 session_id - 修复: 切换会话 _last_switched_to 哨兵防止无限 rerun Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -51,6 +51,7 @@ def parse_file(file_path: str, file_type: str = "") -> dict:
|
||||
".webp": _parse_image,
|
||||
".pdf": _parse_pdf,
|
||||
".docx": _parse_docx,
|
||||
".xlsx": _parse_xlsx,
|
||||
}
|
||||
|
||||
parser = parsers.get(suffix)
|
||||
@@ -195,6 +196,37 @@ def _parse_docx(path: Path) -> dict:
|
||||
"error": "DOCX 解析需要安装 python-docx"}
|
||||
|
||||
|
||||
def _parse_xlsx(path: Path) -> dict:
|
||||
"""提取 Excel (.xlsx) 表格内容为文本。"""
|
||||
try:
|
||||
import openpyxl
|
||||
wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
|
||||
sheets_text = []
|
||||
for sheet_name in wb.sheetnames:
|
||||
ws = wb[sheet_name]
|
||||
rows = []
|
||||
for row in ws.iter_rows(values_only=True):
|
||||
cells = [str(c) if c is not None else "" for c in row]
|
||||
if any(c.strip() for c in cells):
|
||||
rows.append(" | ".join(cells))
|
||||
if rows:
|
||||
sheets_text.append(f"--- 工作表: {sheet_name} ---\n" + "\n".join(rows))
|
||||
wb.close()
|
||||
if sheets_text:
|
||||
return {
|
||||
"text": "\n\n".join(sheets_text),
|
||||
"file_type": "xlsx",
|
||||
"method": "openpyxl",
|
||||
"error": None,
|
||||
}
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
return {"text": "", "file_type": "xlsx", "method": "none",
|
||||
"error": "XLSX 解析需要安装 openpyxl"}
|
||||
|
||||
|
||||
def _parse_text(path: Path) -> dict:
|
||||
"""读取纯文本文件。"""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user