9bb011e429
- Replace st.chat_input with st-multimodal-chatinput (Ctrl+V paste, drag-drop, file button) - Extract _process_uploaded_file() shared handler (eliminates ~70 duplicated lines) - Add XLSX (openpyxl), XLS (xlrd), DOC (olefile) parsers to file_parser.py - Add backend/annotation_detector.py: circle detection (HoughCircles) + arrow detection (HoughLinesP clustering) + OCR correlation + LLM context formatting - Add annotation_result field to AgentState with session persistence - Wire annotation detection into process_input and _format_ocr_context - Add 11 new tests: 7 annotation detector + 4 multi-format parser - Update all docs: CLAUDE.md, README.md, CODE_GUIDE.md, ROADMAP.md
91 lines
2.7 KiB
Python
91 lines
2.7 KiB
Python
"""测试多格式文件解析器:XLSX, XLS, DOC。"""
|
|
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
|
|
def _make_xlsx(path: str) -> None:
|
|
"""生成最小 .xlsx 测试文件。"""
|
|
from openpyxl import Workbook
|
|
wb = Workbook()
|
|
ws = wb.active
|
|
ws.title = "Sheet1"
|
|
ws["A1"] = "名称"
|
|
ws["B1"] = "金额"
|
|
ws["A2"] = "项目A"
|
|
ws["B2"] = 100
|
|
ws["A3"] = "项目B"
|
|
ws["B3"] = 200
|
|
wb.save(path)
|
|
|
|
|
|
def _make_xls(path: str) -> None:
|
|
"""生成最小 .xls 测试文件。"""
|
|
from xlwt import Workbook
|
|
wb = Workbook()
|
|
ws = wb.add_sheet("Sheet1")
|
|
ws.write(0, 0, "名称")
|
|
ws.write(0, 1, "金额")
|
|
ws.write(1, 0, "项目A")
|
|
ws.write(1, 1, 100)
|
|
ws.write(2, 0, "项目B")
|
|
ws.write(2, 1, 200)
|
|
wb.save(path)
|
|
|
|
|
|
class TestMultiFormatParsers:
|
|
"""测试 file_parser.py 的多格式解析器。"""
|
|
|
|
def test_parse_xlsx(self):
|
|
from backend.file_parser import parse_file
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
|
|
path = tmp.name
|
|
try:
|
|
_make_xlsx(path)
|
|
result = parse_file(path, ".xlsx")
|
|
assert result["file_type"] == "xlsx"
|
|
assert result["method"] == "openpyxl"
|
|
assert result["error"] is None
|
|
assert "Sheet1" in result["text"]
|
|
assert "项目A" in result["text"]
|
|
assert "100" in result["text"]
|
|
finally:
|
|
Path(path).unlink(missing_ok=True)
|
|
|
|
def test_parse_xls(self):
|
|
from backend.file_parser import parse_file
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".xls", delete=False) as tmp:
|
|
path = tmp.name
|
|
try:
|
|
_make_xls(path)
|
|
result = parse_file(path, ".xls")
|
|
assert result["file_type"] == "xls"
|
|
assert result["method"] == "xlrd"
|
|
assert result["error"] is None
|
|
assert "Sheet1" in result["text"]
|
|
assert "项目A" in result["text"]
|
|
assert "100.0" in result["text"]
|
|
finally:
|
|
Path(path).unlink(missing_ok=True)
|
|
|
|
def test_parse_doc_nonexistent(self):
|
|
"""测试 .doc 文件不存在时的错误处理。"""
|
|
from backend.file_parser import parse_file
|
|
|
|
result = parse_file("/nonexistent/file.doc", ".doc")
|
|
assert result["file_type"] == ".doc"
|
|
assert result["method"] == "none"
|
|
assert result.get("error") is not None
|
|
|
|
def test_dispatch_adds_new_formats(self):
|
|
"""验证新格式已在 parse_file 调度表中注册。"""
|
|
from backend.file_parser import parse_file
|
|
|
|
for ext in [".xlsx", ".xls", ".doc"]:
|
|
result = parse_file("/tmp/test" + ext, ext)
|
|
assert result["file_type"] in (ext, "xlsx", "xls", "doc")
|