Files
agent_jrxml/tests/test_file_parser_formats.py
panda 9bb011e429 feat: v4 multimodal chat input, multi-format support, and annotation detection
- Replace st.chat_input with st-multimodal-chatinput (Ctrl+V paste, drag-drop, file button)
- Extract _process_uploaded_file() shared handler (eliminates ~70 duplicated lines)
- Add XLSX (openpyxl), XLS (xlrd), DOC (olefile) parsers to file_parser.py
- Add backend/annotation_detector.py: circle detection (HoughCircles) + arrow detection (HoughLinesP clustering) + OCR correlation + LLM context formatting
- Add annotation_result field to AgentState with session persistence
- Wire annotation detection into process_input and _format_ocr_context
- Add 11 new tests: 7 annotation detector + 4 multi-format parser
- Update all docs: CLAUDE.md, README.md, CODE_GUIDE.md, ROADMAP.md
2026-05-20 23:43:16 +08:00

91 lines
2.7 KiB
Python

"""测试多格式文件解析器:XLSX, XLS, DOC。"""
import tempfile
from pathlib import Path
import pytest
def _make_xlsx(path: str) -> None:
"""生成最小 .xlsx 测试文件。"""
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
ws.title = "Sheet1"
ws["A1"] = "名称"
ws["B1"] = "金额"
ws["A2"] = "项目A"
ws["B2"] = 100
ws["A3"] = "项目B"
ws["B3"] = 200
wb.save(path)
def _make_xls(path: str) -> None:
"""生成最小 .xls 测试文件。"""
from xlwt import Workbook
wb = Workbook()
ws = wb.add_sheet("Sheet1")
ws.write(0, 0, "名称")
ws.write(0, 1, "金额")
ws.write(1, 0, "项目A")
ws.write(1, 1, 100)
ws.write(2, 0, "项目B")
ws.write(2, 1, 200)
wb.save(path)
class TestMultiFormatParsers:
"""测试 file_parser.py 的多格式解析器。"""
def test_parse_xlsx(self):
from backend.file_parser import parse_file
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
path = tmp.name
try:
_make_xlsx(path)
result = parse_file(path, ".xlsx")
assert result["file_type"] == "xlsx"
assert result["method"] == "openpyxl"
assert result["error"] is None
assert "Sheet1" in result["text"]
assert "项目A" in result["text"]
assert "100" in result["text"]
finally:
Path(path).unlink(missing_ok=True)
def test_parse_xls(self):
from backend.file_parser import parse_file
with tempfile.NamedTemporaryFile(suffix=".xls", delete=False) as tmp:
path = tmp.name
try:
_make_xls(path)
result = parse_file(path, ".xls")
assert result["file_type"] == "xls"
assert result["method"] == "xlrd"
assert result["error"] is None
assert "Sheet1" in result["text"]
assert "项目A" in result["text"]
assert "100.0" in result["text"]
finally:
Path(path).unlink(missing_ok=True)
def test_parse_doc_nonexistent(self):
"""测试 .doc 文件不存在时的错误处理。"""
from backend.file_parser import parse_file
result = parse_file("/nonexistent/file.doc", ".doc")
assert result["file_type"] == ".doc"
assert result["method"] == "none"
assert result.get("error") is not None
def test_dispatch_adds_new_formats(self):
"""验证新格式已在 parse_file 调度表中注册。"""
from backend.file_parser import parse_file
for ext in [".xlsx", ".xls", ".doc"]:
result = parse_file("/tmp/test" + ext, ext)
assert result["file_type"] in (ext, "xlsx", "xls", "doc")