"""测试多格式文件解析器:XLSX, XLS, DOC。""" import tempfile from pathlib import Path import pytest def _make_xlsx(path: str) -> None: """生成最小 .xlsx 测试文件。""" from openpyxl import Workbook wb = Workbook() ws = wb.active ws.title = "Sheet1" ws["A1"] = "名称" ws["B1"] = "金额" ws["A2"] = "项目A" ws["B2"] = 100 ws["A3"] = "项目B" ws["B3"] = 200 wb.save(path) def _make_xls(path: str) -> None: """生成最小 .xls 测试文件。""" from xlwt import Workbook wb = Workbook() ws = wb.add_sheet("Sheet1") ws.write(0, 0, "名称") ws.write(0, 1, "金额") ws.write(1, 0, "项目A") ws.write(1, 1, 100) ws.write(2, 0, "项目B") ws.write(2, 1, 200) wb.save(path) class TestMultiFormatParsers: """测试 file_parser.py 的多格式解析器。""" def test_parse_xlsx(self): from backend.file_parser import parse_file with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp: path = tmp.name try: _make_xlsx(path) result = parse_file(path, ".xlsx") assert result["file_type"] == "xlsx" assert result["method"] == "openpyxl" assert result["error"] is None assert "Sheet1" in result["text"] assert "项目A" in result["text"] assert "100" in result["text"] finally: Path(path).unlink(missing_ok=True) def test_parse_xls(self): from backend.file_parser import parse_file with tempfile.NamedTemporaryFile(suffix=".xls", delete=False) as tmp: path = tmp.name try: _make_xls(path) result = parse_file(path, ".xls") assert result["file_type"] == "xls" assert result["method"] == "xlrd" assert result["error"] is None assert "Sheet1" in result["text"] assert "项目A" in result["text"] assert "100.0" in result["text"] finally: Path(path).unlink(missing_ok=True) def test_parse_doc_nonexistent(self): """测试 .doc 文件不存在时的错误处理。""" from backend.file_parser import parse_file result = parse_file("/nonexistent/file.doc", ".doc") assert result["file_type"] == ".doc" assert result["method"] == "none" assert result.get("error") is not None def test_dispatch_adds_new_formats(self): """验证新格式已在 parse_file 调度表中注册。""" from backend.file_parser import parse_file for ext in [".xlsx", ".xls", ".doc"]: result = parse_file("/tmp/test" + ext, ext) assert result["file_type"] in (ext, "xlsx", "xls", "doc")