fix: image files silently falling to text parser due to suffix dot mismatch

api_server.py passed "jpg" (no dot) from rsplit, but file_parser.py
parser dict keys all have dots (".jpg"), causing image files to fall
through to _parse_text() which fails on binary data, skipping ALL OCR
and layout analysis. Every image upload was affected.

- file_parser.py: normalize file_type to always have leading dot
- api_server.py: use Path.suffix instead of manual rsplit
This commit is contained in:
2026-05-21 23:05:27 +08:00
parent 83c7da7517
commit 60e2f520ba
2 changed files with 4 additions and 2 deletions
+1 -1
View File
@@ -400,7 +400,7 @@ def _process_files(file_ids: list[str], session_id: str) -> dict:
file_path = info["path"]
uploaded_paths.append(file_path)
parsed = parse_file(file_path, info["filename"].rsplit(".", 1)[-1] if "." in info["filename"] else "")
parsed = parse_file(file_path, Path(info["filename"]).suffix)
if parsed.get("error"):
parts.append(f"[文件: {info['filename']}]\n解析失败: {parsed['error']}")
continue
+3 -1
View File
@@ -41,7 +41,9 @@ def parse_file(file_path: str, file_type: str = "") -> dict:
if not path.exists():
return {"text": "", "file_type": file_type, "method": "none", "error": "文件不存在"}
suffix = file_type or path.suffix.lower()
suffix = path.suffix.lower()
if file_type:
suffix = file_type if file_type.startswith(".") else f".{file_type}"
parsers = {
".png": _parse_image,