From 60e2f520ba47679453feaf87a362a583fcdc9c8f Mon Sep 17 00:00:00 2001 From: panda <1415243231@qq.com> Date: Thu, 21 May 2026 23:05:27 +0800 Subject: [PATCH] fix: image files silently falling to text parser due to suffix dot mismatch api_server.py passed "jpg" (no dot) from rsplit, but file_parser.py parser dict keys all have dots (".jpg"), causing image files to fall through to _parse_text() which fails on binary data, skipping ALL OCR and layout analysis. Every image upload was affected. - file_parser.py: normalize file_type to always have leading dot - api_server.py: use Path.suffix instead of manual rsplit --- api_server.py | 2 +- backend/file_parser.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/api_server.py b/api_server.py index feca84f..86c143e 100644 --- a/api_server.py +++ b/api_server.py @@ -400,7 +400,7 @@ def _process_files(file_ids: list[str], session_id: str) -> dict: file_path = info["path"] uploaded_paths.append(file_path) - parsed = parse_file(file_path, info["filename"].rsplit(".", 1)[-1] if "." in info["filename"] else "") + parsed = parse_file(file_path, Path(info["filename"]).suffix) if parsed.get("error"): parts.append(f"[文件: {info['filename']}]\n解析失败: {parsed['error']}") continue diff --git a/backend/file_parser.py b/backend/file_parser.py index b767ee2..88ee2cd 100644 --- a/backend/file_parser.py +++ b/backend/file_parser.py @@ -41,7 +41,9 @@ def parse_file(file_path: str, file_type: str = "") -> dict: if not path.exists(): return {"text": "", "file_type": file_type, "method": "none", "error": "文件不存在"} - suffix = file_type or path.suffix.lower() + suffix = path.suffix.lower() + if file_type: + suffix = file_type if file_type.startswith(".") else f".{file_type}" parsers = { ".png": _parse_image,