fix: image files silently falling to text parser due to suffix dot mismatch
api_server.py passed "jpg" (no dot) from rsplit, but file_parser.py
parser dict keys all have dots (".jpg"), causing image files to fall
through to _parse_text() which fails on binary data, skipping ALL OCR
and layout analysis. Every image upload was affected.
- file_parser.py: normalize file_type to always have leading dot
- api_server.py: use Path.suffix instead of manual rsplit
This commit is contained in:
+1
-1
@@ -400,7 +400,7 @@ def _process_files(file_ids: list[str], session_id: str) -> dict:
|
||||
file_path = info["path"]
|
||||
uploaded_paths.append(file_path)
|
||||
|
||||
parsed = parse_file(file_path, info["filename"].rsplit(".", 1)[-1] if "." in info["filename"] else "")
|
||||
parsed = parse_file(file_path, Path(info["filename"]).suffix)
|
||||
if parsed.get("error"):
|
||||
parts.append(f"[文件: {info['filename']}]\n解析失败: {parsed['error']}")
|
||||
continue
|
||||
|
||||
@@ -41,7 +41,9 @@ def parse_file(file_path: str, file_type: str = "") -> dict:
|
||||
if not path.exists():
|
||||
return {"text": "", "file_type": file_type, "method": "none", "error": "文件不存在"}
|
||||
|
||||
suffix = file_type or path.suffix.lower()
|
||||
suffix = path.suffix.lower()
|
||||
if file_type:
|
||||
suffix = file_type if file_type.startswith(".") else f".{file_type}"
|
||||
|
||||
parsers = {
|
||||
".png": _parse_image,
|
||||
|
||||
Reference in New Issue
Block a user