fix: image files silently falling to text parser due to suffix dot mismatch

api_server.py passed "jpg" (no dot) from rsplit, but file_parser.py parser dict keys all have dots (".jpg"), causing image files to fall through to _parse_text() which fails on binary data, skipping ALL OCR and layout analysis. Every image upload was affected. - file_parser.py: normalize file_type to always have leading dot - api_server.py: use Path.suffix instead of manual rsplit
2026-05-21 23:05:27 +08:00
parent 83c7da7517
commit 60e2f520ba
2 changed files with 4 additions and 2 deletions
@@ -400,7 +400,7 @@ def _process_files(file_ids: list[str], session_id: str) -> dict:
        file_path = info["path"]
        uploaded_paths.append(file_path)

-        parsed = parse_file(file_path, info["filename"].rsplit(".", 1)[-1] if "." in info["filename"] else "")
+        parsed = parse_file(file_path, Path(info["filename"]).suffix)
        if parsed.get("error"):
            parts.append(f"[文件: {info['filename']}]\n解析失败: {parsed['error']}")
            continue
@@ -41,7 +41,9 @@ def parse_file(file_path: str, file_type: str = "") -> dict:
    if not path.exists():
        return {"text": "", "file_type": file_type, "method": "none", "error": "文件不存在"}

-    suffix = file_type or path.suffix.lower()
+    suffix = path.suffix.lower()
+    if file_type:
+        suffix = file_type if file_type.startswith(".") else f".{file_type}"

    parsers = {
        ".png":  _parse_image,