fix: band-level windowed refine_layout + programmatic map_fields to prevent 91.5% content loss
Root cause: LLM receiving full 34k-char JRXML would regenerate from scratch
instead of modifying coordinates in-place, shrinking output to ~3k chars.
Solution (programmatic node control, not prompt engineering):
- New agent/jrxml_windower.py: decompose JRXML into header (never sent to
LLM) + individual bands. Split bands >4000 chars at element boundaries.
Reassemble with element count validation (>10% change = rollback).
- Rewrite refine_layout: per-band windowed LLM processing (~2-4k chars
each). LLM cannot "reimagine" the entire report.
- Rewrite map_fields: 100% programmatic regex $F{field_N} -> real name
replacement. Zero LLM calls, zero content loss.
- _sanitize_field_name: non-ASCII chars escaped to _uXXXX_ format for
valid JRXML identifiers.
- Tests: 48 new unit tests (windower 28 + map_fields 20). All passing.
Full suite 385 tests, zero regressions.
This commit is contained in:
@@ -265,3 +265,249 @@ class TestBoundaries:
|
||||
json={"text": large_text, "file_ids": []},
|
||||
) as resp:
|
||||
assert resp.status_code == 200
|
||||
|
||||
|
||||
# ── 用户 CRUD API ───────────────────────────────────────────────
|
||||
|
||||
class TestUserAPI:
|
||||
@pytest.fixture(autouse=True)
|
||||
def temp_kb_data(self, monkeypatch, tmp_path):
|
||||
kb_data = tmp_path / "kb_data"
|
||||
monkeypatch.setattr("backend.kb_manager.KB_DATA_DIR", kb_data)
|
||||
monkeypatch.setattr("backend.kb_manager._USERS_FILE", kb_data / "users.json")
|
||||
yield kb_data
|
||||
|
||||
def test_create_user(self, client):
|
||||
resp = client.post("/api/users", json={"name": "测试用户"})
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["name"] == "测试用户"
|
||||
assert len(data["user_id"]) >= 12
|
||||
|
||||
def test_create_user_empty_name_rejected(self, client):
|
||||
resp = client.post("/api/users", json={"name": ""})
|
||||
assert resp.status_code == 400
|
||||
|
||||
def test_list_users(self, client):
|
||||
client.post("/api/users", json={"name": "A"})
|
||||
client.post("/api/users", json={"name": "B"})
|
||||
resp = client.get("/api/users")
|
||||
assert resp.status_code == 200
|
||||
assert len(resp.json()["users"]) == 2
|
||||
|
||||
def test_get_user(self, client):
|
||||
uid = client.post("/api/users", json={"name": "张三"}).json()["user_id"]
|
||||
resp = client.get(f"/api/users/{uid}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["name"] == "张三"
|
||||
|
||||
def test_get_user_not_found(self, client):
|
||||
resp = client.get("/api/users/deadbeef1234567890abcd")
|
||||
assert resp.status_code == 404
|
||||
|
||||
def test_delete_user(self, client):
|
||||
uid = client.post("/api/users", json={"name": "待删除"}).json()["user_id"]
|
||||
resp = client.delete(f"/api/users/{uid}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "deleted"
|
||||
assert client.get(f"/api/users/{uid}").status_code == 404
|
||||
|
||||
def test_delete_nonexistent_user(self, client):
|
||||
resp = client.delete("/api/users/deadbeef1234567890abcd")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
# ── 知识库 CRUD API ─────────────────────────────────────────────
|
||||
|
||||
class TestKbAPI:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_kb(self, monkeypatch, tmp_path):
|
||||
kb_data = tmp_path / "kb_data"
|
||||
monkeypatch.setattr("backend.kb_manager.KB_DATA_DIR", kb_data)
|
||||
monkeypatch.setattr("backend.kb_manager._USERS_FILE", kb_data / "users.json")
|
||||
# 使用 raw TestClient 来创建前置用户
|
||||
from fastapi.testclient import TestClient as TC
|
||||
tc = TC(app)
|
||||
resp = tc.post("/api/users", json={"name": "KB测试用户"})
|
||||
self.uid = resp.json()["user_id"]
|
||||
|
||||
def test_create_kb(self, client):
|
||||
resp = client.post(f"/api/users/{self.uid}/kbs", json={"name": "测试库", "description": "描述"})
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["name"] == "测试库"
|
||||
assert data["parse_status"] == "empty"
|
||||
|
||||
def test_create_kb_empty_name_rejected(self, client):
|
||||
resp = client.post(f"/api/users/{self.uid}/kbs", json={"name": ""})
|
||||
assert resp.status_code == 400
|
||||
|
||||
def test_list_kbs(self, client):
|
||||
client.post(f"/api/users/{self.uid}/kbs", json={"name": "KB1"})
|
||||
client.post(f"/api/users/{self.uid}/kbs", json={"name": "KB2"})
|
||||
resp = client.get(f"/api/users/{self.uid}/kbs")
|
||||
assert resp.status_code == 200
|
||||
assert len(resp.json()["kbs"]) == 2
|
||||
|
||||
def test_get_kb(self, client):
|
||||
kid = client.post(f"/api/users/{self.uid}/kbs", json={"name": "查询库"}).json()["kb_id"]
|
||||
resp = client.get(f"/api/kbs/{kid}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["name"] == "查询库"
|
||||
|
||||
def test_get_kb_not_found(self, client):
|
||||
resp = client.get("/api/kbs/deadbeef1234567890abcd")
|
||||
assert resp.status_code == 404
|
||||
|
||||
def test_delete_kb(self, client):
|
||||
kid = client.post(f"/api/users/{self.uid}/kbs", json={"name": "待删库"}).json()["kb_id"]
|
||||
resp = client.delete(f"/api/kbs/{kid}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "deleted"
|
||||
|
||||
def test_kb_status(self, client):
|
||||
kid = client.post(f"/api/users/{self.uid}/kbs", json={"name": "状态库"}).json()["kb_id"]
|
||||
resp = client.get(f"/api/kbs/{kid}/status")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["parse_status"] == "empty"
|
||||
assert resp.json()["file_count"] == 0
|
||||
|
||||
def test_kb_fields(self, client):
|
||||
kid = client.post(f"/api/users/{self.uid}/kbs", json={"name": "字段库"}).json()["kb_id"]
|
||||
resp = client.get(f"/api/kbs/{kid}/fields")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["fields"] == []
|
||||
assert resp.json()["templates"] == []
|
||||
|
||||
|
||||
# ── KB 文件上传 & 构建 API ──────────────────────────────────────
|
||||
|
||||
class TestKbUploadBuild:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_up(self, monkeypatch, tmp_path):
|
||||
kb_data = tmp_path / "kb_data"
|
||||
kb_data.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setattr("backend.kb_manager.KB_DATA_DIR", kb_data)
|
||||
monkeypatch.setattr("backend.kb_manager._USERS_FILE", kb_data / "users.json")
|
||||
# Mock process_file_for_kb to avoid SameFileError (API already writes to raw_dir)
|
||||
monkeypatch.setattr(
|
||||
"backend.kb_parser.process_file_for_kb",
|
||||
lambda kb_id, file_path, source_name="": {
|
||||
"filename": source_name, "type": "txt", "error": None})
|
||||
from fastapi.testclient import TestClient as TC
|
||||
tc = TC(app)
|
||||
resp = tc.post("/api/users", json={"name": "上传测试用户"})
|
||||
self.uid = resp.json()["user_id"]
|
||||
|
||||
def test_upload_to_kb(self, client):
|
||||
kid = client.post(f"/api/users/{self.uid}/kbs", json={"name": "上传库"}).json()["kb_id"]
|
||||
resp = client.post(
|
||||
f"/api/kbs/{kid}/upload",
|
||||
files={"file": ("readme.md", io.BytesIO(b"# test"), "text/markdown")},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["filename"] == "readme.md"
|
||||
|
||||
def test_upload_to_nonexistent_kb(self, client):
|
||||
resp = client.post(
|
||||
"/api/kbs/deadbeef1234567890abcd/upload",
|
||||
files={"file": ("x.txt", io.BytesIO(b"x"), "text/plain")},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
def test_build_empty_kb_fails(self, client):
|
||||
kid = client.post(f"/api/users/{self.uid}/kbs", json={"name": "空库"}).json()["kb_id"]
|
||||
resp = client.post(f"/api/kbs/{kid}/build")
|
||||
assert resp.status_code == 400
|
||||
|
||||
def test_search_kb_empty_query_rejected(self, client):
|
||||
kid = client.post(f"/api/users/{self.uid}/kbs", json={"name": "搜索库"}).json()["kb_id"]
|
||||
resp = client.get(f"/api/kbs/{kid}/search")
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
# ── 会话-KB 绑定 API ────────────────────────────────────────────
|
||||
|
||||
class TestSessionKbBinding:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_bind(self, monkeypatch, tmp_path):
|
||||
kb_data = tmp_path / "kb_data"
|
||||
kb_data.mkdir(parents=True, exist_ok=True)
|
||||
sessions_dir = tmp_path / "sessions"
|
||||
monkeypatch.setattr("backend.kb_manager.KB_DATA_DIR", kb_data)
|
||||
monkeypatch.setattr("backend.kb_manager._USERS_FILE", kb_data / "users.json")
|
||||
monkeypatch.setattr("backend.session.SESSIONS_DIR", sessions_dir)
|
||||
monkeypatch.setattr("api_server.UPLOADS_DIR", tmp_path / "uploads")
|
||||
|
||||
def test_bind_kb_to_session(self, client):
|
||||
uid = client.post("/api/users", json={"name": "绑定用户"}).json()["user_id"]
|
||||
kid = client.post(f"/api/users/{uid}/kbs", json={"name": "绑定库"}).json()["kb_id"]
|
||||
sid = client.post("/api/sessions").json()["session_id"]
|
||||
resp = client.put(f"/api/sessions/{sid}/kb", json={"kb_id": kid})
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["kb_id"] == kid
|
||||
|
||||
def test_get_session_kb(self, client):
|
||||
uid = client.post("/api/users", json={"name": "查询用户"}).json()["user_id"]
|
||||
kid = client.post(f"/api/users/{uid}/kbs", json={"name": "查询KB"}).json()["kb_id"]
|
||||
sid = client.post("/api/sessions").json()["session_id"]
|
||||
client.put(f"/api/sessions/{sid}/kb", json={"kb_id": kid})
|
||||
resp = client.get(f"/api/sessions/{sid}/kb")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["kb_id"] == kid
|
||||
assert resp.json()["kb_name"] == "查询KB"
|
||||
|
||||
def test_unbind_kb(self, client):
|
||||
sid = client.post("/api/sessions").json()["session_id"]
|
||||
resp = client.put(f"/api/sessions/{sid}/kb", json={"kb_id": ""})
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["kb_id"] is None
|
||||
|
||||
def test_bind_nonexistent_kb(self, client):
|
||||
sid = client.post("/api/sessions").json()["session_id"]
|
||||
resp = client.put(f"/api/sessions/{sid}/kb", json={"kb_id": "deadbeef1234567890abcd"})
|
||||
assert resp.status_code == 404
|
||||
|
||||
def test_bind_to_nonexistent_session(self, client):
|
||||
resp = client.put("/api/sessions/deadbeef1234567890abcd/kb", json={"kb_id": ""})
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
# ── 用户-KB 端到端流程 ──────────────────────────────────────────
|
||||
|
||||
class TestUserKbE2E:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_e2e(self, monkeypatch, tmp_path):
|
||||
kb_data = tmp_path / "kb_data"
|
||||
kb_data.mkdir(parents=True, exist_ok=True)
|
||||
sessions_dir = tmp_path / "sessions"
|
||||
monkeypatch.setattr("backend.kb_manager.KB_DATA_DIR", kb_data)
|
||||
monkeypatch.setattr("backend.kb_manager._USERS_FILE", kb_data / "users.json")
|
||||
monkeypatch.setattr("backend.session.SESSIONS_DIR", sessions_dir)
|
||||
monkeypatch.setattr("api_server.UPLOADS_DIR", tmp_path / "uploads")
|
||||
# Mock process_file_for_kb to avoid SameFileError
|
||||
monkeypatch.setattr(
|
||||
"backend.kb_parser.process_file_for_kb",
|
||||
lambda kb_id, file_path, source_name="": {
|
||||
"filename": source_name, "type": "txt", "error": None})
|
||||
|
||||
def test_full_flow(self, client):
|
||||
# 1. 创建用户
|
||||
uid = client.post("/api/users", json={"name": "E2E用户"}).json()["user_id"]
|
||||
# 2. 创建 KB
|
||||
kid = client.post(f"/api/users/{uid}/kbs", json={"name": "E2E库"}).json()["kb_id"]
|
||||
# 3. 上传文件
|
||||
resp = client.post(
|
||||
f"/api/kbs/{kid}/upload",
|
||||
files={"file": ("readme.md", io.BytesIO(b"# E2E test"), "text/markdown")},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
# 4. 创建会话
|
||||
sid = client.post("/api/sessions").json()["session_id"]
|
||||
# 5. 绑定 KB 到会话
|
||||
bind = client.put(f"/api/sessions/{sid}/kb", json={"kb_id": kid})
|
||||
assert bind.status_code == 200
|
||||
assert bind.json()["kb_id"] == kid
|
||||
# 6. 查询会话 KB
|
||||
info = client.get(f"/api/sessions/{sid}/kb")
|
||||
assert info.json()["kb_name"] == "E2E库"
|
||||
|
||||
Reference in New Issue
Block a user