Files
agent_jrxml/backend/session.py
panda bd5bfbac2d fix: band-level windowed refine_layout + programmatic map_fields to prevent 91.5% content loss
Root cause: LLM receiving full 34k-char JRXML would regenerate from scratch
instead of modifying coordinates in-place, shrinking output to ~3k chars.

Solution (programmatic node control, not prompt engineering):

- New agent/jrxml_windower.py: decompose JRXML into header (never sent to
  LLM) + individual bands. Split bands >4000 chars at element boundaries.
  Reassemble with element count validation (>10% change = rollback).

- Rewrite refine_layout: per-band windowed LLM processing (~2-4k chars
  each). LLM cannot "reimagine" the entire report.

- Rewrite map_fields: 100% programmatic regex $F{field_N} -> real name
  replacement. Zero LLM calls, zero content loss.

- _sanitize_field_name: non-ASCII chars escaped to _uXXXX_ format for
  valid JRXML identifiers.

- Tests: 48 new unit tests (windower 28 + map_fields 20). All passing.
  Full suite 385 tests, zero regressions.
2026-05-24 08:55:38 +08:00

161 lines
4.8 KiB
Python

"""多会话持久化管理模块。
每个会话对应一个独立的 JSON 文件存储在 ./sessions/ 目录下。
"""
import json
import os
import re
import uuid
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from dotenv import load_dotenv
from backend.logger import get_logger
load_dotenv()
_session_log = get_logger("session")
SESSIONS_DIR = Path(os.getenv("SESSIONS_DIR", "./sessions"))
def _ensure_dir():
SESSIONS_DIR.mkdir(parents=True, exist_ok=True)
_VALID_SESSION_ID_RE = re.compile(r'^[a-fA-F0-9]{12,}$')
def validate_session_id(session_id: str) -> bool:
"""校验 session_id 仅含合法 hex 字符(防路径穿越)。"""
return bool(_VALID_SESSION_ID_RE.match(session_id))
def _session_path(session_id: str) -> Path:
if not validate_session_id(session_id):
raise ValueError(f"Invalid session_id: {session_id!r}")
return SESSIONS_DIR / f"{session_id}.json"
def generate_session_id() -> str:
return uuid.uuid4().hex
def create_session(name: str = "", agent_state: Optional[dict] = None,
session_id: Optional[str] = None) -> dict:
"""创建新会话,返回会话元数据。session_id 可选——传入时使用指定 ID。"""
_ensure_dir()
sid = session_id or generate_session_id()
now = _now_iso()
agent_state = agent_state or {}
agent_state["session_id"] = sid
data = {
"session_id": sid,
"session_name": name or f"新建报表 {now[:10]}",
"created_at": now,
"updated_at": now,
"kb_id": agent_state.get("kb_id", "") if agent_state else "",
"agent_state": agent_state,
}
with open(_session_path(sid), "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
_session_log.info("创建会话", extra={"session_id": sid, "session_name": data["session_name"]})
return data
def load_session(session_id: str) -> Optional[dict]:
"""按 ID 加载会话数据。未找到则返回 None。"""
_ensure_dir()
try:
fp = _session_path(session_id)
except ValueError:
return None
if not fp.exists():
return None
with open(fp, "r", encoding="utf-8") as f:
return json.load(f)
def save_session(session_id: str, agent_state: dict, session_name: str = ""):
"""将会话状态原子保存至磁盘(temp file + rename,避免崩溃时截断)。"""
_ensure_dir()
fp = _session_path(session_id)
data = {}
if fp.exists():
with open(fp, "r", encoding="utf-8") as f:
data = json.load(f)
data["session_id"] = session_id
if session_name:
data["session_name"] = session_name
if not data.get("session_name"):
data["session_name"] = f"报表 {data.get('created_at', _now_iso())[:10]}"
data["updated_at"] = _now_iso()
if not data.get("created_at"):
data["created_at"] = data["updated_at"]
data["agent_state"] = agent_state
# 原子写入:先写临时文件,再 replace,避免崩溃时截断 JSON
tmp = tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False,
dir=SESSIONS_DIR, encoding="utf-8",
)
try:
json.dump(data, tmp, ensure_ascii=False, indent=2)
tmp.flush()
os.fsync(tmp.fileno())
tmp.close()
os.replace(tmp.name, str(fp))
except Exception:
tmp.close()
Path(tmp.name).unlink(missing_ok=True)
raise
def get_session_state(session_id: str) -> Optional[dict]:
"""获取会话的完整 agent_state,用于 REST API。
返回 dict 包含 session_id, session_name, created_at, updated_at, agent_state。
未找到则返回 None。
"""
return load_session(session_id)
def list_all_sessions() -> list[dict]:
"""列出所有历史会话(仅摘要,不含完整 agent_state)。"""
_ensure_dir()
sessions = []
for fp in sorted(SESSIONS_DIR.glob("*.json"), key=os.path.getmtime, reverse=True):
try:
with open(fp, "r", encoding="utf-8") as f:
data = json.load(f)
sessions.append({
"session_id": data.get("session_id", fp.stem),
"session_name": data.get("session_name", fp.stem),
"created_at": data.get("created_at", ""),
"updated_at": data.get("updated_at", ""),
})
except (json.JSONDecodeError, KeyError):
continue
return sessions
def delete_session(session_id: str) -> bool:
"""按 ID 删除会话文件。"""
_ensure_dir()
try:
fp = _session_path(session_id)
except ValueError:
return False
if fp.exists():
fp.unlink()
_session_log.info("删除会话", extra={"session_id": session_id})
return True
return False
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat()