chore: remove 13 stale files and clean up project structure

Removed: - app.py (deprecated Streamlit UI, replaced by api_server.py + frontend/) - start_agent_jrxml.py (old launcher, replaced by start.py) - test_reorder.py, e2e_test.py (ad-hoc/outdated test scripts) - ocr_raw_positions.json (debug output) - ARCHITECTURE.md, CODE_GUIDE.md, RAG_INTEGRATION.md, ROADMAP.md (superseded by CLAUDE.md) - EVALUATION_REPORT.md (auto-generated) - scripts/init_kb.py (replaced by init_default_kb.py) - validation_service/validate.bat (redundant, start.py covers it) - sessions/*.json (34 test session files, already gitignored) Updated: - CLAUDE.md: removed stale file entries from key mapping table - README.md: updated init script reference and removed validate.bat - .gitignore: removed EVALUATION_REPORT.md entry
2026-05-24 09:07:15 +08:00
parent bd5bfbac2d
commit e362f530ea
14 changed files with 587 additions and 3240 deletions
@@ -15,7 +15,6 @@ logs/
 db/
 # 自动评测 (Mavis AI)
 .mavis/
 EVALUATION_REPORT.md
 # 上传文件
 uploads/
@@ -1,341 +0,0 @@
 # JRXML 生成代理 — 架构文档
 ## 概览
 一个三层架构的桌面应用，通过自然语言多轮对话帮助非技术用户创建 JasperReports 模板（JRXML）。核心流程：用户输入 → 意图识别 → 模板检索 → LLM 生成/修改 → 自动验证修正 → 输出可编译的 JRXML。
 ```
 ┌──────────────────────────────────────────────────────────────┐
 │                Vue 3 + Vite 前端 (:5173)                      │
 │               frontend/ (聊天界面 + SSE 流式)                  │
 │  聊天界面 / 会话管理 / JRXML 预览 / 下载 / 快捷操作            │
 └─────────────────────┬────────────────────────────────────────┘
                      │ HTTP + SSE (/api/*)
                      ▼
 ┌──────────────────────────────────────────────────────────────┐
 │               FastAPI SSE 后端 (:8000)                        │
 │                    api_server.py                              │
 │  REST: /api/sessions, /api/upload, /api/.../download/latest  │
 │  SSE:  /api/sessions/{id}/chat (流式推送)                     │
 │  事件: node_start | node_complete | stream_token              │
 │        agent_complete | agent_error                           │
 └─────────────────────┬────────────────────────────────────────┘
                      │ run_agent(user_input)
                      ▼
 ┌──────────────────────────────────────────────────────────────┐
 │               LangGraph 状态机 (agent/)                       │
 │                                                              │
 │  load_session → process_input → manage_context               │
 │     → save_state_snapshot → classify_intent                  │
 │        │          │           │         │        │           │
 │        ▼          ▼           ▼         ▼        ▼           │
 │    retrieve   modify_jrxml  preview   consult  undo/reset    │
 │        │          │         /export                          │
 │        ▼          ▼                                         │
 │     generate     save_session                                │
 │        │          │                                          │
 │        └────┬─────┘                                          │
 │             ▼                                                │
 │  (jrxml_reorder 自动规范化元素顺序)                            │
 │             ▼                                                │
 │         validate ──(fail)──► explain_error ──► correct_jrxml │
 │            │                       ▲              │          │
 │          (pass)                    └──(retry<N)───┘          │
 │             ▼                                                │
 │         finalize (失败版本 → jrxml_versions, 提示下载)         │
 └──────────┬──────────────┬─────────────────────┬──────────────┘
           │              │                     │
           ▼              ▼                     ▼
 ┌──────────────┐  ┌──────────────┐  ┌──────────────────────────┐
 │  LLM 后端    │  │ 向量知识库   │  │  验证服务 (:8001)         │
 │ backend/llm  │  │ ChromaDB +   │  │  FastAPI                  │
 │              │  │ RAGSearcher  │  │  结构检查 + 严格 XSD 校验  │
 │ Anthropic SDK│  │              │  │                           │
 │ OpenAI SDK   │  │ Sentence-    │  │  /validate                │
 │ Ollama       │  │ Transformer  │  │  /health                  │
 └──────────────┘  └──────────────┘  └──────────────────────────┘
 ```
 ## 目录结构
 ```
 agent_jrxml/
 ├── api_server.py                   # FastAPI SSE 后端（REST + 流式推送）
 │
 ├── frontend/                       # Vue 3 + Vite 前端
 │   └── src/
 │       ├── api/client.ts           # SSE 客户端 + fetch 封装
 │       ├── stores/                 # Pinia 状态管理（chat + session）
 │       └── components/             # 聊天界面组件
 │
 ├── agent/                          # LangGraph 工作流层
 │   ├── __init__.py
 │   ├── state.py                    # AgentState TypedDict 定义（~28 字段）
 │   ├── nodes.py                    # 18 个工作流节点（生成/修改/验证/修正/意图识别...）
 │   └── graph.py                    # 状态图编译 + 路由逻辑 + 初始状态工厂
 │
 ├── backend/                        # 基础设施层
 │   ├── __init__.py
 │   ├── llm.py                      # LLM 工厂：Anthropic(MiniMax) / OpenAI / Ollama
 │   ├── embeddings.py               # 嵌入模型工厂：HuggingFace / OpenAI
 │   ├── validation.py               # 验证服务 HTTP 客户端
 │   ├── session.py                  # 会话持久化（JSON CRUD + flush/fsync）
 │   ├── jrxml_reorder.py            # JRXML 元素自动排序（匹配 XSD sequence）
 │   └── rag_adapter.py              # RAG 适配层：连接 ChromaDB 做语义搜索
 │
 ├── validation_service/             # 独立验证微服务
 │   ├── main.py                     # FastAPI 服务：结构检查 + 严格 XSD 校验
 │   └── schemas/
 │       └── jasperreport_7_0_6.xsd  # JasperReports 7.0.6 XSD（286KB）
 │
 ├── scripts/
 │   └── init_kb.py                  # 知识库初始化（预下载嵌入模型）
 │
 ├── tests/
 │   ├── __init__.py
 │   ├── test_validation.py          # 验证服务单元测试
 │   └── test_agent.py               # 代理集成测试
 │
 ├── data/                           # 数据目录
 │   ├── sample_templates/           # 示例 JRXML 模板
 │   └── corrections/                # 错误修正案例
 │
 ├── db/chroma/                      # ChromaDB 持久化存储
 ├── sessions/                       # 会话 JSON 文件存储
 ├── jrxml_versions/                 # 失败版本归档存储
 ├── rag/                            # RAG 子模块（独立管线）
 ├── requirements.txt                # Python 依赖
 ├── start_all.bat                   # 一键启动全部服务
 ├── start.bat                       # 启动脚本
 ├── stop.bat                        # 一键停止全部服务
 ├── .env.example                    # 环境变量模板
 └── README.md                       # 使用说明
 ```
 ## 数据流详解
 ### 1. 请求生命周期
 ```
 用户输入 "创建员工名册，包含 id、name、department"
  │
  ├─ load_session      从 sessions/{id}.json 恢复历史状态
  ├─ process_input     记录用户消息到 conversation_history
  ├─ manage_context    检查 token 数，超阈值则 LLM 压缩早期对话
  ├─ save_state_snapshot  保存当前状态快照（用于撤销）
  ├─ classify_intent   LLM 分类 → initial_generation
  ├─ retrieve          RAGSearcher.search_as_context() → 注入 prompt
  ├─ generate          LLM 生成初始 JRXML
  ├─ save_session      持久化到磁盘
  ├─ validate          调用 FastAPI 验证服务
  │   ├─ pass → finalize
  │   └─ fail → explain_error → correct_jrxml → validate (最多 5 次)
  └─ finalize          保存最终 JRXML，UI 展示结果
 ```
 ### 2. 意图路由（8 种意图）
 | 意图 | 条件 | 路由目标 |
 |------|------|---------|
 | `initial_generation` | 无现有报表 | retrieve → generate |
 | `modify_report` | 有现有报表 | modify_jrxml |
 | `preview_report` | — | 直接展示 current_jrxml |
 | `export_jrxml` | — | 触发下载 |
 | `export_pdf` | — | 触发下载 |
 | `consult_question` | — | handle_consult（独立回答） |
 | `undo_modification` | history_states 非空 | 恢复上一个快照 |
 | `reset_session` | — | 清空所有报表状态 |
 ### 3. 自动修正循环
 ```
 validate ──fail──► explain_error ──► correct_jrxml ──► validate
   ▲                                                       │
   └──────────── retry_count < MAX_RETRY (5) ──────────────┘
 ```
 每次修正都会递增 `retry_count`，达到上限后直接 `finalize`（即使仍有错误），在 UI 上展示错误信息。
 ## 核心组件
 ### AgentState（agent/state.py）
 ```python
 class AgentState(TypedDict, total=False):
    # 工作流核心
    conversation_history: List[dict]     # 当前上下文的对话（可能被压缩裁剪）
    current_jrxml: str                   # 当前 JRXML 文本
    user_input: str                      # 本轮用户输入
    status: str                          # "pass" | "fail"
    error_msg: str                       # 验证错误信息
    natural_explanation: str             # 错误的人类可读解释
    retry_count: int                     # 当前修正尝试次数
    user_modification_request: str       # 修改请求文本
    final_jrxml: str                     # 最终验证通过的 JRXML
    stage: str                           # 当前阶段标识
    retrieved_context: str               # RAG 检索到的模板上下文
    # 上下文压缩
    full_conversation_history: List[dict]  # 完整对话（含时间戳）
    compressed_history: str                # 早期对话的压缩摘要
    current_token_count: int               # 当前估算 token 数
    # 会话持久化
    session_id: str
    session_name: str
    created_at: str
    updated_at: str
    # 意图识别 + 撤销
    intent: str                          # 8 种意图之一
    history_states: List[dict]           # 状态快照栈（最多 10 个）
 ```
 ### 工作流节点（agent/nodes.py）
 | 节点 | 职责 | 调用外部 |
 |------|------|---------|
 | `load_session_node` | 从磁盘恢复会话状态 | `backend.session.load_session` |
 | `process_input` | 记录用户输入到对话历史 | — |
 | `manage_context` | token 超阈值时 LLM 压缩早期对话 | `get_llm()` |
 | `save_state_snapshot` | 保存快照到 history_states | — |
 | `classify_intent` | LLM 分类用户意图（8 类） | `get_llm()` |
 | `retrieve` | 从 ChromaDB 搜索相关模板 | `backend.rag_adapter.search_chunks` |
 | `generate` | 首次生成 JRXML | `get_llm()` |
 | `modify_jrxml` | 根据用户需求修改现有 JRXML | `get_llm()` |
 | `validate` | 调用验证服务检查 JRXML | `backend.validation.validate_jrxml` |
 | `explain_error` | LLM 将编译错误翻译为人话 | `get_llm()` |
 | `correct_jrxml` | LLM 自动修正验证失败 | `get_llm()` |
 | `finalize` | 保存最终 JRXML，标记完成 | — |
 | `handle_consult` | 回答 JasperReports 咨询 | `get_llm()` |
 | `handle_undo` | 从 history_states 恢复上一状态 | — |
 | `handle_reset` | 清空报表，重置会话 | — |
 | `save_session_node` | 持久化当前状态到磁盘 | `backend.session.save_session` |
 ### LLM 工厂（backend/llm.py）
 ```
 get_llm()
  ├─ LLM_BACKEND=local  → langchain_ollama.ChatOllama
  └─ LLM_BACKEND=cloud
       ├─ LLM_PROVIDER=anthropic  → raw anthropic.Anthropic SDK
       │    适配 MiniMax Anthropic 兼容 API
       │    包装为 MiniMaxLLM（提供 .invoke() 接口）
       └─ LLM_PROVIDER=openai     → langchain_openai.ChatOpenAI
 ```
 **MiniMaxLLM 适配器**：将 Anthropic SDK 的 `client.messages.create()` 包装成与 LangChain 兼容的 `.invoke(prompt) → Response.content` 接口，供所有节点统一调用。
 ### RAG 适配层（backend/rag_adapter.py）
 ```
 search_chunks(query, k=5)
  └─ RAGSearcher（单例）
       ├─ 懒加载 SentenceTransformer 模型
       ├─ 懒连接 ChromaDB PersistentClient
       ├─ query → 向量编码 → collection.query() → top-k 结果
       └─ search_as_context() → 拼接带元数据标签的上下文字符串
 ```
 ### 验证服务（validation_service/main.py）
 独立的 FastAPI 进程（端口 8001），提供两级验证：
 1. **结构检查**（始终执行）：
   - XML 语法正确性
   - `$F{field}` 引用一致性（表达式 vs `<field>` 声明）
   - `<queryString>` 是否含有效 SQL SELECT
   - `<jasperReport>` 必需属性（pageWidth, pageHeight, name）
 2. **XSD Schema 校验**（可选）：
   - 需要 `validation_service/schemas/jasperreport_7_0_6.xsd` 文件
   - 使用 `lxml.etree.XMLSchema` 进行完整 schema 校验
 ### 会话持久化（backend/session.py）
 ```
 sessions/{session_id}.json
  {
    "session_id": "abc123def456",
    "session_name": "员工名册报表",
    "created_at": "2026-05-19T09:00:00+00:00",
    "updated_at": "2026-05-19T09:30:00+00:00",
    "agent_state": { ... }   // 完整的 AgentState 字段
  }
 ```
 ## 关键 Prompt 设计
 | Prompt | 用途 | 输出约束 |
 |--------|------|---------|
 | `INTENT_CLASSIFY_PROMPT` | 8 分类意图识别 | 只输出意图名称 |
 | `INITIAL_GENERATION_PROMPT` | 首次生成 JRXML | 只输出 JRXML，无 markdown |
 | `MODIFICATION_PROMPT` | 修改现有 JRXML | 只输出完整 JRXML |
 | `CORRECTION_PROMPT` | 自动修正错误 | 只输出修复后 JRXML |
 | `EXPLAIN_PROMPT` | 错误转人话 | 2-3 句话 |
 | `COMPRESSION_PROMPT` | 对话压缩 | ≤200 字摘要 |
 | `CONSULT_PROMPT` | 咨询解答 | 简洁中文 |
 ## 配置参数（.env）
 | 参数 | 默认值 | 说明 |
 |------|--------|------|
 | `LLM_BACKEND` | cloud | cloud / local |
 | `LLM_PROVIDER` | openai | openai / anthropic |
 | `OPENAI_API_KEY` | — | API 密钥 |
 | `OPENAI_BASE_URL` | https://api.openai.com/v1 | API 端点 |
 | `LLM_MODEL` | gpt-4o | 模型名称 |
 | `LOCAL_LLM_MODEL` | qwen2.5-coder:7b | Ollama 模型 |
 | `EMBED_BACKEND` | local | local / cloud |
 | `LOCAL_EMBED_MODEL` | Qwen/Qwen3-Embedding-0.6B | 本地嵌入模型 |
 | `VALIDATION_SERVICE_URL` | http://localhost:8001/validate | 验证端点 |
 | `CHROMA_PERSIST_DIR` | ./db/chroma | ChromaDB 路径 |
 | `MAX_RETRY` | 5 | 自动修正最大尝试次数 |
 | `CONTEXT_MAX_TOKENS` | 6000 | 触发压缩的 token 阈值 |
 | `CONTEXT_KEEP_RECENT` | 4 | 保留最近 N 轮完整对话 |
 | `SESSIONS_DIR` | ./sessions | 会话 JSON 存储目录 |
 | `HISTORY_MAX_SNAPSHOTS` | 10 | 撤销快照保留数量 |
 ## 启动流程
 ```bash
 # 1. 安装依赖
 pip install -r requirements.txt
 # 2. 配置环境
 cp .env.example .env
 # 编辑 .env 填入 API 密钥
 # 3. 初始化知识库（预下载嵌入模型）
 python scripts/init_kb.py --download-model
 # 4. 启动验证服务（终端 1）
 python -m uvicorn validation_service.main:app --port 8001 --host 0.0.0.0
 # 5. 启动 Streamlit 界面（终端 2）
 STREAMLIT_SERVER_HEADLESS=true streamlit run app.py --server.port 8501
 # 6. 访问 http://localhost:8501
 ```
 ## 测试
 ```bash
 pytest tests/test_validation.py -v    # 验证服务单元测试
 pytest tests/test_agent.py -v         # 代理集成测试
 pytest tests/ -v                      # 全部测试
 ```
 ## 技术栈
 | 层 | 技术 |
 |----|------|
 | UI | Streamlit 1.57 |
 | 工作流引擎 | LangGraph 1.2 |
 | LLM 接入 | Anthropic SDK / LangChain-OpenAI / LangChain-Ollama |
 | 向量数据库 | ChromaDB 1.5 |
 | 嵌入模型 | Sentence-Transformers (HuggingFace) |
 | 验证服务 | FastAPI + lxml XMLSchema |
 | HTTP 客户端 | httpx |
 | Token 计算 | tiktoken |
 | 持久化 | JSON 文件 + ChromaDB PersistentClient |
@@ -98,9 +98,7 @@ validation_service/ (FastAPI, 端口 8001) — 不变
 | `agent/datasource.py` | 数据源模式解析：$P{{xxx}} 参数 vs JDBC 直连 | 低 |
 | `agent/jrxml_windower.py` | JRXML Band 级窗口化引擎：拆解/切分/重组/元素计数校验 | 中 |
 | `validation_service/main.py` | FastAPI 验证服务 | 低 |
 | `scripts/init_kb.py` | 旧 RAG 知识库初始化/模型下载 | 低 |
 | `scripts/init_default_kb.py` | 多租户默认 KB 初始化（默认用户 + 预置 KB） | 低 |
 | `app.py` | ~~旧 Streamlit UI~~（已由 api_server.py + frontend/ 替代） | 废弃 |
 ## 关键约定
@@ -1,91 +0,0 @@
 # RAG 知识库集成说明
 ## 概述
 使用 `rag_jrxml` 子项目的语义分块管线替换原有的简单向量知识库。`rag_jrxml` 独立运行产出 ChromaDB，主项目通过 `backend/rag_adapter.py` 查询。
 ## 架构
 ```
 rag/                              ← git submodule (rag_jrxml)
 ├── jrxml_source/                 ← 源数据目录 (242 .jrxml + 16 .md)
 ├── models/                       ← 嵌入模型本地存放
 │   └── paraphrase-multilingual-MiniLM-L12-v2/   (449MB, 384维)
 ├── jrxml_source_chunks/          ← 分块产物 (all_chunks.json, 15,510 chunks)
 ├── embeddings/                   ← 向量产物 (embeddings.npy, 23MB)
 db/chroma/                        ← ChromaDB 持久化 (主项目查询端读取)
 │   集合: jrxml_chunks (15,510 条记录, cosine 距离)
 backend/rag_adapter.py            ← RAGSearcher: 加载模型 + 连接 ChromaDB + 搜索
 agent/nodes.py                    ← retrieve() 调用 search_chunks()
 ```
 ## 管线流程
 ```
 源文件 (.jrxml + .md)
  → batch_chunker.py    语义分块 (按 XML 元素/标题层级切分)
  → embed_chunks.py     向量化 (Sentence-Transformers, CPU)
  → import_to_chroma.py 导入 ChromaDB
  → rag_adapter.py      主项目查询
 ```
 ## 当前数据
 | 指标 | 数值 |
 |---|---|
 | 源文件 | 258 (242 JRXML + 16 MD) |
 | Chunks 总数 | 15,510 |
 | 嵌入维度 | 384 |
 | 嵌入模型 | sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 |
 | 分块类型 | query, field, parameter, variable, band_*, chart, crosstab, element_*, section_* 等 |
 | 知识库大小 | embeddings.npy 23MB, ChromaDB ~50MB |
 ## 主项目配置
 `.env` 中相关变量：
 ```env
 # 嵌入模型 (本地路径优先)
 RAG_EMBED_MODEL=./rag/models/paraphrase-multilingual-MiniLM-L12-v2
 # ChromaDB 路径
 RAG_CHROMA_PATH=./db/chroma
 # 集合名称 (与 rag 子项目一致)
 RAG_COLLECTION_NAME=jrxml_chunks
 ```
 ## 全量构建
 ```bash
 cd rag
 python batch_chunker.py jrxml_source
 python embed_chunks.py jrxml_source_chunks/all_chunks.json
 python import_to_chroma.py --chroma_path ../db/chroma
 ```
 ## 增量更新
 ```bash
 # 1. 将新的 .jrxml / .md 放入 rag/jrxml_source/
 # 2. 增量运行
 cd rag
 python batch_chunker.py jrxml_source --incremental
 python embed_chunks.py --incremental
 python import_to_chroma.py --chroma_path ../db/chroma --incremental
 ```
 ## 更新 rag 子项目
 ```bash
 git submodule update --remote rag
 ```
 ## 搜索接口
 ```python
 from backend.rag_adapter import search_chunks
 # 返回拼接好的上下文字符串，可直接注入 LLM prompt
 context = search_chunks("如何创建饼图", k=5)
 ```
@@ -144,7 +144,6 @@ jrxml-agent/
    *.md                    10 个 Prompt 模板文件
  validation_service/
    main.py                 FastAPI 验证服务器
    validate.bat            Windows 启动器
  data/
    sample_templates/       知识库的 JRXML 模板
    corrections/            错误修正案例
@@ -152,7 +151,7 @@ jrxml-agent/
    app.log                 应用日志（节点流转、路由、用户交互）
    llm.log                 LLM 调用日志（完整 prompt / response）
  scripts/
-    init_kb.py              Chroma 知识库初始化脚本
+    init_default_kb.py      多租户默认知识库初始化脚本
  tests/
    test_validation.py      验证服务测试
    test_agent.py           代理集成测试
@@ -1,202 +0,0 @@
 # 改进路线图
 ## 阶段一：代码质量（低风险，快速交付）
 ### 1. Prompt 拆分 ✓
 - [x] 创建 `prompts/` 目录
 - [x] 7 个 prompt 各拆为独立 `.md` 文件
 - [x] `nodes.py` 改为从文件加载
 - [x] 支持热重载（文件变更无需重启）
 ### 2. 修复无效代码 ✓
 - [x] `backend/llm.py` — `get_num_tokens()` 修复为正确 API
 - [x] `backend/embeddings.py` — 修复 docstring 函数名不一致
 - [x] `backend/llm.py` — 统一 LLM 接口基类 `_BaseLLM`
 ---
 ## 阶段二：用户体验（核心改造）
 ### 3. 流式输出 + 节点平铺 ✓
 - [x] `backend/llm.py` — LLM 工厂支持 `stream()` 统一接口
 - [x] `agent/nodes.py` — generate/modify/correct 节点使用流式 + `get_stream_writer()`
 - [x] `app.py` — 使用 `stream_mode=["updates", "custom"]` 捕获流式事件
 - [x] 节点状态平铺（处理过程 expander 逐节点展示）
 - [x] 流式完成后节点自动折叠
 - [x] 完成后单独展示「总结卡片」
 ### 4. 错误自增长知识库 ✓
 - [x] `backend/error_kb.py` — ErrorKB 类（ChromaDB 持久化）
 - [x] 错误指纹去重（标准化 + MD5）
 - [x] `correct_jrxml` — 保存修正前状态到 `last_error_case`
 - [x] `validate` — 修正成功时自动记录（仅新错误，自动去重）
 - [x] `retrieve` — 搜索错误知识库，注入历史修正案例
 - [x] 记录内容：错误 + 修正前后 JRXML + prompt + 工具链 + 模型
 ### 5. 文件上传支持 ✓
 - [x] `backend/file_parser.py` — 统一解析接口
  - [x] 图片 → PIL 元信息 + PaddleOCR（可选安装后自动识别）
  - [x] PDF → pdfplumber / PyMuPDF 文本提取
  - [x] DOCX → python-docx 文本提取
  - [x] 纯文本 (.txt/.csv/.json/.xml) → 直接读取
 - [x] `can_use_vision()` — 根据模型名判断是否支持原生多模态
 - [x] `app.py` — 侧边栏文件上传组件（多文件，可移除）
 - [x] 上传文本自动注入下一条消息前缀
 ### 6. A4 图片模板识别 ✓
 - [x] `backend/layout_analyzer.py` — 完整布局分析模块
 - [x] A4 比例判定：exact(±3%) / close(±8%) / not_a4 三档
 - [x] PaddleOCR 布局分析：逐元素提取坐标(x,y,w,h)、字号、文本
 - [x] 行分组：Y 轴容差自动聚类
 - [x] 结构化输出：`图片模板共 X 行，第 1 行有 Y 个元素，其中元素 a 长...高...字体...内容是...`
 - [x] 检测门槛：≥2 个 OCR 元素 + A4 比例 → 标记为模板
 - [x] `app.py` — 上传图片/PDF 时自动触发布局分析，替换为布局描述
 ### 7. 会话历史 JRXML 下载 ✓
 - [x] `agent/state.py` — 新增 `jrxml_versions` 字段
 - [x] `agent/nodes.py` — `finalize` 节点追加版本记录
 - [x] `app.py` — 侧边栏"历史版本"折叠区，每版本独立下载按钮
 ### 8. 预览功能修复 ✓
 - [x] 根因：`preview_report` 路由到 `save_session` → `validate` 触发不必要的验证修正循环
 - [x] 修复：`route_after_save` — 预览/导出意图跳过验证直接 `finalize`
 ---
 ## 阶段三：细节修复
 ### 9. Ctrl+C 修复 ✓
 - [x] `app.py` — 注入 JS 拦截裸 `c` 键，保留 Ctrl+C 复制行为
 ---
 ## 阶段四：可观测性
 ### 10. 结构化日志系统 ✓
 - [x] `backend/logger.py` — 集中日志配置模块
  - [x] JSON 格式化（每行一条记录，便于 jq/pandas 分析）
  - [x] 请求级 trace_id（contextvars 自动传播，一次用户请求贯穿全链路）
  - [x] 独立 LLM 日志文件 `logs/llm.log`（记录完整 prompt 和 response）
  - [x] 时区：UTC+8（中国时区）
  - [x] 日志轮转（单文件 10MB，保留 5 备份）
 - [x] `backend/llm.py` — `_LLMLoggingWrapper` 包装所有 LLM 后端
  - [x] 记录每次 invoke/stream 的请求 prompt、响应内容、耗时、模型、调用来源
  - [x] 异常时也记录完整 prompt
 - [x] `agent/nodes.py` — `@log_node` 装饰器覆盖 18 个节点
  - [x] 入口/出口/异常三个阶段的日志
  - [x] 自动记录 state 关键字段摘要（session_id、intent、status、jrxml_length 等）
  - [x] 每个节点耗时（duration_ms）
 - [x] `agent/graph.py` — `@_log_route` 装饰器覆盖 9 个路由函数
  - [x] 记录每次路由决策（来源 → 目标）
 - [x] `app.py` — 用户交互日志
  - [x] 收到用户输入（含上传文件信息）
  - [x] 代理执行开始/完成（含最终 intent、status、jrxml_length）
  - [x] 异常时记录错误详情
  - [x] 会话新建/切换/删除操作日志
 - [x] `backend/session.py` — 会话创建/删除日志
 - [x] `backend/validation.py` — 验证完成/连接失败日志
 - [x] `.env.example` — 新增 `LOG_DIR`、`LOG_LEVEL` 配置项
 - [x] `.gitignore` — 新增 `logs/` 忽略规则
 ---
 ## 执行顺序建议
 ```
 1. Prompt 拆分 ──► 2. 无效代码修复
                        │
                        ▼
              3. 流式输出 + 节点平铺
                        │
          ┌─────────────┼─────────────┐
          ▼             ▼             ▼
    4. 错误自增长   5. 文件上传   7. 下载历史
          │             │
          ▼             ▼
    6. A4 模板识别  8. 预览修复
                        │
                        ▼
                 9. Ctrl+C 修复
                        │
                        ▼
               10. 结构化日志系统
 ```
 ---
 ## 阶段五：OCR 与智能上传 (v3/v4) ✓
 ### 11. OCR 单据字段精确提取 ✓
 - [x] `backend/ocr_extractor.py` — 4 策略优先级提取 (exact_match → kv_pair → regex → table_match)
 - [x] PaddleOCR 首次识别后将原始结果（含所有文本元素 + bbox坐标）持久化
 - [x] `_format_ocr_context()` — OCR 结果格式化为 LLM prompt 注入
 - [x] `process_input` 节点在上传图片时自动触发 OCR 字段提取
 - [x] OCR 结果持久化到会话文件
 ### 12. 多模态聊天输入 ✓
 - [x] `app.py` — `st.chat_input` 替换为 `st_multimodal_chatinput`
 - [x] 支持 Ctrl+V 粘贴文件 + 拖拽 + 文件按钮
 - [x] `_process_uploaded_file()` — 提取共享文件处理逻辑（消除 ~70 行重复代码）
 - [x] 剪贴板文件 base64 解码 + MIME type → 扩展名推断
 ### 13. 多格式文件支持 ✓
 - [x] `backend/file_parser.py` — 新增 XLSX (openpyxl)、XLS (xlrd)、DOC (olefile)
 - [x] 侧边栏上传器类型列表中新增 xlsx/xls/doc
 - [x] 单元测试: `tests/test_file_parser_formats.py` (4 tests)
 ### 14. 批注检测 ✓
 - [x] `backend/annotation_detector.py` — 圈选 + 箭头 + OCR 关联
 - [x] 圆圈检测: 红色通道增强 → HoughCircles
 - [x] 箭头检测: Canny → HoughLinesP → 线段聚类 → 端点方向判定
 - [x] `format_annotation_context()` — 批注结果格式化为中文提示
 - [x] `process_input` 节点在 OCR 提取后自动运行批注检测
 - [x] `annotation_result` 字段持久化到 AgentState + 会话文件
 - [x] 单元测试: `tests/test_annotation_detector.py` (7 tests)
 ### 15. OCR 上下文 LLM 注入 ✓
 - [x] `prompts/modification.md` — 新增 `{ocr_context}` 占位符
 - [x] `modify_jrxml` + `generate` 节点注入 OCR 上下文
 - [x] OCR 上下文包含: 结构化字段、全部文本元素（含坐标）、批注检测结果
 ---
 ## 阶段六：分层精确生成 (v5) ✓
 ### 16. 布局 Schema 提取 ✓
 - [x] `backend/layout_analyzer.py` — 新增 `extract_layout_schema()` 函数（+107 行）
 - [x] X 坐标聚类列检测（avg_width * 0.5 阈值）
 - [x] 区域分类：标题/表头/数据/表尾（启发式算法）
 - [x] `schema_text` 紧凑中文描述（列定义 + 区域 + 宽度分类）
 - [x] 空行/单行/双行边界情况处理
 - [x] 单元测试: `tests/test_layered_generation.py::TestExtractLayoutSchema` (9 tests)
 ### 17. 3 阶段生成管线 ✓
 - [x] Phase 1: `generate_skeleton` — 压缩布局 schema → 骨架 JRXML (`$F{field_N}` 占位)
 - [x] Phase 2: `refine_layout` — 采样坐标（表头+首行数据+末行）→ 像素级位置精调
 - [x] Phase 3: `map_fields` — OCR 字段名 → 替换占位符为真实字段名
 - [x] 中间阶段跳过验证（仅最终 mapped 结果进入 validate 循环）
 - [x] 流式输出支持（每阶段逐字生成）
 - [x] 单元测试: `tests/test_layered_generation.py::TestIntegration` (4 tests)
 ### 18. 路由与状态 ✓
 - [x] `agent/graph.py` — 新增 `route_after_retrieve()` 条件路由
 - [x] `layout_schema.total_rows > 0` → 3 阶段，否则 → 原有 1-shot
 - [x] `agent/state.py` — 新增 `layout_schema: dict` 和 `ocr_elements: list`
 - [x] 会话持久化支持（`save_session_node` / `load_session_node`）
 - [x] 文本请求和其他意图零行为变更
 - [x] 单元测试: `tests/test_layered_generation.py::TestRouting` (4 tests)
 ### 19. Prompt 模板 ✓
 - [x] `prompts/skeleton_generation.md` — 骨架生成 prompt
 - [x] `prompts/refine_layout.md` — 布局精调 prompt
 - [x] `prompts/field_mapping.md` — 字段映射 prompt
 - [x] `prompts/loader.py` — 注册 3 个新模板（热重载）
 ### 20. UI 集成 ✓
 - [x] `app.py` — 上传 A4 图片时自动调用 `extract_layout_schema()`
 - [x] 新增节点标签：`🏗 生成骨架` / `📐 精调布局` / `🏷 映射字段`
 - [x] 3 个新节点的详情渲染
 ---
 阶段一立即可做，无外部依赖。阶段二是主要工作量。阶段三是收尾。阶段四是可观测性基础。阶段五是 OCR 智能增强和用户体验改进。阶段六解决 A4 报表图片 OCR 元素过多（数百个）导致 LLM prompt 超长的问题。
@@ -1,926 +0,0 @@
 """Streamlit 多轮对话 UI，用于 JRXML 生成代理。
 支持:
 - 流式输出（LLM 逐字展示）
 - 节点平铺展开（每个处理阶段独立展示）
 - 完成后自动折叠节点区
 - 过程总结卡片
 """
 import os
 import sys
 os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
 try:
    import torchvision
 except Exception:
    pass
 import base64
 import tempfile
 import time
 from pathlib import Path
 import streamlit as st
 import streamlit.components.v1 as components
 from dotenv import load_dotenv
 load_dotenv(override=True)
 from agent.graph import build_graph, create_initial_state
 from backend.session import (
    create_session,
    load_session,
    delete_session,
    list_all_sessions,
 )
 from backend.logger import get_logger, set_trace_id, generate_trace_id
 _app_log = get_logger("app")
 st.set_page_config(
    page_title="JRXML 代理",
    page_icon="📊",
    layout="wide",
    initial_sidebar_state="expanded",
 )
 # 阻止 Streamlit 裸 'c' 键清除缓存，保留 Ctrl+C 复制行为
 st.html("""
 <script>
 (function() {
    const parent = window.parent.document;
    parent.addEventListener('keydown', function(e) {
        // 仅拦截裸 'c' 键（非 Ctrl/Cmd 组合）
        if (e.key === 'c' && !e.ctrlKey && !e.metaKey && !e.altKey) {
            const tag = parent.activeElement ? parent.activeElement.tagName : '';
            if (tag !== 'INPUT' && tag !== 'TEXTAREA' && !parent.activeElement.isContentEditable) {
                e.stopImmediatePropagation();
                e.preventDefault();
            }
        }
    }, true);
 })();
 </script>
 """)
 # ---- 节点名称 → 中文标签 ----
 NODE_LABELS = {
    "load_session":       "📂 加载会话",
    "process_input":      "📝 记录输入",
    "manage_context":     "🧠 管理上下文",
    "save_state_snapshot": "💾 保存快照",
    "classify_intent":    "🔍 识别意图",
    "retrieve":           "📚 检索模板",
    "generate":           "⚙️ 生成 JRXML",
    "modify_jrxml":       "🔧 修改 JRXML",
    "validate":           "✅ 验证",
    "explain_error":      "🔎 分析错误",
    "correct_jrxml":      "🛠 自动修正",
    "finalize":           "📋 完成",
    "handle_consult":     "💬 咨询回答",
    "handle_undo":        "↩ 撤销操作",
    "handle_reset":       "🔄 重置会话",
    "save_session":       "💾 保存会话",
    "generate_skeleton": "🏗 生成骨架",
    "refine_layout":     "📐 精调布局",
    "map_fields":        "🏷 映射字段",
 }
 INTENT_LABELS = {
    "initial_generation": "新建报表",
    "modify_report":      "修改报表",
    "preview_report":     "预览报表",
    "export_pdf":         "导出 PDF",
    "export_jrxml":       "下载 JRXML",
    "undo_modification":  "撤销修改",
    "consult_question":   "咨询问题",
    "reset_session":      "重置会话",
 }
 SKIP_NODES = {"load_session", "process_input", "manage_context",
              "save_state_snapshot", "save_session"}
 def _render_jrxml(jrxml: str, max_lines: int = 30):
    """展示 JRXML 代码（折叠、限行）。"""
    lines = jrxml.strip().split("\n")
    preview = "\n".join(lines[:max_lines])
    if len(lines) > max_lines:
        preview += f"\n... (共 {len(lines)} 行)"
    st.code(preview, language="xml")
 # ---- URL 参数 ----
 query_params = st.query_params
 url_session_id = query_params.get("session_id", "")
 # ---- 会话状态初始化 ----
 if "messages" not in st.session_state:
    st.session_state.messages = []
 if "graph" not in st.session_state:
    st.session_state.graph = build_graph()
 if "pending_action" not in st.session_state:
    st.session_state.pending_action = None
 if "agent_state" not in st.session_state:
    if url_session_id:
        data = load_session(url_session_id)
        if data and data.get("agent_state"):
            st.session_state.agent_state = data["agent_state"]
            st.session_state.agent_state["session_id"] = url_session_id
        else:
            st.session_state.agent_state = create_initial_state()
            new_data = create_session(name="", agent_state=st.session_state.agent_state)
            st.session_state.agent_state["session_id"] = new_data["session_id"]
            st.session_state.agent_state["session_name"] = new_data["session_name"]
            st.session_state.agent_state["created_at"] = new_data["created_at"]
    else:
        st.session_state.agent_state = create_initial_state()
        new_data = create_session(name="", agent_state=st.session_state.agent_state)
        st.session_state.agent_state["session_id"] = new_data["session_id"]
        st.session_state.agent_state["session_name"] = new_data["session_name"]
        st.session_state.agent_state["created_at"] = new_data["created_at"]
 current_session_id = st.session_state.agent_state.get("session_id", "")
 def run_agent(user_input: str):
    """运行代理图：流式渲染节点进度 + LLM 文本。"""
    trace_id = generate_trace_id()
    set_trace_id(trace_id)
    agent_state = st.session_state.agent_state
    session_id = agent_state.get("session_id", "")
    _app_log.info(
        "代理执行开始",
        extra={
            "session_id": session_id,
            "trace_id": trace_id,
            "user_input_preview": user_input[:200],
            "user_input_length": len(user_input),
            "has_jrxml": bool(agent_state.get("current_jrxml", "").strip()),
            "intent": agent_state.get("intent", ""),
        },
    )
    if agent_state.get("current_jrxml") and agent_state.get("status") == "pass":
        agent_state["user_modification_request"] = user_input
    agent_state["user_input"] = user_input
    agent_state["retry_count"] = 0
    # ---- UI 占位 ----
    progress_placeholder = st.empty()     # 实时节点进度
    streaming_placeholder = st.empty()    # 流式文本
    summary_placeholder = st.empty()      # 总结卡片
    # 初始状态提示
    progress_placeholder.info("⏳ 正在分析您的需求...")
    executed_nodes: list[dict] = []
    stream_text = ""
    stream_active = False
    final_state = None
    def _render_progress(nodes: list[dict]):
        """渲染实时节点进度到占位符。"""
        if not nodes:
            return
        lines = []
        for i, node in enumerate(nodes):
            icon = "●" if i == len(nodes) - 1 else "✓"
            detail = f" — {node['detail']}" if node.get("detail") else ""
            lines.append(f"{icon} {node['label']}{detail}")
        progress_placeholder.markdown("\n\n".join(lines))
    try:
        for event in st.session_state.graph.stream(
            agent_state, stream_mode=["updates", "custom"]
        ):
            mode, data = event
            if mode == "updates":
                for node_name, node_state in data.items():
                    label = NODE_LABELS.get(node_name, node_name)
                    if node_name not in SKIP_NODES:
                        executed_nodes.append({
                            "name": node_name,
                            "label": label,
                        })
                    if node_name == "classify_intent":
                        intent = node_state.get("intent", "")
                        il = INTENT_LABELS.get(intent, intent)
                        executed_nodes[-1]["detail"] = f"意图: {il}"
                    elif node_name == "retrieve":
                        ctx = node_state.get("retrieved_context", "")
                        executed_nodes[-1]["detail"] = (
                            f"找到 {len(ctx)} 字符参考模板" if ctx else "未匹配到模板"
                        )
                    elif node_name in ("generate", "modify_jrxml", "correct_jrxml",
                                       "generate_skeleton", "refine_layout", "map_fields"):
                        jrxml = node_state.get("current_jrxml", "")
                        executed_nodes[-1]["detail"] = f"生成 {len(jrxml)} 字符 JRXML"
                    elif node_name == "validate":
                        status = node_state.get("status", "")
                        if status == "pass":
                            executed_nodes[-1]["detail"] = "验证通过 ✓"
                        else:
                            err = node_state.get("error_msg", "")
                            executed_nodes[-1]["detail"] = f"验证失败: {err[:80]}"
                    elif node_name == "explain_error":
                        expl = node_state.get("natural_explanation", "")
                        executed_nodes[-1]["detail"] = expl[:120]
                    elif node_name == "handle_consult":
                        ans = node_state.get("consult_answer", "")
                        executed_nodes[-1]["detail"] = ans[:150]
                    final_state = node_state
                # 每个节点完成后立即更新进度
                _render_progress(executed_nodes)
            elif mode == "custom":
                cd = data
                if cd.get("type") == "stream":
                    stream_text += cd.get("text", "")
                    stream_active = True
                    streaming_placeholder.code(stream_text, language="xml")
    except Exception as e:
        progress_placeholder.empty()
        _app_log.error(
            f"代理执行异常: {e}",
            extra={"session_id": session_id, "error": str(e)},
        )
        st.error(f"工作流异常: {e}")
        return
    # ---- 清理临时占位 ----
    progress_placeholder.empty()
    if stream_active:
        streaming_placeholder.empty()
    # ---- 总结卡片 ----
    # 注：node_state 只含变更字段，用 agent_state（被所有节点就地修改）获取完整状态
    final_state = agent_state
    if final_state:
        st.session_state.agent_state = final_state
        intent = final_state.get("intent", "")
        status = final_state.get("status", "")
        with summary_placeholder.container(border=True):
            if intent == "consult_question":
                answer = final_state.get("consult_answer", "")
                st.info(answer)
                st.session_state.messages.append({
                    "role": "assistant", "content": answer, "type": "consult",
                })
            elif intent in ("undo_modification", "reset_session"):
                st.success("操作已完成")
            elif intent in ("preview_report", "export_pdf", "export_jrxml"):
                jrxml = final_state.get("current_jrxml", "")
                if jrxml:
                    st.success("✅ 当前报表")
                    _render_jrxml(jrxml)
                    st.session_state.messages.append({
                        "role": "assistant", "content": jrxml, "type": "jrxml",
                    })
                else:
                    st.warning("⚠ 当前没有报表可以展示。")
            elif status == "pass":
                jrxml = final_state.get("current_jrxml", "")
                st.success("✅ JRXML 生成成功")
                st.markdown("**生成结果:**")
                _render_jrxml(jrxml)
                st.caption("您可以从侧边栏下载文件，或继续对话进行修改。")
                st.session_state.messages.append({
                    "role": "assistant", "content": jrxml, "type": "jrxml",
                })
                st.session_state.messages.append({
                    "role": "assistant",
                    "content": "✅ JRXML 生成成功！您可以从侧边栏下载文件，或继续修改。",
                    "type": "success",
                })
            else:
                jrxml = final_state.get("current_jrxml", "")
                error_msg = final_state.get("error_msg", "未知错误")
                explanation = final_state.get("natural_explanation", "")
                retries = final_state.get("retry_count", 0)
                st.error(f"❌ 经过 {retries} 次重试后仍无法生成有效的 JRXML")
                st.markdown(f"**错误:** {error_msg}")
                if explanation:
                    st.markdown(f"**原因:** {explanation}")
                if jrxml:
                    with st.expander("查看当前 JRXML"):
                        _render_jrxml(jrxml, max_lines=80)
                st.caption("💡 下次输入修改需求时，系统会自动加载失败上下文继续修复。")
                st.session_state.messages.append({
                    "role": "assistant",
                    "content": f"❌ 经过 {retries} 次重试后仍无法生成有效的 JRXML。\n\n**错误:** {error_msg}\n\n💡 请直接描述修改需求，系统会自动加载失败上下文。",
                    "type": "error_explanation",
                })
            # OCR 字段提取结果展示
            ocr_result = agent_state.get("ocr_extraction_result", {})
            if ocr_result and ocr_result.get("ocr_available") and ocr_result.get("fields"):
                with st.expander("🔍 OCR 单据字段提取结果", expanded=False):
                    fields = ocr_result.get("fields", [])
                    non_empty = [f for f in fields if f.get("field_value")]
                    empty = [f for f in fields if not f.get("field_value")]
                    if non_empty:
                        st.markdown("**已提取字段:**")
                        for f in non_empty:
                            method = f.get("extraction_method", "")
                            conf = f.get("confidence", 0)
                            st.markdown(
                                f"- **{f['field_name']}**: `{f['field_value']}` "
                                f"（置信度: {conf:.0%}, 方法: {method}）"
                            )
                    if empty:
                        st.caption(
                            f"未提取到值的字段: {', '.join(f['field_name'] for f in empty)}"
                        )
                    st.caption(
                        f"共检测到 {ocr_result.get('total_elements', 0)} 个文本元素"
                    )
    else:
        st.error("未产生结果，请重试。")
    _app_log.info(
        "代理执行完成",
        extra={
            "session_id": session_id,
            "intent": final_state.get("intent", ""),
            "status": final_state.get("status", ""),
            "jrxml_length": len(final_state.get("current_jrxml", "")),
            "retry_count": final_state.get("retry_count", 0),
        },
    )
 # ---- 侧边栏 ----
 with st.sidebar:
    st.title("📊 JRXML 代理")
    st.markdown("通过自然语言生成 JasperReports 模板。")
    st.divider()
    # 会话管理
    st.markdown("### 会话管理")
    sessions = list_all_sessions()
    session_options = {}
    for s in sessions:
        sid = s["session_id"]
        name = s.get("session_name", sid)
        updated = s.get("updated_at", "")[:16]
        session_options[f"{name} ({updated})"] = sid
    selected_label = None
    for label, sid in session_options.items():
        if sid == current_session_id:
            selected_label = label
            break
    selected = st.selectbox(
        "切换会话",
        options=list(session_options.keys()),
        index=list(session_options.keys()).index(selected_label) if selected_label else 0,
        key="session_selector",
    )
    if selected and session_options.get(selected) != current_session_id:
        new_sid = session_options[selected]
        if st.session_state.get("_last_switched_to") == new_sid:
            # 防止同一会话重复切换导致的无限 rerun 循环
            st.session_state._last_switched_to = ""
        else:
            data = load_session(new_sid)
            if data and data.get("agent_state"):
                _app_log.info(
                    "切换会话",
                    extra={"from_session": current_session_id, "to_session": new_sid},
                )
                data["agent_state"]["session_id"] = new_sid
                st.session_state.agent_state = data["agent_state"]
                st.session_state.messages = []
                st.session_state._last_switched_to = new_sid
                st.rerun()
    col1, col2 = st.columns(2)
    with col1:
        if st.button("➕ 新建", use_container_width=True):
            new_data = create_session(name="", agent_state=create_initial_state())
            _app_log.info(
                "新建会话",
                extra={"session_id": new_data["session_id"]},
            )
            st.session_state.agent_state = create_initial_state()
            st.session_state.agent_state["session_id"] = new_data["session_id"]
            st.session_state.agent_state["session_name"] = new_data["session_name"]
            st.session_state.agent_state["created_at"] = new_data["created_at"]
            st.session_state.messages = []
            st.rerun()
    with col2:
        if st.button("🗑 删除", use_container_width=True):
            if current_session_id:
                _app_log.info(
                    "删除会话",
                    extra={"session_id": current_session_id},
                )
                delete_session(current_session_id)
            st.session_state.agent_state = create_initial_state()
            new_data = create_session(name="", agent_state=st.session_state.agent_state)
            st.session_state.agent_state["session_id"] = new_data["session_id"]
            st.session_state.agent_state["session_name"] = new_data["session_name"]
            st.session_state.agent_state["created_at"] = new_data["created_at"]
            st.session_state.messages = []
            st.rerun()
    current_name = st.session_state.agent_state.get("session_name", "")
    st.caption(f"当前: {current_name} (`{current_session_id}`)")
    st.divider()
    st.markdown("### 快捷操作")
    has_jrxml = bool(st.session_state.agent_state.get("current_jrxml", "").strip())
    has_history = bool(st.session_state.agent_state.get("history_states", []))
    qcol1, qcol2 = st.columns(2)
    with qcol1:
        if st.button("👁 预览", use_container_width=True, disabled=not has_jrxml):
            with st.spinner("正在准备预览..."):
                run_agent("预览报表")
            st.rerun()
    with qcol2:
        if st.button("↩ 撤销", use_container_width=True, disabled=not has_history):
            with st.spinner("正在撤销..."):
                run_agent("撤销上一步修改")
            st.rerun()
    if st.button("🔄 重置会话", use_container_width=True):
        with st.spinner("正在重置..."):
            run_agent("重新来，清空当前报表")
        st.rerun()
    st.divider()
    st.markdown("### 配置")
    llm_backend = os.getenv("LLM_BACKEND", "cloud")
    llm_model = os.getenv("LLM_MODEL", os.getenv("LOCAL_LLM_MODEL", "gpt-4o"))
    st.caption(f"大语言模型: {llm_backend} / {llm_model}")
    st.caption(f"最大重试次数: {os.getenv('MAX_RETRY', '5')}")
    st.caption(f"验证服务: {os.getenv('VALIDATION_SERVICE_URL', 'http://localhost:8001/validate')}")
    st.divider()
    st.markdown("### 下载")
    final = st.session_state.agent_state.get("final_jrxml", "")
    versions = st.session_state.agent_state.get("jrxml_versions", [])
    if final:
        st.download_button(
            label="📥 下载最新 JRXML",
            data=final,
            file_name="report.jrxml",
            mime="application/xml",
            use_container_width=True,
        )
    if versions:
        with st.expander("📋 历史版本", expanded=False):
            for i, v in enumerate(reversed(versions)):
                ts = v.get("ts", "")[:16]
                label = v.get("label", "版本")
                status = v.get("status", "")
                icon = "✅" if status == "pass" else "❌"
                dl_label = f"{icon} v{len(versions)-i} — {label} ({ts})"
                st.download_button(
                    label=dl_label,
                    data=v.get("jrxml", ""),
                    file_name=f"report_v{len(versions)-i}.jrxml",
                    mime="application/xml",
                    use_container_width=True,
                    key=f"dl_v{i}",
                )
 # ---- 标题 ----
 st.title("📝 JRXML 报表生成器")
 st.caption("用自然语言描述您的报表需求，我将逐步生成可用的 JRXML 模板。")
 # ---- 聊天历史 ----
 for msg in st.session_state.messages:
    with st.chat_message(msg["role"]):
        if msg.get("type") == "jrxml":
            with st.expander("查看生成的 JRXML", expanded=False):
                st.code(msg["content"], language="xml")
        elif msg.get("type") == "error_explanation":
            st.warning(msg["content"])
        elif msg.get("type") == "success":
            st.success(msg["content"])
        elif msg.get("type") == "consult":
            st.info(msg["content"])
        else:
            st.markdown(msg["content"])
 # ---- 统一聊天输入组件 ----
 UNIFIED_CHAT_HTML = r"""
 <!DOCTYPE html>
 <html lang="zh-CN">
 <head>
 <meta charset="utf-8">
 <style>
  * { box-sizing: border-box; margin: 0; padding: 0; }
  body {
    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
    background: transparent;
    padding: 4px 0;
  }
  .chat-container {
    position: relative;
    border: 1px solid #d1d5db;
    border-radius: 12px;
    padding: 8px 12px;
    background: #ffffff;
    transition: border-color 0.2s, box-shadow 0.2s;
  }
  .chat-container:focus-within {
    border-color: #3b82f6;
    box-shadow: 0 0 0 2px rgba(59,130,246,0.15);
  }
  .chat-container.drag-active {
    border-color: #3b82f6;
    background: rgba(59,130,246,0.04);
  }
  .file-chips {
    display: flex;
    flex-wrap: wrap;
    gap: 6px;
    margin-bottom: 6px;
  }
  .file-chips:empty { display: none; }
  .file-chip {
    display: inline-flex;
    align-items: center;
    gap: 4px;
    padding: 2px 8px;
    background: #f3f4f6;
    border-radius: 14px;
    font-size: 12px;
    color: #374151;
    max-width: 200px;
  }
  .file-chip .chip-icon { font-size: 13px; }
  .file-chip .chip-name {
    overflow: hidden;
    text-overflow: ellipsis;
    white-space: nowrap;
  }
  .file-chip .chip-remove {
    border: none;
    background: none;
    cursor: pointer;
    color: #9ca3af;
    font-size: 14px;
    line-height: 1;
    padding: 0 2px;
    flex-shrink: 0;
  }
  .file-chip .chip-remove:hover { color: #ef4444; }
  .input-row {
    display: flex;
    align-items: flex-end;
    gap: 8px;
  }
  .attach-btn {
    border: none;
    background: none;
    cursor: pointer;
    padding: 4px 6px;
    font-size: 20px;
    line-height: 1;
    color: #6b7280;
    border-radius: 6px;
    transition: background 0.15s, color 0.15s;
    flex-shrink: 0;
  }
  .attach-btn:hover { background: #f3f4f6; color: #374151; }
  textarea {
    flex: 1;
    border: none;
    outline: none;
    resize: none;
    font-size: 15px;
    line-height: 1.5;
    font-family: inherit;
    color: #111827;
    background: transparent;
    padding: 4px 0;
    min-height: 24px;
    max-height: 120px;
    overflow-y: auto;
  }
  textarea::placeholder { color: #9ca3af; }
  .send-btn {
    border: none;
    cursor: pointer;
    padding: 4px 10px;
    font-size: 16px;
    background: #e5e7eb;
    color: #9ca3af;
    border-radius: 8px;
    transition: all 0.15s;
    flex-shrink: 0;
  }
  .send-btn.active { background: #3b82f6; color: #fff; }
  .send-btn.active:hover { background: #2563eb; }
  .send-btn:disabled { opacity: 0.5; cursor: default; }
  .error-toast {
    position: fixed;
    bottom: 12px;
    left: 50%;
    transform: translateX(-50%);
    background: #ef4444;
    color: #fff;
    padding: 6px 16px;
    border-radius: 8px;
    font-size: 13px;
    z-index: 9999;
    animation: toastOut 2.5s forwards;
    pointer-events: none;
  }
  @keyframes toastOut {
    0%, 70% { opacity: 1; }
    100% { opacity: 0; }
  }
  @media (prefers-color-scheme: dark) {
    .chat-container { background: #1f2937; border-color: #374151; }
    .chat-container:focus-within { border-color: #3b82f6; }
    .file-chip { background: #374151; color: #e5e7eb; }
    .file-chip .chip-remove { color: #6b7280; }
    .attach-btn { color: #9ca3af; }
    .attach-btn:hover { background: #374151; color: #e5e7eb; }
    textarea { color: #f9fafb; }
    textarea::placeholder { color: #6b7280; }
    .send-btn { background: #374151; }
  }
 </style>
 </head>
 <body>
 <div class="chat-container" id="container">
  <div class="file-chips" id="chips"></div>
  <div class="input-row">
    <button class="attach-btn" id="attachBtn" title="附加文件">&#x1F4CE;</button>
    <textarea id="textInput" placeholder="描述您的报表需求..." rows="1"></textarea>
    <button class="send-btn" id="sendBtn" title="发送">&#x27A4;</button>
  </div>
  <input type="file" id="fileInput" multiple hidden
    accept=".png,.jpg,.jpeg,.bmp,.webp,.pdf,.docx,.xlsx,.xls,.doc,.txt">
 </div>
 <script>
  const container = document.getElementById('container');
  const chipsEl = document.getElementById('chips');
  const textInput = document.getElementById('textInput');
  const sendBtn = document.getElementById('sendBtn');
  const attachBtn = document.getElementById('attachBtn');
  const fileInput = document.getElementById('fileInput');
  let attachedFiles = [];
  const MAX_FILES = 10;
  const MAX_SIZE = 20 * 1024 * 1024;
  function getIcon(type) {
    if (type.startsWith('image/')) return '🖼';
    if (type.includes('pdf')) return '📄';
    if (type.includes('document')) return '📝';
    if (type.includes('spreadsheet') || type.includes('excel')) return '📊';
    return '📎';
  }
  function updateSendBtn() {
    var canSend = textInput.value.trim() || attachedFiles.length > 0;
    sendBtn.classList.toggle('active', canSend);
  }
  function renderChips() {
    chipsEl.innerHTML = '';
    attachedFiles.forEach(function(f, i) {
      var chip = document.createElement('span');
      chip.className = 'file-chip';
      var name = f.name.length > 16 ? f.name.slice(0,14)+'..' : f.name;
      chip.innerHTML = '<span class="chip-icon">'+getIcon(f.type)+'</span>' +
        '<span class="chip-name">'+name+'</span>' +
        '<button class="chip-remove">&times;</button>';
      chip.querySelector('.chip-remove').onclick = (function(idx) {
        return function() {
          attachedFiles.splice(idx, 1);
          renderChips();
          updateSendBtn();
        };
      })(i);
      chipsEl.appendChild(chip);
    });
    updateSendBtn();
  }
  function addFiles(fileList) {
    for (var i = 0; i < fileList.length; i++) {
      var file = fileList[i];
      if (attachedFiles.length >= MAX_FILES) { showToast('最多附加 '+MAX_FILES+' 个文件'); break; }
      if (file.size > MAX_SIZE) { showToast(file.name+' 超过 20MB 限制'); continue; }
      if (attachedFiles.some(function(f) { return f.name === file.name && f.size === file.size; })) continue;
      attachedFiles.push({name: file.name, type: file.type, file: file});
    }
    renderChips();
  }
  function showToast(msg) {
    var t = document.createElement('div');
    t.className = 'error-toast';
    t.textContent = msg;
    document.body.appendChild(t);
    setTimeout(function() { t.remove(); }, 2600);
  }
  function readFile(file) {
    return new Promise(function(resolve, reject) {
      var reader = new FileReader();
      reader.onload = function() { resolve(reader.result); };
      reader.onerror = reject;
      reader.readAsDataURL(file);
    });
  }
  async function handleSend() {
    var text = textInput.value.trim();
    if (!text && attachedFiles.length === 0) return;
    sendBtn.disabled = true;
    var files = [];
    for (var i = 0; i < attachedFiles.length; i++) {
      var f = attachedFiles[i];
      try {
        var dataUrl = await readFile(f.file);
        files.push({name: f.name, type: f.type, data: dataUrl, size: f.file.size});
      } catch(e) {
        showToast(f.name+' 读取失败');
      }
    }
    Streamlit.setComponentValue({text: text, files: files});
    textInput.value = '';
    attachedFiles = [];
    renderChips();
    sendBtn.disabled = false;
    textInput.style.height = 'auto';
  }
  attachBtn.onclick = function() { fileInput.click(); };
  fileInput.onchange = function() { addFiles(fileInput.files); fileInput.value = ''; };
  textInput.oninput = function() {
    updateSendBtn();
    textInput.style.height = 'auto';
    textInput.style.height = Math.min(textInput.scrollHeight, 120) + 'px';
  };
  textInput.onkeydown = function(e) {
    if (e.key === 'Enter' && !e.shiftKey) {
      e.preventDefault();
      handleSend();
    }
  };
  sendBtn.onclick = handleSend;
  document.addEventListener('paste', function(e) {
    var items = e.clipboardData && e.clipboardData.items;
    if (!items) return;
    var files = [];
    for (var i = 0; i < items.length; i++) {
      if (items[i].kind === 'file') files.push(items[i].getAsFile());
    }
    if (files.length) { e.preventDefault(); addFiles(files); }
  });
  var containerDiv = document.getElementById('container');
  containerDiv.addEventListener('dragover', function(e) {
    e.preventDefault();
    containerDiv.classList.add('drag-active');
  });
  containerDiv.addEventListener('dragleave', function() {
    containerDiv.classList.remove('drag-active');
  });
  containerDiv.addEventListener('drop', function(e) {
    e.preventDefault();
    containerDiv.classList.remove('drag-active');
    addFiles(e.dataTransfer.files);
  });
  updateSendBtn();
 </script>
 </body>
 </html>
 """
 chat_result = components.html(UNIFIED_CHAT_HTML, height=180)
 if chat_result and isinstance(chat_result, dict):
    prompt = chat_result.get("text", "")
    files = chat_result.get("files", [])
    from backend.file_parser import parse_file
    from backend.layout_analyzer import analyze_layout, extract_layout_schema
    file_texts = []
    attached_info = []
    first_image_path = None
    temp_paths = []
    for f in files:
        header, b64data = f.get("data", ",").split(",", 1)
        raw = base64.b64decode(b64data)
        mime = f.get("type", "")
        mime_to_suffix = {
            "image/png": ".png", "image/jpeg": ".jpg", "image/bmp": ".bmp",
            "image/webp": ".webp", "application/pdf": ".pdf",
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
            "application/vnd.ms-excel": ".xls", "application/msword": ".doc",
            "text/plain": ".txt",
        }
        suffix = mime_to_suffix.get(mime, Path(f["name"]).suffix.lower())
        with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
            tmp.write(raw)
            tmp_path = tmp.name
        temp_paths.append(tmp_path)
        result = parse_file(tmp_path, suffix)
        text = result["text"]
        file_type = result["file_type"]
        img_suffixes = (".png", ".jpg", ".jpeg", ".bmp", ".webp")
        if suffix in img_suffixes and result.get("method") not in ("metadata_only", None):
            try:
                layout = analyze_layout(tmp_path)
                tt = layout.get("template_type", "unknown")
                if tt == "full_a4":
                    text = layout["description"]
                    file_type = "a4_template"
                    schema = extract_layout_schema(layout)
                    st.session_state.agent_state["layout_schema"] = schema
                    st.session_state.agent_state["ocr_elements"] = layout.get("rows", [])
                elif tt == "partial_rows":
                    file_type = "a4_partial"
            except Exception:
                pass
        file_texts.append(f"[附加文件: {f['name']} ({file_type})]\n{text}")
        attached_info.append({"name": f["name"], "type": file_type, "length": len(text)})
        if not first_image_path and file_type in ("image", "a4_template", "a4_partial"):
            first_image_path = tmp_path
    if file_texts:
        full_prompt = "\n\n".join(file_texts) + "\n\n---\n用户需求:\n" + prompt
    else:
        full_prompt = prompt
    if first_image_path:
        st.session_state.agent_state["uploaded_file_path"] = first_image_path
    _app_log.info(
        "收到用户输入",
        extra={
            "session_id": current_session_id,
            "prompt_preview": prompt[:200],
            "prompt_length": len(prompt),
            "has_uploaded_files": bool(attached_info),
            "uploaded_files": attached_info,
        },
    )
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)
    run_agent(full_prompt)
    for p in temp_paths:
        try:
            Path(p).unlink(missing_ok=True)
        except Exception:
            pass
    st.rerun()
@@ -0,0 +1,586 @@
 # 对话场景遍历文档
 > 从 `agent/graph.py` 状态图递归遍历生成，覆盖所有用户意图 → 节点路径 → 退出条件。
 > 最后更新: 2026-05-24
 ---
 ## 状态图总览
 ```
                        ┌──────────────────────────────────────────────────┐
                        │              修正循环 (最多 MAX_RETRY=5 次)       │
                        │    ┌─────────┐    ┌──────────────┐    ┌────────┐ │
                        │    │ validate │───→│ explain_error│───→│correct │ │
                        │    └────┬─────┘    └──────────────┘    │_jrxml  │ │
                        │         │ pass                          └───┬────┘ │
                        │         ▼                                   │      │
                        │    ┌─────────┐                         retry<5  │
                        │    │finalize │◄────────────────────────────────┘ │
                        │    └─────────┘  retry>=5                        │
                        └──────────────────────────────────────────────────┘
 load_session ──→ process_input ──→ manage_context ──→ save_state_snapshot
                                                           │
                                                           ▼
                                                    classify_intent
                                                           │
              ┌────────────┬──────────┬────────┬───────────┼───────────┬──────────┐
              ▼            ▼          ▼        ▼           ▼           ▼          ▼
         retrieve    modify_jrxml  save_   handle_     handle_     handle_    (兜底)
         (新建报表)   (修改报表)   session  consult     undo        reset
              │            │       (预览)  (咨询)      (撤销)      (重置)
     ┌────────┴────┐       │          │        │           │           │
     ▼             ▼       │          │        │           │           │
  generate    generate_    │          │        │           │           │
  (1-shot)    skeleton     │          │        │           │           │
     │             │        │          │        │           │           │
     │        refine_       │          │        │           │           │
     │        layout        │          │        │           │           │
     │             │        │          │        │           │           │
     │        map_fields    │          │        │           │           │
     │             │        │          │        │           │           │
     └──────┬──────┘        │          │        │           │           │
            ▼               ▼          ▼        ▼           ▼           ▼
         save_session ◄─────┴──────────┘    finalize ◄─── finalize ◄── finalize
            │                                     ▲
            │ (预览/导出跳过验证)                    │
            ├───────────────────────────────────────┘
            │ (其他意图走验证)
            ▼
         validate ──→ explain_error ──→ correct_jrxml ──→ validate (循环)
            │ pass                                     │ retry>=MAX
            ▼                                          ▼
         finalize ────────────────────────────────→ finalize
 ```
 ---
 ## 节点详细清单
 每个节点标注了 **代码行号** (`agent/nodes.py` 或 `agent/graph.py`)、**前驱节点** (predecessors)、**后继节点** (successors)。
 ### 1. load_session — 加载会话
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:77` |
 | 前驱 | (入口节点, graph entry_point) |
 | 后继 | `process_input` (固定边 graph.py:198) |
 | 功能 | 从 `sessions/{session_id}.json` 磁盘加载状态，注入 agent_state。不从磁盘覆盖 `session_id`。 |
 | LLM | 否 |
 ### 2. process_input — 处理用户输入
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:98` |
 | 前驱 | `load_session` (graph.py:198) |
 | 后继 | `manage_context` (graph.py:199) |
 | 功能 | 文件解析（PDF/DOCX/XLSX/图片/文本）→ OCR 字段提取 → 批注检测 → 模板 JRXML 解析。注入 `ocr_extraction_result`、`layout_schema`、`ocr_elements`、`uploaded_template_jrxml`。 |
 | LLM | 否（OCR 用 PaddleOCR/EasyOCR） |
 ### 3. manage_context — 上下文管理
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:143` |
 | 前驱 | `process_input` (graph.py:199) |
 | 后继 | `save_state_snapshot` (graph.py:200) |
 | 功能 | Token 计数 → 对话压缩（超限时 LLM 压缩为摘要）→ `compressed_history`。 |
 | LLM | 是（压缩时调 LLM） |
 ### 4. save_state_snapshot — 状态快照
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:191` |
 | 前驱 | `manage_context` (graph.py:200) |
 | 后继 | `classify_intent` (graph.py:201) |
 | 功能 | 深拷贝当前状态 → 推入 `history_states` 列表。最多保留 5 个快照。撤销时恢复到最新快照。 |
 | LLM | 否 |
 ### 5. classify_intent — 意图分类
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:200` |
 | 前驱 | `save_state_snapshot` (graph.py:201) |
 | 后继 | 6 路条件分发 (graph.py:204-215) |
 | 功能 | LLM 分类用户意图为 8 种之一。prompt: `prompts/intent_classify.md`。 |
 | LLM | 是 |
 | 路由函数 | `route_by_intent` (graph.py:67) |
 **分类逻辑与路由目标**:
 | 意图值 | 路由目标 | 说明 |
 |--------|---------|------|
 | `initial_generation` | → `retrieve` | 新建报表 |
 | `modify_report` | → `modify_jrxml` | 修改现有报表 |
 | `preview_report` | → `save_session` | 预览（跳过生成） |
 | `export_pdf` | → `save_session` | 导出 PDF（跳过生成） |
 | `export_jrxml` | → `save_session` | 下载 JRXML（跳过生成） |
 | `consult_question` | → `handle_consult` | 咨询问答 |
 | `undo_modification` | → `handle_undo` | 撤销 |
 | `reset_session` | → `handle_reset` | 重置 |
 | 未知/兜底 | 有 `current_jrxml` → `modify_jrxml`; 无 → `retrieve` | |
 ### 6. retrieve — RAG/知识库检索
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:442` |
 | 前驱 | `classify_intent` (graph.py:204-215, intent=initial_generation) |
 | 后继 | 条件分发: `generate_skeleton` 或 `generate` (graph.py:218-224) |
 | 功能 | ① ErrorKB 检索历史修正案例 → ② KB 模板检索 → ③ KB 字段定义检索。注入 `retrieved_context`、`kb_template_jrxml`、`kb_fields`。 |
 | LLM | 否（向量搜索 + 字段匹配） |
 | 路由函数 | `route_after_retrieve` (graph.py:94) |
 **路由逻辑** (`route_after_retrieve`, graph.py:94-99):
 - `layout_schema.total_rows > 0` → `generate_skeleton` (3 阶段)
 - 否则 → `generate` (1-shot)
 ### 7. generate — 1-shot 生成
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:578` |
 | 前驱 | `retrieve` (graph.py:218-224, 无 layout_schema 时) |
 | 后继 | `save_session` (graph.py:227-231) |
 | 功能 | LLM 一次生成完整 JRXML。注入 OCR 上下文 + 模板上下文。流式输出。截断时续写（最多 3 轮）。 |
 | LLM | 是 |
 | Prompt | `prompts/initial_generation.md` |
 ### 8. generate_skeleton — 骨架生成（3 阶段-1）
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:657` |
 | 前驱 | `retrieve` (graph.py:218-224, 有 layout_schema 时) |
 | 后继 | `refine_layout` (固定边 graph.py:233) |
 | 功能 | 压缩布局 schema → LLM 生成骨架 JRXML。字段用 `$F{field_N}` 占位。流式输出 + 续写。 |
 | LLM | 是 |
 | Prompt | `prompts/skeleton_generation.md` |
 ### 9. refine_layout — 坐标精调（3 阶段-2）
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:879` |
 | 前驱 | `generate_skeleton` (graph.py:233) |
 | 后继 | `map_fields` (固定边 graph.py:234) |
 | 功能 | ① `decompose_jrxml()` 拆解为 header + bands → ② 每个 band 窗口化（>4000 字符切分）→ ③ 逐窗口 LLM 精调坐标 → ④ `reassemble_jrxml()` 重组 → ⑤ `validate_element_count()` 校验（>10% 回退）。header 完全不发给 LLM。 |
 | LLM | 是（N 次，N = band 窗口数） |
 | Prompt | `prompts/refine_layout.md` |
 ### 10. map_fields — 字段映射（3 阶段-3）
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:978` |
 | 前驱 | `refine_layout` (graph.py:234) |
 | 后继 | `save_session` (graph.py:235-239) |
 | 功能 | 纯程序化正则替换 `$F{field_N}` → OCR 真实字段名。`_sanitize_field_name()` 净化非 ASCII 字符。零 LLM 调用。 |
 | LLM | 否 |
 ### 11. modify_jrxml — 修改报表
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:1022` |
 | 前驱 | `classify_intent` (graph.py:204-215, intent=modify_report) |
 | 后继 | `save_session` (graph.py:242-246) |
 | 功能 | 基于现有 JRXML + 用户修改描述 + OCR 上下文 + 模板上下文 → LLM 修改。流式输出 + 续写。空响应守卫。 |
 | LLM | 是 |
 | Prompt | `prompts/modification.md` |
 ### 12. handle_consult — 咨询解答
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:261` |
 | 前驱 | `classify_intent` (graph.py:204-215, intent=consult_question) |
 | 后继 | `finalize` (固定边 graph.py:280) |
 | 功能 | LLM 回答 JasperReports 相关知识问题。回答写入 `conversation_history`。 |
 | LLM | 是 |
 | Prompt | `prompts/consult.md` |
 ### 13. handle_undo — 撤销
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:281` |
 | 前驱 | `classify_intent` (graph.py:204-215, intent=undo_modification) |
 | 后继 | `save_session` (graph.py:249-253) |
 | 功能 | 从 `history_states` 弹出最近快照，恢复 `current_jrxml`、`conversation_history`、`status`。无快照时提示"无可撤销状态"。 |
 | LLM | 否 |
 ### 14. handle_reset — 重置
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:309` |
 | 前驱 | `classify_intent` (graph.py:204-215, intent=reset_session) |
 | 后继 | `finalize` (固定边 graph.py:281) |
 | 功能 | 清空所有状态到 `create_initial_state()` 默认值（保留 `session_id`、`session_name`）。 |
 | LLM | 否 |
 ### 15. save_session — 保存会话
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:325` |
 | 前驱 | `generate`、`map_fields`、`modify_jrxml`、`handle_undo`、`classify_intent`(预览/导出) |
 | 后继 | 条件分发: `validate` 或 `finalize` (graph.py:256-260) |
 | 功能 | 原子持久化会话 JSON (`tempfile + os.replace`)。序列化 `agent_state` 到 `sessions/{session_id}.json`。 |
 | LLM | 否 |
 | 路由函数 | `route_after_save` (graph.py:118) |
 **路由逻辑** (`route_after_save`, graph.py:118-123):
 - `intent in (preview_report, export_pdf, export_jrxml)` → `finalize` (跳过验证)
 - 其他 → `validate`
 ### 16. validate — 验证
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:1235` |
 | 前驱 | `save_session` (graph.py:256-260)、`correct_jrxml` (graph.py:273-277) |
 | 后继 | 条件分发: `finalize` 或 `explain_error` (graph.py:263-267) |
 | 功能 | ① 结构检查（字段引用一致性/SQL 存在/pageWidth/pageHeight/name）→ ② XSD 校验（可选）→ ③ 像素对比（有上传图片时 Java 渲染 JRXML→PNG + OpenCV SSIM）。 |
 | LLM | 否 |
 | 路由函数 | `route_after_validate` (graph.py:127) |
 **路由逻辑** (`route_after_validate`, graph.py:127-131):
 - `status == "pass"` → `finalize`
 - `status == "fail"` → `explain_error`
 ### 17. explain_error — 错误解释
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:1310` |
 | 前驱 | `validate` (graph.py:263-267, status=fail) |
 | 后继 | `correct_jrxml` (graph.py:268-272) |
 | 功能 | LLM 将编译错误翻译为自然语言解释。注入 `natural_explanation`。 |
 | LLM | 是 |
 | Prompt | `prompts/explain_error.md` |
 ### 18. correct_jrxml — 自动修正
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:1355` |
 | 前驱 | `explain_error` (graph.py:268-272) |
 | 后继 | 条件分发: `validate` 或 `finalize` (graph.py:273-277) |
 | 功能 | 基于错误解释 + OCR 上下文 + 模板上下文 → LLM 修正 JRXML。注入 `last_error_case`。去重检测（输入输出相同则 `retry_count+=2`）。 |
 | LLM | 是 |
 | Prompt | `prompts/correction.md` |
 | 路由函数 | `route_after_correct` (graph.py:139) |
 **路由逻辑** (`route_after_correct`, graph.py:139-143):
 - `retry_count >= MAX_RETRY` (默认5) → `finalize` (放弃修正)
 - `retry_count < MAX_RETRY` → `validate` (重新验证)
 ### 19. finalize — 最终处理
 | 属性 | 值 |
 |------|-----|
 | 代码位置 | `agent/nodes.py:1452` |
 | 前驱 | `validate`(pass)、`correct_jrxml`(retry>=MAX)、`handle_consult`、`handle_reset`、`save_session`(预览/导出) |
 | 后继 | `END` (graph.py:284) |
 | 功能 | 记录 `jrxml_versions` 版本历史。验证通过时设置 `final_jrxml`。失败时记录 `pending_failure_context` 供下次输入自动注入。 |
 | LLM | 否 |
 ---
 ## 路由函数索引
 | # | 路由函数 | 代码位置 | 条件 | 分支 |
 |---|---------|---------|------|------|
 | R1 | `route_by_intent` | `graph.py:67` | `state.intent` | 6 路: retrieve / modify_jrxml / save_session / handle_consult / handle_undo / handle_reset |
 | R2 | `route_after_retrieve` | `graph.py:94` | `layout_schema.total_rows > 0` | 2 路: generate_skeleton / generate |
 | R3 | `route_after_generate` | `graph.py:103` | 无条件 | save_session |
 | R4 | `route_after_modify` | `graph.py:108` | 无条件 | save_session |
 | R5 | `route_after_undo` | `graph.py:113` | 无条件 | save_session |
 | R6 | `route_after_save` | `graph.py:118` | `intent in (preview, export)` | 2 路: finalize / validate |
 | R7 | `route_after_validate` | `graph.py:127` | `status == "pass"` | 2 路: finalize / explain_error |
 | R8 | `route_after_explain` | `graph.py:133` | 无条件 | correct_jrxml |
 | R9 | `route_after_correct` | `graph.py:139` | `retry_count >= MAX_RETRY` | 2 路: finalize / validate |
 ---
 ## 完整对话场景
 ### 场景 1: 新建报表 — 1-shot（无布局 schema）
 **触发**: `intent=initial_generation` + 无图片/无结构化布局
 **用户示例**: "帮我生成一个销售报表"、"生成一个包含客户名和金额的表格"
 ```
  load_session                 nodes.py:77
 → process_input                nodes.py:98
 → manage_context               nodes.py:143
 → save_state_snapshot          nodes.py:191
 → classify_intent              nodes.py:200   意图=initial_generation
  └─ R1: route_by_intent       graph.py:67    → retrieve
 → retrieve                     nodes.py:442
  └─ R2: route_after_retrieve  graph.py:94    layout_schema 为空 → generate
 → generate                     nodes.py:578    LLM 1-shot 生成完整 JRXML
  └─ R3: route_after_generate  graph.py:103   → save_session
 → save_session                 nodes.py:325    持久化到磁盘
  └─ R6: route_after_save      graph.py:118   intent=initial_generation → validate
 → validate                     nodes.py:1235   结构检查 + XSD + 像素对比
  └─ R7: route_after_validate  graph.py:127
       ├─ status=pass → finalize              nodes.py:1452 → END ✓
       └─ status=fail → explain_error         nodes.py:1310
                         └─ R8 → correct_jrxml nodes.py:1355
                                  └─ R9:
                                     retry<5  → validate (循环)
                                     retry>=5 → finalize → END ✗
 ```
 **LLM 调用**: `classify_intent` + `generate` + 最多 5× (`explain_error` + `correct_jrxml`)
 **退出好结局**: `final_jrxml` 有值, `status=pass`
 **退出坏结局**: `pending_failure_context` 有值, `retry_count=5`
 ---
 ### 场景 2: 新建报表 — 3 阶段分层生成（有布局 schema）
 **触发**: `intent=initial_generation` + 上传图片 + OCR 提取到 `layout_schema.total_rows > 0`
 **用户示例**: 上传销售单图片 → "根据这个模板生成报表"
 ```
  load_session                 nodes.py:77
 → process_input                nodes.py:98    OCR提取 + 布局分析
 → manage_context               nodes.py:143
 → save_state_snapshot          nodes.py:191
 → classify_intent              nodes.py:200   意图=initial_generation
  └─ R1: route_by_intent       graph.py:67    → retrieve
 → retrieve                     nodes.py:442   KB检索模板+字段
  └─ R2: route_after_retrieve  graph.py:94    layout_schema.total_rows>0 → generate_skeleton
 → generate_skeleton            nodes.py:657   阶段1: 骨架JRXML ($F{field_N}占位)
 → refine_layout                nodes.py:879   阶段2: Band级窗口化坐标精调
 → map_fields                   nodes.py:978   阶段3: 程序化字段映射
  └─ R3: route_after_generate  graph.py:103   → save_session
 → save_session                 nodes.py:325
  └─ R6: route_after_save      graph.py:118   → validate
 → validate                     nodes.py:1235
  └─ R7                        同场景1的验证循环
 ```
 **内容保护**:
 - `refine_layout`: header (field/param/queryString) 完全不发给 LLM
 - `refine_layout`: 每窗口 ~4000 字符, LLM 无法重写整个报表
 - `map_fields`: 纯正则替换, 零 LLM, 100% 确定性
 - `validate_element_count()`: 每阶段后校验, >10% 变化回退
 **LLM 调用**: `classify_intent` + `generate_skeleton` + N×`refine_layout`(N=band窗口数) + 可能的修正循环
 ---
 ### 场景 3: 修改已有报表
 **触发**: `intent=modify_report`（已有 `current_jrxml`）
 **用户示例**: "把标题字体改大"、"在底部加合计行"、"删除第三列"
 ```
  load_session → process_input → manage_context → save_state_snapshot
 → classify_intent              nodes.py:200   意图=modify_report
  └─ R1: route_by_intent       graph.py:67    → modify_jrxml
 → modify_jrxml                 nodes.py:1022  LLM修改现有JRXML
  └─ R4: route_after_modify    graph.py:108   → save_session
 → save_session                 nodes.py:325
  └─ R6: route_after_save      graph.py:118   → validate
 → (同场景1的验证循环)
 ```
 **特殊逻辑**: `correct_jrxml` 去重检测: 输入输出相同 → `retry_count += 2`
 ---
 ### 场景 4: 预览 / 导出（跳过验证）
 **触发**: `intent in (preview_report, export_pdf, export_jrxml)`
 **用户示例**: "预览报表"、"导出 PDF"、"下载 JRXML"
 ```
  load_session → process_input → manage_context → save_state_snapshot
 → classify_intent              nodes.py:200   意图=preview/export
  └─ R1: route_by_intent       graph.py:67    → save_session
 → save_session                 nodes.py:325
  └─ R6: route_after_save      graph.py:118   intent=preview/export → finalize
 → finalize                     nodes.py:1452  → END ✓
 ```
 **LLM 调用**: 仅 `classify_intent` (1次)
 **跳过**: generate / modify_jrxml / validate / correct_jrxml
 ---
 ### 场景 5: 咨询问答
 **触发**: `intent=consult_question`
 **用户示例**: "JasperReports 里 $F 和 $P 有什么区别？"、"怎么设置页脚？"
 ```
  load_session → process_input → manage_context → save_state_snapshot
 → classify_intent              nodes.py:200   意图=consult_question
  └─ R1: route_by_intent       graph.py:67    → handle_consult
 → handle_consult               nodes.py:261   LLM回答
 → finalize                     nodes.py:1452  → END ✓
 ```
 **LLM 调用**: `classify_intent` + `handle_consult` (2次)
 ---
 ### 场景 6: 撤销
 **触发**: `intent=undo_modification`
 **用户示例**: "撤销"、"回退"、"恢复到修改前"
 ```
  load_session → process_input → manage_context → save_state_snapshot
 → classify_intent              nodes.py:200   意图=undo_modification
  └─ R1: route_by_intent       graph.py:67    → handle_undo
 → handle_undo                  nodes.py:281   恢复history_states快照
  └─ R5: route_after_undo      graph.py:113   → save_session
 → save_session                 nodes.py:325
  └─ R6 → validate → (验证循环)
 ```
 **LLM 调用**: 仅 `classify_intent` (1次)
 **特殊**: 无快照时提示"无可撤销状态"，不改变当前状态
 ---
 ### 场景 7: 重置
 **触发**: `intent=reset_session`
 **用户示例**: "重置"、"重新开始"、"清空对话"
 ```
  load_session → process_input → manage_context → save_state_snapshot
 → classify_intent              nodes.py:200   意图=reset_session
  └─ R1: route_by_intent       graph.py:67    → handle_reset
 → handle_reset                 nodes.py:309   清空到初始状态
 → finalize                     nodes.py:1452  → END ✓
 ```
 **LLM 调用**: 仅 `classify_intent` (1次)
 ---
 ### 场景 8: 兜底路由（未知意图）
 **触发**: LLM 分类返回非标准意图
 ```
  load_session → ... → classify_intent → [未知意图]
  └─ R1 fallback (graph.py:87-90):
       ├─ state有current_jrxml → modify_jrxml (走修改路径, →场景3)
       └─ state无current_jrxml → retrieve      (走生成路径, →场景1/2)
 ```
 ---
 ## AgentState 字段速查
 | 字段 | 类型 | 写节点 | 读节点 |
 |------|------|--------|--------|
 | `intent` | `str` | classify_intent | R1 route_by_intent, R6 route_after_save |
 | `current_jrxml` | `str` | generate, generate_skeleton, refine_layout, map_fields, modify_jrxml, correct_jrxml, handle_undo | validate, save_session, finalize |
 | `user_input` | `str` | process_input | classify_intent, manage_context |
 | `user_modification_request` | `str` | process_input | modify_jrxml |
 | `conversation_history` | `list` | process_input, finalize, handle_consult | manage_context, classify_intent, modify_jrxml |
 | `full_conversation_history` | `list` | process_input | manage_context |
 | `compressed_history` | `str` | manage_context | modify_jrxml, handle_consult |
 | `retry_count` | `int` | correct_jrxml, validate | R7 route_after_correct |
 | `status` | `str` | validate | R7 route_after_validate, finalize |
 | `error_msg` | `str` | validate | explain_error, finalize |
 | `natural_explanation` | `str` | explain_error | correct_jrxml |
 | `final_jrxml` | `str` | finalize | (用户下载) |
 | `jrxml_versions` | `list` | finalize | (前端展示) |
 | `last_error_case` | `dict` | correct_jrxml | retrieve |
 | `pending_failure_context` | `dict` | finalize | process_input (下次) |
 | `layout_schema` | `dict` | process_input | R2 route_after_retrieve, generate_skeleton |
 | `ocr_elements` | `list` | process_input | refine_layout, generate_skeleton |
 | `ocr_extraction_result` | `dict` | process_input | map_fields, modify_jrxml, correct_jrxml |
 | `history_states` | `list` | save_state_snapshot | handle_undo |
 | `kb_id` | `str` | process_input | retrieve |
 | `kb_fields` | `list` | retrieve | generate_skeleton |
 | `uploaded_template_jrxml` | `str` | process_input | generate, generate_skeleton, modify_jrxml, correct_jrxml |
 ---
 ## LLM 调用统计
 | 场景 | classify | 生成节点 | 窗口数 | 修正循环 | 总计(最小~最大) |
 |------|----------|---------|--------|---------|----------------|
 | 1-shot 生成 | 1 | generate=1 | - | 0~5×2 | 2 ~ 12 |
 | 3 阶段生成 | 1 | skeleton+refine×N | N | 0~5×2 | 2+N ~ 12+N |
 | 修改报表 | 1 | modify=1 | - | 0~5×2 | 2 ~ 12 |
 | 预览/导出 | 1 | - | - | - | 1 |
 | 咨询 | 1 | consult=1 | - | - | 2 |
 | 撤销 | 1 | - | - | - | 1 |
 | 重置 | 1 | - | - | - | 1 |
 > N = band 窗口数。`销售单.jrxml` (73k 字符) 拆解后 N≈17。
 ---
 ## 修正循环流程
 ```
 validate ──fail──→ explain_error ──→ correct_jrxml
    ▲                                      │
    │         retry_count < MAX_RETRY(5)    │
    └──────────────────────────────────────┘
                     │
                     │ retry_count >= 5
                     ▼
                  finalize (放弃, 记录pending_failure_context)
 ```
 **修正轮次推进**:
 1. `validate` 失败 → `status="fail"`, `error_msg` 有值
 2. `explain_error` → LLM 翻译错误 → `natural_explanation` 有值
 3. `correct_jrxml` → LLM 修正 → `retry_count += 1`。去重检测：输入输出相同 → `retry_count += 2`
 4. `route_after_correct` → retry<5 → 回到 `validate`; retry>=5 → `finalize`
 **失败上下文** (`pending_failure_context`): 重试耗尽后记录 `{error_msg, bad_jrxml, retry_count, ts}`，下次用户消息时 `process_input` 自动注入到 prompt。
 ---
 ## 边定义索引（graph.py 全部边）
 | 类型 | 源节点 | 目标节点 | 位置 |
 |------|--------|---------|------|
 | 固定边 | load_session | process_input | line 198 |
 | 固定边 | process_input | manage_context | line 199 |
 | 固定边 | manage_context | save_state_snapshot | line 200 |
 | 固定边 | save_state_snapshot | classify_intent | line 201 |
 | 条件边 | classify_intent | retrieve / modify_jrxml / save_session / handle_consult / handle_undo / handle_reset | lines 204-215 |
 | 条件边 | retrieve | generate / generate_skeleton | lines 218-224 |
 | 条件边 | generate | save_session | lines 227-231 |
 | 固定边 | generate_skeleton | refine_layout | line 233 |
 | 固定边 | refine_layout | map_fields | line 234 |
 | 条件边 | map_fields | save_session | lines 235-239 |
 | 条件边 | modify_jrxml | save_session | lines 242-246 |
 | 条件边 | handle_undo | save_session | lines 249-253 |
 | 条件边 | save_session | validate / finalize | lines 256-260 |
 | 条件边 | validate | finalize / explain_error | lines 263-267 |
 | 条件边 | explain_error | correct_jrxml | lines 268-272 |
 | 条件边 | correct_jrxml | validate / finalize | lines 273-277 |
 | 固定边 | handle_consult | finalize | line 280 |
 | 固定边 | handle_reset | finalize | line 281 |
 | 固定边 | finalize | END | line 284 |
@@ -1,114 +0,0 @@
 """
 JRXML Agent E2E test — Playwright automation.
 Tests: page load, upload image, send message, wait for response.
 Usage: python test_e2e.py
 Prerequisites: Servers must be running (start.bat or with_server.py)
 """
 import os, sys, time, base64, tempfile
 from playwright.sync_api import sync_playwright
 FRONTEND = "http://localhost:5173"
 API = "http://localhost:8000"
 TEST_IMAGE = r"D:\Idea Project\agent_jrxml\test_invoice_e2e.png"
 def run():
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page(viewport={"width": 1280, "height": 900})
        # Capture console errors
        errors = []
        page.on("console", lambda msg: errors.append(msg.text) if msg.type == "error" else None)
        # 1. Navigate and wait
        print("[1] Loading frontend...")
        page.goto(FRONTEND, timeout=15000)
        page.wait_for_load_state("networkidle")
        page.wait_for_timeout(1000)
        # Screenshot initial state
        page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_01_initial.png", full_page=True)
        print("    Screenshot: e2e_01_initial.png")
        # Verify sidebar loads
        sidebar = page.locator(".sidebar")
        assert sidebar.is_visible(), "Sidebar not visible"
        print("    OK: Sidebar visible")
        # 2. Create new session (click +)
        print("[2] Creating new session...")
        page.locator(".btn-icon").click()
        page.wait_for_timeout(500)
        page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_02_session.png")
        print("    OK: New session created")
        # 3. Upload test image
        print("[3] Uploading test image...")
        upload_input = page.locator('input[type="file"]')
        upload_input.set_input_files(TEST_IMAGE)
        page.wait_for_timeout(500)
        # Verify file chip appears
        chip = page.locator(".chip").first
        assert chip.is_visible(), "File chip not visible after upload"
        print(f"    OK: File chip visible — {chip.inner_text()}")
        # 4. Type message and send
        print('[4] Sending message...')
        textarea = page.locator("textarea").first
        textarea.fill("根据这张图片生成车历卡报表模板")
        page.wait_for_timeout(200)
        page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_03_input.png")
        # Click send button or press Enter
        page.locator('button[type="submit"]').click()
        print("    Sent!")
        # 5. Wait for streaming response
        print("[5] Waiting for AI response...")
        try:
            # Wait up to 3 minutes for a success or error message
            page.wait_for_selector('.message.assistant', timeout=180000)
            page.wait_for_timeout(2000)
            page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_04_response.png", full_page=True)
            # Check for success/error
            messages = page.locator('.message.assistant').all()
            for m in messages:
                text = m.inner_text()
                if "成功" in text:
                    print(f"    ✅ SUCCESS: {text[:100]}")
                elif "失败" in text or "错误" in text:
                    print(f"    ❌ ERROR: {text[:100]}")
                elif "JRXML" in text:
                    print(f"    📄 JRXML generated ({len(text)} chars)")
        except Exception as e:
            page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_04_timeout.png", full_page=True)
            print(f"    ⚠️  Timeout waiting for response: {e}")
        # 6. Check download button
        print("[6] Checking download button...")
        download_btn = page.locator(".btn-download").first
        if download_btn.is_visible():
            text = download_btn.inner_text()
            print(f"    Download button: '{text}'")
            if "暂无" not in text:
                print("    ✅ Download link available!")
            else:
                print("    ⚠️  Download shows '暂无下载文件'")
        else:
            print("    ⚠️  Download button not found")
        # Console errors
        if errors:
            print(f"\n[!] Console errors ({len(errors)}):")
            for e in errors[:5]:
                print(f"    {e[:200]}")
        else:
            print("\n    ✅ No console errors")
        print("\n=== E2E test complete ===")
        browser.close()
 if __name__ == "__main__":
    os.makedirs(r"D:\Idea Project\agent_jrxml", exist_ok=True)
    run()
@@ -1,55 +0,0 @@
 """初始化 JRXML 向量知识库。
 rag_jrxml 子项目独立运行管线（分块→向量化→导入），本脚本仅用于预下载嵌入模型。
 用法:
    python scripts/init_kb.py --download-model   # 预下载嵌入模型
 """
 import os
 import sys
 import argparse
 from pathlib import Path
 from dotenv import load_dotenv
 sys.path.insert(0, str(Path(__file__).parent.parent))
 load_dotenv()
 def download_model():
    """预下载嵌入模型到本地。"""
    model_name = os.getenv("RAG_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B")
    print(f"正在下载嵌入模型: {model_name}")
    print("如遇网络超时，可设置环境变量 HF_ENDPOINT=https://hf-mirror.com 使用镜像")
    print()
    from sentence_transformers import SentenceTransformer
    model = SentenceTransformer(model_name)
    model.encode("测试下载")
    print(f"嵌入模型下载完成: {model_name}")
 def main():
    parser = argparse.ArgumentParser(description="JRXML 向量知识库工具")
    parser.add_argument(
        "--download-model", action="store_true",
        help="预下载嵌入模型到本地"
    )
    args = parser.parse_args()
    if args.download_model:
        download_model()
    else:
        print("用法: python scripts/init_kb.py --download-model")
        print()
        print("知识库构建请在 rag/ 子项目中独立运行:")
        print("  cd rag")
        print("  python batch_chunker.py jrxml_source")
        print("  python embed_chunks.py")
        print("  python import_to_chroma.py")
 if __name__ == "__main__":
    main()
@@ -1,144 +0,0 @@
 """
 agent_jrxml 统一启动/停止脚本
 用法: python start.py [--frontend]
 """
 import subprocess
 import sys
 import time
 import signal
 import os
 import socket
 PROCESSES = []
 def kill_port(port):
    """杀掉占用指定端口的所有进程"""
    killed = []
    try:
        result = subprocess.run(
            ['netstat', '-ano'], capture_output=True, text=True, timeout=10
        )
        for line in result.stdout.splitlines():
            if f':{port}' in line and 'LISTENING' in line:
                parts = line.strip().split()
                pid = parts[-1]
                try:
                    subprocess.run(['taskkill', '/F', '/PID', pid], 
                                   capture_output=True, timeout=5)
                    killed.append(pid)
                except:
                    pass
    except:
        pass
    if killed:
        print(f"[清理] 端口 {port} 已清理 {len(killed)} 个进程: {', '.join(killed)}")
    return len(killed)
 def wait_port(port, timeout=30):
    """等待端口就绪"""
    for i in range(timeout * 2):
        try:
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.settimeout(1)
            s.connect(('127.0.0.1', port))
            s.close()
            return True
        except:
            time.sleep(0.5)
    return False
 def start(port, module, cwd=None):
    """启动一个 uvicorn 服务"""
    cmd = [
        sys.executable, '-c',
        f"import uvicorn; uvicorn.run('{module}', host='0.0.0.0', port={port}, reload=False)"
    ]
    proc = subprocess.Popen(cmd, cwd=cwd)
    PROCESSES.append((port, proc))
    print(f"[启动] {module} -> :{port} (PID: {proc.pid})")
    return proc
 def cleanup():
    """清理所有子进程"""
    print("\n[清理] 正在停止所有服务...")
    for port, proc in PROCESSES:
        try:
            proc.terminate()
        except:
            pass
    time.sleep(2)
    for port, proc in PROCESSES:
        try:
            proc.kill()
        except:
            pass
        kill_port(port)
    print("[清理] 完成")
 def main():
    frontend = '--frontend' in sys.argv
    # 1. 清理残留进程
    print("=" * 50)
    print("agent_jrxml 启动脚本")
    print("=" * 50)
    kill_port(8000)
    kill_port(8001)
    if frontend:
        kill_port(5173)
    # 2. 启动服务（基于脚本所在目录自动定位项目）
    project = os.path.dirname(os.path.abspath(__file__))
    start(8000, 'api_server:app', cwd=project)
    start(8001, 'validation_service.main:app', cwd=project)
    if frontend:
        # 前端用 npm 启动
        frontend_dir = os.path.join(project, 'frontend')
        proc = subprocess.Popen(
            ['npm', 'run', 'dev'], cwd=frontend_dir,
            shell=True
        )
        PROCESSES.append((5173, proc))
        print(f"[启动] frontend (Vite) -> :5173")
    # 3. 等待就绪
    print("\n[等待] 等待服务就绪...")
    ok = True
    for port, _ in PROCESSES:
        if wait_port(port):
            print(f"  :{port} ✓")
        else:
            print(f"  :{port} ✗ 超时!")
            ok = False
    if not ok:
        print("\n[错误] 部分服务启动失败")
        cleanup()
        sys.exit(1)
    print(f"\n{'='*50}")
    print("服务就绪:")
    print(f"  API:    http://localhost:8000/docs")
    print(f"  验证:   http://localhost:8001/health")
    if frontend:
        print(f"  前端:   http://localhost:5173")
    print(f"\n按 Ctrl+C 停止所有服务")
    print(f"{'='*50}")
    # 4. 等待退出信号
    try:
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        pass
    finally:
        cleanup()
 if __name__ == '__main__':
    main()
@@ -1,29 +0,0 @@
 import sys, io
 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
 import xml.etree.ElementTree as ET
 from backend.jrxml_reorder import normalize_jrxml
 bad = '''<?xml version="1.0" encoding="UTF-8"?>
 <jasperReport xmlns="http://jasperreports.sourceforge.net/jasperreports" name="Test" pageWidth="595" pageHeight="842">
    <queryString><![CDATA[SELECT 1]]></queryString>
    <style name="s1"/>
    <field name="f1" class="java.lang.String"/>
    <property name="p1" value="v1"/>
    <parameter name="param1" class="java.lang.String"/>
    <title><band height="50"><textField><reportElement x="0" y="0" width="100" height="20"/></textField></band></title>
    <detail><band height="30"><staticText><reportElement x="0" y="0" width="100" height="20"/><text>Hi</text></staticText></band></detail>
 </jasperReport>'''
 fixed = normalize_jrxml(bad)
 print('=== Before ===')
 root = ET.fromstring(bad)
 print('Children:', [c.tag.split('}')[-1] for c in root])
 print('\n=== After ===')
 root2 = ET.fromstring(fixed)
 print('Children:', [c.tag.split('}')[-1] for c in root2])
 # 验证
 import requests
 r = requests.post('http://localhost:8001/validate', json={'jrxml': fixed}, timeout=10)
 print(f'\nValidation: {r.json()}')
@@ -1,6 +0,0 @@
@echo off
 echo 正在启动 JRXML 验证服务...
 echo.
 cd /d "%~dp0"
 python -m uvicorn validation_service.main:app --host 0.0.0.0 --port 8001 --reload
 pause