feat: FastAPI+SSE API server, JRXML auto-reorder, session integrity fixes

This commit is contained in:
2026-05-22 17:53:59 +08:00
parent 1144a86d02
commit 1e5ce9725b
32 changed files with 9189 additions and 309 deletions
+46
View File
@@ -0,0 +1,46 @@
{
"permissions": {
"allow": [
"Bash(git submodule *)",
"Bash(python -c \"import py_compile; py_compile.compile\\('scripts/init_kb.py', doraise=True\\); print\\('init_kb.py OK'\\)\")",
"Bash(python -c \"import py_compile; py_compile.compile\\('agent/nodes.py', doraise=True\\); print\\('nodes.py OK'\\)\")",
"Bash(python -c \"import py_compile; py_compile.compile\\('backend/embeddings.py', doraise=True\\); print\\('embeddings.py OK'\\)\")",
"Bash(python *)",
"Bash(PYTHONIOENCODING=utf-8 python batch_chunker.py jrxml_source)",
"Bash(taskkill /F /IM python.exe)",
"Bash(pkill -f embed_chunks)",
"Bash(pip show *)",
"Bash(streamlit run *)",
"Bash(curl -s http://localhost:8001/validate -X POST -H \"Content-Type: application/json\" -d '{\"jrxml\":\"\"}')",
"Bash(STREAMLIT_SERVER_HEADLESS=true streamlit run app.py --server.port 8501)",
"Bash(git add *)",
"Bash(git commit -m ' *)",
"Bash(pip install *)",
"Bash(git push *)",
"Bash(claude mcp *)",
"mcp__zai-mcp-server__extract_text_from_screenshot",
"mcp__MiniMax__understand_image",
"Bash(curl -s http://localhost:8001/health)",
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:8501)",
"Bash(curl -s -X POST http://localhost:8001/validate -H \"Content-Type: application/json\" -d \"{\\\\\"jrxml\\\\\": \\\\\"\\\\\"}\")",
"Bash(curl -s -X POST http://localhost:8001/validate -H \"Content-Type: application/json\" -d \"{\\\\\"jrxml\\\\\": \\\\\"<?xml version=\\\\\\\\\\\\\"1.0\\\\\\\\\\\\\"?><jasperReport name=\\\\\\\\\\\\\"test\\\\\\\\\\\\\" pageWidth=\\\\\\\\\\\\\"595\\\\\\\\\\\\\" pageHeight=\\\\\\\\\\\\\"842\\\\\\\\\\\\\"><queryString><![CDATA[SELECT 1]]></queryString></jasperReport>\\\\\"}\")",
"Bash(curl -s -X POST http://localhost:8001/validate -H \"Content-Type: application/json\" -d \"{\\\\\"jrxml\\\\\": \\\\\"<?xml version=\\\\\\\\\\\\\"1.0\\\\\\\\\\\\\"?><jasperReport name=\\\\\\\\\\\\\"test\\\\\\\\\\\\\" pageWidth=\\\\\\\\\\\\\"595\\\\\\\\\\\\\" pageHeight=\\\\\\\\\\\\\"842\\\\\\\\\\\\\"><queryString><![CDATA[SELECT 1]]></queryString><detail><band height=\\\\\\\\\\\\\"50\\\\\\\\\\\\\"/></detail></jasperReport>\\\\\"}\")",
"Bash(curl -s -X POST http://localhost:8001/validate -H 'Content-Type: application/json' -d '{\"jrxml\": \"<?xml version=\\\\\"1.0\\\\\"?><jasperReport name=\\\\\"test\\\\\" pageWidth=\\\\\"595\\\\\" pageHeight=\\\\\"842\\\\\"><queryString><![CDATA[SELECT name FROM users]]></queryString><field name=\\\\\"name\\\\\" class=\\\\\"java.lang.String\\\\\"/><detail><band height=\\\\\"50\\\\\"><textField><reportElement x=\\\\\"0\\\\\" y=\\\\\"0\\\\\" width=\\\\\"100\\\\\" height=\\\\\"20\\\\\"/><textFieldExpression><![CDATA[$F{name}]]></textFieldExpression></textField></band></detail></jasperReport>\"}')",
"Bash(curl -s -o /dev/null -w \"Streamlit: %{http_code}\\\\n\" http://localhost:8501)",
"Bash(grep -v \"Complete$\")",
"Bash(git pull *)",
"Bash(pip search *)",
"Read",
"Write",
"Edit",
"Bash",
"Git",
"Npm",
"Pip",
"Grep",
"Glob",
"Bash(rm -rf components/* assets/* style.css)",
"Bash(mkdir -p api stores components utils)"
]
}
}
+1 -1
View File
@@ -48,7 +48,7 @@ RAG_USE_FP16=true
RAG_BATCH_SIZE=64
# 最大自动修正尝试次数
MAX_RETRY=3
MAX_RETRY=5
# 上下文压缩阈值(token 数)
CONTEXT_MAX_TOKENS=6000
+341
View File
@@ -0,0 +1,341 @@
# JRXML 生成代理 — 架构文档
## 概览
一个三层架构的桌面应用,通过自然语言多轮对话帮助非技术用户创建 JasperReports 模板(JRXML)。核心流程:用户输入 → 意图识别 → 模板检索 → LLM 生成/修改 → 自动验证修正 → 输出可编译的 JRXML。
```
┌──────────────────────────────────────────────────────────────┐
│ Vue 3 + Vite 前端 (:5173) │
│ frontend/ (聊天界面 + SSE 流式) │
│ 聊天界面 / 会话管理 / JRXML 预览 / 下载 / 快捷操作 │
└─────────────────────┬────────────────────────────────────────┘
│ HTTP + SSE (/api/*)
┌──────────────────────────────────────────────────────────────┐
│ FastAPI SSE 后端 (:8000) │
│ api_server.py │
│ REST: /api/sessions, /api/upload, /api/.../download/latest │
│ SSE: /api/sessions/{id}/chat (流式推送) │
│ 事件: node_start | node_complete | stream_token │
│ agent_complete | agent_error │
└─────────────────────┬────────────────────────────────────────┘
│ run_agent(user_input)
┌──────────────────────────────────────────────────────────────┐
│ LangGraph 状态机 (agent/) │
│ │
│ load_session → process_input → manage_context │
│ → save_state_snapshot → classify_intent │
│ │ │ │ │ │ │
│ ▼ ▼ ▼ ▼ ▼ │
│ retrieve modify_jrxml preview consult undo/reset │
│ │ │ /export │
│ ▼ ▼ │
│ generate save_session │
│ │ │ │
│ └────┬─────┘ │
│ ▼ │
│ (jrxml_reorder 自动规范化元素顺序) │
│ ▼ │
│ validate ──(fail)──► explain_error ──► correct_jrxml │
│ │ ▲ │ │
│ (pass) └──(retry<N)───┘ │
│ ▼ │
│ finalize (失败版本 → jrxml_versions, 提示下载) │
└──────────┬──────────────┬─────────────────────┬──────────────┘
│ │ │
▼ ▼ ▼
┌──────────────┐ ┌──────────────┐ ┌──────────────────────────┐
│ LLM 后端 │ │ 向量知识库 │ │ 验证服务 (:8001) │
│ backend/llm │ │ ChromaDB + │ │ FastAPI │
│ │ │ RAGSearcher │ │ 结构检查 + 严格 XSD 校验 │
│ Anthropic SDK│ │ │ │ │
│ OpenAI SDK │ │ Sentence- │ │ /validate │
│ Ollama │ │ Transformer │ │ /health │
└──────────────┘ └──────────────┘ └──────────────────────────┘
```
## 目录结构
```
agent_jrxml/
├── api_server.py # FastAPI SSE 后端(REST + 流式推送)
├── frontend/ # Vue 3 + Vite 前端
│ └── src/
│ ├── api/client.ts # SSE 客户端 + fetch 封装
│ ├── stores/ # Pinia 状态管理(chat + session
│ └── components/ # 聊天界面组件
├── agent/ # LangGraph 工作流层
│ ├── __init__.py
│ ├── state.py # AgentState TypedDict 定义(~28 字段)
│ ├── nodes.py # 18 个工作流节点(生成/修改/验证/修正/意图识别...)
│ └── graph.py # 状态图编译 + 路由逻辑 + 初始状态工厂
├── backend/ # 基础设施层
│ ├── __init__.py
│ ├── llm.py # LLM 工厂:Anthropic(MiniMax) / OpenAI / Ollama
│ ├── embeddings.py # 嵌入模型工厂:HuggingFace / OpenAI
│ ├── validation.py # 验证服务 HTTP 客户端
│ ├── session.py # 会话持久化(JSON CRUD + flush/fsync
│ ├── jrxml_reorder.py # JRXML 元素自动排序(匹配 XSD sequence
│ └── rag_adapter.py # RAG 适配层:连接 ChromaDB 做语义搜索
├── validation_service/ # 独立验证微服务
│ ├── main.py # FastAPI 服务:结构检查 + 严格 XSD 校验
│ └── schemas/
│ └── jasperreport_7_0_6.xsd # JasperReports 7.0.6 XSD286KB
├── scripts/
│ └── init_kb.py # 知识库初始化(预下载嵌入模型)
├── tests/
│ ├── __init__.py
│ ├── test_validation.py # 验证服务单元测试
│ └── test_agent.py # 代理集成测试
├── data/ # 数据目录
│ ├── sample_templates/ # 示例 JRXML 模板
│ └── corrections/ # 错误修正案例
├── db/chroma/ # ChromaDB 持久化存储
├── sessions/ # 会话 JSON 文件存储
├── jrxml_versions/ # 失败版本归档存储
├── rag/ # RAG 子模块(独立管线)
├── requirements.txt # Python 依赖
├── start_all.bat # 一键启动全部服务
├── start.bat # 启动脚本
├── stop.bat # 一键停止全部服务
├── .env.example # 环境变量模板
└── README.md # 使用说明
```
## 数据流详解
### 1. 请求生命周期
```
用户输入 "创建员工名册,包含 id、name、department"
├─ load_session 从 sessions/{id}.json 恢复历史状态
├─ process_input 记录用户消息到 conversation_history
├─ manage_context 检查 token 数,超阈值则 LLM 压缩早期对话
├─ save_state_snapshot 保存当前状态快照(用于撤销)
├─ classify_intent LLM 分类 → initial_generation
├─ retrieve RAGSearcher.search_as_context() → 注入 prompt
├─ generate LLM 生成初始 JRXML
├─ save_session 持久化到磁盘
├─ validate 调用 FastAPI 验证服务
│ ├─ pass → finalize
│ └─ fail → explain_error → correct_jrxml → validate (最多 5 次)
└─ finalize 保存最终 JRXML,UI 展示结果
```
### 2. 意图路由(8 种意图)
| 意图 | 条件 | 路由目标 |
|------|------|---------|
| `initial_generation` | 无现有报表 | retrieve → generate |
| `modify_report` | 有现有报表 | modify_jrxml |
| `preview_report` | — | 直接展示 current_jrxml |
| `export_jrxml` | — | 触发下载 |
| `export_pdf` | — | 触发下载 |
| `consult_question` | — | handle_consult(独立回答) |
| `undo_modification` | history_states 非空 | 恢复上一个快照 |
| `reset_session` | — | 清空所有报表状态 |
### 3. 自动修正循环
```
validate ──fail──► explain_error ──► correct_jrxml ──► validate
▲ │
└──────────── retry_count < MAX_RETRY (5) ──────────────┘
```
每次修正都会递增 `retry_count`,达到上限后直接 `finalize`(即使仍有错误),在 UI 上展示错误信息。
## 核心组件
### AgentStateagent/state.py
```python
class AgentState(TypedDict, total=False):
# 工作流核心
conversation_history: List[dict] # 当前上下文的对话(可能被压缩裁剪)
current_jrxml: str # 当前 JRXML 文本
user_input: str # 本轮用户输入
status: str # "pass" | "fail"
error_msg: str # 验证错误信息
natural_explanation: str # 错误的人类可读解释
retry_count: int # 当前修正尝试次数
user_modification_request: str # 修改请求文本
final_jrxml: str # 最终验证通过的 JRXML
stage: str # 当前阶段标识
retrieved_context: str # RAG 检索到的模板上下文
# 上下文压缩
full_conversation_history: List[dict] # 完整对话(含时间戳)
compressed_history: str # 早期对话的压缩摘要
current_token_count: int # 当前估算 token 数
# 会话持久化
session_id: str
session_name: str
created_at: str
updated_at: str
# 意图识别 + 撤销
intent: str # 8 种意图之一
history_states: List[dict] # 状态快照栈(最多 10 个)
```
### 工作流节点(agent/nodes.py
| 节点 | 职责 | 调用外部 |
|------|------|---------|
| `load_session_node` | 从磁盘恢复会话状态 | `backend.session.load_session` |
| `process_input` | 记录用户输入到对话历史 | — |
| `manage_context` | token 超阈值时 LLM 压缩早期对话 | `get_llm()` |
| `save_state_snapshot` | 保存快照到 history_states | — |
| `classify_intent` | LLM 分类用户意图(8 类) | `get_llm()` |
| `retrieve` | 从 ChromaDB 搜索相关模板 | `backend.rag_adapter.search_chunks` |
| `generate` | 首次生成 JRXML | `get_llm()` |
| `modify_jrxml` | 根据用户需求修改现有 JRXML | `get_llm()` |
| `validate` | 调用验证服务检查 JRXML | `backend.validation.validate_jrxml` |
| `explain_error` | LLM 将编译错误翻译为人话 | `get_llm()` |
| `correct_jrxml` | LLM 自动修正验证失败 | `get_llm()` |
| `finalize` | 保存最终 JRXML,标记完成 | — |
| `handle_consult` | 回答 JasperReports 咨询 | `get_llm()` |
| `handle_undo` | 从 history_states 恢复上一状态 | — |
| `handle_reset` | 清空报表,重置会话 | — |
| `save_session_node` | 持久化当前状态到磁盘 | `backend.session.save_session` |
### LLM 工厂(backend/llm.py
```
get_llm()
├─ LLM_BACKEND=local → langchain_ollama.ChatOllama
└─ LLM_BACKEND=cloud
├─ LLM_PROVIDER=anthropic → raw anthropic.Anthropic SDK
│ 适配 MiniMax Anthropic 兼容 API
│ 包装为 MiniMaxLLM(提供 .invoke() 接口)
└─ LLM_PROVIDER=openai → langchain_openai.ChatOpenAI
```
**MiniMaxLLM 适配器**:将 Anthropic SDK 的 `client.messages.create()` 包装成与 LangChain 兼容的 `.invoke(prompt) → Response.content` 接口,供所有节点统一调用。
### RAG 适配层(backend/rag_adapter.py
```
search_chunks(query, k=5)
└─ RAGSearcher(单例)
├─ 懒加载 SentenceTransformer 模型
├─ 懒连接 ChromaDB PersistentClient
├─ query → 向量编码 → collection.query() → top-k 结果
└─ search_as_context() → 拼接带元数据标签的上下文字符串
```
### 验证服务(validation_service/main.py
独立的 FastAPI 进程(端口 8001),提供两级验证:
1. **结构检查**(始终执行):
- XML 语法正确性
- `$F{field}` 引用一致性(表达式 vs `<field>` 声明)
- `<queryString>` 是否含有效 SQL SELECT
- `<jasperReport>` 必需属性(pageWidth, pageHeight, name
2. **XSD Schema 校验**(可选):
- 需要 `validation_service/schemas/jasperreport_7_0_6.xsd` 文件
- 使用 `lxml.etree.XMLSchema` 进行完整 schema 校验
### 会话持久化(backend/session.py
```
sessions/{session_id}.json
{
"session_id": "abc123def456",
"session_name": "员工名册报表",
"created_at": "2026-05-19T09:00:00+00:00",
"updated_at": "2026-05-19T09:30:00+00:00",
"agent_state": { ... } // 完整的 AgentState 字段
}
```
## 关键 Prompt 设计
| Prompt | 用途 | 输出约束 |
|--------|------|---------|
| `INTENT_CLASSIFY_PROMPT` | 8 分类意图识别 | 只输出意图名称 |
| `INITIAL_GENERATION_PROMPT` | 首次生成 JRXML | 只输出 JRXML,无 markdown |
| `MODIFICATION_PROMPT` | 修改现有 JRXML | 只输出完整 JRXML |
| `CORRECTION_PROMPT` | 自动修正错误 | 只输出修复后 JRXML |
| `EXPLAIN_PROMPT` | 错误转人话 | 2-3 句话 |
| `COMPRESSION_PROMPT` | 对话压缩 | ≤200 字摘要 |
| `CONSULT_PROMPT` | 咨询解答 | 简洁中文 |
## 配置参数(.env
| 参数 | 默认值 | 说明 |
|------|--------|------|
| `LLM_BACKEND` | cloud | cloud / local |
| `LLM_PROVIDER` | openai | openai / anthropic |
| `OPENAI_API_KEY` | — | API 密钥 |
| `OPENAI_BASE_URL` | https://api.openai.com/v1 | API 端点 |
| `LLM_MODEL` | gpt-4o | 模型名称 |
| `LOCAL_LLM_MODEL` | qwen2.5-coder:7b | Ollama 模型 |
| `EMBED_BACKEND` | local | local / cloud |
| `LOCAL_EMBED_MODEL` | Qwen/Qwen3-Embedding-0.6B | 本地嵌入模型 |
| `VALIDATION_SERVICE_URL` | http://localhost:8001/validate | 验证端点 |
| `CHROMA_PERSIST_DIR` | ./db/chroma | ChromaDB 路径 |
| `MAX_RETRY` | 5 | 自动修正最大尝试次数 |
| `CONTEXT_MAX_TOKENS` | 6000 | 触发压缩的 token 阈值 |
| `CONTEXT_KEEP_RECENT` | 4 | 保留最近 N 轮完整对话 |
| `SESSIONS_DIR` | ./sessions | 会话 JSON 存储目录 |
| `HISTORY_MAX_SNAPSHOTS` | 10 | 撤销快照保留数量 |
## 启动流程
```bash
# 1. 安装依赖
pip install -r requirements.txt
# 2. 配置环境
cp .env.example .env
# 编辑 .env 填入 API 密钥
# 3. 初始化知识库(预下载嵌入模型)
python scripts/init_kb.py --download-model
# 4. 启动验证服务(终端 1
python -m uvicorn validation_service.main:app --port 8001 --host 0.0.0.0
# 5. 启动 Streamlit 界面(终端 2
STREAMLIT_SERVER_HEADLESS=true streamlit run app.py --server.port 8501
# 6. 访问 http://localhost:8501
```
## 测试
```bash
pytest tests/test_validation.py -v # 验证服务单元测试
pytest tests/test_agent.py -v # 代理集成测试
pytest tests/ -v # 全部测试
```
## 技术栈
| 层 | 技术 |
|----|------|
| UI | Streamlit 1.57 |
| 工作流引擎 | LangGraph 1.2 |
| LLM 接入 | Anthropic SDK / LangChain-OpenAI / LangChain-Ollama |
| 向量数据库 | ChromaDB 1.5 |
| 嵌入模型 | Sentence-Transformers (HuggingFace) |
| 验证服务 | FastAPI + lxml XMLSchema |
| HTTP 客户端 | httpx |
| Token 计算 | tiktoken |
| 持久化 | JSON 文件 + ChromaDB PersistentClient |
+7 -7
View File
@@ -31,7 +31,7 @@
## 1. 项目是什么
**一句话**:用户用中文描述报表需求 → LLM 生成 JRXML 模板 → 自动验证 → 失败则自动修正(最多 3 次)→ 重试耗尽后失败上下文自动注入下一轮 → 返回可用的 JRXML 文件。
**一句话**:用户用中文描述报表需求 → LLM 生成 JRXML 模板 → 自动验证 → 失败则自动修正(最多 5 次)→ 重试耗尽后失败上下文自动注入下一轮 → 返回可用的 JRXML 文件。
**技术栈**StreamlitUI + LangGraph(状态机) + LLMMiniMax/OpenAI/Ollama + ChromaDB(向量库) + FastAPI(验证微服务)
@@ -110,7 +110,7 @@ streamlit run app.py --server.port 8501
│ │ │ │
│ │ correct_jrxml │
│ │ │ │
│ │ (retry < 3) ────┘ │
│ │ (retry < 5) ────┘ │
│ ▼ │
│ finalize → END │
└──────────────────────────┬───────────────────────────────────┘
@@ -251,14 +251,14 @@ def route_after_correct(state) -> Literal["validate", "finalize"]:
return "validate" if state.get("retry_count", 0) < MAX_RETRY else "finalize"
```
**MAX_RETRY 默认为 3**`.env` 中配置)。重试耗尽后进入 finalize,finalize 会将失败上下文写入 `pending_failure_context`,下次用户输入时 `process_input` 自动注入。
**MAX_RETRY 默认为 5**`.env` 中配置)。重试耗尽后进入 finalize,finalize 会将失败上下文写入 `pending_failure_context`,下次用户输入时 `process_input` 自动注入。
```
**关键路由逻辑**
- `route_by_intent`8 种意图分叉,是整个系统的"交通枢纽"
- `route_after_retrieve`:有 layout_schema → 3 阶段精确生成(generate_skeleton → refine_layout → map_fields),无 schema → 原 1-shot generate
- `route_after_save`:预览/导出意图**跳过验证**直通 finalize(这是修复预览问题的关键)
- `route_after_correct`:重试次数 < 3 则继续验证循环,否则认输
- `route_after_correct`:重试次数 < 5 则继续验证循环,否则认输
### 5.2 图构建
@@ -347,9 +347,9 @@ def build_graph():
│ ▼ │
│ correct_jrxml │
│ │ │
│ ├── retry < 3? ──► validate (循环) │
│ ├── retry < 5? ──► validate (循环) │
│ │ │
│ └── retry >= 3? ──► finalize (放弃) │
│ └── retry >= 5? ──► finalize (放弃) │
│ │
▼ │
finalize ──► END │
@@ -1169,7 +1169,7 @@ parent.addEventListener('keydown', function(e) {
| `RAG_USE_GPU` | `true` | GPU 加速 |
| `RAG_USE_FP16` | `true` | 半精度推理 |
| `VALIDATION_SERVICE_URL` | `http://localhost:8001/validate` | 验证服务地址 |
| `MAX_RETRY` | `3` | 最大自动修正次数 |
| `MAX_RETRY` | `5` | 最大自动修正次数 |
| `CONTEXT_MAX_TOKENS` | `6000` | 触发压缩的 token 阈值 |
| `CONTEXT_KEEP_RECENT` | `4` | 压缩时保留最近 N 轮 |
| `SESSIONS_DIR` | `./sessions` | 会话文件目录 |
+91
View File
@@ -0,0 +1,91 @@
# RAG 知识库集成说明
## 概述
使用 `rag_jrxml` 子项目的语义分块管线替换原有的简单向量知识库。`rag_jrxml` 独立运行产出 ChromaDB,主项目通过 `backend/rag_adapter.py` 查询。
## 架构
```
rag/ ← git submodule (rag_jrxml)
├── jrxml_source/ ← 源数据目录 (242 .jrxml + 16 .md)
├── models/ ← 嵌入模型本地存放
│ └── paraphrase-multilingual-MiniLM-L12-v2/ (449MB, 384维)
├── jrxml_source_chunks/ ← 分块产物 (all_chunks.json, 15,510 chunks)
├── embeddings/ ← 向量产物 (embeddings.npy, 23MB)
db/chroma/ ← ChromaDB 持久化 (主项目查询端读取)
│ 集合: jrxml_chunks (15,510 条记录, cosine 距离)
backend/rag_adapter.py ← RAGSearcher: 加载模型 + 连接 ChromaDB + 搜索
agent/nodes.py ← retrieve() 调用 search_chunks()
```
## 管线流程
```
源文件 (.jrxml + .md)
→ batch_chunker.py 语义分块 (按 XML 元素/标题层级切分)
→ embed_chunks.py 向量化 (Sentence-Transformers, CPU)
→ import_to_chroma.py 导入 ChromaDB
→ rag_adapter.py 主项目查询
```
## 当前数据
| 指标 | 数值 |
|---|---|
| 源文件 | 258 (242 JRXML + 16 MD) |
| Chunks 总数 | 15,510 |
| 嵌入维度 | 384 |
| 嵌入模型 | sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 |
| 分块类型 | query, field, parameter, variable, band_*, chart, crosstab, element_*, section_* 等 |
| 知识库大小 | embeddings.npy 23MB, ChromaDB ~50MB |
## 主项目配置
`.env` 中相关变量:
```env
# 嵌入模型 (本地路径优先)
RAG_EMBED_MODEL=./rag/models/paraphrase-multilingual-MiniLM-L12-v2
# ChromaDB 路径
RAG_CHROMA_PATH=./db/chroma
# 集合名称 (与 rag 子项目一致)
RAG_COLLECTION_NAME=jrxml_chunks
```
## 全量构建
```bash
cd rag
python batch_chunker.py jrxml_source
python embed_chunks.py jrxml_source_chunks/all_chunks.json
python import_to_chroma.py --chroma_path ../db/chroma
```
## 增量更新
```bash
# 1. 将新的 .jrxml / .md 放入 rag/jrxml_source/
# 2. 增量运行
cd rag
python batch_chunker.py jrxml_source --incremental
python embed_chunks.py --incremental
python import_to_chroma.py --chroma_path ../db/chroma --incremental
```
## 更新 rag 子项目
```bash
git submodule update --remote rag
```
## 搜索接口
```python
from backend.rag_adapter import search_chunks
# 返回拼接好的上下文字符串,可直接注入 LLM prompt
context = search_chunks("如何创建饼图", k=5)
```
+2 -2
View File
@@ -6,7 +6,7 @@
- **多轮聊天**:通过对话优化报表 -- 添加列、更改标题、添加汇总
- **自动验证**:每次生成或修改后都会验证 JRXML
- **自动修正**:如果验证失败,代理会分析错误并自动修正(最多 3 次)
- **自动修正**:如果验证失败,代理会分析错误并自动修正(最多 5 次)
- **模板检索**:使用 Chroma 向量数据库检索相关的 JRXML 示例以获得更好的生成效果
- **文件上传**:支持图片(OCR识别)、PDF、Word、Excel、文本文件等
- **聊天粘贴/拖拽**:支持直接在对话框中 Ctrl+V 粘贴或拖拽文件(图片/PDF/Excel/Word
@@ -182,7 +182,7 @@ jrxml-agent/
| LOCAL_EMBED_MODEL | 嵌入模型 | Qwen/Qwen3-Embedding-0.6B |
| VALIDATION_SERVICE_URL | 验证端点 | http://localhost:8001/validate |
| CHROMA_PERSIST_DIR | Chroma 存储位置 | ./db/chroma |
| MAX_RETRY | 自动修正尝试次数 | 3 |
| MAX_RETRY | 自动修正尝试次数 | 5 |
| CONTEXT_MAX_TOKENS | 上下文压缩阈值 | 6000 |
| LOG_DIR | 日志目录 | ./logs |
| LOG_LEVEL | 日志级别 | DEBUG |
+27 -5
View File
@@ -18,11 +18,11 @@ from backend.logger import get_logger, set_trace_id
from backend.validation import validate_jrxml
from prompts.loader import load_prompt
load_dotenv()
load_dotenv(override=True)
_node_log = get_logger("agent")
MAX_RETRY = int(os.getenv("MAX_RETRY", "3"))
MAX_RETRY = int(os.getenv("MAX_RETRY", "5"))
CONTEXT_MAX_TOKENS = int(os.getenv("CONTEXT_MAX_TOKENS", "6000"))
CONTEXT_KEEP_RECENT = int(os.getenv("CONTEXT_KEEP_RECENT", "4"))
HISTORY_MAX_SNAPSHOTS = int(os.getenv("HISTORY_MAX_SNAPSHOTS", "10"))
@@ -815,6 +815,14 @@ def validate(state: AgentState) -> Dict:
state["error_msg"] = f"JRXML 内容过短({len(jrxml.strip())} 字符),可能为不完整或空内容。"
return state
# 自动规范化 JRXML 元素顺序(符合 XSD sequence 要求)
try:
from backend.jrxml_reorder import normalize_jrxml
jrxml = normalize_jrxml(jrxml)
state["current_jrxml"] = jrxml
except Exception:
pass # 规范化失败不影响后续流程
result = validate_jrxml(jrxml)
state["status"] = "pass" if result.get("valid") else "fail"
state["error_msg"] = result.get("error", "")
@@ -923,6 +931,20 @@ def finalize(state: AgentState) -> Dict:
# 验证未通过:不覆盖 final_jrxml,保留上一次成功的版本
retries = state.get("retry_count", 0)
error_msg = state.get("error_msg", "未知错误")
# 保存失败版本到 jrxml_versions(用户可以选择下载)
if jrxml.strip():
versions = state.get("jrxml_versions", [])
if not isinstance(versions, list):
versions = []
versions.append({
"ts": _now_iso(),
"jrxml": jrxml,
"intent": state.get("intent", ""),
"label": f"失败版本 (第{retries}次重试)",
"status": "fail",
"error_msg": error_msg,
})
state["jrxml_versions"] = versions
# 记录失败上下文,下次用户输入时自动注入
state["pending_failure_context"] = {
"error_msg": error_msg,
@@ -934,8 +956,8 @@ def finalize(state: AgentState) -> Dict:
"role": "assistant",
"content": (
f"❌ 经过 {retries} 次重试后仍无法生成有效的 JRXML。\n"
f"错误: {error_msg}\n"
f"请描述您想要的修改,系统自动加载失败上下文继续修复"
f"错误: {error_msg}\n\n"
f"您可以:\n1. 继续描述修改要求,系统自动重试修复\n2. 点击下载按钮获取当前版本(虽未通过 XSD 验证,但可能可在 Studio 中手动修复"
),
})
return state
@@ -966,4 +988,4 @@ def _extract_jrxml(text: str) -> str:
if xml_start >= 0 and jr_end > xml_start:
return text[xml_start:jr_end + len("</jasperreport>")].strip()
return text
return text
+24 -2
View File
@@ -181,6 +181,13 @@ def _run_graph_sync(agent_state: AgentState, event_q: queue.Queue):
for node_state in data.values():
if isinstance(node_state, dict):
agent_state.update(node_state)
# 在 graph 完成后立即保存 session,防止 SSE 流中断导致数据丢失
sid = agent_state.get("session_id", "")
if sid:
try:
save_session(sid, agent_state)
except Exception:
pass # 静默失败,SSE 流中还有一次保存机会
event_q.put(("done", {"reason": "graph_completed"}))
except Exception as exc:
event_q.put(("error", {
@@ -218,6 +225,8 @@ async def _sse_generator(agent_state: AgentState, session_id: str = "") -> str:
total_ms = round((time.time() - t_start) * 1000)
if session_id:
save_session(session_id, agent_state)
versions = agent_state.get("jrxml_versions", [])
last_ver = versions[-1] if versions else {}
yield _sse_line("agent_complete", {
"reason": "done",
"intent": agent_state.get("intent", ""),
@@ -228,6 +237,9 @@ async def _sse_generator(agent_state: AgentState, session_id: str = "") -> str:
"retry_count": agent_state.get("retry_count", 0),
"total_duration_ms": total_ms,
"ocr_extraction_result": agent_state.get("ocr_extraction_result", {}),
"versions": len(versions),
"has_failed_version": last_ver.get("status") == "fail" if last_ver else False,
"failed_version_index": len(versions) - 1 if last_ver.get("status") == "fail" else -1,
})
await future
return
@@ -532,7 +544,17 @@ async def chat(session_id: str, payload: dict):
# ── 返回 SSE 流 ──
async def stream_and_save():
final_state = None
# 如果上传了附件,先发送处理状态
if file_ids:
yield _sse_line("node_start", {
"node": "process_attachments",
"label": "正在处理附件",
})
yield _sse_line("node_complete", {
"node": "process_attachments",
"label": "正在处理附件",
"detail": f"已解析 {len(file_ids)} 个文件",
})
async for sse_chunk in _sse_generator(agent_state, session_id):
yield sse_chunk
@@ -622,4 +644,4 @@ async def download_file(file_id: str):
if __name__ == "__main__":
import uvicorn
port = int(os.getenv("API_PORT", "8000"))
uvicorn.run("api_server:app", host="0.0.0.0", port=port, reload=True)
uvicorn.run("api_server:app", host="0.0.0.0", port=port, reload=False)
+388 -233
View File
@@ -17,13 +17,16 @@ try:
except Exception:
pass
import base64
import tempfile
import time
from pathlib import Path
import streamlit as st
import streamlit.components.v1 as components
from dotenv import load_dotenv
load_dotenv()
load_dotenv(override=True)
from agent.graph import build_graph, create_initial_state
from backend.session import (
@@ -109,86 +112,6 @@ def _render_jrxml(jrxml: str, max_lines: int = 30):
st.code(preview, language="xml")
# ---- 共享文件上传处理 ----
def _process_uploaded_file(uploaded_file, suffix: str) -> dict:
"""处理单个上传文件:保存临时文件、解析、布局分析。
返回: {"name": str, "text": str, "type": str, "tmp_path": str|None}
"""
import tempfile
from backend.file_parser import parse_file
from backend.layout_analyzer import analyze_layout
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(uploaded_file.getvalue())
tmp_path = tmp.name
result = parse_file(tmp_path, suffix)
parsed_text = result["text"]
parsed_type = result["file_type"]
# 对图片/PDF 进行 A4 模板布局分析
if suffix in (".png", ".jpg", ".jpeg", ".bmp", ".webp", ".pdf"):
layout = analyze_layout(tmp_path)
tt = layout.get("template_type", "unknown")
current_jrxml = st.session_state.agent_state.get("current_jrxml", "")
if tt == "full_a4":
parsed_text = layout["description"]
parsed_type = "a4_template"
# 存储布局 schema 供分层精确生成使用
from backend.layout_analyzer import extract_layout_schema
schema = extract_layout_schema(layout)
st.session_state.agent_state["layout_schema"] = schema
st.session_state.agent_state["ocr_elements"] = layout.get("rows", [])
elif tt == "partial_rows":
parsed_type = "a4_partial"
if current_jrxml.strip():
from backend.layout_analyzer import match_rows_to_jrxml
match = match_rows_to_jrxml(layout, current_jrxml)
parsed_text = (
f"[行片段修改] 上传图片包含 {layout['total_rows']} 行,"
f"视为 A4 报表的一部分。\n\n"
f"{match['description']}\n\n"
f"--- 行结构 ---\n{layout['description']}"
)
else:
parsed_text = layout["description"]
else:
has_ocr = result.get("method") not in ("metadata_only", None)
img_w, img_h = layout["image_size"]
ratio = layout["aspect_ratio"]
if has_ocr:
parsed_text = (
f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}"
f"未检测到 A4 报表结构,图片将被视为参考样式。\n"
f"请根据用户的文字描述生成报表。"
)
else:
parsed_text = (
f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}\n"
f"⚠ OCR 引擎未安装,无法识别图片中的文字内容。\n"
f"请严格根据用户的文字描述来推断图片中的报表需求。\n"
f"(提示:如需图片文字识别,请运行 pip install paddleocr"
)
parsed_type = "image_reference"
elif suffix in (".pdf", ".docx", ".xlsx", ".xls", ".doc"):
parsed_type = suffix.lstrip(".")
keep_temp = (
suffix in (".png", ".jpg", ".jpeg", ".bmp", ".webp")
and result.get("method") not in ("metadata_only", None)
)
return {
"name": uploaded_file.name,
"text": parsed_text,
"type": parsed_type,
"tmp_path": tmp_path if keep_temp else None,
}
# ---- URL 参数 ----
query_params = st.query_params
url_session_id = query_params.get("session_id", "")
@@ -344,14 +267,6 @@ def run_agent(user_input: str):
if stream_active:
streaming_placeholder.empty()
# 清理已处理的临时文件
for p in st.session_state.get("uploaded_temp_paths", []):
try:
Path(p).unlink(missing_ok=True)
except Exception:
pass
st.session_state.uploaded_temp_paths = []
# ---- 总结卡片 ----
# 注:node_state 只含变更字段,用 agent_state(被所有节点就地修改)获取完整状态
final_state = agent_state
@@ -557,62 +472,12 @@ with st.sidebar:
run_agent("重新来,清空当前报表")
st.rerun()
st.divider()
st.markdown("### 上传文件")
st.caption("支持图片 (OCR)、PDF、Word、文本文件。内容将附加到您的下一条消息中。")
if "uploaded_files" not in st.session_state:
st.session_state.uploaded_files = [] # [{name, text, type}]
if "uploaded_temp_paths" not in st.session_state:
st.session_state.uploaded_temp_paths = [] # 待清理的临时文件路径
uploaded = st.file_uploader(
"选择文件",
type=["png", "jpg", "jpeg", "bmp", "webp", "pdf", "docx", "xlsx", "xls", "doc",
"txt", "csv", "json", "xml"],
accept_multiple_files=True,
key="file_uploader",
label_visibility="collapsed",
)
if uploaded:
for uf in uploaded:
# 去重
if any(f["name"] == uf.name for f in st.session_state.uploaded_files):
continue
suffix = Path(uf.name).suffix.lower()
result = _process_uploaded_file(uf, suffix)
if result["text"]:
st.session_state.uploaded_files.append({
"name": result["name"],
"text": result["text"],
"type": result["type"],
})
tmp_path = result["tmp_path"]
if tmp_path:
st.session_state.agent_state["uploaded_file_path"] = tmp_path
st.session_state.uploaded_temp_paths.append(tmp_path)
if st.session_state.uploaded_files:
for i, f in enumerate(st.session_state.uploaded_files):
cols = st.columns([5, 1])
with cols[0]:
st.caption(f"📎 {f['name']} ({f['type']}, {len(f['text'])} 字符)")
with cols[1]:
if st.button("", key=f"rm_uf_{i}", help="移除"):
st.session_state.uploaded_files.pop(i)
st.rerun()
st.divider()
st.markdown("### 配置")
llm_backend = os.getenv("LLM_BACKEND", "cloud")
llm_model = os.getenv("LLM_MODEL", os.getenv("LOCAL_LLM_MODEL", "gpt-4o"))
st.caption(f"大语言模型: {llm_backend} / {llm_model}")
st.caption(f"最大重试次数: {os.getenv('MAX_RETRY', '3')}")
st.caption(f"最大重试次数: {os.getenv('MAX_RETRY', '5')}")
st.caption(f"验证服务: {os.getenv('VALIDATION_SERVICE_URL', 'http://localhost:8001/validate')}")
st.divider()
@@ -666,106 +531,396 @@ for msg in st.session_state.messages:
else:
st.markdown(msg["content"])
# ---- 聊天输入(支持粘贴/拖拽文件) ----
from st_multimodal_chatinput import multimodal_chatinput
import base64
import io
from pathlib import Path as _Path
# ---- 统一聊天输入组件 ----
UNIFIED_CHAT_HTML = r"""
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background: transparent;
padding: 4px 0;
}
.chat-container {
position: relative;
border: 1px solid #d1d5db;
border-radius: 12px;
padding: 8px 12px;
background: #ffffff;
transition: border-color 0.2s, box-shadow 0.2s;
}
.chat-container:focus-within {
border-color: #3b82f6;
box-shadow: 0 0 0 2px rgba(59,130,246,0.15);
}
.chat-container.drag-active {
border-color: #3b82f6;
background: rgba(59,130,246,0.04);
}
.file-chips {
display: flex;
flex-wrap: wrap;
gap: 6px;
margin-bottom: 6px;
}
.file-chips:empty { display: none; }
.file-chip {
display: inline-flex;
align-items: center;
gap: 4px;
padding: 2px 8px;
background: #f3f4f6;
border-radius: 14px;
font-size: 12px;
color: #374151;
max-width: 200px;
}
.file-chip .chip-icon { font-size: 13px; }
.file-chip .chip-name {
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.file-chip .chip-remove {
border: none;
background: none;
cursor: pointer;
color: #9ca3af;
font-size: 14px;
line-height: 1;
padding: 0 2px;
flex-shrink: 0;
}
.file-chip .chip-remove:hover { color: #ef4444; }
.input-row {
display: flex;
align-items: flex-end;
gap: 8px;
}
.attach-btn {
border: none;
background: none;
cursor: pointer;
padding: 4px 6px;
font-size: 20px;
line-height: 1;
color: #6b7280;
border-radius: 6px;
transition: background 0.15s, color 0.15s;
flex-shrink: 0;
}
.attach-btn:hover { background: #f3f4f6; color: #374151; }
textarea {
flex: 1;
border: none;
outline: none;
resize: none;
font-size: 15px;
line-height: 1.5;
font-family: inherit;
color: #111827;
background: transparent;
padding: 4px 0;
min-height: 24px;
max-height: 120px;
overflow-y: auto;
}
textarea::placeholder { color: #9ca3af; }
.send-btn {
border: none;
cursor: pointer;
padding: 4px 10px;
font-size: 16px;
background: #e5e7eb;
color: #9ca3af;
border-radius: 8px;
transition: all 0.15s;
flex-shrink: 0;
}
.send-btn.active { background: #3b82f6; color: #fff; }
.send-btn.active:hover { background: #2563eb; }
.send-btn:disabled { opacity: 0.5; cursor: default; }
.error-toast {
position: fixed;
bottom: 12px;
left: 50%;
transform: translateX(-50%);
background: #ef4444;
color: #fff;
padding: 6px 16px;
border-radius: 8px;
font-size: 13px;
z-index: 9999;
animation: toastOut 2.5s forwards;
pointer-events: none;
}
@keyframes toastOut {
0%, 70% { opacity: 1; }
100% { opacity: 0; }
}
# MIME type → 文件扩展名映射(用于剪贴板粘贴无扩展名的文件)
MIME_TO_EXT = {
"image/png": ".png",
"image/jpeg": ".jpg",
"image/bmp": ".bmp",
"image/webp": ".webp",
"application/pdf": ".pdf",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/vnd.ms-excel": ".xls",
"application/msword": ".doc",
"text/plain": ".txt",
"text/csv": ".csv",
"application/json": ".json",
"text/xml": ".xml",
}
@media (prefers-color-scheme: dark) {
.chat-container { background: #1f2937; border-color: #374151; }
.chat-container:focus-within { border-color: #3b82f6; }
.file-chip { background: #374151; color: #e5e7eb; }
.file-chip .chip-remove { color: #6b7280; }
.attach-btn { color: #9ca3af; }
.attach-btn:hover { background: #374151; color: #e5e7eb; }
textarea { color: #f9fafb; }
textarea::placeholder { color: #6b7280; }
.send-btn { background: #374151; }
}
</style>
</head>
<body>
<div class="chat-container" id="container">
<div class="file-chips" id="chips"></div>
<div class="input-row">
<button class="attach-btn" id="attachBtn" title="附加文件">&#x1F4CE;</button>
<textarea id="textInput" placeholder="描述您的报表需求..." rows="1"></textarea>
<button class="send-btn" id="sendBtn" title="发送">&#x27A4;</button>
</div>
<input type="file" id="fileInput" multiple hidden
accept=".png,.jpg,.jpeg,.bmp,.webp,.pdf,.docx,.xlsx,.xls,.doc,.txt">
</div>
<script>
const container = document.getElementById('container');
const chipsEl = document.getElementById('chips');
const textInput = document.getElementById('textInput');
const sendBtn = document.getElementById('sendBtn');
const attachBtn = document.getElementById('attachBtn');
const fileInput = document.getElementById('fileInput');
chat_result = multimodal_chatinput()
if chat_result:
prompt = (chat_result.get("textInput") or "").strip()
chat_files = chat_result.get("uploadedFiles") or []
let attachedFiles = [];
const MAX_FILES = 10;
const MAX_SIZE = 20 * 1024 * 1024;
# 处理聊天中上传/粘贴的文件
uploaded_texts = []
uploaded_files_info = []
function getIcon(type) {
if (type.startsWith('image/')) return '🖼';
if (type.includes('pdf')) return '📄';
if (type.includes('document')) return '📝';
if (type.includes('spreadsheet') || type.includes('excel')) return '📊';
return '📎';
}
# 先收集侧边栏已上传的文件
if st.session_state.get("uploaded_files"):
for f in st.session_state.uploaded_files:
uploaded_texts.append(f"[上传文件: {f['name']}]\n{f['text']}")
uploaded_files_info.append({"name": f["name"], "type": f["type"], "length": len(f["text"])})
st.session_state.uploaded_files = []
function updateSendBtn() {
var canSend = textInput.value.trim() || attachedFiles.length > 0;
sendBtn.classList.toggle('active', canSend);
}
# 处理聊天中的文件
class _Base64File:
"""包装 base64 文件为类 UploadedFile 接口。"""
def __init__(self, name, data_bytes):
self.name = name
self._data = data_bytes
function renderChips() {
chipsEl.innerHTML = '';
attachedFiles.forEach(function(f, i) {
var chip = document.createElement('span');
chip.className = 'file-chip';
var name = f.name.length > 16 ? f.name.slice(0,14)+'..' : f.name;
chip.innerHTML = '<span class="chip-icon">'+getIcon(f.type)+'</span>' +
'<span class="chip-name">'+name+'</span>' +
'<button class="chip-remove">&times;</button>';
chip.querySelector('.chip-remove').onclick = (function(idx) {
return function() {
attachedFiles.splice(idx, 1);
renderChips();
updateSendBtn();
};
})(i);
chipsEl.appendChild(chip);
});
updateSendBtn();
}
def getvalue(self):
return self._data
function addFiles(fileList) {
for (var i = 0; i < fileList.length; i++) {
var file = fileList[i];
if (attachedFiles.length >= MAX_FILES) { showToast('最多附加 '+MAX_FILES+' 个文件'); break; }
if (file.size > MAX_SIZE) { showToast(file.name+' 超过 20MB 限制'); continue; }
if (attachedFiles.some(function(f) { return f.name === file.name && f.size === file.size; })) continue;
attachedFiles.push({name: file.name, type: file.type, file: file});
}
renderChips();
}
for cf in chat_files:
name = cf.get("name", "clipboard_file")
mime = cf.get("type", "")
content_b64 = cf.get("content", "")
if not content_b64:
continue
function showToast(msg) {
var t = document.createElement('div');
t.className = 'error-toast';
t.textContent = msg;
document.body.appendChild(t);
setTimeout(function() { t.remove(); }, 2600);
}
function readFile(file) {
return new Promise(function(resolve, reject) {
var reader = new FileReader();
reader.onload = function() { resolve(reader.result); };
reader.onerror = reject;
reader.readAsDataURL(file);
});
}
async function handleSend() {
var text = textInput.value.trim();
if (!text && attachedFiles.length === 0) return;
sendBtn.disabled = true;
var files = [];
for (var i = 0; i < attachedFiles.length; i++) {
var f = attachedFiles[i];
try {
var dataUrl = await readFile(f.file);
files.push({name: f.name, type: f.type, data: dataUrl, size: f.file.size});
} catch(e) {
showToast(f.name+' 读取失败');
}
}
Streamlit.setComponentValue({text: text, files: files});
textInput.value = '';
attachedFiles = [];
renderChips();
sendBtn.disabled = false;
textInput.style.height = 'auto';
}
attachBtn.onclick = function() { fileInput.click(); };
fileInput.onchange = function() { addFiles(fileInput.files); fileInput.value = ''; };
textInput.oninput = function() {
updateSendBtn();
textInput.style.height = 'auto';
textInput.style.height = Math.min(textInput.scrollHeight, 120) + 'px';
};
textInput.onkeydown = function(e) {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
handleSend();
}
};
sendBtn.onclick = handleSend;
document.addEventListener('paste', function(e) {
var items = e.clipboardData && e.clipboardData.items;
if (!items) return;
var files = [];
for (var i = 0; i < items.length; i++) {
if (items[i].kind === 'file') files.push(items[i].getAsFile());
}
if (files.length) { e.preventDefault(); addFiles(files); }
});
var containerDiv = document.getElementById('container');
containerDiv.addEventListener('dragover', function(e) {
e.preventDefault();
containerDiv.classList.add('drag-active');
});
containerDiv.addEventListener('dragleave', function() {
containerDiv.classList.remove('drag-active');
});
containerDiv.addEventListener('drop', function(e) {
e.preventDefault();
containerDiv.classList.remove('drag-active');
addFiles(e.dataTransfer.files);
});
updateSendBtn();
</script>
</body>
</html>
"""
chat_result = components.html(UNIFIED_CHAT_HTML, height=180)
if chat_result and isinstance(chat_result, dict):
prompt = chat_result.get("text", "")
files = chat_result.get("files", [])
from backend.file_parser import parse_file
from backend.layout_analyzer import analyze_layout, extract_layout_schema
file_texts = []
attached_info = []
first_image_path = None
temp_paths = []
for f in files:
header, b64data = f.get("data", ",").split(",", 1)
raw = base64.b64decode(b64data)
mime = f.get("type", "")
mime_to_suffix = {
"image/png": ".png", "image/jpeg": ".jpg", "image/bmp": ".bmp",
"image/webp": ".webp", "application/pdf": ".pdf",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/vnd.ms-excel": ".xls", "application/msword": ".doc",
"text/plain": ".txt",
}
suffix = mime_to_suffix.get(mime, Path(f["name"]).suffix.lower())
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(raw)
tmp_path = tmp.name
temp_paths.append(tmp_path)
result = parse_file(tmp_path, suffix)
text = result["text"]
file_type = result["file_type"]
img_suffixes = (".png", ".jpg", ".jpeg", ".bmp", ".webp")
if suffix in img_suffixes and result.get("method") not in ("metadata_only", None):
try:
layout = analyze_layout(tmp_path)
tt = layout.get("template_type", "unknown")
if tt == "full_a4":
text = layout["description"]
file_type = "a4_template"
schema = extract_layout_schema(layout)
st.session_state.agent_state["layout_schema"] = schema
st.session_state.agent_state["ocr_elements"] = layout.get("rows", [])
elif tt == "partial_rows":
file_type = "a4_partial"
except Exception:
pass
file_texts.append(f"[附加文件: {f['name']} ({file_type})]\n{text}")
attached_info.append({"name": f["name"], "type": file_type, "length": len(text)})
if not first_image_path and file_type in ("image", "a4_template", "a4_partial"):
first_image_path = tmp_path
if file_texts:
full_prompt = "\n\n".join(file_texts) + "\n\n---\n用户需求:\n" + prompt
else:
full_prompt = prompt
if first_image_path:
st.session_state.agent_state["uploaded_file_path"] = first_image_path
_app_log.info(
"收到用户输入",
extra={
"session_id": current_session_id,
"prompt_preview": prompt[:200],
"prompt_length": len(prompt),
"has_uploaded_files": bool(attached_info),
"uploaded_files": attached_info,
},
)
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
run_agent(full_prompt)
for p in temp_paths:
try:
data = base64.b64decode(content_b64)
Path(p).unlink(missing_ok=True)
except Exception:
continue
pass
suffix = _Path(name).suffix.lower()
if not suffix and mime in MIME_TO_EXT:
suffix = MIME_TO_EXT[mime]
name = f"{_Path(name).stem}{suffix}"
wrapper = _Base64File(name, data)
result = _process_uploaded_file(wrapper, suffix)
if result["text"]:
uploaded_texts.append(f"[上传文件: {result['name']}]\n{result['text']}")
uploaded_files_info.append({"name": result["name"], "type": result["type"], "length": len(result["text"])})
tmp_path = result["tmp_path"]
if tmp_path:
st.session_state.agent_state["uploaded_file_path"] = tmp_path
st.session_state.uploaded_temp_paths.append(tmp_path)
if prompt or uploaded_texts:
if uploaded_texts:
full_prompt = "\n\n".join(uploaded_texts)
if prompt:
full_prompt += "\n\n---\n用户需求:\n" + prompt
else:
full_prompt = prompt
displayed_prompt = prompt or "(已上传文件,未输入文字)"
_app_log.info(
"收到用户输入",
extra={
"session_id": current_session_id,
"prompt_preview": displayed_prompt[:200],
"prompt_length": len(full_prompt),
"has_uploaded_files": bool(uploaded_files_info),
"uploaded_files": uploaded_files_info,
},
)
st.session_state.messages.append({"role": "user", "content": displayed_prompt})
with st.chat_message("user"):
st.markdown(displayed_prompt)
run_agent(full_prompt)
st.rerun()
st.rerun()
+201
View File
@@ -0,0 +1,201 @@
"""
JRXML 元素自动排序 JasperReports XSD <xs:sequence> 要求重排子元素
XSD 要求 jasperReport 子元素严格按以下顺序
property, propertyExpression, import, template, reportFont,
style, subDataset, scriptlet, parameter, queryString, field,
sortField, variable, filterExpression, group, background, title,
pageHeader, columnHeader, detail, columnFooter, pageFooter,
lastPageFooter, summary, noData
以及 band 内部的 reportElement 必须在其他元素之前
"""
import re
import xml.etree.ElementTree as ET
from typing import Optional
# JasperReports XSD sequence 顺序(索引越小越靠前)
JASPERREPORT_ORDER = {
"property": 0,
"propertyExpression": 1,
"import": 2,
"template": 3,
"reportFont": 4,
"style": 5,
"subDataset": 6,
"scriptlet": 7,
"parameter": 8,
"queryString": 9,
"field": 10,
"sortField": 11,
"variable": 12,
"filterExpression": 13,
"group": 14,
"background": 15,
"title": 16,
"pageHeader": 17,
"columnHeader": 18,
"detail": 19,
"columnFooter": 20,
"pageFooter": 21,
"lastPageFooter": 22,
"summary": 23,
"noData": 24,
}
# 带命名空间的标签映射(去掉 ns 前缀后匹配)
NS = "http://jasperreports.sourceforge.net/jasperreports"
def _tag_local(tag: str) -> str:
"""提取标签本地名(去掉命名空间前缀)。"""
return tag.split("}")[-1] if "}" in tag else tag
def _sort_key(elem: ET.Element) -> int:
"""排序键:按 JASPERREPORT_ORDER 中的顺序,未知元素放最后。"""
local = _tag_local(elem.tag)
return JASPERREPORT_ORDER.get(local, 999)
def reorder_jrxml_elements(xml_string: str) -> str:
"""重排 JRXML 字符串中的子元素顺序,使其符合 XSD sequence 要求。
处理范围
- jasperReport 的直接子元素
- band 的直接子元素reportElement 在前
返回重排后的 XML 字符串如果解析失败返回原始字符串
"""
try:
root = ET.fromstring(xml_string)
except ET.ParseError:
return xml_string # 无法解析,返回原始
_reorder_children(root)
_reorder_bands(root)
# 序列化回字符串
result = ET.tostring(root, encoding="unicode")
# 恢复 XML 声明、CDATA、命名空间
result = _restore_formatting(xml_string, result)
return result
def _reorder_children(parent: ET.Element):
"""递归重排所有子元素。"""
children = list(parent)
if not children:
return
# 按 XSD 顺序排序
children.sort(key=_sort_key)
# 重建子元素列表
for i, child in enumerate(children):
# ET 不支持直接 reorder,用 remove + insert
pass
# 实际上 ElementTree 不支持直接重排,需要重建
# 我们用更可靠的方式:收集所有子元素,清空,再按顺序添加
sorted_children = sorted(list(parent), key=_sort_key)
# 移除所有子元素
for child in list(parent):
parent.remove(child)
# 按排序后的顺序重新添加(保持 tail 文本在最后)
tail_text = ""
for child in sorted_children:
tail_text = child.tail or ""
child.tail = ""
parent.append(child)
# 恢复最后一个元素的 tail
if sorted_children and tail_text:
sorted_children[-1].tail = tail_text
# 递归处理子元素
for child in parent:
_reorder_children(child)
def _reorder_bands(root: ET.Element):
"""确保 band 内部 reportElement 在其他元素之前。"""
for elem in root.iter():
if _tag_local(elem.tag) == "band":
_ensure_reportelement_first(elem)
def _ensure_reportelement_first(band: ET.Element):
"""在 band 内部,确保 reportElement 元素排在最前面。"""
children = list(band)
report_elements = [c for c in children if _tag_local(c.tag) == "reportElement"]
other_elements = [c for c in children if _tag_local(c.tag) != "reportElement"]
if not report_elements:
return
# 移除所有
for c in list(band):
band.remove(c)
# 先添加 reportElement
tail = ""
for r in report_elements:
r.tail = ""
band.append(r)
# 再添加其他
for o in other_elements:
o.tail = ""
band.append(o)
# 恢复 tail
last = band[-1] if list(band) else None
if last and children:
last.tail = children[-1].tail or ""
def _restore_formatting(original: str, reordered: str) -> str:
"""恢复 XML 声明和 CDATA 段。"""
# 保留原始声明
decl = ""
if original.strip().startswith("<?xml"):
m = re.match(r'<\?xml[^?]*\?>', original)
if m:
decl = m.group()
if decl and not reordered.strip().startswith("<?xml"):
reordered = decl + "\n" + reordered
# 恢复 CDATAET 会把 CDATA 转成普通文本)
# 从原始 XML 提取所有 CDATA 块
cdata_pattern = re.compile(r'<!\[CDATA\[(.*?)\]\]>', re.DOTALL)
cdata_blocks = cdata_pattern.findall(original)
if cdata_blocks:
# 在重排后的 XML 中,对应位置的文本用 CDATA 包裹
def _restore_cdata(match):
nonlocal cdata_blocks
text = match.group(1)
for cdata in cdata_blocks:
if cdata.strip() == text.strip():
return f"<![CDATA[{cdata}]]>"
return match.group(0)
# 替换已转义的文本为 CDATA
reordered = re.sub(
r'(<queryString[^>]*>)\s*(.*?)\s*(</queryString>)',
lambda m: m.group(1) + f"\n <![CDATA[{m.group(2).strip()}]]>\n " + m.group(3),
reordered,
flags=re.DOTALL
)
return reordered
def normalize_jrxml(jrxml_text: str) -> str:
"""规范化 JRXML:排序元素 + 恢复格式。"""
if not jrxml_text or not jrxml_text.strip():
return jrxml_text
result = reorder_jrxml_elements(jrxml_text)
return result
+2 -1
View File
@@ -179,7 +179,8 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
)
for block in resp.content:
if block.type == "text":
block_type = getattr(block, "type", "")
if block_type == "text":
return type("Response", (), {"content": block.text})()
return type("Response", (), {"content": ""})()
+2
View File
@@ -90,6 +90,8 @@ def save_session(session_id: str, agent_state: dict, session_name: str = ""):
)
try:
json.dump(data, tmp, ensure_ascii=False, indent=2)
tmp.flush()
os.fsync(tmp.fileno())
tmp.close()
os.replace(tmp.name, str(fp))
except Exception:
+1 -1
View File
@@ -7,7 +7,7 @@ from dotenv import load_dotenv
from backend.logger import get_logger
load_dotenv()
load_dotenv(override=True)
_val_log = get_logger("validation")
+114
View File
@@ -0,0 +1,114 @@
"""
JRXML Agent E2E test Playwright automation.
Tests: page load, upload image, send message, wait for response.
Usage: python test_e2e.py
Prerequisites: Servers must be running (start.bat or with_server.py)
"""
import os, sys, time, base64, tempfile
from playwright.sync_api import sync_playwright
FRONTEND = "http://localhost:5173"
API = "http://localhost:8000"
TEST_IMAGE = r"D:\Idea Project\agent_jrxml\test_invoice_e2e.png"
def run():
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page(viewport={"width": 1280, "height": 900})
# Capture console errors
errors = []
page.on("console", lambda msg: errors.append(msg.text) if msg.type == "error" else None)
# 1. Navigate and wait
print("[1] Loading frontend...")
page.goto(FRONTEND, timeout=15000)
page.wait_for_load_state("networkidle")
page.wait_for_timeout(1000)
# Screenshot initial state
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_01_initial.png", full_page=True)
print(" Screenshot: e2e_01_initial.png")
# Verify sidebar loads
sidebar = page.locator(".sidebar")
assert sidebar.is_visible(), "Sidebar not visible"
print(" OK: Sidebar visible")
# 2. Create new session (click +)
print("[2] Creating new session...")
page.locator(".btn-icon").click()
page.wait_for_timeout(500)
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_02_session.png")
print(" OK: New session created")
# 3. Upload test image
print("[3] Uploading test image...")
upload_input = page.locator('input[type="file"]')
upload_input.set_input_files(TEST_IMAGE)
page.wait_for_timeout(500)
# Verify file chip appears
chip = page.locator(".chip").first
assert chip.is_visible(), "File chip not visible after upload"
print(f" OK: File chip visible — {chip.inner_text()}")
# 4. Type message and send
print('[4] Sending message...')
textarea = page.locator("textarea").first
textarea.fill("根据这张图片生成车历卡报表模板")
page.wait_for_timeout(200)
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_03_input.png")
# Click send button or press Enter
page.locator('button[type="submit"]').click()
print(" Sent!")
# 5. Wait for streaming response
print("[5] Waiting for AI response...")
try:
# Wait up to 3 minutes for a success or error message
page.wait_for_selector('.message.assistant', timeout=180000)
page.wait_for_timeout(2000)
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_04_response.png", full_page=True)
# Check for success/error
messages = page.locator('.message.assistant').all()
for m in messages:
text = m.inner_text()
if "成功" in text:
print(f" ✅ SUCCESS: {text[:100]}")
elif "失败" in text or "错误" in text:
print(f" ❌ ERROR: {text[:100]}")
elif "JRXML" in text:
print(f" 📄 JRXML generated ({len(text)} chars)")
except Exception as e:
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_04_timeout.png", full_page=True)
print(f" ⚠️ Timeout waiting for response: {e}")
# 6. Check download button
print("[6] Checking download button...")
download_btn = page.locator(".btn-download").first
if download_btn.is_visible():
text = download_btn.inner_text()
print(f" Download button: '{text}'")
if "暂无" not in text:
print(" ✅ Download link available!")
else:
print(" ⚠️ Download shows '暂无下载文件'")
else:
print(" ⚠️ Download button not found")
# Console errors
if errors:
print(f"\n[!] Console errors ({len(errors)}):")
for e in errors[:5]:
print(f" {e[:200]}")
else:
print("\n ✅ No console errors")
print("\n=== E2E test complete ===")
browser.close()
if __name__ == "__main__":
os.makedirs(r"D:\Idea Project\agent_jrxml", exist_ok=True)
run()
-2
View File
@@ -33,8 +33,6 @@ openpyxl>=3.1.0
paddleocr>=2.9.0,<3.0.0
paddlepaddle>=2.6.0,<3.0.0
easyocr>=1.7.0
# 聊天输入增强(粘贴/拖拽上传)
st-multimodal-chatinput>=0.2.1
# 多格式文件解析
openpyxl>=3.1.0
+38 -44
View File
@@ -1,53 +1,47 @@
@echo off
setlocal enabledelayedexpansion
echo ================================================
echo agent_jrxml 启动 (API + 验证)
echo ================================================
cd /d "%~dp0"
echo ============================================
echo JRXML Agent - One-Click Start
echo ============================================
echo.
REM ========== Kill processes on ports ==========
echo [Pre-check] Cleaning up occupied ports...
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING" 2^>nul') do (
echo Killing PID %%a on port 8001...
taskkill /PID %%a /F 2>nul
:: 清理残留进程
echo [清理] 检查残留进程...
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8000.*LISTENING"') do (
taskkill /F /PID %%a >nul 2>&1 && echo 已清理 PID %%a
)
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8000.*LISTENING" 2^>nul') do (
echo Killing PID %%a on port 8000...
taskkill /PID %%a /F 2>nul
)
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":5173.*LISTENING" 2^>nul') do (
echo Killing PID %%a on port 5173...
taskkill /PID %%a /F 2>nul
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do (
taskkill /F /PID %%a >nul 2>&1 && echo 已清理 PID %%a
)
echo.
REM ========== Detect Python ==========
set PYTHON=python
if exist "%~dp0.venv\Scripts\python.exe" set "PYTHON=%~dp0.venv\Scripts\python.exe"
echo Using Python: %PYTHON%
:: 启动验证服务 (后台最小化)
echo [启动] 验证服务 :8001
start "jrxml-validator" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('validation_service.main:app',host='0.0.0.0',port=8001,reload=False)"
:: 等待验证服务就绪 (用 PowerShell 检测)
echo [等待] 验证服务就绪...
:wait_val
ping -n 2 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8001/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_val
echo :8001 就绪
:: 启动 API 服务 (前台,Ctrl+C 退出)
echo [启动] API 服务 :8000
echo ================================================
echo 服务已就绪:
echo API: http://localhost:8000/docs
echo 验证: http://localhost:8001/health
echo 按 Ctrl+C 停止 API 服务
echo 关闭窗口后会自动清理验证服务
echo ================================================
.venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('api_server:app',host='0.0.0.0',port=8000,reload=False)"
:: API 进程退出后自动清理
echo.
REM ========== Start services ==========
echo [1/3] Starting validation service on port 8001...
start "JRXML-Validator" cmd /k cd /d "%~dp0" ^&^& "%PYTHON%" -m uvicorn validation_service.main:app --port 8001 --host 0.0.0.0
timeout /t 3 /nobreak >nul
echo [2/3] Starting backend API on port 8000...
start "JRXML-API" cmd /k cd /d "%~dp0" ^&^& "%PYTHON%" -m uvicorn api_server:app --port 8000 --host 0.0.0.0
timeout /t 3 /nobreak >nul
echo [3/3] Starting frontend dev server on port 5173...
start "JRXML-Frontend" cmd /k cd /d "%~dp0frontend" ^&^& npm run dev
timeout /t 3 /nobreak >nul
echo.
echo ============================================
echo All services started!
echo Frontend : http://localhost:5173
echo Backend : http://localhost:8000
echo Validator : http://localhost:8001
echo ============================================
echo.
echo Close the service windows or run stop.bat to stop.
echo [清理] 停止验证服务...
taskkill /F /FI "WINDOWTITLE eq jrxml-validator*" >nul 2>&1
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
echo 已停止所有服务
pause
+144
View File
@@ -0,0 +1,144 @@
"""
agent_jrxml 统一启动/停止脚本
用法: python start.py [--frontend]
"""
import subprocess
import sys
import time
import signal
import os
import socket
PROCESSES = []
def kill_port(port):
"""杀掉占用指定端口的所有进程"""
killed = []
try:
result = subprocess.run(
['netstat', '-ano'], capture_output=True, text=True, timeout=10
)
for line in result.stdout.splitlines():
if f':{port}' in line and 'LISTENING' in line:
parts = line.strip().split()
pid = parts[-1]
try:
subprocess.run(['taskkill', '/F', '/PID', pid],
capture_output=True, timeout=5)
killed.append(pid)
except:
pass
except:
pass
if killed:
print(f"[清理] 端口 {port} 已清理 {len(killed)} 个进程: {', '.join(killed)}")
return len(killed)
def wait_port(port, timeout=30):
"""等待端口就绪"""
for i in range(timeout * 2):
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect(('127.0.0.1', port))
s.close()
return True
except:
time.sleep(0.5)
return False
def start(port, module, cwd=None):
"""启动一个 uvicorn 服务"""
cmd = [
sys.executable, '-c',
f"import uvicorn; uvicorn.run('{module}', host='0.0.0.0', port={port}, reload=False)"
]
proc = subprocess.Popen(cmd, cwd=cwd)
PROCESSES.append((port, proc))
print(f"[启动] {module} -> :{port} (PID: {proc.pid})")
return proc
def cleanup():
"""清理所有子进程"""
print("\n[清理] 正在停止所有服务...")
for port, proc in PROCESSES:
try:
proc.terminate()
except:
pass
time.sleep(2)
for port, proc in PROCESSES:
try:
proc.kill()
except:
pass
kill_port(port)
print("[清理] 完成")
def main():
frontend = '--frontend' in sys.argv
# 1. 清理残留进程
print("=" * 50)
print("agent_jrxml 启动脚本")
print("=" * 50)
kill_port(8000)
kill_port(8001)
if frontend:
kill_port(5173)
# 2. 启动服务(基于脚本所在目录自动定位项目)
project = os.path.dirname(os.path.abspath(__file__))
start(8000, 'api_server:app', cwd=project)
start(8001, 'validation_service.main:app', cwd=project)
if frontend:
# 前端用 npm 启动
frontend_dir = os.path.join(project, 'frontend')
proc = subprocess.Popen(
['npm', 'run', 'dev'], cwd=frontend_dir,
shell=True
)
PROCESSES.append((5173, proc))
print(f"[启动] frontend (Vite) -> :5173")
# 3. 等待就绪
print("\n[等待] 等待服务就绪...")
ok = True
for port, _ in PROCESSES:
if wait_port(port):
print(f" :{port}")
else:
print(f" :{port} ✗ 超时!")
ok = False
if not ok:
print("\n[错误] 部分服务启动失败")
cleanup()
sys.exit(1)
print(f"\n{'='*50}")
print("服务就绪:")
print(f" API: http://localhost:8000/docs")
print(f" 验证: http://localhost:8001/health")
if frontend:
print(f" 前端: http://localhost:5173")
print(f"\n按 Ctrl+C 停止所有服务")
print(f"{'='*50}")
# 4. 等待退出信号
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
pass
finally:
cleanup()
if __name__ == '__main__':
main()
+50
View File
@@ -0,0 +1,50 @@
@echo off
setlocal enabledelayedexpansion
echo ================================================
echo agent_jrxml 启动 (全栈)
echo ================================================
cd /d "%~dp0"
:: 清理残留进程
echo [清理] 检查残留进程...
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8000.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":5173.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
echo.
:: 1. 验证服务
echo [1/3] 验证服务 :8001
start "jrxml-validator" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('validation_service.main:app',host='0.0.0.0',port=8001,reload=False)"
:wait_val
ping -n 2 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8001/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_val
echo :8001 就绪
:: 2. API 服务
echo [2/3] API 服务 :8000
start "jrxml-api" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('api_server:app',host='0.0.0.0',port=8000,reload=False)"
:wait_api
ping -n 2 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8000/api/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_api
echo :8000 就绪
:: 3. 前端
echo [3/3] 前端 :5173
start "jrxml-frontend" /MIN cmd /c "cd /d "%~dp0frontend" && npm run dev"
:wait_fe
ping -n 3 127.0.0.1 >nul
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:5173 -TimeoutSec 3 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
if errorlevel 1 goto wait_fe
echo :5173 就绪
echo.
echo ================================================
echo 全部就绪:
echo 前端: http://localhost:5173
echo API: http://localhost:8000/docs
echo 验证: http://localhost:8001/health
echo 运行 stop.bat 停止所有服务
echo ================================================
pause
+6 -7
View File
@@ -1,9 +1,8 @@
@echo off
echo 正在停止 JRXML 代理服务...
taskkill /fi "WINDOWTITLE eq JRXML 验证服务*" /f 2>nul
taskkill /fi "WINDOWTITLE eq JRXML API*" /f 2>nul
taskkill /fi "WINDOWTITLE eq JRXML Frontend*" /f 2>nul
echo 已停止。
chcp 65001 >nul
echo [清理] 停止所有 agent_jrxml 服务...
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8000.*LISTENING"') do taskkill /F /PID %%a 2>nul
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do taskkill /F /PID %%a 2>nul
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":5173.*LISTENING"') do taskkill /F /PID %%a 2>nul
echo 已停止
pause
Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

+29
View File
@@ -0,0 +1,29 @@
import sys, io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
import xml.etree.ElementTree as ET
from backend.jrxml_reorder import normalize_jrxml
bad = '''<?xml version="1.0" encoding="UTF-8"?>
<jasperReport xmlns="http://jasperreports.sourceforge.net/jasperreports" name="Test" pageWidth="595" pageHeight="842">
<queryString><![CDATA[SELECT 1]]></queryString>
<style name="s1"/>
<field name="f1" class="java.lang.String"/>
<property name="p1" value="v1"/>
<parameter name="param1" class="java.lang.String"/>
<title><band height="50"><textField><reportElement x="0" y="0" width="100" height="20"/></textField></band></title>
<detail><band height="30"><staticText><reportElement x="0" y="0" width="100" height="20"/><text>Hi</text></staticText></band></detail>
</jasperReport>'''
fixed = normalize_jrxml(bad)
print('=== Before ===')
root = ET.fromstring(bad)
print('Children:', [c.tag.split('}')[-1] for c in root])
print('\n=== After ===')
root2 = ET.fromstring(fixed)
print('Children:', [c.tag.split('}')[-1] for c in root2])
# 验证
import requests
r = requests.post('http://localhost:8001/validate', json={'jrxml': fixed}, timeout=10)
print(f'\nValidation: {r.json()}')
+3 -3
View File
@@ -57,7 +57,7 @@ class TestAcceptanceScenarios:
state["stage"] = "initial_generation"
final = run_graph(graph, state)
assert final.get("retry_count", 0) <= 3, "不应超过最大重试次数"
assert final.get("retry_count", 0) <= 5, "不应超过最大重试次数"
print(f"场景 2 状态: {final.get('status')}, 重试次数: {final.get('retry_count', 0)}")
def test_scenario3_multi_turn_modification(self, graph):
@@ -128,8 +128,8 @@ class TestAcceptanceScenarios:
state = create_initial_state()
state["current_jrxml"] = "<invalid>xml<<<"
state["user_input"] = "Fix this"
state["retry_count"] = 3 # 已达到最大重试次数
state["retry_count"] = 5 # 已达到最大重试次数
state["status"] = "fail"
final = run_graph(graph, state)
assert final.get("retry_count", 0) >= 3 or final.get("status") == "pass"
assert final.get("retry_count", 0) >= 5 or final.get("status") == "pass"
Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

@@ -0,0 +1 @@
测试文件内容
Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

+1 -1
View File
@@ -159,4 +159,4 @@ async def health():
"schema_available": schema_available,
"validation_type": "XSD" if schema_available else "仅结构检查",
"note": "如需完整的 JasperReports 7.0.6 编译验证,请使用基于 Java 的验证器",
}
}
File diff suppressed because it is too large Load Diff