feat: FastAPI+SSE API server, JRXML auto-reorder, session integrity fixes
@@ -0,0 +1,46 @@
|
|||||||
|
{
|
||||||
|
"permissions": {
|
||||||
|
"allow": [
|
||||||
|
"Bash(git submodule *)",
|
||||||
|
"Bash(python -c \"import py_compile; py_compile.compile\\('scripts/init_kb.py', doraise=True\\); print\\('init_kb.py OK'\\)\")",
|
||||||
|
"Bash(python -c \"import py_compile; py_compile.compile\\('agent/nodes.py', doraise=True\\); print\\('nodes.py OK'\\)\")",
|
||||||
|
"Bash(python -c \"import py_compile; py_compile.compile\\('backend/embeddings.py', doraise=True\\); print\\('embeddings.py OK'\\)\")",
|
||||||
|
"Bash(python *)",
|
||||||
|
"Bash(PYTHONIOENCODING=utf-8 python batch_chunker.py jrxml_source)",
|
||||||
|
"Bash(taskkill /F /IM python.exe)",
|
||||||
|
"Bash(pkill -f embed_chunks)",
|
||||||
|
"Bash(pip show *)",
|
||||||
|
"Bash(streamlit run *)",
|
||||||
|
"Bash(curl -s http://localhost:8001/validate -X POST -H \"Content-Type: application/json\" -d '{\"jrxml\":\"\"}')",
|
||||||
|
"Bash(STREAMLIT_SERVER_HEADLESS=true streamlit run app.py --server.port 8501)",
|
||||||
|
"Bash(git add *)",
|
||||||
|
"Bash(git commit -m ' *)",
|
||||||
|
"Bash(pip install *)",
|
||||||
|
"Bash(git push *)",
|
||||||
|
"Bash(claude mcp *)",
|
||||||
|
"mcp__zai-mcp-server__extract_text_from_screenshot",
|
||||||
|
"mcp__MiniMax__understand_image",
|
||||||
|
"Bash(curl -s http://localhost:8001/health)",
|
||||||
|
"Bash(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:8501)",
|
||||||
|
"Bash(curl -s -X POST http://localhost:8001/validate -H \"Content-Type: application/json\" -d \"{\\\\\"jrxml\\\\\": \\\\\"\\\\\"}\")",
|
||||||
|
"Bash(curl -s -X POST http://localhost:8001/validate -H \"Content-Type: application/json\" -d \"{\\\\\"jrxml\\\\\": \\\\\"<?xml version=\\\\\\\\\\\\\"1.0\\\\\\\\\\\\\"?><jasperReport name=\\\\\\\\\\\\\"test\\\\\\\\\\\\\" pageWidth=\\\\\\\\\\\\\"595\\\\\\\\\\\\\" pageHeight=\\\\\\\\\\\\\"842\\\\\\\\\\\\\"><queryString><![CDATA[SELECT 1]]></queryString></jasperReport>\\\\\"}\")",
|
||||||
|
"Bash(curl -s -X POST http://localhost:8001/validate -H \"Content-Type: application/json\" -d \"{\\\\\"jrxml\\\\\": \\\\\"<?xml version=\\\\\\\\\\\\\"1.0\\\\\\\\\\\\\"?><jasperReport name=\\\\\\\\\\\\\"test\\\\\\\\\\\\\" pageWidth=\\\\\\\\\\\\\"595\\\\\\\\\\\\\" pageHeight=\\\\\\\\\\\\\"842\\\\\\\\\\\\\"><queryString><![CDATA[SELECT 1]]></queryString><detail><band height=\\\\\\\\\\\\\"50\\\\\\\\\\\\\"/></detail></jasperReport>\\\\\"}\")",
|
||||||
|
"Bash(curl -s -X POST http://localhost:8001/validate -H 'Content-Type: application/json' -d '{\"jrxml\": \"<?xml version=\\\\\"1.0\\\\\"?><jasperReport name=\\\\\"test\\\\\" pageWidth=\\\\\"595\\\\\" pageHeight=\\\\\"842\\\\\"><queryString><![CDATA[SELECT name FROM users]]></queryString><field name=\\\\\"name\\\\\" class=\\\\\"java.lang.String\\\\\"/><detail><band height=\\\\\"50\\\\\"><textField><reportElement x=\\\\\"0\\\\\" y=\\\\\"0\\\\\" width=\\\\\"100\\\\\" height=\\\\\"20\\\\\"/><textFieldExpression><![CDATA[$F{name}]]></textFieldExpression></textField></band></detail></jasperReport>\"}')",
|
||||||
|
"Bash(curl -s -o /dev/null -w \"Streamlit: %{http_code}\\\\n\" http://localhost:8501)",
|
||||||
|
"Bash(grep -v \"Complete$\")",
|
||||||
|
"Bash(git pull *)",
|
||||||
|
"Bash(pip search *)",
|
||||||
|
"Read",
|
||||||
|
"Write",
|
||||||
|
"Edit",
|
||||||
|
"Bash",
|
||||||
|
"Git",
|
||||||
|
"Npm",
|
||||||
|
"Pip",
|
||||||
|
"Grep",
|
||||||
|
"Glob",
|
||||||
|
"Bash(rm -rf components/* assets/* style.css)",
|
||||||
|
"Bash(mkdir -p api stores components utils)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -48,7 +48,7 @@ RAG_USE_FP16=true
|
|||||||
RAG_BATCH_SIZE=64
|
RAG_BATCH_SIZE=64
|
||||||
|
|
||||||
# 最大自动修正尝试次数
|
# 最大自动修正尝试次数
|
||||||
MAX_RETRY=3
|
MAX_RETRY=5
|
||||||
|
|
||||||
# 上下文压缩阈值(token 数)
|
# 上下文压缩阈值(token 数)
|
||||||
CONTEXT_MAX_TOKENS=6000
|
CONTEXT_MAX_TOKENS=6000
|
||||||
|
|||||||
@@ -0,0 +1,341 @@
|
|||||||
|
# JRXML 生成代理 — 架构文档
|
||||||
|
|
||||||
|
## 概览
|
||||||
|
|
||||||
|
一个三层架构的桌面应用,通过自然语言多轮对话帮助非技术用户创建 JasperReports 模板(JRXML)。核心流程:用户输入 → 意图识别 → 模板检索 → LLM 生成/修改 → 自动验证修正 → 输出可编译的 JRXML。
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────────────────────────────────────────────────────────┐
|
||||||
|
│ Vue 3 + Vite 前端 (:5173) │
|
||||||
|
│ frontend/ (聊天界面 + SSE 流式) │
|
||||||
|
│ 聊天界面 / 会话管理 / JRXML 预览 / 下载 / 快捷操作 │
|
||||||
|
└─────────────────────┬────────────────────────────────────────┘
|
||||||
|
│ HTTP + SSE (/api/*)
|
||||||
|
▼
|
||||||
|
┌──────────────────────────────────────────────────────────────┐
|
||||||
|
│ FastAPI SSE 后端 (:8000) │
|
||||||
|
│ api_server.py │
|
||||||
|
│ REST: /api/sessions, /api/upload, /api/.../download/latest │
|
||||||
|
│ SSE: /api/sessions/{id}/chat (流式推送) │
|
||||||
|
│ 事件: node_start | node_complete | stream_token │
|
||||||
|
│ agent_complete | agent_error │
|
||||||
|
└─────────────────────┬────────────────────────────────────────┘
|
||||||
|
│ run_agent(user_input)
|
||||||
|
▼
|
||||||
|
┌──────────────────────────────────────────────────────────────┐
|
||||||
|
│ LangGraph 状态机 (agent/) │
|
||||||
|
│ │
|
||||||
|
│ load_session → process_input → manage_context │
|
||||||
|
│ → save_state_snapshot → classify_intent │
|
||||||
|
│ │ │ │ │ │ │
|
||||||
|
│ ▼ ▼ ▼ ▼ ▼ │
|
||||||
|
│ retrieve modify_jrxml preview consult undo/reset │
|
||||||
|
│ │ │ /export │
|
||||||
|
│ ▼ ▼ │
|
||||||
|
│ generate save_session │
|
||||||
|
│ │ │ │
|
||||||
|
│ └────┬─────┘ │
|
||||||
|
│ ▼ │
|
||||||
|
│ (jrxml_reorder 自动规范化元素顺序) │
|
||||||
|
│ ▼ │
|
||||||
|
│ validate ──(fail)──► explain_error ──► correct_jrxml │
|
||||||
|
│ │ ▲ │ │
|
||||||
|
│ (pass) └──(retry<N)───┘ │
|
||||||
|
│ ▼ │
|
||||||
|
│ finalize (失败版本 → jrxml_versions, 提示下载) │
|
||||||
|
└──────────┬──────────────┬─────────────────────┬──────────────┘
|
||||||
|
│ │ │
|
||||||
|
▼ ▼ ▼
|
||||||
|
┌──────────────┐ ┌──────────────┐ ┌──────────────────────────┐
|
||||||
|
│ LLM 后端 │ │ 向量知识库 │ │ 验证服务 (:8001) │
|
||||||
|
│ backend/llm │ │ ChromaDB + │ │ FastAPI │
|
||||||
|
│ │ │ RAGSearcher │ │ 结构检查 + 严格 XSD 校验 │
|
||||||
|
│ Anthropic SDK│ │ │ │ │
|
||||||
|
│ OpenAI SDK │ │ Sentence- │ │ /validate │
|
||||||
|
│ Ollama │ │ Transformer │ │ /health │
|
||||||
|
└──────────────┘ └──────────────┘ └──────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## 目录结构
|
||||||
|
|
||||||
|
```
|
||||||
|
agent_jrxml/
|
||||||
|
├── api_server.py # FastAPI SSE 后端(REST + 流式推送)
|
||||||
|
│
|
||||||
|
├── frontend/ # Vue 3 + Vite 前端
|
||||||
|
│ └── src/
|
||||||
|
│ ├── api/client.ts # SSE 客户端 + fetch 封装
|
||||||
|
│ ├── stores/ # Pinia 状态管理(chat + session)
|
||||||
|
│ └── components/ # 聊天界面组件
|
||||||
|
│
|
||||||
|
├── agent/ # LangGraph 工作流层
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── state.py # AgentState TypedDict 定义(~28 字段)
|
||||||
|
│ ├── nodes.py # 18 个工作流节点(生成/修改/验证/修正/意图识别...)
|
||||||
|
│ └── graph.py # 状态图编译 + 路由逻辑 + 初始状态工厂
|
||||||
|
│
|
||||||
|
├── backend/ # 基础设施层
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── llm.py # LLM 工厂:Anthropic(MiniMax) / OpenAI / Ollama
|
||||||
|
│ ├── embeddings.py # 嵌入模型工厂:HuggingFace / OpenAI
|
||||||
|
│ ├── validation.py # 验证服务 HTTP 客户端
|
||||||
|
│ ├── session.py # 会话持久化(JSON CRUD + flush/fsync)
|
||||||
|
│ ├── jrxml_reorder.py # JRXML 元素自动排序(匹配 XSD sequence)
|
||||||
|
│ └── rag_adapter.py # RAG 适配层:连接 ChromaDB 做语义搜索
|
||||||
|
│
|
||||||
|
├── validation_service/ # 独立验证微服务
|
||||||
|
│ ├── main.py # FastAPI 服务:结构检查 + 严格 XSD 校验
|
||||||
|
│ └── schemas/
|
||||||
|
│ └── jasperreport_7_0_6.xsd # JasperReports 7.0.6 XSD(286KB)
|
||||||
|
│
|
||||||
|
├── scripts/
|
||||||
|
│ └── init_kb.py # 知识库初始化(预下载嵌入模型)
|
||||||
|
│
|
||||||
|
├── tests/
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── test_validation.py # 验证服务单元测试
|
||||||
|
│ └── test_agent.py # 代理集成测试
|
||||||
|
│
|
||||||
|
├── data/ # 数据目录
|
||||||
|
│ ├── sample_templates/ # 示例 JRXML 模板
|
||||||
|
│ └── corrections/ # 错误修正案例
|
||||||
|
│
|
||||||
|
├── db/chroma/ # ChromaDB 持久化存储
|
||||||
|
├── sessions/ # 会话 JSON 文件存储
|
||||||
|
├── jrxml_versions/ # 失败版本归档存储
|
||||||
|
├── rag/ # RAG 子模块(独立管线)
|
||||||
|
├── requirements.txt # Python 依赖
|
||||||
|
├── start_all.bat # 一键启动全部服务
|
||||||
|
├── start.bat # 启动脚本
|
||||||
|
├── stop.bat # 一键停止全部服务
|
||||||
|
├── .env.example # 环境变量模板
|
||||||
|
└── README.md # 使用说明
|
||||||
|
```
|
||||||
|
|
||||||
|
## 数据流详解
|
||||||
|
|
||||||
|
### 1. 请求生命周期
|
||||||
|
|
||||||
|
```
|
||||||
|
用户输入 "创建员工名册,包含 id、name、department"
|
||||||
|
│
|
||||||
|
├─ load_session 从 sessions/{id}.json 恢复历史状态
|
||||||
|
├─ process_input 记录用户消息到 conversation_history
|
||||||
|
├─ manage_context 检查 token 数,超阈值则 LLM 压缩早期对话
|
||||||
|
├─ save_state_snapshot 保存当前状态快照(用于撤销)
|
||||||
|
├─ classify_intent LLM 分类 → initial_generation
|
||||||
|
├─ retrieve RAGSearcher.search_as_context() → 注入 prompt
|
||||||
|
├─ generate LLM 生成初始 JRXML
|
||||||
|
├─ save_session 持久化到磁盘
|
||||||
|
├─ validate 调用 FastAPI 验证服务
|
||||||
|
│ ├─ pass → finalize
|
||||||
|
│ └─ fail → explain_error → correct_jrxml → validate (最多 5 次)
|
||||||
|
└─ finalize 保存最终 JRXML,UI 展示结果
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. 意图路由(8 种意图)
|
||||||
|
|
||||||
|
| 意图 | 条件 | 路由目标 |
|
||||||
|
|------|------|---------|
|
||||||
|
| `initial_generation` | 无现有报表 | retrieve → generate |
|
||||||
|
| `modify_report` | 有现有报表 | modify_jrxml |
|
||||||
|
| `preview_report` | — | 直接展示 current_jrxml |
|
||||||
|
| `export_jrxml` | — | 触发下载 |
|
||||||
|
| `export_pdf` | — | 触发下载 |
|
||||||
|
| `consult_question` | — | handle_consult(独立回答) |
|
||||||
|
| `undo_modification` | history_states 非空 | 恢复上一个快照 |
|
||||||
|
| `reset_session` | — | 清空所有报表状态 |
|
||||||
|
|
||||||
|
### 3. 自动修正循环
|
||||||
|
|
||||||
|
```
|
||||||
|
validate ──fail──► explain_error ──► correct_jrxml ──► validate
|
||||||
|
▲ │
|
||||||
|
└──────────── retry_count < MAX_RETRY (5) ──────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
每次修正都会递增 `retry_count`,达到上限后直接 `finalize`(即使仍有错误),在 UI 上展示错误信息。
|
||||||
|
|
||||||
|
## 核心组件
|
||||||
|
|
||||||
|
### AgentState(agent/state.py)
|
||||||
|
|
||||||
|
```python
|
||||||
|
class AgentState(TypedDict, total=False):
|
||||||
|
# 工作流核心
|
||||||
|
conversation_history: List[dict] # 当前上下文的对话(可能被压缩裁剪)
|
||||||
|
current_jrxml: str # 当前 JRXML 文本
|
||||||
|
user_input: str # 本轮用户输入
|
||||||
|
status: str # "pass" | "fail"
|
||||||
|
error_msg: str # 验证错误信息
|
||||||
|
natural_explanation: str # 错误的人类可读解释
|
||||||
|
retry_count: int # 当前修正尝试次数
|
||||||
|
user_modification_request: str # 修改请求文本
|
||||||
|
final_jrxml: str # 最终验证通过的 JRXML
|
||||||
|
stage: str # 当前阶段标识
|
||||||
|
retrieved_context: str # RAG 检索到的模板上下文
|
||||||
|
|
||||||
|
# 上下文压缩
|
||||||
|
full_conversation_history: List[dict] # 完整对话(含时间戳)
|
||||||
|
compressed_history: str # 早期对话的压缩摘要
|
||||||
|
current_token_count: int # 当前估算 token 数
|
||||||
|
|
||||||
|
# 会话持久化
|
||||||
|
session_id: str
|
||||||
|
session_name: str
|
||||||
|
created_at: str
|
||||||
|
updated_at: str
|
||||||
|
|
||||||
|
# 意图识别 + 撤销
|
||||||
|
intent: str # 8 种意图之一
|
||||||
|
history_states: List[dict] # 状态快照栈(最多 10 个)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 工作流节点(agent/nodes.py)
|
||||||
|
|
||||||
|
| 节点 | 职责 | 调用外部 |
|
||||||
|
|------|------|---------|
|
||||||
|
| `load_session_node` | 从磁盘恢复会话状态 | `backend.session.load_session` |
|
||||||
|
| `process_input` | 记录用户输入到对话历史 | — |
|
||||||
|
| `manage_context` | token 超阈值时 LLM 压缩早期对话 | `get_llm()` |
|
||||||
|
| `save_state_snapshot` | 保存快照到 history_states | — |
|
||||||
|
| `classify_intent` | LLM 分类用户意图(8 类) | `get_llm()` |
|
||||||
|
| `retrieve` | 从 ChromaDB 搜索相关模板 | `backend.rag_adapter.search_chunks` |
|
||||||
|
| `generate` | 首次生成 JRXML | `get_llm()` |
|
||||||
|
| `modify_jrxml` | 根据用户需求修改现有 JRXML | `get_llm()` |
|
||||||
|
| `validate` | 调用验证服务检查 JRXML | `backend.validation.validate_jrxml` |
|
||||||
|
| `explain_error` | LLM 将编译错误翻译为人话 | `get_llm()` |
|
||||||
|
| `correct_jrxml` | LLM 自动修正验证失败 | `get_llm()` |
|
||||||
|
| `finalize` | 保存最终 JRXML,标记完成 | — |
|
||||||
|
| `handle_consult` | 回答 JasperReports 咨询 | `get_llm()` |
|
||||||
|
| `handle_undo` | 从 history_states 恢复上一状态 | — |
|
||||||
|
| `handle_reset` | 清空报表,重置会话 | — |
|
||||||
|
| `save_session_node` | 持久化当前状态到磁盘 | `backend.session.save_session` |
|
||||||
|
|
||||||
|
### LLM 工厂(backend/llm.py)
|
||||||
|
|
||||||
|
```
|
||||||
|
get_llm()
|
||||||
|
├─ LLM_BACKEND=local → langchain_ollama.ChatOllama
|
||||||
|
└─ LLM_BACKEND=cloud
|
||||||
|
├─ LLM_PROVIDER=anthropic → raw anthropic.Anthropic SDK
|
||||||
|
│ 适配 MiniMax Anthropic 兼容 API
|
||||||
|
│ 包装为 MiniMaxLLM(提供 .invoke() 接口)
|
||||||
|
└─ LLM_PROVIDER=openai → langchain_openai.ChatOpenAI
|
||||||
|
```
|
||||||
|
|
||||||
|
**MiniMaxLLM 适配器**:将 Anthropic SDK 的 `client.messages.create()` 包装成与 LangChain 兼容的 `.invoke(prompt) → Response.content` 接口,供所有节点统一调用。
|
||||||
|
|
||||||
|
### RAG 适配层(backend/rag_adapter.py)
|
||||||
|
|
||||||
|
```
|
||||||
|
search_chunks(query, k=5)
|
||||||
|
└─ RAGSearcher(单例)
|
||||||
|
├─ 懒加载 SentenceTransformer 模型
|
||||||
|
├─ 懒连接 ChromaDB PersistentClient
|
||||||
|
├─ query → 向量编码 → collection.query() → top-k 结果
|
||||||
|
└─ search_as_context() → 拼接带元数据标签的上下文字符串
|
||||||
|
```
|
||||||
|
|
||||||
|
### 验证服务(validation_service/main.py)
|
||||||
|
|
||||||
|
独立的 FastAPI 进程(端口 8001),提供两级验证:
|
||||||
|
|
||||||
|
1. **结构检查**(始终执行):
|
||||||
|
- XML 语法正确性
|
||||||
|
- `$F{field}` 引用一致性(表达式 vs `<field>` 声明)
|
||||||
|
- `<queryString>` 是否含有效 SQL SELECT
|
||||||
|
- `<jasperReport>` 必需属性(pageWidth, pageHeight, name)
|
||||||
|
|
||||||
|
2. **XSD Schema 校验**(可选):
|
||||||
|
- 需要 `validation_service/schemas/jasperreport_7_0_6.xsd` 文件
|
||||||
|
- 使用 `lxml.etree.XMLSchema` 进行完整 schema 校验
|
||||||
|
|
||||||
|
### 会话持久化(backend/session.py)
|
||||||
|
|
||||||
|
```
|
||||||
|
sessions/{session_id}.json
|
||||||
|
{
|
||||||
|
"session_id": "abc123def456",
|
||||||
|
"session_name": "员工名册报表",
|
||||||
|
"created_at": "2026-05-19T09:00:00+00:00",
|
||||||
|
"updated_at": "2026-05-19T09:30:00+00:00",
|
||||||
|
"agent_state": { ... } // 完整的 AgentState 字段
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 关键 Prompt 设计
|
||||||
|
|
||||||
|
| Prompt | 用途 | 输出约束 |
|
||||||
|
|--------|------|---------|
|
||||||
|
| `INTENT_CLASSIFY_PROMPT` | 8 分类意图识别 | 只输出意图名称 |
|
||||||
|
| `INITIAL_GENERATION_PROMPT` | 首次生成 JRXML | 只输出 JRXML,无 markdown |
|
||||||
|
| `MODIFICATION_PROMPT` | 修改现有 JRXML | 只输出完整 JRXML |
|
||||||
|
| `CORRECTION_PROMPT` | 自动修正错误 | 只输出修复后 JRXML |
|
||||||
|
| `EXPLAIN_PROMPT` | 错误转人话 | 2-3 句话 |
|
||||||
|
| `COMPRESSION_PROMPT` | 对话压缩 | ≤200 字摘要 |
|
||||||
|
| `CONSULT_PROMPT` | 咨询解答 | 简洁中文 |
|
||||||
|
|
||||||
|
## 配置参数(.env)
|
||||||
|
|
||||||
|
| 参数 | 默认值 | 说明 |
|
||||||
|
|------|--------|------|
|
||||||
|
| `LLM_BACKEND` | cloud | cloud / local |
|
||||||
|
| `LLM_PROVIDER` | openai | openai / anthropic |
|
||||||
|
| `OPENAI_API_KEY` | — | API 密钥 |
|
||||||
|
| `OPENAI_BASE_URL` | https://api.openai.com/v1 | API 端点 |
|
||||||
|
| `LLM_MODEL` | gpt-4o | 模型名称 |
|
||||||
|
| `LOCAL_LLM_MODEL` | qwen2.5-coder:7b | Ollama 模型 |
|
||||||
|
| `EMBED_BACKEND` | local | local / cloud |
|
||||||
|
| `LOCAL_EMBED_MODEL` | Qwen/Qwen3-Embedding-0.6B | 本地嵌入模型 |
|
||||||
|
| `VALIDATION_SERVICE_URL` | http://localhost:8001/validate | 验证端点 |
|
||||||
|
| `CHROMA_PERSIST_DIR` | ./db/chroma | ChromaDB 路径 |
|
||||||
|
| `MAX_RETRY` | 5 | 自动修正最大尝试次数 |
|
||||||
|
| `CONTEXT_MAX_TOKENS` | 6000 | 触发压缩的 token 阈值 |
|
||||||
|
| `CONTEXT_KEEP_RECENT` | 4 | 保留最近 N 轮完整对话 |
|
||||||
|
| `SESSIONS_DIR` | ./sessions | 会话 JSON 存储目录 |
|
||||||
|
| `HISTORY_MAX_SNAPSHOTS` | 10 | 撤销快照保留数量 |
|
||||||
|
|
||||||
|
## 启动流程
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. 安装依赖
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# 2. 配置环境
|
||||||
|
cp .env.example .env
|
||||||
|
# 编辑 .env 填入 API 密钥
|
||||||
|
|
||||||
|
# 3. 初始化知识库(预下载嵌入模型)
|
||||||
|
python scripts/init_kb.py --download-model
|
||||||
|
|
||||||
|
# 4. 启动验证服务(终端 1)
|
||||||
|
python -m uvicorn validation_service.main:app --port 8001 --host 0.0.0.0
|
||||||
|
|
||||||
|
# 5. 启动 Streamlit 界面(终端 2)
|
||||||
|
STREAMLIT_SERVER_HEADLESS=true streamlit run app.py --server.port 8501
|
||||||
|
|
||||||
|
# 6. 访问 http://localhost:8501
|
||||||
|
```
|
||||||
|
|
||||||
|
## 测试
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_validation.py -v # 验证服务单元测试
|
||||||
|
pytest tests/test_agent.py -v # 代理集成测试
|
||||||
|
pytest tests/ -v # 全部测试
|
||||||
|
```
|
||||||
|
|
||||||
|
## 技术栈
|
||||||
|
|
||||||
|
| 层 | 技术 |
|
||||||
|
|----|------|
|
||||||
|
| UI | Streamlit 1.57 |
|
||||||
|
| 工作流引擎 | LangGraph 1.2 |
|
||||||
|
| LLM 接入 | Anthropic SDK / LangChain-OpenAI / LangChain-Ollama |
|
||||||
|
| 向量数据库 | ChromaDB 1.5 |
|
||||||
|
| 嵌入模型 | Sentence-Transformers (HuggingFace) |
|
||||||
|
| 验证服务 | FastAPI + lxml XMLSchema |
|
||||||
|
| HTTP 客户端 | httpx |
|
||||||
|
| Token 计算 | tiktoken |
|
||||||
|
| 持久化 | JSON 文件 + ChromaDB PersistentClient |
|
||||||
@@ -31,7 +31,7 @@
|
|||||||
|
|
||||||
## 1. 项目是什么
|
## 1. 项目是什么
|
||||||
|
|
||||||
**一句话**:用户用中文描述报表需求 → LLM 生成 JRXML 模板 → 自动验证 → 失败则自动修正(最多 3 次)→ 重试耗尽后失败上下文自动注入下一轮 → 返回可用的 JRXML 文件。
|
**一句话**:用户用中文描述报表需求 → LLM 生成 JRXML 模板 → 自动验证 → 失败则自动修正(最多 5 次)→ 重试耗尽后失败上下文自动注入下一轮 → 返回可用的 JRXML 文件。
|
||||||
|
|
||||||
**技术栈**:Streamlit(UI) + LangGraph(状态机) + LLM(MiniMax/OpenAI/Ollama) + ChromaDB(向量库) + FastAPI(验证微服务)
|
**技术栈**:Streamlit(UI) + LangGraph(状态机) + LLM(MiniMax/OpenAI/Ollama) + ChromaDB(向量库) + FastAPI(验证微服务)
|
||||||
|
|
||||||
@@ -110,7 +110,7 @@ streamlit run app.py --server.port 8501
|
|||||||
│ │ │ │
|
│ │ │ │
|
||||||
│ │ correct_jrxml │
|
│ │ correct_jrxml │
|
||||||
│ │ │ │
|
│ │ │ │
|
||||||
│ │ (retry < 3) ────┘ │
|
│ │ (retry < 5) ────┘ │
|
||||||
│ ▼ │
|
│ ▼ │
|
||||||
│ finalize → END │
|
│ finalize → END │
|
||||||
└──────────────────────────┬───────────────────────────────────┘
|
└──────────────────────────┬───────────────────────────────────┘
|
||||||
@@ -251,14 +251,14 @@ def route_after_correct(state) -> Literal["validate", "finalize"]:
|
|||||||
return "validate" if state.get("retry_count", 0) < MAX_RETRY else "finalize"
|
return "validate" if state.get("retry_count", 0) < MAX_RETRY else "finalize"
|
||||||
```
|
```
|
||||||
|
|
||||||
**MAX_RETRY 默认为 3**(`.env` 中配置)。重试耗尽后进入 finalize,finalize 会将失败上下文写入 `pending_failure_context`,下次用户输入时 `process_input` 自动注入。
|
**MAX_RETRY 默认为 5**(`.env` 中配置)。重试耗尽后进入 finalize,finalize 会将失败上下文写入 `pending_failure_context`,下次用户输入时 `process_input` 自动注入。
|
||||||
```
|
```
|
||||||
|
|
||||||
**关键路由逻辑**:
|
**关键路由逻辑**:
|
||||||
- `route_by_intent`:8 种意图分叉,是整个系统的"交通枢纽"
|
- `route_by_intent`:8 种意图分叉,是整个系统的"交通枢纽"
|
||||||
- `route_after_retrieve`:有 layout_schema → 3 阶段精确生成(generate_skeleton → refine_layout → map_fields),无 schema → 原 1-shot generate
|
- `route_after_retrieve`:有 layout_schema → 3 阶段精确生成(generate_skeleton → refine_layout → map_fields),无 schema → 原 1-shot generate
|
||||||
- `route_after_save`:预览/导出意图**跳过验证**直通 finalize(这是修复预览问题的关键)
|
- `route_after_save`:预览/导出意图**跳过验证**直通 finalize(这是修复预览问题的关键)
|
||||||
- `route_after_correct`:重试次数 < 3 则继续验证循环,否则认输
|
- `route_after_correct`:重试次数 < 5 则继续验证循环,否则认输
|
||||||
|
|
||||||
### 5.2 图构建
|
### 5.2 图构建
|
||||||
|
|
||||||
@@ -347,9 +347,9 @@ def build_graph():
|
|||||||
│ ▼ │
|
│ ▼ │
|
||||||
│ correct_jrxml │
|
│ correct_jrxml │
|
||||||
│ │ │
|
│ │ │
|
||||||
│ ├── retry < 3? ──► validate (循环) │
|
│ ├── retry < 5? ──► validate (循环) │
|
||||||
│ │ │
|
│ │ │
|
||||||
│ └── retry >= 3? ──► finalize (放弃) │
|
│ └── retry >= 5? ──► finalize (放弃) │
|
||||||
│ │
|
│ │
|
||||||
▼ │
|
▼ │
|
||||||
finalize ──► END │
|
finalize ──► END │
|
||||||
@@ -1169,7 +1169,7 @@ parent.addEventListener('keydown', function(e) {
|
|||||||
| `RAG_USE_GPU` | `true` | GPU 加速 |
|
| `RAG_USE_GPU` | `true` | GPU 加速 |
|
||||||
| `RAG_USE_FP16` | `true` | 半精度推理 |
|
| `RAG_USE_FP16` | `true` | 半精度推理 |
|
||||||
| `VALIDATION_SERVICE_URL` | `http://localhost:8001/validate` | 验证服务地址 |
|
| `VALIDATION_SERVICE_URL` | `http://localhost:8001/validate` | 验证服务地址 |
|
||||||
| `MAX_RETRY` | `3` | 最大自动修正次数 |
|
| `MAX_RETRY` | `5` | 最大自动修正次数 |
|
||||||
| `CONTEXT_MAX_TOKENS` | `6000` | 触发压缩的 token 阈值 |
|
| `CONTEXT_MAX_TOKENS` | `6000` | 触发压缩的 token 阈值 |
|
||||||
| `CONTEXT_KEEP_RECENT` | `4` | 压缩时保留最近 N 轮 |
|
| `CONTEXT_KEEP_RECENT` | `4` | 压缩时保留最近 N 轮 |
|
||||||
| `SESSIONS_DIR` | `./sessions` | 会话文件目录 |
|
| `SESSIONS_DIR` | `./sessions` | 会话文件目录 |
|
||||||
|
|||||||
@@ -0,0 +1,91 @@
|
|||||||
|
# RAG 知识库集成说明
|
||||||
|
|
||||||
|
## 概述
|
||||||
|
|
||||||
|
使用 `rag_jrxml` 子项目的语义分块管线替换原有的简单向量知识库。`rag_jrxml` 独立运行产出 ChromaDB,主项目通过 `backend/rag_adapter.py` 查询。
|
||||||
|
|
||||||
|
## 架构
|
||||||
|
|
||||||
|
```
|
||||||
|
rag/ ← git submodule (rag_jrxml)
|
||||||
|
├── jrxml_source/ ← 源数据目录 (242 .jrxml + 16 .md)
|
||||||
|
├── models/ ← 嵌入模型本地存放
|
||||||
|
│ └── paraphrase-multilingual-MiniLM-L12-v2/ (449MB, 384维)
|
||||||
|
├── jrxml_source_chunks/ ← 分块产物 (all_chunks.json, 15,510 chunks)
|
||||||
|
├── embeddings/ ← 向量产物 (embeddings.npy, 23MB)
|
||||||
|
|
||||||
|
db/chroma/ ← ChromaDB 持久化 (主项目查询端读取)
|
||||||
|
│ 集合: jrxml_chunks (15,510 条记录, cosine 距离)
|
||||||
|
|
||||||
|
backend/rag_adapter.py ← RAGSearcher: 加载模型 + 连接 ChromaDB + 搜索
|
||||||
|
agent/nodes.py ← retrieve() 调用 search_chunks()
|
||||||
|
```
|
||||||
|
|
||||||
|
## 管线流程
|
||||||
|
|
||||||
|
```
|
||||||
|
源文件 (.jrxml + .md)
|
||||||
|
→ batch_chunker.py 语义分块 (按 XML 元素/标题层级切分)
|
||||||
|
→ embed_chunks.py 向量化 (Sentence-Transformers, CPU)
|
||||||
|
→ import_to_chroma.py 导入 ChromaDB
|
||||||
|
→ rag_adapter.py 主项目查询
|
||||||
|
```
|
||||||
|
|
||||||
|
## 当前数据
|
||||||
|
|
||||||
|
| 指标 | 数值 |
|
||||||
|
|---|---|
|
||||||
|
| 源文件 | 258 (242 JRXML + 16 MD) |
|
||||||
|
| Chunks 总数 | 15,510 |
|
||||||
|
| 嵌入维度 | 384 |
|
||||||
|
| 嵌入模型 | sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 |
|
||||||
|
| 分块类型 | query, field, parameter, variable, band_*, chart, crosstab, element_*, section_* 等 |
|
||||||
|
| 知识库大小 | embeddings.npy 23MB, ChromaDB ~50MB |
|
||||||
|
|
||||||
|
## 主项目配置
|
||||||
|
|
||||||
|
`.env` 中相关变量:
|
||||||
|
|
||||||
|
```env
|
||||||
|
# 嵌入模型 (本地路径优先)
|
||||||
|
RAG_EMBED_MODEL=./rag/models/paraphrase-multilingual-MiniLM-L12-v2
|
||||||
|
# ChromaDB 路径
|
||||||
|
RAG_CHROMA_PATH=./db/chroma
|
||||||
|
# 集合名称 (与 rag 子项目一致)
|
||||||
|
RAG_COLLECTION_NAME=jrxml_chunks
|
||||||
|
```
|
||||||
|
|
||||||
|
## 全量构建
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd rag
|
||||||
|
python batch_chunker.py jrxml_source
|
||||||
|
python embed_chunks.py jrxml_source_chunks/all_chunks.json
|
||||||
|
python import_to_chroma.py --chroma_path ../db/chroma
|
||||||
|
```
|
||||||
|
|
||||||
|
## 增量更新
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. 将新的 .jrxml / .md 放入 rag/jrxml_source/
|
||||||
|
# 2. 增量运行
|
||||||
|
cd rag
|
||||||
|
python batch_chunker.py jrxml_source --incremental
|
||||||
|
python embed_chunks.py --incremental
|
||||||
|
python import_to_chroma.py --chroma_path ../db/chroma --incremental
|
||||||
|
```
|
||||||
|
|
||||||
|
## 更新 rag 子项目
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git submodule update --remote rag
|
||||||
|
```
|
||||||
|
|
||||||
|
## 搜索接口
|
||||||
|
|
||||||
|
```python
|
||||||
|
from backend.rag_adapter import search_chunks
|
||||||
|
|
||||||
|
# 返回拼接好的上下文字符串,可直接注入 LLM prompt
|
||||||
|
context = search_chunks("如何创建饼图", k=5)
|
||||||
|
```
|
||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
- **多轮聊天**:通过对话优化报表 -- 添加列、更改标题、添加汇总
|
- **多轮聊天**:通过对话优化报表 -- 添加列、更改标题、添加汇总
|
||||||
- **自动验证**:每次生成或修改后都会验证 JRXML
|
- **自动验证**:每次生成或修改后都会验证 JRXML
|
||||||
- **自动修正**:如果验证失败,代理会分析错误并自动修正(最多 3 次)
|
- **自动修正**:如果验证失败,代理会分析错误并自动修正(最多 5 次)
|
||||||
- **模板检索**:使用 Chroma 向量数据库检索相关的 JRXML 示例以获得更好的生成效果
|
- **模板检索**:使用 Chroma 向量数据库检索相关的 JRXML 示例以获得更好的生成效果
|
||||||
- **文件上传**:支持图片(OCR识别)、PDF、Word、Excel、文本文件等
|
- **文件上传**:支持图片(OCR识别)、PDF、Word、Excel、文本文件等
|
||||||
- **聊天粘贴/拖拽**:支持直接在对话框中 Ctrl+V 粘贴或拖拽文件(图片/PDF/Excel/Word)
|
- **聊天粘贴/拖拽**:支持直接在对话框中 Ctrl+V 粘贴或拖拽文件(图片/PDF/Excel/Word)
|
||||||
@@ -182,7 +182,7 @@ jrxml-agent/
|
|||||||
| LOCAL_EMBED_MODEL | 嵌入模型 | Qwen/Qwen3-Embedding-0.6B |
|
| LOCAL_EMBED_MODEL | 嵌入模型 | Qwen/Qwen3-Embedding-0.6B |
|
||||||
| VALIDATION_SERVICE_URL | 验证端点 | http://localhost:8001/validate |
|
| VALIDATION_SERVICE_URL | 验证端点 | http://localhost:8001/validate |
|
||||||
| CHROMA_PERSIST_DIR | Chroma 存储位置 | ./db/chroma |
|
| CHROMA_PERSIST_DIR | Chroma 存储位置 | ./db/chroma |
|
||||||
| MAX_RETRY | 自动修正尝试次数 | 3 |
|
| MAX_RETRY | 自动修正尝试次数 | 5 |
|
||||||
| CONTEXT_MAX_TOKENS | 上下文压缩阈值 | 6000 |
|
| CONTEXT_MAX_TOKENS | 上下文压缩阈值 | 6000 |
|
||||||
| LOG_DIR | 日志目录 | ./logs |
|
| LOG_DIR | 日志目录 | ./logs |
|
||||||
| LOG_LEVEL | 日志级别 | DEBUG |
|
| LOG_LEVEL | 日志级别 | DEBUG |
|
||||||
|
|||||||
@@ -18,11 +18,11 @@ from backend.logger import get_logger, set_trace_id
|
|||||||
from backend.validation import validate_jrxml
|
from backend.validation import validate_jrxml
|
||||||
from prompts.loader import load_prompt
|
from prompts.loader import load_prompt
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv(override=True)
|
||||||
|
|
||||||
_node_log = get_logger("agent")
|
_node_log = get_logger("agent")
|
||||||
|
|
||||||
MAX_RETRY = int(os.getenv("MAX_RETRY", "3"))
|
MAX_RETRY = int(os.getenv("MAX_RETRY", "5"))
|
||||||
CONTEXT_MAX_TOKENS = int(os.getenv("CONTEXT_MAX_TOKENS", "6000"))
|
CONTEXT_MAX_TOKENS = int(os.getenv("CONTEXT_MAX_TOKENS", "6000"))
|
||||||
CONTEXT_KEEP_RECENT = int(os.getenv("CONTEXT_KEEP_RECENT", "4"))
|
CONTEXT_KEEP_RECENT = int(os.getenv("CONTEXT_KEEP_RECENT", "4"))
|
||||||
HISTORY_MAX_SNAPSHOTS = int(os.getenv("HISTORY_MAX_SNAPSHOTS", "10"))
|
HISTORY_MAX_SNAPSHOTS = int(os.getenv("HISTORY_MAX_SNAPSHOTS", "10"))
|
||||||
@@ -815,6 +815,14 @@ def validate(state: AgentState) -> Dict:
|
|||||||
state["error_msg"] = f"JRXML 内容过短({len(jrxml.strip())} 字符),可能为不完整或空内容。"
|
state["error_msg"] = f"JRXML 内容过短({len(jrxml.strip())} 字符),可能为不完整或空内容。"
|
||||||
return state
|
return state
|
||||||
|
|
||||||
|
# 自动规范化 JRXML 元素顺序(符合 XSD sequence 要求)
|
||||||
|
try:
|
||||||
|
from backend.jrxml_reorder import normalize_jrxml
|
||||||
|
jrxml = normalize_jrxml(jrxml)
|
||||||
|
state["current_jrxml"] = jrxml
|
||||||
|
except Exception:
|
||||||
|
pass # 规范化失败不影响后续流程
|
||||||
|
|
||||||
result = validate_jrxml(jrxml)
|
result = validate_jrxml(jrxml)
|
||||||
state["status"] = "pass" if result.get("valid") else "fail"
|
state["status"] = "pass" if result.get("valid") else "fail"
|
||||||
state["error_msg"] = result.get("error", "")
|
state["error_msg"] = result.get("error", "")
|
||||||
@@ -923,6 +931,20 @@ def finalize(state: AgentState) -> Dict:
|
|||||||
# 验证未通过:不覆盖 final_jrxml,保留上一次成功的版本
|
# 验证未通过:不覆盖 final_jrxml,保留上一次成功的版本
|
||||||
retries = state.get("retry_count", 0)
|
retries = state.get("retry_count", 0)
|
||||||
error_msg = state.get("error_msg", "未知错误")
|
error_msg = state.get("error_msg", "未知错误")
|
||||||
|
# 保存失败版本到 jrxml_versions(用户可以选择下载)
|
||||||
|
if jrxml.strip():
|
||||||
|
versions = state.get("jrxml_versions", [])
|
||||||
|
if not isinstance(versions, list):
|
||||||
|
versions = []
|
||||||
|
versions.append({
|
||||||
|
"ts": _now_iso(),
|
||||||
|
"jrxml": jrxml,
|
||||||
|
"intent": state.get("intent", ""),
|
||||||
|
"label": f"失败版本 (第{retries}次重试)",
|
||||||
|
"status": "fail",
|
||||||
|
"error_msg": error_msg,
|
||||||
|
})
|
||||||
|
state["jrxml_versions"] = versions
|
||||||
# 记录失败上下文,下次用户输入时自动注入
|
# 记录失败上下文,下次用户输入时自动注入
|
||||||
state["pending_failure_context"] = {
|
state["pending_failure_context"] = {
|
||||||
"error_msg": error_msg,
|
"error_msg": error_msg,
|
||||||
@@ -934,8 +956,8 @@ def finalize(state: AgentState) -> Dict:
|
|||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"content": (
|
"content": (
|
||||||
f"❌ 经过 {retries} 次重试后仍无法生成有效的 JRXML。\n"
|
f"❌ 经过 {retries} 次重试后仍无法生成有效的 JRXML。\n"
|
||||||
f"错误: {error_msg}\n"
|
f"错误: {error_msg}\n\n"
|
||||||
f"请描述您想要的修改,系统会自动加载失败上下文继续修复。"
|
f"您可以:\n1. 继续描述修改要求,系统将自动重试修复\n2. 点击下载按钮获取当前版本(虽未通过 XSD 验证,但可能可在 Studio 中手动修复)"
|
||||||
),
|
),
|
||||||
})
|
})
|
||||||
return state
|
return state
|
||||||
|
|||||||
@@ -181,6 +181,13 @@ def _run_graph_sync(agent_state: AgentState, event_q: queue.Queue):
|
|||||||
for node_state in data.values():
|
for node_state in data.values():
|
||||||
if isinstance(node_state, dict):
|
if isinstance(node_state, dict):
|
||||||
agent_state.update(node_state)
|
agent_state.update(node_state)
|
||||||
|
# 在 graph 完成后立即保存 session,防止 SSE 流中断导致数据丢失
|
||||||
|
sid = agent_state.get("session_id", "")
|
||||||
|
if sid:
|
||||||
|
try:
|
||||||
|
save_session(sid, agent_state)
|
||||||
|
except Exception:
|
||||||
|
pass # 静默失败,SSE 流中还有一次保存机会
|
||||||
event_q.put(("done", {"reason": "graph_completed"}))
|
event_q.put(("done", {"reason": "graph_completed"}))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
event_q.put(("error", {
|
event_q.put(("error", {
|
||||||
@@ -218,6 +225,8 @@ async def _sse_generator(agent_state: AgentState, session_id: str = "") -> str:
|
|||||||
total_ms = round((time.time() - t_start) * 1000)
|
total_ms = round((time.time() - t_start) * 1000)
|
||||||
if session_id:
|
if session_id:
|
||||||
save_session(session_id, agent_state)
|
save_session(session_id, agent_state)
|
||||||
|
versions = agent_state.get("jrxml_versions", [])
|
||||||
|
last_ver = versions[-1] if versions else {}
|
||||||
yield _sse_line("agent_complete", {
|
yield _sse_line("agent_complete", {
|
||||||
"reason": "done",
|
"reason": "done",
|
||||||
"intent": agent_state.get("intent", ""),
|
"intent": agent_state.get("intent", ""),
|
||||||
@@ -228,6 +237,9 @@ async def _sse_generator(agent_state: AgentState, session_id: str = "") -> str:
|
|||||||
"retry_count": agent_state.get("retry_count", 0),
|
"retry_count": agent_state.get("retry_count", 0),
|
||||||
"total_duration_ms": total_ms,
|
"total_duration_ms": total_ms,
|
||||||
"ocr_extraction_result": agent_state.get("ocr_extraction_result", {}),
|
"ocr_extraction_result": agent_state.get("ocr_extraction_result", {}),
|
||||||
|
"versions": len(versions),
|
||||||
|
"has_failed_version": last_ver.get("status") == "fail" if last_ver else False,
|
||||||
|
"failed_version_index": len(versions) - 1 if last_ver.get("status") == "fail" else -1,
|
||||||
})
|
})
|
||||||
await future
|
await future
|
||||||
return
|
return
|
||||||
@@ -532,7 +544,17 @@ async def chat(session_id: str, payload: dict):
|
|||||||
|
|
||||||
# ── 返回 SSE 流 ──
|
# ── 返回 SSE 流 ──
|
||||||
async def stream_and_save():
|
async def stream_and_save():
|
||||||
final_state = None
|
# 如果上传了附件,先发送处理状态
|
||||||
|
if file_ids:
|
||||||
|
yield _sse_line("node_start", {
|
||||||
|
"node": "process_attachments",
|
||||||
|
"label": "正在处理附件",
|
||||||
|
})
|
||||||
|
yield _sse_line("node_complete", {
|
||||||
|
"node": "process_attachments",
|
||||||
|
"label": "正在处理附件",
|
||||||
|
"detail": f"已解析 {len(file_ids)} 个文件",
|
||||||
|
})
|
||||||
async for sse_chunk in _sse_generator(agent_state, session_id):
|
async for sse_chunk in _sse_generator(agent_state, session_id):
|
||||||
yield sse_chunk
|
yield sse_chunk
|
||||||
|
|
||||||
@@ -622,4 +644,4 @@ async def download_file(file_id: str):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
port = int(os.getenv("API_PORT", "8000"))
|
port = int(os.getenv("API_PORT", "8000"))
|
||||||
uvicorn.run("api_server:app", host="0.0.0.0", port=port, reload=True)
|
uvicorn.run("api_server:app", host="0.0.0.0", port=port, reload=False)
|
||||||
@@ -17,13 +17,16 @@ try:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
import streamlit.components.v1 as components
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
load_dotenv()
|
load_dotenv(override=True)
|
||||||
|
|
||||||
from agent.graph import build_graph, create_initial_state
|
from agent.graph import build_graph, create_initial_state
|
||||||
from backend.session import (
|
from backend.session import (
|
||||||
@@ -109,86 +112,6 @@ def _render_jrxml(jrxml: str, max_lines: int = 30):
|
|||||||
st.code(preview, language="xml")
|
st.code(preview, language="xml")
|
||||||
|
|
||||||
|
|
||||||
# ---- 共享文件上传处理 ----
|
|
||||||
def _process_uploaded_file(uploaded_file, suffix: str) -> dict:
|
|
||||||
"""处理单个上传文件:保存临时文件、解析、布局分析。
|
|
||||||
|
|
||||||
返回: {"name": str, "text": str, "type": str, "tmp_path": str|None}
|
|
||||||
"""
|
|
||||||
import tempfile
|
|
||||||
from backend.file_parser import parse_file
|
|
||||||
from backend.layout_analyzer import analyze_layout
|
|
||||||
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
|
||||||
tmp.write(uploaded_file.getvalue())
|
|
||||||
tmp_path = tmp.name
|
|
||||||
|
|
||||||
result = parse_file(tmp_path, suffix)
|
|
||||||
parsed_text = result["text"]
|
|
||||||
parsed_type = result["file_type"]
|
|
||||||
|
|
||||||
# 对图片/PDF 进行 A4 模板布局分析
|
|
||||||
if suffix in (".png", ".jpg", ".jpeg", ".bmp", ".webp", ".pdf"):
|
|
||||||
layout = analyze_layout(tmp_path)
|
|
||||||
tt = layout.get("template_type", "unknown")
|
|
||||||
current_jrxml = st.session_state.agent_state.get("current_jrxml", "")
|
|
||||||
|
|
||||||
if tt == "full_a4":
|
|
||||||
parsed_text = layout["description"]
|
|
||||||
parsed_type = "a4_template"
|
|
||||||
# 存储布局 schema 供分层精确生成使用
|
|
||||||
from backend.layout_analyzer import extract_layout_schema
|
|
||||||
schema = extract_layout_schema(layout)
|
|
||||||
st.session_state.agent_state["layout_schema"] = schema
|
|
||||||
st.session_state.agent_state["ocr_elements"] = layout.get("rows", [])
|
|
||||||
elif tt == "partial_rows":
|
|
||||||
parsed_type = "a4_partial"
|
|
||||||
if current_jrxml.strip():
|
|
||||||
from backend.layout_analyzer import match_rows_to_jrxml
|
|
||||||
match = match_rows_to_jrxml(layout, current_jrxml)
|
|
||||||
parsed_text = (
|
|
||||||
f"[行片段修改] 上传图片包含 {layout['total_rows']} 行,"
|
|
||||||
f"视为 A4 报表的一部分。\n\n"
|
|
||||||
f"{match['description']}\n\n"
|
|
||||||
f"--- 行结构 ---\n{layout['description']}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
parsed_text = layout["description"]
|
|
||||||
else:
|
|
||||||
has_ocr = result.get("method") not in ("metadata_only", None)
|
|
||||||
img_w, img_h = layout["image_size"]
|
|
||||||
ratio = layout["aspect_ratio"]
|
|
||||||
if has_ocr:
|
|
||||||
parsed_text = (
|
|
||||||
f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}。"
|
|
||||||
f"未检测到 A4 报表结构,图片将被视为参考样式。\n"
|
|
||||||
f"请根据用户的文字描述生成报表。"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
parsed_text = (
|
|
||||||
f"[图片上传] 尺寸 {img_w}x{img_h}px, 比例 {ratio}。\n"
|
|
||||||
f"⚠ OCR 引擎未安装,无法识别图片中的文字内容。\n"
|
|
||||||
f"请严格根据用户的文字描述来推断图片中的报表需求。\n"
|
|
||||||
f"(提示:如需图片文字识别,请运行 pip install paddleocr)"
|
|
||||||
)
|
|
||||||
parsed_type = "image_reference"
|
|
||||||
|
|
||||||
elif suffix in (".pdf", ".docx", ".xlsx", ".xls", ".doc"):
|
|
||||||
parsed_type = suffix.lstrip(".")
|
|
||||||
|
|
||||||
keep_temp = (
|
|
||||||
suffix in (".png", ".jpg", ".jpeg", ".bmp", ".webp")
|
|
||||||
and result.get("method") not in ("metadata_only", None)
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"name": uploaded_file.name,
|
|
||||||
"text": parsed_text,
|
|
||||||
"type": parsed_type,
|
|
||||||
"tmp_path": tmp_path if keep_temp else None,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# ---- URL 参数 ----
|
# ---- URL 参数 ----
|
||||||
query_params = st.query_params
|
query_params = st.query_params
|
||||||
url_session_id = query_params.get("session_id", "")
|
url_session_id = query_params.get("session_id", "")
|
||||||
@@ -344,14 +267,6 @@ def run_agent(user_input: str):
|
|||||||
if stream_active:
|
if stream_active:
|
||||||
streaming_placeholder.empty()
|
streaming_placeholder.empty()
|
||||||
|
|
||||||
# 清理已处理的临时文件
|
|
||||||
for p in st.session_state.get("uploaded_temp_paths", []):
|
|
||||||
try:
|
|
||||||
Path(p).unlink(missing_ok=True)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
st.session_state.uploaded_temp_paths = []
|
|
||||||
|
|
||||||
# ---- 总结卡片 ----
|
# ---- 总结卡片 ----
|
||||||
# 注:node_state 只含变更字段,用 agent_state(被所有节点就地修改)获取完整状态
|
# 注:node_state 只含变更字段,用 agent_state(被所有节点就地修改)获取完整状态
|
||||||
final_state = agent_state
|
final_state = agent_state
|
||||||
@@ -557,62 +472,12 @@ with st.sidebar:
|
|||||||
run_agent("重新来,清空当前报表")
|
run_agent("重新来,清空当前报表")
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
st.divider()
|
|
||||||
st.markdown("### 上传文件")
|
|
||||||
st.caption("支持图片 (OCR)、PDF、Word、文本文件。内容将附加到您的下一条消息中。")
|
|
||||||
|
|
||||||
if "uploaded_files" not in st.session_state:
|
|
||||||
st.session_state.uploaded_files = [] # [{name, text, type}]
|
|
||||||
|
|
||||||
if "uploaded_temp_paths" not in st.session_state:
|
|
||||||
st.session_state.uploaded_temp_paths = [] # 待清理的临时文件路径
|
|
||||||
|
|
||||||
uploaded = st.file_uploader(
|
|
||||||
"选择文件",
|
|
||||||
type=["png", "jpg", "jpeg", "bmp", "webp", "pdf", "docx", "xlsx", "xls", "doc",
|
|
||||||
"txt", "csv", "json", "xml"],
|
|
||||||
accept_multiple_files=True,
|
|
||||||
key="file_uploader",
|
|
||||||
label_visibility="collapsed",
|
|
||||||
)
|
|
||||||
|
|
||||||
if uploaded:
|
|
||||||
for uf in uploaded:
|
|
||||||
# 去重
|
|
||||||
if any(f["name"] == uf.name for f in st.session_state.uploaded_files):
|
|
||||||
continue
|
|
||||||
|
|
||||||
suffix = Path(uf.name).suffix.lower()
|
|
||||||
result = _process_uploaded_file(uf, suffix)
|
|
||||||
|
|
||||||
if result["text"]:
|
|
||||||
st.session_state.uploaded_files.append({
|
|
||||||
"name": result["name"],
|
|
||||||
"text": result["text"],
|
|
||||||
"type": result["type"],
|
|
||||||
})
|
|
||||||
|
|
||||||
tmp_path = result["tmp_path"]
|
|
||||||
if tmp_path:
|
|
||||||
st.session_state.agent_state["uploaded_file_path"] = tmp_path
|
|
||||||
st.session_state.uploaded_temp_paths.append(tmp_path)
|
|
||||||
|
|
||||||
if st.session_state.uploaded_files:
|
|
||||||
for i, f in enumerate(st.session_state.uploaded_files):
|
|
||||||
cols = st.columns([5, 1])
|
|
||||||
with cols[0]:
|
|
||||||
st.caption(f"📎 {f['name']} ({f['type']}, {len(f['text'])} 字符)")
|
|
||||||
with cols[1]:
|
|
||||||
if st.button("✕", key=f"rm_uf_{i}", help="移除"):
|
|
||||||
st.session_state.uploaded_files.pop(i)
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
st.markdown("### 配置")
|
st.markdown("### 配置")
|
||||||
llm_backend = os.getenv("LLM_BACKEND", "cloud")
|
llm_backend = os.getenv("LLM_BACKEND", "cloud")
|
||||||
llm_model = os.getenv("LLM_MODEL", os.getenv("LOCAL_LLM_MODEL", "gpt-4o"))
|
llm_model = os.getenv("LLM_MODEL", os.getenv("LOCAL_LLM_MODEL", "gpt-4o"))
|
||||||
st.caption(f"大语言模型: {llm_backend} / {llm_model}")
|
st.caption(f"大语言模型: {llm_backend} / {llm_model}")
|
||||||
st.caption(f"最大重试次数: {os.getenv('MAX_RETRY', '3')}")
|
st.caption(f"最大重试次数: {os.getenv('MAX_RETRY', '5')}")
|
||||||
st.caption(f"验证服务: {os.getenv('VALIDATION_SERVICE_URL', 'http://localhost:8001/validate')}")
|
st.caption(f"验证服务: {os.getenv('VALIDATION_SERVICE_URL', 'http://localhost:8001/validate')}")
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
@@ -666,106 +531,396 @@ for msg in st.session_state.messages:
|
|||||||
else:
|
else:
|
||||||
st.markdown(msg["content"])
|
st.markdown(msg["content"])
|
||||||
|
|
||||||
# ---- 聊天输入(支持粘贴/拖拽文件) ----
|
# ---- 统一聊天输入组件 ----
|
||||||
from st_multimodal_chatinput import multimodal_chatinput
|
UNIFIED_CHAT_HTML = r"""
|
||||||
import base64
|
<!DOCTYPE html>
|
||||||
import io
|
<html lang="zh-CN">
|
||||||
from pathlib import Path as _Path
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
# MIME type → 文件扩展名映射(用于剪贴板粘贴无扩展名的文件)
|
<style>
|
||||||
MIME_TO_EXT = {
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
"image/png": ".png",
|
body {
|
||||||
"image/jpeg": ".jpg",
|
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
||||||
"image/bmp": ".bmp",
|
background: transparent;
|
||||||
"image/webp": ".webp",
|
padding: 4px 0;
|
||||||
"application/pdf": ".pdf",
|
}
|
||||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
.chat-container {
|
||||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
position: relative;
|
||||||
"application/vnd.ms-excel": ".xls",
|
border: 1px solid #d1d5db;
|
||||||
"application/msword": ".doc",
|
border-radius: 12px;
|
||||||
"text/plain": ".txt",
|
padding: 8px 12px;
|
||||||
"text/csv": ".csv",
|
background: #ffffff;
|
||||||
"application/json": ".json",
|
transition: border-color 0.2s, box-shadow 0.2s;
|
||||||
"text/xml": ".xml",
|
}
|
||||||
|
.chat-container:focus-within {
|
||||||
|
border-color: #3b82f6;
|
||||||
|
box-shadow: 0 0 0 2px rgba(59,130,246,0.15);
|
||||||
|
}
|
||||||
|
.chat-container.drag-active {
|
||||||
|
border-color: #3b82f6;
|
||||||
|
background: rgba(59,130,246,0.04);
|
||||||
|
}
|
||||||
|
.file-chips {
|
||||||
|
display: flex;
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 6px;
|
||||||
|
margin-bottom: 6px;
|
||||||
|
}
|
||||||
|
.file-chips:empty { display: none; }
|
||||||
|
.file-chip {
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 4px;
|
||||||
|
padding: 2px 8px;
|
||||||
|
background: #f3f4f6;
|
||||||
|
border-radius: 14px;
|
||||||
|
font-size: 12px;
|
||||||
|
color: #374151;
|
||||||
|
max-width: 200px;
|
||||||
|
}
|
||||||
|
.file-chip .chip-icon { font-size: 13px; }
|
||||||
|
.file-chip .chip-name {
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
.file-chip .chip-remove {
|
||||||
|
border: none;
|
||||||
|
background: none;
|
||||||
|
cursor: pointer;
|
||||||
|
color: #9ca3af;
|
||||||
|
font-size: 14px;
|
||||||
|
line-height: 1;
|
||||||
|
padding: 0 2px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
.file-chip .chip-remove:hover { color: #ef4444; }
|
||||||
|
.input-row {
|
||||||
|
display: flex;
|
||||||
|
align-items: flex-end;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
.attach-btn {
|
||||||
|
border: none;
|
||||||
|
background: none;
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 4px 6px;
|
||||||
|
font-size: 20px;
|
||||||
|
line-height: 1;
|
||||||
|
color: #6b7280;
|
||||||
|
border-radius: 6px;
|
||||||
|
transition: background 0.15s, color 0.15s;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
.attach-btn:hover { background: #f3f4f6; color: #374151; }
|
||||||
|
textarea {
|
||||||
|
flex: 1;
|
||||||
|
border: none;
|
||||||
|
outline: none;
|
||||||
|
resize: none;
|
||||||
|
font-size: 15px;
|
||||||
|
line-height: 1.5;
|
||||||
|
font-family: inherit;
|
||||||
|
color: #111827;
|
||||||
|
background: transparent;
|
||||||
|
padding: 4px 0;
|
||||||
|
min-height: 24px;
|
||||||
|
max-height: 120px;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
textarea::placeholder { color: #9ca3af; }
|
||||||
|
.send-btn {
|
||||||
|
border: none;
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 4px 10px;
|
||||||
|
font-size: 16px;
|
||||||
|
background: #e5e7eb;
|
||||||
|
color: #9ca3af;
|
||||||
|
border-radius: 8px;
|
||||||
|
transition: all 0.15s;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
.send-btn.active { background: #3b82f6; color: #fff; }
|
||||||
|
.send-btn.active:hover { background: #2563eb; }
|
||||||
|
.send-btn:disabled { opacity: 0.5; cursor: default; }
|
||||||
|
.error-toast {
|
||||||
|
position: fixed;
|
||||||
|
bottom: 12px;
|
||||||
|
left: 50%;
|
||||||
|
transform: translateX(-50%);
|
||||||
|
background: #ef4444;
|
||||||
|
color: #fff;
|
||||||
|
padding: 6px 16px;
|
||||||
|
border-radius: 8px;
|
||||||
|
font-size: 13px;
|
||||||
|
z-index: 9999;
|
||||||
|
animation: toastOut 2.5s forwards;
|
||||||
|
pointer-events: none;
|
||||||
|
}
|
||||||
|
@keyframes toastOut {
|
||||||
|
0%, 70% { opacity: 1; }
|
||||||
|
100% { opacity: 0; }
|
||||||
}
|
}
|
||||||
|
|
||||||
chat_result = multimodal_chatinput()
|
@media (prefers-color-scheme: dark) {
|
||||||
if chat_result:
|
.chat-container { background: #1f2937; border-color: #374151; }
|
||||||
prompt = (chat_result.get("textInput") or "").strip()
|
.chat-container:focus-within { border-color: #3b82f6; }
|
||||||
chat_files = chat_result.get("uploadedFiles") or []
|
.file-chip { background: #374151; color: #e5e7eb; }
|
||||||
|
.file-chip .chip-remove { color: #6b7280; }
|
||||||
|
.attach-btn { color: #9ca3af; }
|
||||||
|
.attach-btn:hover { background: #374151; color: #e5e7eb; }
|
||||||
|
textarea { color: #f9fafb; }
|
||||||
|
textarea::placeholder { color: #6b7280; }
|
||||||
|
.send-btn { background: #374151; }
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="chat-container" id="container">
|
||||||
|
<div class="file-chips" id="chips"></div>
|
||||||
|
<div class="input-row">
|
||||||
|
<button class="attach-btn" id="attachBtn" title="附加文件">📎</button>
|
||||||
|
<textarea id="textInput" placeholder="描述您的报表需求..." rows="1"></textarea>
|
||||||
|
<button class="send-btn" id="sendBtn" title="发送">➤</button>
|
||||||
|
</div>
|
||||||
|
<input type="file" id="fileInput" multiple hidden
|
||||||
|
accept=".png,.jpg,.jpeg,.bmp,.webp,.pdf,.docx,.xlsx,.xls,.doc,.txt">
|
||||||
|
</div>
|
||||||
|
<script>
|
||||||
|
const container = document.getElementById('container');
|
||||||
|
const chipsEl = document.getElementById('chips');
|
||||||
|
const textInput = document.getElementById('textInput');
|
||||||
|
const sendBtn = document.getElementById('sendBtn');
|
||||||
|
const attachBtn = document.getElementById('attachBtn');
|
||||||
|
const fileInput = document.getElementById('fileInput');
|
||||||
|
|
||||||
# 处理聊天中上传/粘贴的文件
|
let attachedFiles = [];
|
||||||
uploaded_texts = []
|
const MAX_FILES = 10;
|
||||||
uploaded_files_info = []
|
const MAX_SIZE = 20 * 1024 * 1024;
|
||||||
|
|
||||||
# 先收集侧边栏已上传的文件
|
function getIcon(type) {
|
||||||
if st.session_state.get("uploaded_files"):
|
if (type.startsWith('image/')) return '🖼';
|
||||||
for f in st.session_state.uploaded_files:
|
if (type.includes('pdf')) return '📄';
|
||||||
uploaded_texts.append(f"[上传文件: {f['name']}]\n{f['text']}")
|
if (type.includes('document')) return '📝';
|
||||||
uploaded_files_info.append({"name": f["name"], "type": f["type"], "length": len(f["text"])})
|
if (type.includes('spreadsheet') || type.includes('excel')) return '📊';
|
||||||
st.session_state.uploaded_files = []
|
return '📎';
|
||||||
|
}
|
||||||
|
|
||||||
# 处理聊天中的文件
|
function updateSendBtn() {
|
||||||
class _Base64File:
|
var canSend = textInput.value.trim() || attachedFiles.length > 0;
|
||||||
"""包装 base64 文件为类 UploadedFile 接口。"""
|
sendBtn.classList.toggle('active', canSend);
|
||||||
def __init__(self, name, data_bytes):
|
}
|
||||||
self.name = name
|
|
||||||
self._data = data_bytes
|
|
||||||
|
|
||||||
def getvalue(self):
|
function renderChips() {
|
||||||
return self._data
|
chipsEl.innerHTML = '';
|
||||||
|
attachedFiles.forEach(function(f, i) {
|
||||||
|
var chip = document.createElement('span');
|
||||||
|
chip.className = 'file-chip';
|
||||||
|
var name = f.name.length > 16 ? f.name.slice(0,14)+'..' : f.name;
|
||||||
|
chip.innerHTML = '<span class="chip-icon">'+getIcon(f.type)+'</span>' +
|
||||||
|
'<span class="chip-name">'+name+'</span>' +
|
||||||
|
'<button class="chip-remove">×</button>';
|
||||||
|
chip.querySelector('.chip-remove').onclick = (function(idx) {
|
||||||
|
return function() {
|
||||||
|
attachedFiles.splice(idx, 1);
|
||||||
|
renderChips();
|
||||||
|
updateSendBtn();
|
||||||
|
};
|
||||||
|
})(i);
|
||||||
|
chipsEl.appendChild(chip);
|
||||||
|
});
|
||||||
|
updateSendBtn();
|
||||||
|
}
|
||||||
|
|
||||||
for cf in chat_files:
|
function addFiles(fileList) {
|
||||||
name = cf.get("name", "clipboard_file")
|
for (var i = 0; i < fileList.length; i++) {
|
||||||
mime = cf.get("type", "")
|
var file = fileList[i];
|
||||||
content_b64 = cf.get("content", "")
|
if (attachedFiles.length >= MAX_FILES) { showToast('最多附加 '+MAX_FILES+' 个文件'); break; }
|
||||||
if not content_b64:
|
if (file.size > MAX_SIZE) { showToast(file.name+' 超过 20MB 限制'); continue; }
|
||||||
continue
|
if (attachedFiles.some(function(f) { return f.name === file.name && f.size === file.size; })) continue;
|
||||||
|
attachedFiles.push({name: file.name, type: file.type, file: file});
|
||||||
|
}
|
||||||
|
renderChips();
|
||||||
|
}
|
||||||
|
|
||||||
|
function showToast(msg) {
|
||||||
|
var t = document.createElement('div');
|
||||||
|
t.className = 'error-toast';
|
||||||
|
t.textContent = msg;
|
||||||
|
document.body.appendChild(t);
|
||||||
|
setTimeout(function() { t.remove(); }, 2600);
|
||||||
|
}
|
||||||
|
|
||||||
|
function readFile(file) {
|
||||||
|
return new Promise(function(resolve, reject) {
|
||||||
|
var reader = new FileReader();
|
||||||
|
reader.onload = function() { resolve(reader.result); };
|
||||||
|
reader.onerror = reject;
|
||||||
|
reader.readAsDataURL(file);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handleSend() {
|
||||||
|
var text = textInput.value.trim();
|
||||||
|
if (!text && attachedFiles.length === 0) return;
|
||||||
|
|
||||||
|
sendBtn.disabled = true;
|
||||||
|
var files = [];
|
||||||
|
for (var i = 0; i < attachedFiles.length; i++) {
|
||||||
|
var f = attachedFiles[i];
|
||||||
|
try {
|
||||||
|
var dataUrl = await readFile(f.file);
|
||||||
|
files.push({name: f.name, type: f.type, data: dataUrl, size: f.file.size});
|
||||||
|
} catch(e) {
|
||||||
|
showToast(f.name+' 读取失败');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Streamlit.setComponentValue({text: text, files: files});
|
||||||
|
|
||||||
|
textInput.value = '';
|
||||||
|
attachedFiles = [];
|
||||||
|
renderChips();
|
||||||
|
sendBtn.disabled = false;
|
||||||
|
textInput.style.height = 'auto';
|
||||||
|
}
|
||||||
|
|
||||||
|
attachBtn.onclick = function() { fileInput.click(); };
|
||||||
|
fileInput.onchange = function() { addFiles(fileInput.files); fileInput.value = ''; };
|
||||||
|
|
||||||
|
textInput.oninput = function() {
|
||||||
|
updateSendBtn();
|
||||||
|
textInput.style.height = 'auto';
|
||||||
|
textInput.style.height = Math.min(textInput.scrollHeight, 120) + 'px';
|
||||||
|
};
|
||||||
|
|
||||||
|
textInput.onkeydown = function(e) {
|
||||||
|
if (e.key === 'Enter' && !e.shiftKey) {
|
||||||
|
e.preventDefault();
|
||||||
|
handleSend();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
sendBtn.onclick = handleSend;
|
||||||
|
|
||||||
|
document.addEventListener('paste', function(e) {
|
||||||
|
var items = e.clipboardData && e.clipboardData.items;
|
||||||
|
if (!items) return;
|
||||||
|
var files = [];
|
||||||
|
for (var i = 0; i < items.length; i++) {
|
||||||
|
if (items[i].kind === 'file') files.push(items[i].getAsFile());
|
||||||
|
}
|
||||||
|
if (files.length) { e.preventDefault(); addFiles(files); }
|
||||||
|
});
|
||||||
|
|
||||||
|
var containerDiv = document.getElementById('container');
|
||||||
|
containerDiv.addEventListener('dragover', function(e) {
|
||||||
|
e.preventDefault();
|
||||||
|
containerDiv.classList.add('drag-active');
|
||||||
|
});
|
||||||
|
containerDiv.addEventListener('dragleave', function() {
|
||||||
|
containerDiv.classList.remove('drag-active');
|
||||||
|
});
|
||||||
|
containerDiv.addEventListener('drop', function(e) {
|
||||||
|
e.preventDefault();
|
||||||
|
containerDiv.classList.remove('drag-active');
|
||||||
|
addFiles(e.dataTransfer.files);
|
||||||
|
});
|
||||||
|
|
||||||
|
updateSendBtn();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
chat_result = components.html(UNIFIED_CHAT_HTML, height=180)
|
||||||
|
|
||||||
|
if chat_result and isinstance(chat_result, dict):
|
||||||
|
prompt = chat_result.get("text", "")
|
||||||
|
files = chat_result.get("files", [])
|
||||||
|
|
||||||
|
from backend.file_parser import parse_file
|
||||||
|
from backend.layout_analyzer import analyze_layout, extract_layout_schema
|
||||||
|
|
||||||
|
file_texts = []
|
||||||
|
attached_info = []
|
||||||
|
first_image_path = None
|
||||||
|
temp_paths = []
|
||||||
|
|
||||||
|
for f in files:
|
||||||
|
header, b64data = f.get("data", ",").split(",", 1)
|
||||||
|
raw = base64.b64decode(b64data)
|
||||||
|
|
||||||
|
mime = f.get("type", "")
|
||||||
|
mime_to_suffix = {
|
||||||
|
"image/png": ".png", "image/jpeg": ".jpg", "image/bmp": ".bmp",
|
||||||
|
"image/webp": ".webp", "application/pdf": ".pdf",
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
||||||
|
"application/vnd.ms-excel": ".xls", "application/msword": ".doc",
|
||||||
|
"text/plain": ".txt",
|
||||||
|
}
|
||||||
|
suffix = mime_to_suffix.get(mime, Path(f["name"]).suffix.lower())
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
||||||
|
tmp.write(raw)
|
||||||
|
tmp_path = tmp.name
|
||||||
|
temp_paths.append(tmp_path)
|
||||||
|
|
||||||
|
result = parse_file(tmp_path, suffix)
|
||||||
|
text = result["text"]
|
||||||
|
file_type = result["file_type"]
|
||||||
|
|
||||||
|
img_suffixes = (".png", ".jpg", ".jpeg", ".bmp", ".webp")
|
||||||
|
if suffix in img_suffixes and result.get("method") not in ("metadata_only", None):
|
||||||
try:
|
try:
|
||||||
data = base64.b64decode(content_b64)
|
layout = analyze_layout(tmp_path)
|
||||||
|
tt = layout.get("template_type", "unknown")
|
||||||
|
if tt == "full_a4":
|
||||||
|
text = layout["description"]
|
||||||
|
file_type = "a4_template"
|
||||||
|
schema = extract_layout_schema(layout)
|
||||||
|
st.session_state.agent_state["layout_schema"] = schema
|
||||||
|
st.session_state.agent_state["ocr_elements"] = layout.get("rows", [])
|
||||||
|
elif tt == "partial_rows":
|
||||||
|
file_type = "a4_partial"
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
pass
|
||||||
|
|
||||||
suffix = _Path(name).suffix.lower()
|
file_texts.append(f"[附加文件: {f['name']} ({file_type})]\n{text}")
|
||||||
if not suffix and mime in MIME_TO_EXT:
|
attached_info.append({"name": f["name"], "type": file_type, "length": len(text)})
|
||||||
suffix = MIME_TO_EXT[mime]
|
|
||||||
name = f"{_Path(name).stem}{suffix}"
|
|
||||||
|
|
||||||
wrapper = _Base64File(name, data)
|
if not first_image_path and file_type in ("image", "a4_template", "a4_partial"):
|
||||||
result = _process_uploaded_file(wrapper, suffix)
|
first_image_path = tmp_path
|
||||||
|
|
||||||
if result["text"]:
|
if file_texts:
|
||||||
uploaded_texts.append(f"[上传文件: {result['name']}]\n{result['text']}")
|
full_prompt = "\n\n".join(file_texts) + "\n\n---\n用户需求:\n" + prompt
|
||||||
uploaded_files_info.append({"name": result["name"], "type": result["type"], "length": len(result["text"])})
|
|
||||||
|
|
||||||
tmp_path = result["tmp_path"]
|
|
||||||
if tmp_path:
|
|
||||||
st.session_state.agent_state["uploaded_file_path"] = tmp_path
|
|
||||||
st.session_state.uploaded_temp_paths.append(tmp_path)
|
|
||||||
|
|
||||||
if prompt or uploaded_texts:
|
|
||||||
if uploaded_texts:
|
|
||||||
full_prompt = "\n\n".join(uploaded_texts)
|
|
||||||
if prompt:
|
|
||||||
full_prompt += "\n\n---\n用户需求:\n" + prompt
|
|
||||||
else:
|
else:
|
||||||
full_prompt = prompt
|
full_prompt = prompt
|
||||||
|
|
||||||
displayed_prompt = prompt or "(已上传文件,未输入文字)"
|
if first_image_path:
|
||||||
|
st.session_state.agent_state["uploaded_file_path"] = first_image_path
|
||||||
|
|
||||||
_app_log.info(
|
_app_log.info(
|
||||||
"收到用户输入",
|
"收到用户输入",
|
||||||
extra={
|
extra={
|
||||||
"session_id": current_session_id,
|
"session_id": current_session_id,
|
||||||
"prompt_preview": displayed_prompt[:200],
|
"prompt_preview": prompt[:200],
|
||||||
"prompt_length": len(full_prompt),
|
"prompt_length": len(prompt),
|
||||||
"has_uploaded_files": bool(uploaded_files_info),
|
"has_uploaded_files": bool(attached_info),
|
||||||
"uploaded_files": uploaded_files_info,
|
"uploaded_files": attached_info,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
st.session_state.messages.append({"role": "user", "content": displayed_prompt})
|
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||||
with st.chat_message("user"):
|
with st.chat_message("user"):
|
||||||
st.markdown(displayed_prompt)
|
st.markdown(prompt)
|
||||||
run_agent(full_prompt)
|
run_agent(full_prompt)
|
||||||
|
|
||||||
|
for p in temp_paths:
|
||||||
|
try:
|
||||||
|
Path(p).unlink(missing_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|||||||
@@ -0,0 +1,201 @@
|
|||||||
|
"""
|
||||||
|
JRXML 元素自动排序 — 按 JasperReports XSD <xs:sequence> 要求重排子元素。
|
||||||
|
|
||||||
|
XSD 要求 jasperReport 子元素严格按以下顺序:
|
||||||
|
property, propertyExpression, import, template, reportFont,
|
||||||
|
style, subDataset, scriptlet, parameter, queryString, field,
|
||||||
|
sortField, variable, filterExpression, group, background, title,
|
||||||
|
pageHeader, columnHeader, detail, columnFooter, pageFooter,
|
||||||
|
lastPageFooter, summary, noData
|
||||||
|
|
||||||
|
以及 band 内部的 reportElement 必须在其他元素之前。
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
# JasperReports XSD sequence 顺序(索引越小越靠前)
|
||||||
|
JASPERREPORT_ORDER = {
|
||||||
|
"property": 0,
|
||||||
|
"propertyExpression": 1,
|
||||||
|
"import": 2,
|
||||||
|
"template": 3,
|
||||||
|
"reportFont": 4,
|
||||||
|
"style": 5,
|
||||||
|
"subDataset": 6,
|
||||||
|
"scriptlet": 7,
|
||||||
|
"parameter": 8,
|
||||||
|
"queryString": 9,
|
||||||
|
"field": 10,
|
||||||
|
"sortField": 11,
|
||||||
|
"variable": 12,
|
||||||
|
"filterExpression": 13,
|
||||||
|
"group": 14,
|
||||||
|
"background": 15,
|
||||||
|
"title": 16,
|
||||||
|
"pageHeader": 17,
|
||||||
|
"columnHeader": 18,
|
||||||
|
"detail": 19,
|
||||||
|
"columnFooter": 20,
|
||||||
|
"pageFooter": 21,
|
||||||
|
"lastPageFooter": 22,
|
||||||
|
"summary": 23,
|
||||||
|
"noData": 24,
|
||||||
|
}
|
||||||
|
|
||||||
|
# 带命名空间的标签映射(去掉 ns 前缀后匹配)
|
||||||
|
NS = "http://jasperreports.sourceforge.net/jasperreports"
|
||||||
|
|
||||||
|
|
||||||
|
def _tag_local(tag: str) -> str:
|
||||||
|
"""提取标签本地名(去掉命名空间前缀)。"""
|
||||||
|
return tag.split("}")[-1] if "}" in tag else tag
|
||||||
|
|
||||||
|
|
||||||
|
def _sort_key(elem: ET.Element) -> int:
|
||||||
|
"""排序键:按 JASPERREPORT_ORDER 中的顺序,未知元素放最后。"""
|
||||||
|
local = _tag_local(elem.tag)
|
||||||
|
return JASPERREPORT_ORDER.get(local, 999)
|
||||||
|
|
||||||
|
|
||||||
|
def reorder_jrxml_elements(xml_string: str) -> str:
|
||||||
|
"""重排 JRXML 字符串中的子元素顺序,使其符合 XSD sequence 要求。
|
||||||
|
|
||||||
|
处理范围:
|
||||||
|
- jasperReport 的直接子元素
|
||||||
|
- band 的直接子元素(reportElement 在前)
|
||||||
|
|
||||||
|
返回重排后的 XML 字符串。如果解析失败,返回原始字符串。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
root = ET.fromstring(xml_string)
|
||||||
|
except ET.ParseError:
|
||||||
|
return xml_string # 无法解析,返回原始
|
||||||
|
|
||||||
|
_reorder_children(root)
|
||||||
|
_reorder_bands(root)
|
||||||
|
|
||||||
|
# 序列化回字符串
|
||||||
|
result = ET.tostring(root, encoding="unicode")
|
||||||
|
|
||||||
|
# 恢复 XML 声明、CDATA、命名空间
|
||||||
|
result = _restore_formatting(xml_string, result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _reorder_children(parent: ET.Element):
|
||||||
|
"""递归重排所有子元素。"""
|
||||||
|
children = list(parent)
|
||||||
|
if not children:
|
||||||
|
return
|
||||||
|
|
||||||
|
# 按 XSD 顺序排序
|
||||||
|
children.sort(key=_sort_key)
|
||||||
|
|
||||||
|
# 重建子元素列表
|
||||||
|
for i, child in enumerate(children):
|
||||||
|
# ET 不支持直接 reorder,用 remove + insert
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 实际上 ElementTree 不支持直接重排,需要重建
|
||||||
|
# 我们用更可靠的方式:收集所有子元素,清空,再按顺序添加
|
||||||
|
sorted_children = sorted(list(parent), key=_sort_key)
|
||||||
|
|
||||||
|
# 移除所有子元素
|
||||||
|
for child in list(parent):
|
||||||
|
parent.remove(child)
|
||||||
|
|
||||||
|
# 按排序后的顺序重新添加(保持 tail 文本在最后)
|
||||||
|
tail_text = ""
|
||||||
|
for child in sorted_children:
|
||||||
|
tail_text = child.tail or ""
|
||||||
|
child.tail = ""
|
||||||
|
parent.append(child)
|
||||||
|
|
||||||
|
# 恢复最后一个元素的 tail
|
||||||
|
if sorted_children and tail_text:
|
||||||
|
sorted_children[-1].tail = tail_text
|
||||||
|
|
||||||
|
# 递归处理子元素
|
||||||
|
for child in parent:
|
||||||
|
_reorder_children(child)
|
||||||
|
|
||||||
|
|
||||||
|
def _reorder_bands(root: ET.Element):
|
||||||
|
"""确保 band 内部 reportElement 在其他元素之前。"""
|
||||||
|
for elem in root.iter():
|
||||||
|
if _tag_local(elem.tag) == "band":
|
||||||
|
_ensure_reportelement_first(elem)
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_reportelement_first(band: ET.Element):
|
||||||
|
"""在 band 内部,确保 reportElement 元素排在最前面。"""
|
||||||
|
children = list(band)
|
||||||
|
report_elements = [c for c in children if _tag_local(c.tag) == "reportElement"]
|
||||||
|
other_elements = [c for c in children if _tag_local(c.tag) != "reportElement"]
|
||||||
|
|
||||||
|
if not report_elements:
|
||||||
|
return
|
||||||
|
|
||||||
|
# 移除所有
|
||||||
|
for c in list(band):
|
||||||
|
band.remove(c)
|
||||||
|
|
||||||
|
# 先添加 reportElement
|
||||||
|
tail = ""
|
||||||
|
for r in report_elements:
|
||||||
|
r.tail = ""
|
||||||
|
band.append(r)
|
||||||
|
# 再添加其他
|
||||||
|
for o in other_elements:
|
||||||
|
o.tail = ""
|
||||||
|
band.append(o)
|
||||||
|
# 恢复 tail
|
||||||
|
last = band[-1] if list(band) else None
|
||||||
|
if last and children:
|
||||||
|
last.tail = children[-1].tail or ""
|
||||||
|
|
||||||
|
|
||||||
|
def _restore_formatting(original: str, reordered: str) -> str:
|
||||||
|
"""恢复 XML 声明和 CDATA 段。"""
|
||||||
|
# 保留原始声明
|
||||||
|
decl = ""
|
||||||
|
if original.strip().startswith("<?xml"):
|
||||||
|
m = re.match(r'<\?xml[^?]*\?>', original)
|
||||||
|
if m:
|
||||||
|
decl = m.group()
|
||||||
|
if decl and not reordered.strip().startswith("<?xml"):
|
||||||
|
reordered = decl + "\n" + reordered
|
||||||
|
|
||||||
|
# 恢复 CDATA(ET 会把 CDATA 转成普通文本)
|
||||||
|
# 从原始 XML 提取所有 CDATA 块
|
||||||
|
cdata_pattern = re.compile(r'<!\[CDATA\[(.*?)\]\]>', re.DOTALL)
|
||||||
|
cdata_blocks = cdata_pattern.findall(original)
|
||||||
|
|
||||||
|
if cdata_blocks:
|
||||||
|
# 在重排后的 XML 中,对应位置的文本用 CDATA 包裹
|
||||||
|
def _restore_cdata(match):
|
||||||
|
nonlocal cdata_blocks
|
||||||
|
text = match.group(1)
|
||||||
|
for cdata in cdata_blocks:
|
||||||
|
if cdata.strip() == text.strip():
|
||||||
|
return f"<![CDATA[{cdata}]]>"
|
||||||
|
return match.group(0)
|
||||||
|
|
||||||
|
# 替换已转义的文本为 CDATA
|
||||||
|
reordered = re.sub(
|
||||||
|
r'(<queryString[^>]*>)\s*(.*?)\s*(</queryString>)',
|
||||||
|
lambda m: m.group(1) + f"\n <![CDATA[{m.group(2).strip()}]]>\n " + m.group(3),
|
||||||
|
reordered,
|
||||||
|
flags=re.DOTALL
|
||||||
|
)
|
||||||
|
|
||||||
|
return reordered
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_jrxml(jrxml_text: str) -> str:
|
||||||
|
"""规范化 JRXML:排序元素 + 恢复格式。"""
|
||||||
|
if not jrxml_text or not jrxml_text.strip():
|
||||||
|
return jrxml_text
|
||||||
|
result = reorder_jrxml_elements(jrxml_text)
|
||||||
|
return result
|
||||||
@@ -179,7 +179,8 @@ def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
|
|||||||
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
|
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
|
||||||
)
|
)
|
||||||
for block in resp.content:
|
for block in resp.content:
|
||||||
if block.type == "text":
|
block_type = getattr(block, "type", "")
|
||||||
|
if block_type == "text":
|
||||||
return type("Response", (), {"content": block.text})()
|
return type("Response", (), {"content": block.text})()
|
||||||
return type("Response", (), {"content": ""})()
|
return type("Response", (), {"content": ""})()
|
||||||
|
|
||||||
|
|||||||
@@ -90,6 +90,8 @@ def save_session(session_id: str, agent_state: dict, session_name: str = ""):
|
|||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
json.dump(data, tmp, ensure_ascii=False, indent=2)
|
json.dump(data, tmp, ensure_ascii=False, indent=2)
|
||||||
|
tmp.flush()
|
||||||
|
os.fsync(tmp.fileno())
|
||||||
tmp.close()
|
tmp.close()
|
||||||
os.replace(tmp.name, str(fp))
|
os.replace(tmp.name, str(fp))
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from dotenv import load_dotenv
|
|||||||
|
|
||||||
from backend.logger import get_logger
|
from backend.logger import get_logger
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv(override=True)
|
||||||
|
|
||||||
_val_log = get_logger("validation")
|
_val_log = get_logger("validation")
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,114 @@
|
|||||||
|
"""
|
||||||
|
JRXML Agent E2E test — Playwright automation.
|
||||||
|
Tests: page load, upload image, send message, wait for response.
|
||||||
|
Usage: python test_e2e.py
|
||||||
|
Prerequisites: Servers must be running (start.bat or with_server.py)
|
||||||
|
"""
|
||||||
|
import os, sys, time, base64, tempfile
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
FRONTEND = "http://localhost:5173"
|
||||||
|
API = "http://localhost:8000"
|
||||||
|
TEST_IMAGE = r"D:\Idea Project\agent_jrxml\test_invoice_e2e.png"
|
||||||
|
|
||||||
|
def run():
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True)
|
||||||
|
page = browser.new_page(viewport={"width": 1280, "height": 900})
|
||||||
|
|
||||||
|
# Capture console errors
|
||||||
|
errors = []
|
||||||
|
page.on("console", lambda msg: errors.append(msg.text) if msg.type == "error" else None)
|
||||||
|
|
||||||
|
# 1. Navigate and wait
|
||||||
|
print("[1] Loading frontend...")
|
||||||
|
page.goto(FRONTEND, timeout=15000)
|
||||||
|
page.wait_for_load_state("networkidle")
|
||||||
|
page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
# Screenshot initial state
|
||||||
|
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_01_initial.png", full_page=True)
|
||||||
|
print(" Screenshot: e2e_01_initial.png")
|
||||||
|
|
||||||
|
# Verify sidebar loads
|
||||||
|
sidebar = page.locator(".sidebar")
|
||||||
|
assert sidebar.is_visible(), "Sidebar not visible"
|
||||||
|
print(" OK: Sidebar visible")
|
||||||
|
|
||||||
|
# 2. Create new session (click +)
|
||||||
|
print("[2] Creating new session...")
|
||||||
|
page.locator(".btn-icon").click()
|
||||||
|
page.wait_for_timeout(500)
|
||||||
|
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_02_session.png")
|
||||||
|
print(" OK: New session created")
|
||||||
|
|
||||||
|
# 3. Upload test image
|
||||||
|
print("[3] Uploading test image...")
|
||||||
|
upload_input = page.locator('input[type="file"]')
|
||||||
|
upload_input.set_input_files(TEST_IMAGE)
|
||||||
|
page.wait_for_timeout(500)
|
||||||
|
# Verify file chip appears
|
||||||
|
chip = page.locator(".chip").first
|
||||||
|
assert chip.is_visible(), "File chip not visible after upload"
|
||||||
|
print(f" OK: File chip visible — {chip.inner_text()}")
|
||||||
|
|
||||||
|
# 4. Type message and send
|
||||||
|
print('[4] Sending message...')
|
||||||
|
textarea = page.locator("textarea").first
|
||||||
|
textarea.fill("根据这张图片生成车历卡报表模板")
|
||||||
|
page.wait_for_timeout(200)
|
||||||
|
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_03_input.png")
|
||||||
|
|
||||||
|
# Click send button or press Enter
|
||||||
|
page.locator('button[type="submit"]').click()
|
||||||
|
print(" Sent!")
|
||||||
|
|
||||||
|
# 5. Wait for streaming response
|
||||||
|
print("[5] Waiting for AI response...")
|
||||||
|
try:
|
||||||
|
# Wait up to 3 minutes for a success or error message
|
||||||
|
page.wait_for_selector('.message.assistant', timeout=180000)
|
||||||
|
page.wait_for_timeout(2000)
|
||||||
|
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_04_response.png", full_page=True)
|
||||||
|
|
||||||
|
# Check for success/error
|
||||||
|
messages = page.locator('.message.assistant').all()
|
||||||
|
for m in messages:
|
||||||
|
text = m.inner_text()
|
||||||
|
if "成功" in text:
|
||||||
|
print(f" ✅ SUCCESS: {text[:100]}")
|
||||||
|
elif "失败" in text or "错误" in text:
|
||||||
|
print(f" ❌ ERROR: {text[:100]}")
|
||||||
|
elif "JRXML" in text:
|
||||||
|
print(f" 📄 JRXML generated ({len(text)} chars)")
|
||||||
|
except Exception as e:
|
||||||
|
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_04_timeout.png", full_page=True)
|
||||||
|
print(f" ⚠️ Timeout waiting for response: {e}")
|
||||||
|
|
||||||
|
# 6. Check download button
|
||||||
|
print("[6] Checking download button...")
|
||||||
|
download_btn = page.locator(".btn-download").first
|
||||||
|
if download_btn.is_visible():
|
||||||
|
text = download_btn.inner_text()
|
||||||
|
print(f" Download button: '{text}'")
|
||||||
|
if "暂无" not in text:
|
||||||
|
print(" ✅ Download link available!")
|
||||||
|
else:
|
||||||
|
print(" ⚠️ Download shows '暂无下载文件'")
|
||||||
|
else:
|
||||||
|
print(" ⚠️ Download button not found")
|
||||||
|
|
||||||
|
# Console errors
|
||||||
|
if errors:
|
||||||
|
print(f"\n[!] Console errors ({len(errors)}):")
|
||||||
|
for e in errors[:5]:
|
||||||
|
print(f" {e[:200]}")
|
||||||
|
else:
|
||||||
|
print("\n ✅ No console errors")
|
||||||
|
|
||||||
|
print("\n=== E2E test complete ===")
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
os.makedirs(r"D:\Idea Project\agent_jrxml", exist_ok=True)
|
||||||
|
run()
|
||||||
@@ -33,8 +33,6 @@ openpyxl>=3.1.0
|
|||||||
paddleocr>=2.9.0,<3.0.0
|
paddleocr>=2.9.0,<3.0.0
|
||||||
paddlepaddle>=2.6.0,<3.0.0
|
paddlepaddle>=2.6.0,<3.0.0
|
||||||
easyocr>=1.7.0
|
easyocr>=1.7.0
|
||||||
# 聊天输入增强(粘贴/拖拽上传)
|
|
||||||
st-multimodal-chatinput>=0.2.1
|
|
||||||
|
|
||||||
# 多格式文件解析
|
# 多格式文件解析
|
||||||
openpyxl>=3.1.0
|
openpyxl>=3.1.0
|
||||||
|
|||||||
@@ -1,53 +1,47 @@
|
|||||||
@echo off
|
@echo off
|
||||||
setlocal enabledelayedexpansion
|
setlocal enabledelayedexpansion
|
||||||
|
echo ================================================
|
||||||
|
echo agent_jrxml 启动 (API + 验证)
|
||||||
|
echo ================================================
|
||||||
|
cd /d "%~dp0"
|
||||||
|
|
||||||
echo ============================================
|
:: 清理残留进程
|
||||||
echo JRXML Agent - One-Click Start
|
echo [清理] 检查残留进程...
|
||||||
echo ============================================
|
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8000.*LISTENING"') do (
|
||||||
echo.
|
taskkill /F /PID %%a >nul 2>&1 && echo 已清理 PID %%a
|
||||||
|
|
||||||
REM ========== Kill processes on ports ==========
|
|
||||||
echo [Pre-check] Cleaning up occupied ports...
|
|
||||||
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING" 2^>nul') do (
|
|
||||||
echo Killing PID %%a on port 8001...
|
|
||||||
taskkill /PID %%a /F 2>nul
|
|
||||||
)
|
)
|
||||||
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8000.*LISTENING" 2^>nul') do (
|
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do (
|
||||||
echo Killing PID %%a on port 8000...
|
taskkill /F /PID %%a >nul 2>&1 && echo 已清理 PID %%a
|
||||||
taskkill /PID %%a /F 2>nul
|
|
||||||
)
|
|
||||||
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":5173.*LISTENING" 2^>nul') do (
|
|
||||||
echo Killing PID %%a on port 5173...
|
|
||||||
taskkill /PID %%a /F 2>nul
|
|
||||||
)
|
)
|
||||||
echo.
|
echo.
|
||||||
|
|
||||||
REM ========== Detect Python ==========
|
:: 启动验证服务 (后台最小化)
|
||||||
set PYTHON=python
|
echo [启动] 验证服务 :8001
|
||||||
if exist "%~dp0.venv\Scripts\python.exe" set "PYTHON=%~dp0.venv\Scripts\python.exe"
|
start "jrxml-validator" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('validation_service.main:app',host='0.0.0.0',port=8001,reload=False)"
|
||||||
echo Using Python: %PYTHON%
|
|
||||||
|
:: 等待验证服务就绪 (用 PowerShell 检测)
|
||||||
|
echo [等待] 验证服务就绪...
|
||||||
|
:wait_val
|
||||||
|
ping -n 2 127.0.0.1 >nul
|
||||||
|
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8001/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
|
||||||
|
if errorlevel 1 goto wait_val
|
||||||
|
echo :8001 就绪
|
||||||
|
|
||||||
|
:: 启动 API 服务 (前台,Ctrl+C 退出)
|
||||||
|
echo [启动] API 服务 :8000
|
||||||
|
echo ================================================
|
||||||
|
echo 服务已就绪:
|
||||||
|
echo API: http://localhost:8000/docs
|
||||||
|
echo 验证: http://localhost:8001/health
|
||||||
|
echo 按 Ctrl+C 停止 API 服务
|
||||||
|
echo 关闭窗口后会自动清理验证服务
|
||||||
|
echo ================================================
|
||||||
|
.venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('api_server:app',host='0.0.0.0',port=8000,reload=False)"
|
||||||
|
|
||||||
|
:: API 进程退出后自动清理
|
||||||
echo.
|
echo.
|
||||||
|
echo [清理] 停止验证服务...
|
||||||
REM ========== Start services ==========
|
taskkill /F /FI "WINDOWTITLE eq jrxml-validator*" >nul 2>&1
|
||||||
echo [1/3] Starting validation service on port 8001...
|
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
|
||||||
start "JRXML-Validator" cmd /k cd /d "%~dp0" ^&^& "%PYTHON%" -m uvicorn validation_service.main:app --port 8001 --host 0.0.0.0
|
echo 已停止所有服务
|
||||||
timeout /t 3 /nobreak >nul
|
|
||||||
|
|
||||||
echo [2/3] Starting backend API on port 8000...
|
|
||||||
start "JRXML-API" cmd /k cd /d "%~dp0" ^&^& "%PYTHON%" -m uvicorn api_server:app --port 8000 --host 0.0.0.0
|
|
||||||
timeout /t 3 /nobreak >nul
|
|
||||||
|
|
||||||
echo [3/3] Starting frontend dev server on port 5173...
|
|
||||||
start "JRXML-Frontend" cmd /k cd /d "%~dp0frontend" ^&^& npm run dev
|
|
||||||
timeout /t 3 /nobreak >nul
|
|
||||||
|
|
||||||
echo.
|
|
||||||
echo ============================================
|
|
||||||
echo All services started!
|
|
||||||
echo Frontend : http://localhost:5173
|
|
||||||
echo Backend : http://localhost:8000
|
|
||||||
echo Validator : http://localhost:8001
|
|
||||||
echo ============================================
|
|
||||||
echo.
|
|
||||||
echo Close the service windows or run stop.bat to stop.
|
|
||||||
pause
|
pause
|
||||||
|
|||||||
@@ -0,0 +1,144 @@
|
|||||||
|
"""
|
||||||
|
agent_jrxml 统一启动/停止脚本
|
||||||
|
用法: python start.py [--frontend]
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import signal
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
|
||||||
|
PROCESSES = []
|
||||||
|
|
||||||
|
def kill_port(port):
|
||||||
|
"""杀掉占用指定端口的所有进程"""
|
||||||
|
killed = []
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
['netstat', '-ano'], capture_output=True, text=True, timeout=10
|
||||||
|
)
|
||||||
|
for line in result.stdout.splitlines():
|
||||||
|
if f':{port}' in line and 'LISTENING' in line:
|
||||||
|
parts = line.strip().split()
|
||||||
|
pid = parts[-1]
|
||||||
|
try:
|
||||||
|
subprocess.run(['taskkill', '/F', '/PID', pid],
|
||||||
|
capture_output=True, timeout=5)
|
||||||
|
killed.append(pid)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if killed:
|
||||||
|
print(f"[清理] 端口 {port} 已清理 {len(killed)} 个进程: {', '.join(killed)}")
|
||||||
|
return len(killed)
|
||||||
|
|
||||||
|
|
||||||
|
def wait_port(port, timeout=30):
|
||||||
|
"""等待端口就绪"""
|
||||||
|
for i in range(timeout * 2):
|
||||||
|
try:
|
||||||
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
s.settimeout(1)
|
||||||
|
s.connect(('127.0.0.1', port))
|
||||||
|
s.close()
|
||||||
|
return True
|
||||||
|
except:
|
||||||
|
time.sleep(0.5)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def start(port, module, cwd=None):
|
||||||
|
"""启动一个 uvicorn 服务"""
|
||||||
|
cmd = [
|
||||||
|
sys.executable, '-c',
|
||||||
|
f"import uvicorn; uvicorn.run('{module}', host='0.0.0.0', port={port}, reload=False)"
|
||||||
|
]
|
||||||
|
proc = subprocess.Popen(cmd, cwd=cwd)
|
||||||
|
PROCESSES.append((port, proc))
|
||||||
|
print(f"[启动] {module} -> :{port} (PID: {proc.pid})")
|
||||||
|
return proc
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup():
|
||||||
|
"""清理所有子进程"""
|
||||||
|
print("\n[清理] 正在停止所有服务...")
|
||||||
|
for port, proc in PROCESSES:
|
||||||
|
try:
|
||||||
|
proc.terminate()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
time.sleep(2)
|
||||||
|
for port, proc in PROCESSES:
|
||||||
|
try:
|
||||||
|
proc.kill()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
kill_port(port)
|
||||||
|
print("[清理] 完成")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
frontend = '--frontend' in sys.argv
|
||||||
|
|
||||||
|
# 1. 清理残留进程
|
||||||
|
print("=" * 50)
|
||||||
|
print("agent_jrxml 启动脚本")
|
||||||
|
print("=" * 50)
|
||||||
|
kill_port(8000)
|
||||||
|
kill_port(8001)
|
||||||
|
if frontend:
|
||||||
|
kill_port(5173)
|
||||||
|
|
||||||
|
# 2. 启动服务(基于脚本所在目录自动定位项目)
|
||||||
|
project = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
start(8000, 'api_server:app', cwd=project)
|
||||||
|
start(8001, 'validation_service.main:app', cwd=project)
|
||||||
|
|
||||||
|
if frontend:
|
||||||
|
# 前端用 npm 启动
|
||||||
|
frontend_dir = os.path.join(project, 'frontend')
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
['npm', 'run', 'dev'], cwd=frontend_dir,
|
||||||
|
shell=True
|
||||||
|
)
|
||||||
|
PROCESSES.append((5173, proc))
|
||||||
|
print(f"[启动] frontend (Vite) -> :5173")
|
||||||
|
|
||||||
|
# 3. 等待就绪
|
||||||
|
print("\n[等待] 等待服务就绪...")
|
||||||
|
ok = True
|
||||||
|
for port, _ in PROCESSES:
|
||||||
|
if wait_port(port):
|
||||||
|
print(f" :{port} ✓")
|
||||||
|
else:
|
||||||
|
print(f" :{port} ✗ 超时!")
|
||||||
|
ok = False
|
||||||
|
|
||||||
|
if not ok:
|
||||||
|
print("\n[错误] 部分服务启动失败")
|
||||||
|
cleanup()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"\n{'='*50}")
|
||||||
|
print("服务就绪:")
|
||||||
|
print(f" API: http://localhost:8000/docs")
|
||||||
|
print(f" 验证: http://localhost:8001/health")
|
||||||
|
if frontend:
|
||||||
|
print(f" 前端: http://localhost:5173")
|
||||||
|
print(f"\n按 Ctrl+C 停止所有服务")
|
||||||
|
print(f"{'='*50}")
|
||||||
|
|
||||||
|
# 4. 等待退出信号
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
time.sleep(1)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
@echo off
|
||||||
|
setlocal enabledelayedexpansion
|
||||||
|
echo ================================================
|
||||||
|
echo agent_jrxml 启动 (全栈)
|
||||||
|
echo ================================================
|
||||||
|
cd /d "%~dp0"
|
||||||
|
|
||||||
|
:: 清理残留进程
|
||||||
|
echo [清理] 检查残留进程...
|
||||||
|
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8000.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
|
||||||
|
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
|
||||||
|
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":5173.*LISTENING"') do taskkill /F /PID %%a >nul 2>&1
|
||||||
|
echo.
|
||||||
|
|
||||||
|
:: 1. 验证服务
|
||||||
|
echo [1/3] 验证服务 :8001
|
||||||
|
start "jrxml-validator" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('validation_service.main:app',host='0.0.0.0',port=8001,reload=False)"
|
||||||
|
:wait_val
|
||||||
|
ping -n 2 127.0.0.1 >nul
|
||||||
|
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8001/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
|
||||||
|
if errorlevel 1 goto wait_val
|
||||||
|
echo :8001 就绪
|
||||||
|
|
||||||
|
:: 2. API 服务
|
||||||
|
echo [2/3] API 服务 :8000
|
||||||
|
start "jrxml-api" /MIN .venv\Scripts\python.exe -c "import uvicorn; uvicorn.run('api_server:app',host='0.0.0.0',port=8000,reload=False)"
|
||||||
|
:wait_api
|
||||||
|
ping -n 2 127.0.0.1 >nul
|
||||||
|
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:8000/api/health -TimeoutSec 2 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
|
||||||
|
if errorlevel 1 goto wait_api
|
||||||
|
echo :8000 就绪
|
||||||
|
|
||||||
|
:: 3. 前端
|
||||||
|
echo [3/3] 前端 :5173
|
||||||
|
start "jrxml-frontend" /MIN cmd /c "cd /d "%~dp0frontend" && npm run dev"
|
||||||
|
:wait_fe
|
||||||
|
ping -n 3 127.0.0.1 >nul
|
||||||
|
powershell -Command "try{$r=Invoke-WebRequest -Uri http://localhost:5173 -TimeoutSec 3 -UseBasicParsing;exit 0}catch{exit 1}" >nul 2>&1
|
||||||
|
if errorlevel 1 goto wait_fe
|
||||||
|
echo :5173 就绪
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo ================================================
|
||||||
|
echo 全部就绪:
|
||||||
|
echo 前端: http://localhost:5173
|
||||||
|
echo API: http://localhost:8000/docs
|
||||||
|
echo 验证: http://localhost:8001/health
|
||||||
|
echo 运行 stop.bat 停止所有服务
|
||||||
|
echo ================================================
|
||||||
|
pause
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
@echo off
|
@echo off
|
||||||
echo 正在停止 JRXML 代理服务...
|
chcp 65001 >nul
|
||||||
|
echo [清理] 停止所有 agent_jrxml 服务...
|
||||||
taskkill /fi "WINDOWTITLE eq JRXML 验证服务*" /f 2>nul
|
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8000.*LISTENING"') do taskkill /F /PID %%a 2>nul
|
||||||
taskkill /fi "WINDOWTITLE eq JRXML API*" /f 2>nul
|
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":8001.*LISTENING"') do taskkill /F /PID %%a 2>nul
|
||||||
taskkill /fi "WINDOWTITLE eq JRXML Frontend*" /f 2>nul
|
for /f "tokens=5" %%a in ('netstat -ano ^| findstr ":5173.*LISTENING"') do taskkill /F /PID %%a 2>nul
|
||||||
|
echo 已停止
|
||||||
echo 已停止。
|
|
||||||
pause
|
pause
|
||||||
|
|||||||
|
After Width: | Height: | Size: 10 KiB |
@@ -0,0 +1,29 @@
|
|||||||
|
import sys, io
|
||||||
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from backend.jrxml_reorder import normalize_jrxml
|
||||||
|
|
||||||
|
bad = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<jasperReport xmlns="http://jasperreports.sourceforge.net/jasperreports" name="Test" pageWidth="595" pageHeight="842">
|
||||||
|
<queryString><![CDATA[SELECT 1]]></queryString>
|
||||||
|
<style name="s1"/>
|
||||||
|
<field name="f1" class="java.lang.String"/>
|
||||||
|
<property name="p1" value="v1"/>
|
||||||
|
<parameter name="param1" class="java.lang.String"/>
|
||||||
|
<title><band height="50"><textField><reportElement x="0" y="0" width="100" height="20"/></textField></band></title>
|
||||||
|
<detail><band height="30"><staticText><reportElement x="0" y="0" width="100" height="20"/><text>Hi</text></staticText></band></detail>
|
||||||
|
</jasperReport>'''
|
||||||
|
|
||||||
|
fixed = normalize_jrxml(bad)
|
||||||
|
print('=== Before ===')
|
||||||
|
root = ET.fromstring(bad)
|
||||||
|
print('Children:', [c.tag.split('}')[-1] for c in root])
|
||||||
|
|
||||||
|
print('\n=== After ===')
|
||||||
|
root2 = ET.fromstring(fixed)
|
||||||
|
print('Children:', [c.tag.split('}')[-1] for c in root2])
|
||||||
|
|
||||||
|
# 验证
|
||||||
|
import requests
|
||||||
|
r = requests.post('http://localhost:8001/validate', json={'jrxml': fixed}, timeout=10)
|
||||||
|
print(f'\nValidation: {r.json()}')
|
||||||
@@ -57,7 +57,7 @@ class TestAcceptanceScenarios:
|
|||||||
state["stage"] = "initial_generation"
|
state["stage"] = "initial_generation"
|
||||||
|
|
||||||
final = run_graph(graph, state)
|
final = run_graph(graph, state)
|
||||||
assert final.get("retry_count", 0) <= 3, "不应超过最大重试次数"
|
assert final.get("retry_count", 0) <= 5, "不应超过最大重试次数"
|
||||||
print(f"场景 2 状态: {final.get('status')}, 重试次数: {final.get('retry_count', 0)}")
|
print(f"场景 2 状态: {final.get('status')}, 重试次数: {final.get('retry_count', 0)}")
|
||||||
|
|
||||||
def test_scenario3_multi_turn_modification(self, graph):
|
def test_scenario3_multi_turn_modification(self, graph):
|
||||||
@@ -128,8 +128,8 @@ class TestAcceptanceScenarios:
|
|||||||
state = create_initial_state()
|
state = create_initial_state()
|
||||||
state["current_jrxml"] = "<invalid>xml<<<"
|
state["current_jrxml"] = "<invalid>xml<<<"
|
||||||
state["user_input"] = "Fix this"
|
state["user_input"] = "Fix this"
|
||||||
state["retry_count"] = 3 # 已达到最大重试次数
|
state["retry_count"] = 5 # 已达到最大重试次数
|
||||||
state["status"] = "fail"
|
state["status"] = "fail"
|
||||||
|
|
||||||
final = run_graph(graph, state)
|
final = run_graph(graph, state)
|
||||||
assert final.get("retry_count", 0) >= 3 or final.get("status") == "pass"
|
assert final.get("retry_count", 0) >= 5 or final.get("status") == "pass"
|
||||||
|
|||||||
|
After Width: | Height: | Size: 1.7 MiB |
|
After Width: | Height: | Size: 1.7 MiB |
|
After Width: | Height: | Size: 1.7 MiB |
|
After Width: | Height: | Size: 1.7 MiB |
|
After Width: | Height: | Size: 1.7 MiB |
|
After Width: | Height: | Size: 1.7 MiB |
@@ -0,0 +1 @@
|
|||||||
|
测试文件内容
|
||||||
|
After Width: | Height: | Size: 1.7 MiB |