chore: remove 13 stale files and clean up project structure
Removed: - app.py (deprecated Streamlit UI, replaced by api_server.py + frontend/) - start_agent_jrxml.py (old launcher, replaced by start.py) - test_reorder.py, e2e_test.py (ad-hoc/outdated test scripts) - ocr_raw_positions.json (debug output) - ARCHITECTURE.md, CODE_GUIDE.md, RAG_INTEGRATION.md, ROADMAP.md (superseded by CLAUDE.md) - EVALUATION_REPORT.md (auto-generated) - scripts/init_kb.py (replaced by init_default_kb.py) - validation_service/validate.bat (redundant, start.py covers it) - sessions/*.json (34 test session files, already gitignored) Updated: - CLAUDE.md: removed stale file entries from key mapping table - README.md: updated init script reference and removed validate.bat - .gitignore: removed EVALUATION_REPORT.md entry
This commit is contained in:
@@ -15,7 +15,6 @@ logs/
|
|||||||
db/
|
db/
|
||||||
# 自动评测 (Mavis AI)
|
# 自动评测 (Mavis AI)
|
||||||
.mavis/
|
.mavis/
|
||||||
EVALUATION_REPORT.md
|
|
||||||
|
|
||||||
# 上传文件
|
# 上传文件
|
||||||
uploads/
|
uploads/
|
||||||
|
|||||||
-341
@@ -1,341 +0,0 @@
|
|||||||
# JRXML 生成代理 — 架构文档
|
|
||||||
|
|
||||||
## 概览
|
|
||||||
|
|
||||||
一个三层架构的桌面应用,通过自然语言多轮对话帮助非技术用户创建 JasperReports 模板(JRXML)。核心流程:用户输入 → 意图识别 → 模板检索 → LLM 生成/修改 → 自动验证修正 → 输出可编译的 JRXML。
|
|
||||||
|
|
||||||
```
|
|
||||||
┌──────────────────────────────────────────────────────────────┐
|
|
||||||
│ Vue 3 + Vite 前端 (:5173) │
|
|
||||||
│ frontend/ (聊天界面 + SSE 流式) │
|
|
||||||
│ 聊天界面 / 会话管理 / JRXML 预览 / 下载 / 快捷操作 │
|
|
||||||
└─────────────────────┬────────────────────────────────────────┘
|
|
||||||
│ HTTP + SSE (/api/*)
|
|
||||||
▼
|
|
||||||
┌──────────────────────────────────────────────────────────────┐
|
|
||||||
│ FastAPI SSE 后端 (:8000) │
|
|
||||||
│ api_server.py │
|
|
||||||
│ REST: /api/sessions, /api/upload, /api/.../download/latest │
|
|
||||||
│ SSE: /api/sessions/{id}/chat (流式推送) │
|
|
||||||
│ 事件: node_start | node_complete | stream_token │
|
|
||||||
│ agent_complete | agent_error │
|
|
||||||
└─────────────────────┬────────────────────────────────────────┘
|
|
||||||
│ run_agent(user_input)
|
|
||||||
▼
|
|
||||||
┌──────────────────────────────────────────────────────────────┐
|
|
||||||
│ LangGraph 状态机 (agent/) │
|
|
||||||
│ │
|
|
||||||
│ load_session → process_input → manage_context │
|
|
||||||
│ → save_state_snapshot → classify_intent │
|
|
||||||
│ │ │ │ │ │ │
|
|
||||||
│ ▼ ▼ ▼ ▼ ▼ │
|
|
||||||
│ retrieve modify_jrxml preview consult undo/reset │
|
|
||||||
│ │ │ /export │
|
|
||||||
│ ▼ ▼ │
|
|
||||||
│ generate save_session │
|
|
||||||
│ │ │ │
|
|
||||||
│ └────┬─────┘ │
|
|
||||||
│ ▼ │
|
|
||||||
│ (jrxml_reorder 自动规范化元素顺序) │
|
|
||||||
│ ▼ │
|
|
||||||
│ validate ──(fail)──► explain_error ──► correct_jrxml │
|
|
||||||
│ │ ▲ │ │
|
|
||||||
│ (pass) └──(retry<N)───┘ │
|
|
||||||
│ ▼ │
|
|
||||||
│ finalize (失败版本 → jrxml_versions, 提示下载) │
|
|
||||||
└──────────┬──────────────┬─────────────────────┬──────────────┘
|
|
||||||
│ │ │
|
|
||||||
▼ ▼ ▼
|
|
||||||
┌──────────────┐ ┌──────────────┐ ┌──────────────────────────┐
|
|
||||||
│ LLM 后端 │ │ 向量知识库 │ │ 验证服务 (:8001) │
|
|
||||||
│ backend/llm │ │ ChromaDB + │ │ FastAPI │
|
|
||||||
│ │ │ RAGSearcher │ │ 结构检查 + 严格 XSD 校验 │
|
|
||||||
│ Anthropic SDK│ │ │ │ │
|
|
||||||
│ OpenAI SDK │ │ Sentence- │ │ /validate │
|
|
||||||
│ Ollama │ │ Transformer │ │ /health │
|
|
||||||
└──────────────┘ └──────────────┘ └──────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
## 目录结构
|
|
||||||
|
|
||||||
```
|
|
||||||
agent_jrxml/
|
|
||||||
├── api_server.py # FastAPI SSE 后端(REST + 流式推送)
|
|
||||||
│
|
|
||||||
├── frontend/ # Vue 3 + Vite 前端
|
|
||||||
│ └── src/
|
|
||||||
│ ├── api/client.ts # SSE 客户端 + fetch 封装
|
|
||||||
│ ├── stores/ # Pinia 状态管理(chat + session)
|
|
||||||
│ └── components/ # 聊天界面组件
|
|
||||||
│
|
|
||||||
├── agent/ # LangGraph 工作流层
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── state.py # AgentState TypedDict 定义(~28 字段)
|
|
||||||
│ ├── nodes.py # 18 个工作流节点(生成/修改/验证/修正/意图识别...)
|
|
||||||
│ └── graph.py # 状态图编译 + 路由逻辑 + 初始状态工厂
|
|
||||||
│
|
|
||||||
├── backend/ # 基础设施层
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── llm.py # LLM 工厂:Anthropic(MiniMax) / OpenAI / Ollama
|
|
||||||
│ ├── embeddings.py # 嵌入模型工厂:HuggingFace / OpenAI
|
|
||||||
│ ├── validation.py # 验证服务 HTTP 客户端
|
|
||||||
│ ├── session.py # 会话持久化(JSON CRUD + flush/fsync)
|
|
||||||
│ ├── jrxml_reorder.py # JRXML 元素自动排序(匹配 XSD sequence)
|
|
||||||
│ └── rag_adapter.py # RAG 适配层:连接 ChromaDB 做语义搜索
|
|
||||||
│
|
|
||||||
├── validation_service/ # 独立验证微服务
|
|
||||||
│ ├── main.py # FastAPI 服务:结构检查 + 严格 XSD 校验
|
|
||||||
│ └── schemas/
|
|
||||||
│ └── jasperreport_7_0_6.xsd # JasperReports 7.0.6 XSD(286KB)
|
|
||||||
│
|
|
||||||
├── scripts/
|
|
||||||
│ └── init_kb.py # 知识库初始化(预下载嵌入模型)
|
|
||||||
│
|
|
||||||
├── tests/
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── test_validation.py # 验证服务单元测试
|
|
||||||
│ └── test_agent.py # 代理集成测试
|
|
||||||
│
|
|
||||||
├── data/ # 数据目录
|
|
||||||
│ ├── sample_templates/ # 示例 JRXML 模板
|
|
||||||
│ └── corrections/ # 错误修正案例
|
|
||||||
│
|
|
||||||
├── db/chroma/ # ChromaDB 持久化存储
|
|
||||||
├── sessions/ # 会话 JSON 文件存储
|
|
||||||
├── jrxml_versions/ # 失败版本归档存储
|
|
||||||
├── rag/ # RAG 子模块(独立管线)
|
|
||||||
├── requirements.txt # Python 依赖
|
|
||||||
├── start_all.bat # 一键启动全部服务
|
|
||||||
├── start.bat # 启动脚本
|
|
||||||
├── stop.bat # 一键停止全部服务
|
|
||||||
├── .env.example # 环境变量模板
|
|
||||||
└── README.md # 使用说明
|
|
||||||
```
|
|
||||||
|
|
||||||
## 数据流详解
|
|
||||||
|
|
||||||
### 1. 请求生命周期
|
|
||||||
|
|
||||||
```
|
|
||||||
用户输入 "创建员工名册,包含 id、name、department"
|
|
||||||
│
|
|
||||||
├─ load_session 从 sessions/{id}.json 恢复历史状态
|
|
||||||
├─ process_input 记录用户消息到 conversation_history
|
|
||||||
├─ manage_context 检查 token 数,超阈值则 LLM 压缩早期对话
|
|
||||||
├─ save_state_snapshot 保存当前状态快照(用于撤销)
|
|
||||||
├─ classify_intent LLM 分类 → initial_generation
|
|
||||||
├─ retrieve RAGSearcher.search_as_context() → 注入 prompt
|
|
||||||
├─ generate LLM 生成初始 JRXML
|
|
||||||
├─ save_session 持久化到磁盘
|
|
||||||
├─ validate 调用 FastAPI 验证服务
|
|
||||||
│ ├─ pass → finalize
|
|
||||||
│ └─ fail → explain_error → correct_jrxml → validate (最多 5 次)
|
|
||||||
└─ finalize 保存最终 JRXML,UI 展示结果
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. 意图路由(8 种意图)
|
|
||||||
|
|
||||||
| 意图 | 条件 | 路由目标 |
|
|
||||||
|------|------|---------|
|
|
||||||
| `initial_generation` | 无现有报表 | retrieve → generate |
|
|
||||||
| `modify_report` | 有现有报表 | modify_jrxml |
|
|
||||||
| `preview_report` | — | 直接展示 current_jrxml |
|
|
||||||
| `export_jrxml` | — | 触发下载 |
|
|
||||||
| `export_pdf` | — | 触发下载 |
|
|
||||||
| `consult_question` | — | handle_consult(独立回答) |
|
|
||||||
| `undo_modification` | history_states 非空 | 恢复上一个快照 |
|
|
||||||
| `reset_session` | — | 清空所有报表状态 |
|
|
||||||
|
|
||||||
### 3. 自动修正循环
|
|
||||||
|
|
||||||
```
|
|
||||||
validate ──fail──► explain_error ──► correct_jrxml ──► validate
|
|
||||||
▲ │
|
|
||||||
└──────────── retry_count < MAX_RETRY (5) ──────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
每次修正都会递增 `retry_count`,达到上限后直接 `finalize`(即使仍有错误),在 UI 上展示错误信息。
|
|
||||||
|
|
||||||
## 核心组件
|
|
||||||
|
|
||||||
### AgentState(agent/state.py)
|
|
||||||
|
|
||||||
```python
|
|
||||||
class AgentState(TypedDict, total=False):
|
|
||||||
# 工作流核心
|
|
||||||
conversation_history: List[dict] # 当前上下文的对话(可能被压缩裁剪)
|
|
||||||
current_jrxml: str # 当前 JRXML 文本
|
|
||||||
user_input: str # 本轮用户输入
|
|
||||||
status: str # "pass" | "fail"
|
|
||||||
error_msg: str # 验证错误信息
|
|
||||||
natural_explanation: str # 错误的人类可读解释
|
|
||||||
retry_count: int # 当前修正尝试次数
|
|
||||||
user_modification_request: str # 修改请求文本
|
|
||||||
final_jrxml: str # 最终验证通过的 JRXML
|
|
||||||
stage: str # 当前阶段标识
|
|
||||||
retrieved_context: str # RAG 检索到的模板上下文
|
|
||||||
|
|
||||||
# 上下文压缩
|
|
||||||
full_conversation_history: List[dict] # 完整对话(含时间戳)
|
|
||||||
compressed_history: str # 早期对话的压缩摘要
|
|
||||||
current_token_count: int # 当前估算 token 数
|
|
||||||
|
|
||||||
# 会话持久化
|
|
||||||
session_id: str
|
|
||||||
session_name: str
|
|
||||||
created_at: str
|
|
||||||
updated_at: str
|
|
||||||
|
|
||||||
# 意图识别 + 撤销
|
|
||||||
intent: str # 8 种意图之一
|
|
||||||
history_states: List[dict] # 状态快照栈(最多 10 个)
|
|
||||||
```
|
|
||||||
|
|
||||||
### 工作流节点(agent/nodes.py)
|
|
||||||
|
|
||||||
| 节点 | 职责 | 调用外部 |
|
|
||||||
|------|------|---------|
|
|
||||||
| `load_session_node` | 从磁盘恢复会话状态 | `backend.session.load_session` |
|
|
||||||
| `process_input` | 记录用户输入到对话历史 | — |
|
|
||||||
| `manage_context` | token 超阈值时 LLM 压缩早期对话 | `get_llm()` |
|
|
||||||
| `save_state_snapshot` | 保存快照到 history_states | — |
|
|
||||||
| `classify_intent` | LLM 分类用户意图(8 类) | `get_llm()` |
|
|
||||||
| `retrieve` | 从 ChromaDB 搜索相关模板 | `backend.rag_adapter.search_chunks` |
|
|
||||||
| `generate` | 首次生成 JRXML | `get_llm()` |
|
|
||||||
| `modify_jrxml` | 根据用户需求修改现有 JRXML | `get_llm()` |
|
|
||||||
| `validate` | 调用验证服务检查 JRXML | `backend.validation.validate_jrxml` |
|
|
||||||
| `explain_error` | LLM 将编译错误翻译为人话 | `get_llm()` |
|
|
||||||
| `correct_jrxml` | LLM 自动修正验证失败 | `get_llm()` |
|
|
||||||
| `finalize` | 保存最终 JRXML,标记完成 | — |
|
|
||||||
| `handle_consult` | 回答 JasperReports 咨询 | `get_llm()` |
|
|
||||||
| `handle_undo` | 从 history_states 恢复上一状态 | — |
|
|
||||||
| `handle_reset` | 清空报表,重置会话 | — |
|
|
||||||
| `save_session_node` | 持久化当前状态到磁盘 | `backend.session.save_session` |
|
|
||||||
|
|
||||||
### LLM 工厂(backend/llm.py)
|
|
||||||
|
|
||||||
```
|
|
||||||
get_llm()
|
|
||||||
├─ LLM_BACKEND=local → langchain_ollama.ChatOllama
|
|
||||||
└─ LLM_BACKEND=cloud
|
|
||||||
├─ LLM_PROVIDER=anthropic → raw anthropic.Anthropic SDK
|
|
||||||
│ 适配 MiniMax Anthropic 兼容 API
|
|
||||||
│ 包装为 MiniMaxLLM(提供 .invoke() 接口)
|
|
||||||
└─ LLM_PROVIDER=openai → langchain_openai.ChatOpenAI
|
|
||||||
```
|
|
||||||
|
|
||||||
**MiniMaxLLM 适配器**:将 Anthropic SDK 的 `client.messages.create()` 包装成与 LangChain 兼容的 `.invoke(prompt) → Response.content` 接口,供所有节点统一调用。
|
|
||||||
|
|
||||||
### RAG 适配层(backend/rag_adapter.py)
|
|
||||||
|
|
||||||
```
|
|
||||||
search_chunks(query, k=5)
|
|
||||||
└─ RAGSearcher(单例)
|
|
||||||
├─ 懒加载 SentenceTransformer 模型
|
|
||||||
├─ 懒连接 ChromaDB PersistentClient
|
|
||||||
├─ query → 向量编码 → collection.query() → top-k 结果
|
|
||||||
└─ search_as_context() → 拼接带元数据标签的上下文字符串
|
|
||||||
```
|
|
||||||
|
|
||||||
### 验证服务(validation_service/main.py)
|
|
||||||
|
|
||||||
独立的 FastAPI 进程(端口 8001),提供两级验证:
|
|
||||||
|
|
||||||
1. **结构检查**(始终执行):
|
|
||||||
- XML 语法正确性
|
|
||||||
- `$F{field}` 引用一致性(表达式 vs `<field>` 声明)
|
|
||||||
- `<queryString>` 是否含有效 SQL SELECT
|
|
||||||
- `<jasperReport>` 必需属性(pageWidth, pageHeight, name)
|
|
||||||
|
|
||||||
2. **XSD Schema 校验**(可选):
|
|
||||||
- 需要 `validation_service/schemas/jasperreport_7_0_6.xsd` 文件
|
|
||||||
- 使用 `lxml.etree.XMLSchema` 进行完整 schema 校验
|
|
||||||
|
|
||||||
### 会话持久化(backend/session.py)
|
|
||||||
|
|
||||||
```
|
|
||||||
sessions/{session_id}.json
|
|
||||||
{
|
|
||||||
"session_id": "abc123def456",
|
|
||||||
"session_name": "员工名册报表",
|
|
||||||
"created_at": "2026-05-19T09:00:00+00:00",
|
|
||||||
"updated_at": "2026-05-19T09:30:00+00:00",
|
|
||||||
"agent_state": { ... } // 完整的 AgentState 字段
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## 关键 Prompt 设计
|
|
||||||
|
|
||||||
| Prompt | 用途 | 输出约束 |
|
|
||||||
|--------|------|---------|
|
|
||||||
| `INTENT_CLASSIFY_PROMPT` | 8 分类意图识别 | 只输出意图名称 |
|
|
||||||
| `INITIAL_GENERATION_PROMPT` | 首次生成 JRXML | 只输出 JRXML,无 markdown |
|
|
||||||
| `MODIFICATION_PROMPT` | 修改现有 JRXML | 只输出完整 JRXML |
|
|
||||||
| `CORRECTION_PROMPT` | 自动修正错误 | 只输出修复后 JRXML |
|
|
||||||
| `EXPLAIN_PROMPT` | 错误转人话 | 2-3 句话 |
|
|
||||||
| `COMPRESSION_PROMPT` | 对话压缩 | ≤200 字摘要 |
|
|
||||||
| `CONSULT_PROMPT` | 咨询解答 | 简洁中文 |
|
|
||||||
|
|
||||||
## 配置参数(.env)
|
|
||||||
|
|
||||||
| 参数 | 默认值 | 说明 |
|
|
||||||
|------|--------|------|
|
|
||||||
| `LLM_BACKEND` | cloud | cloud / local |
|
|
||||||
| `LLM_PROVIDER` | openai | openai / anthropic |
|
|
||||||
| `OPENAI_API_KEY` | — | API 密钥 |
|
|
||||||
| `OPENAI_BASE_URL` | https://api.openai.com/v1 | API 端点 |
|
|
||||||
| `LLM_MODEL` | gpt-4o | 模型名称 |
|
|
||||||
| `LOCAL_LLM_MODEL` | qwen2.5-coder:7b | Ollama 模型 |
|
|
||||||
| `EMBED_BACKEND` | local | local / cloud |
|
|
||||||
| `LOCAL_EMBED_MODEL` | Qwen/Qwen3-Embedding-0.6B | 本地嵌入模型 |
|
|
||||||
| `VALIDATION_SERVICE_URL` | http://localhost:8001/validate | 验证端点 |
|
|
||||||
| `CHROMA_PERSIST_DIR` | ./db/chroma | ChromaDB 路径 |
|
|
||||||
| `MAX_RETRY` | 5 | 自动修正最大尝试次数 |
|
|
||||||
| `CONTEXT_MAX_TOKENS` | 6000 | 触发压缩的 token 阈值 |
|
|
||||||
| `CONTEXT_KEEP_RECENT` | 4 | 保留最近 N 轮完整对话 |
|
|
||||||
| `SESSIONS_DIR` | ./sessions | 会话 JSON 存储目录 |
|
|
||||||
| `HISTORY_MAX_SNAPSHOTS` | 10 | 撤销快照保留数量 |
|
|
||||||
|
|
||||||
## 启动流程
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 1. 安装依赖
|
|
||||||
pip install -r requirements.txt
|
|
||||||
|
|
||||||
# 2. 配置环境
|
|
||||||
cp .env.example .env
|
|
||||||
# 编辑 .env 填入 API 密钥
|
|
||||||
|
|
||||||
# 3. 初始化知识库(预下载嵌入模型)
|
|
||||||
python scripts/init_kb.py --download-model
|
|
||||||
|
|
||||||
# 4. 启动验证服务(终端 1)
|
|
||||||
python -m uvicorn validation_service.main:app --port 8001 --host 0.0.0.0
|
|
||||||
|
|
||||||
# 5. 启动 Streamlit 界面(终端 2)
|
|
||||||
STREAMLIT_SERVER_HEADLESS=true streamlit run app.py --server.port 8501
|
|
||||||
|
|
||||||
# 6. 访问 http://localhost:8501
|
|
||||||
```
|
|
||||||
|
|
||||||
## 测试
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pytest tests/test_validation.py -v # 验证服务单元测试
|
|
||||||
pytest tests/test_agent.py -v # 代理集成测试
|
|
||||||
pytest tests/ -v # 全部测试
|
|
||||||
```
|
|
||||||
|
|
||||||
## 技术栈
|
|
||||||
|
|
||||||
| 层 | 技术 |
|
|
||||||
|----|------|
|
|
||||||
| UI | Streamlit 1.57 |
|
|
||||||
| 工作流引擎 | LangGraph 1.2 |
|
|
||||||
| LLM 接入 | Anthropic SDK / LangChain-OpenAI / LangChain-Ollama |
|
|
||||||
| 向量数据库 | ChromaDB 1.5 |
|
|
||||||
| 嵌入模型 | Sentence-Transformers (HuggingFace) |
|
|
||||||
| 验证服务 | FastAPI + lxml XMLSchema |
|
|
||||||
| HTTP 客户端 | httpx |
|
|
||||||
| Token 计算 | tiktoken |
|
|
||||||
| 持久化 | JSON 文件 + ChromaDB PersistentClient |
|
|
||||||
@@ -98,9 +98,7 @@ validation_service/ (FastAPI, 端口 8001) — 不变
|
|||||||
| `agent/datasource.py` | 数据源模式解析:$P{{xxx}} 参数 vs JDBC 直连 | 低 |
|
| `agent/datasource.py` | 数据源模式解析:$P{{xxx}} 参数 vs JDBC 直连 | 低 |
|
||||||
| `agent/jrxml_windower.py` | JRXML Band 级窗口化引擎:拆解/切分/重组/元素计数校验 | 中 |
|
| `agent/jrxml_windower.py` | JRXML Band 级窗口化引擎:拆解/切分/重组/元素计数校验 | 中 |
|
||||||
| `validation_service/main.py` | FastAPI 验证服务 | 低 |
|
| `validation_service/main.py` | FastAPI 验证服务 | 低 |
|
||||||
| `scripts/init_kb.py` | 旧 RAG 知识库初始化/模型下载 | 低 |
|
|
||||||
| `scripts/init_default_kb.py` | 多租户默认 KB 初始化(默认用户 + 预置 KB) | 低 |
|
| `scripts/init_default_kb.py` | 多租户默认 KB 初始化(默认用户 + 预置 KB) | 低 |
|
||||||
| `app.py` | ~~旧 Streamlit UI~~(已由 api_server.py + frontend/ 替代) | 废弃 |
|
|
||||||
|
|
||||||
## 关键约定
|
## 关键约定
|
||||||
|
|
||||||
|
|||||||
-1327
File diff suppressed because it is too large
Load Diff
@@ -1,91 +0,0 @@
|
|||||||
# RAG 知识库集成说明
|
|
||||||
|
|
||||||
## 概述
|
|
||||||
|
|
||||||
使用 `rag_jrxml` 子项目的语义分块管线替换原有的简单向量知识库。`rag_jrxml` 独立运行产出 ChromaDB,主项目通过 `backend/rag_adapter.py` 查询。
|
|
||||||
|
|
||||||
## 架构
|
|
||||||
|
|
||||||
```
|
|
||||||
rag/ ← git submodule (rag_jrxml)
|
|
||||||
├── jrxml_source/ ← 源数据目录 (242 .jrxml + 16 .md)
|
|
||||||
├── models/ ← 嵌入模型本地存放
|
|
||||||
│ └── paraphrase-multilingual-MiniLM-L12-v2/ (449MB, 384维)
|
|
||||||
├── jrxml_source_chunks/ ← 分块产物 (all_chunks.json, 15,510 chunks)
|
|
||||||
├── embeddings/ ← 向量产物 (embeddings.npy, 23MB)
|
|
||||||
|
|
||||||
db/chroma/ ← ChromaDB 持久化 (主项目查询端读取)
|
|
||||||
│ 集合: jrxml_chunks (15,510 条记录, cosine 距离)
|
|
||||||
|
|
||||||
backend/rag_adapter.py ← RAGSearcher: 加载模型 + 连接 ChromaDB + 搜索
|
|
||||||
agent/nodes.py ← retrieve() 调用 search_chunks()
|
|
||||||
```
|
|
||||||
|
|
||||||
## 管线流程
|
|
||||||
|
|
||||||
```
|
|
||||||
源文件 (.jrxml + .md)
|
|
||||||
→ batch_chunker.py 语义分块 (按 XML 元素/标题层级切分)
|
|
||||||
→ embed_chunks.py 向量化 (Sentence-Transformers, CPU)
|
|
||||||
→ import_to_chroma.py 导入 ChromaDB
|
|
||||||
→ rag_adapter.py 主项目查询
|
|
||||||
```
|
|
||||||
|
|
||||||
## 当前数据
|
|
||||||
|
|
||||||
| 指标 | 数值 |
|
|
||||||
|---|---|
|
|
||||||
| 源文件 | 258 (242 JRXML + 16 MD) |
|
|
||||||
| Chunks 总数 | 15,510 |
|
|
||||||
| 嵌入维度 | 384 |
|
|
||||||
| 嵌入模型 | sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 |
|
|
||||||
| 分块类型 | query, field, parameter, variable, band_*, chart, crosstab, element_*, section_* 等 |
|
|
||||||
| 知识库大小 | embeddings.npy 23MB, ChromaDB ~50MB |
|
|
||||||
|
|
||||||
## 主项目配置
|
|
||||||
|
|
||||||
`.env` 中相关变量:
|
|
||||||
|
|
||||||
```env
|
|
||||||
# 嵌入模型 (本地路径优先)
|
|
||||||
RAG_EMBED_MODEL=./rag/models/paraphrase-multilingual-MiniLM-L12-v2
|
|
||||||
# ChromaDB 路径
|
|
||||||
RAG_CHROMA_PATH=./db/chroma
|
|
||||||
# 集合名称 (与 rag 子项目一致)
|
|
||||||
RAG_COLLECTION_NAME=jrxml_chunks
|
|
||||||
```
|
|
||||||
|
|
||||||
## 全量构建
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd rag
|
|
||||||
python batch_chunker.py jrxml_source
|
|
||||||
python embed_chunks.py jrxml_source_chunks/all_chunks.json
|
|
||||||
python import_to_chroma.py --chroma_path ../db/chroma
|
|
||||||
```
|
|
||||||
|
|
||||||
## 增量更新
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 1. 将新的 .jrxml / .md 放入 rag/jrxml_source/
|
|
||||||
# 2. 增量运行
|
|
||||||
cd rag
|
|
||||||
python batch_chunker.py jrxml_source --incremental
|
|
||||||
python embed_chunks.py --incremental
|
|
||||||
python import_to_chroma.py --chroma_path ../db/chroma --incremental
|
|
||||||
```
|
|
||||||
|
|
||||||
## 更新 rag 子项目
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git submodule update --remote rag
|
|
||||||
```
|
|
||||||
|
|
||||||
## 搜索接口
|
|
||||||
|
|
||||||
```python
|
|
||||||
from backend.rag_adapter import search_chunks
|
|
||||||
|
|
||||||
# 返回拼接好的上下文字符串,可直接注入 LLM prompt
|
|
||||||
context = search_chunks("如何创建饼图", k=5)
|
|
||||||
```
|
|
||||||
@@ -144,7 +144,6 @@ jrxml-agent/
|
|||||||
*.md 10 个 Prompt 模板文件
|
*.md 10 个 Prompt 模板文件
|
||||||
validation_service/
|
validation_service/
|
||||||
main.py FastAPI 验证服务器
|
main.py FastAPI 验证服务器
|
||||||
validate.bat Windows 启动器
|
|
||||||
data/
|
data/
|
||||||
sample_templates/ 知识库的 JRXML 模板
|
sample_templates/ 知识库的 JRXML 模板
|
||||||
corrections/ 错误修正案例
|
corrections/ 错误修正案例
|
||||||
@@ -152,7 +151,7 @@ jrxml-agent/
|
|||||||
app.log 应用日志(节点流转、路由、用户交互)
|
app.log 应用日志(节点流转、路由、用户交互)
|
||||||
llm.log LLM 调用日志(完整 prompt / response)
|
llm.log LLM 调用日志(完整 prompt / response)
|
||||||
scripts/
|
scripts/
|
||||||
init_kb.py Chroma 知识库初始化脚本
|
init_default_kb.py 多租户默认知识库初始化脚本
|
||||||
tests/
|
tests/
|
||||||
test_validation.py 验证服务测试
|
test_validation.py 验证服务测试
|
||||||
test_agent.py 代理集成测试
|
test_agent.py 代理集成测试
|
||||||
|
|||||||
-202
@@ -1,202 +0,0 @@
|
|||||||
# 改进路线图
|
|
||||||
|
|
||||||
## 阶段一:代码质量(低风险,快速交付)
|
|
||||||
|
|
||||||
### 1. Prompt 拆分 ✓
|
|
||||||
- [x] 创建 `prompts/` 目录
|
|
||||||
- [x] 7 个 prompt 各拆为独立 `.md` 文件
|
|
||||||
- [x] `nodes.py` 改为从文件加载
|
|
||||||
- [x] 支持热重载(文件变更无需重启)
|
|
||||||
|
|
||||||
### 2. 修复无效代码 ✓
|
|
||||||
- [x] `backend/llm.py` — `get_num_tokens()` 修复为正确 API
|
|
||||||
- [x] `backend/embeddings.py` — 修复 docstring 函数名不一致
|
|
||||||
- [x] `backend/llm.py` — 统一 LLM 接口基类 `_BaseLLM`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 阶段二:用户体验(核心改造)
|
|
||||||
|
|
||||||
### 3. 流式输出 + 节点平铺 ✓
|
|
||||||
- [x] `backend/llm.py` — LLM 工厂支持 `stream()` 统一接口
|
|
||||||
- [x] `agent/nodes.py` — generate/modify/correct 节点使用流式 + `get_stream_writer()`
|
|
||||||
- [x] `app.py` — 使用 `stream_mode=["updates", "custom"]` 捕获流式事件
|
|
||||||
- [x] 节点状态平铺(处理过程 expander 逐节点展示)
|
|
||||||
- [x] 流式完成后节点自动折叠
|
|
||||||
- [x] 完成后单独展示「总结卡片」
|
|
||||||
|
|
||||||
### 4. 错误自增长知识库 ✓
|
|
||||||
- [x] `backend/error_kb.py` — ErrorKB 类(ChromaDB 持久化)
|
|
||||||
- [x] 错误指纹去重(标准化 + MD5)
|
|
||||||
- [x] `correct_jrxml` — 保存修正前状态到 `last_error_case`
|
|
||||||
- [x] `validate` — 修正成功时自动记录(仅新错误,自动去重)
|
|
||||||
- [x] `retrieve` — 搜索错误知识库,注入历史修正案例
|
|
||||||
- [x] 记录内容:错误 + 修正前后 JRXML + prompt + 工具链 + 模型
|
|
||||||
|
|
||||||
### 5. 文件上传支持 ✓
|
|
||||||
- [x] `backend/file_parser.py` — 统一解析接口
|
|
||||||
- [x] 图片 → PIL 元信息 + PaddleOCR(可选安装后自动识别)
|
|
||||||
- [x] PDF → pdfplumber / PyMuPDF 文本提取
|
|
||||||
- [x] DOCX → python-docx 文本提取
|
|
||||||
- [x] 纯文本 (.txt/.csv/.json/.xml) → 直接读取
|
|
||||||
- [x] `can_use_vision()` — 根据模型名判断是否支持原生多模态
|
|
||||||
- [x] `app.py` — 侧边栏文件上传组件(多文件,可移除)
|
|
||||||
- [x] 上传文本自动注入下一条消息前缀
|
|
||||||
|
|
||||||
### 6. A4 图片模板识别 ✓
|
|
||||||
- [x] `backend/layout_analyzer.py` — 完整布局分析模块
|
|
||||||
- [x] A4 比例判定:exact(±3%) / close(±8%) / not_a4 三档
|
|
||||||
- [x] PaddleOCR 布局分析:逐元素提取坐标(x,y,w,h)、字号、文本
|
|
||||||
- [x] 行分组:Y 轴容差自动聚类
|
|
||||||
- [x] 结构化输出:`图片模板共 X 行,第 1 行有 Y 个元素,其中元素 a 长...高...字体...内容是...`
|
|
||||||
- [x] 检测门槛:≥2 个 OCR 元素 + A4 比例 → 标记为模板
|
|
||||||
- [x] `app.py` — 上传图片/PDF 时自动触发布局分析,替换为布局描述
|
|
||||||
|
|
||||||
### 7. 会话历史 JRXML 下载 ✓
|
|
||||||
- [x] `agent/state.py` — 新增 `jrxml_versions` 字段
|
|
||||||
- [x] `agent/nodes.py` — `finalize` 节点追加版本记录
|
|
||||||
- [x] `app.py` — 侧边栏"历史版本"折叠区,每版本独立下载按钮
|
|
||||||
|
|
||||||
### 8. 预览功能修复 ✓
|
|
||||||
- [x] 根因:`preview_report` 路由到 `save_session` → `validate` 触发不必要的验证修正循环
|
|
||||||
- [x] 修复:`route_after_save` — 预览/导出意图跳过验证直接 `finalize`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 阶段三:细节修复
|
|
||||||
|
|
||||||
### 9. Ctrl+C 修复 ✓
|
|
||||||
- [x] `app.py` — 注入 JS 拦截裸 `c` 键,保留 Ctrl+C 复制行为
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 阶段四:可观测性
|
|
||||||
|
|
||||||
### 10. 结构化日志系统 ✓
|
|
||||||
- [x] `backend/logger.py` — 集中日志配置模块
|
|
||||||
- [x] JSON 格式化(每行一条记录,便于 jq/pandas 分析)
|
|
||||||
- [x] 请求级 trace_id(contextvars 自动传播,一次用户请求贯穿全链路)
|
|
||||||
- [x] 独立 LLM 日志文件 `logs/llm.log`(记录完整 prompt 和 response)
|
|
||||||
- [x] 时区:UTC+8(中国时区)
|
|
||||||
- [x] 日志轮转(单文件 10MB,保留 5 备份)
|
|
||||||
- [x] `backend/llm.py` — `_LLMLoggingWrapper` 包装所有 LLM 后端
|
|
||||||
- [x] 记录每次 invoke/stream 的请求 prompt、响应内容、耗时、模型、调用来源
|
|
||||||
- [x] 异常时也记录完整 prompt
|
|
||||||
- [x] `agent/nodes.py` — `@log_node` 装饰器覆盖 18 个节点
|
|
||||||
- [x] 入口/出口/异常三个阶段的日志
|
|
||||||
- [x] 自动记录 state 关键字段摘要(session_id、intent、status、jrxml_length 等)
|
|
||||||
- [x] 每个节点耗时(duration_ms)
|
|
||||||
- [x] `agent/graph.py` — `@_log_route` 装饰器覆盖 9 个路由函数
|
|
||||||
- [x] 记录每次路由决策(来源 → 目标)
|
|
||||||
- [x] `app.py` — 用户交互日志
|
|
||||||
- [x] 收到用户输入(含上传文件信息)
|
|
||||||
- [x] 代理执行开始/完成(含最终 intent、status、jrxml_length)
|
|
||||||
- [x] 异常时记录错误详情
|
|
||||||
- [x] 会话新建/切换/删除操作日志
|
|
||||||
- [x] `backend/session.py` — 会话创建/删除日志
|
|
||||||
- [x] `backend/validation.py` — 验证完成/连接失败日志
|
|
||||||
- [x] `.env.example` — 新增 `LOG_DIR`、`LOG_LEVEL` 配置项
|
|
||||||
- [x] `.gitignore` — 新增 `logs/` 忽略规则
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 执行顺序建议
|
|
||||||
|
|
||||||
```
|
|
||||||
1. Prompt 拆分 ──► 2. 无效代码修复
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
3. 流式输出 + 节点平铺
|
|
||||||
│
|
|
||||||
┌─────────────┼─────────────┐
|
|
||||||
▼ ▼ ▼
|
|
||||||
4. 错误自增长 5. 文件上传 7. 下载历史
|
|
||||||
│ │
|
|
||||||
▼ ▼
|
|
||||||
6. A4 模板识别 8. 预览修复
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
9. Ctrl+C 修复
|
|
||||||
│
|
|
||||||
▼
|
|
||||||
10. 结构化日志系统
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 阶段五:OCR 与智能上传 (v3/v4) ✓
|
|
||||||
|
|
||||||
### 11. OCR 单据字段精确提取 ✓
|
|
||||||
- [x] `backend/ocr_extractor.py` — 4 策略优先级提取 (exact_match → kv_pair → regex → table_match)
|
|
||||||
- [x] PaddleOCR 首次识别后将原始结果(含所有文本元素 + bbox坐标)持久化
|
|
||||||
- [x] `_format_ocr_context()` — OCR 结果格式化为 LLM prompt 注入
|
|
||||||
- [x] `process_input` 节点在上传图片时自动触发 OCR 字段提取
|
|
||||||
- [x] OCR 结果持久化到会话文件
|
|
||||||
|
|
||||||
### 12. 多模态聊天输入 ✓
|
|
||||||
- [x] `app.py` — `st.chat_input` 替换为 `st_multimodal_chatinput`
|
|
||||||
- [x] 支持 Ctrl+V 粘贴文件 + 拖拽 + 文件按钮
|
|
||||||
- [x] `_process_uploaded_file()` — 提取共享文件处理逻辑(消除 ~70 行重复代码)
|
|
||||||
- [x] 剪贴板文件 base64 解码 + MIME type → 扩展名推断
|
|
||||||
|
|
||||||
### 13. 多格式文件支持 ✓
|
|
||||||
- [x] `backend/file_parser.py` — 新增 XLSX (openpyxl)、XLS (xlrd)、DOC (olefile)
|
|
||||||
- [x] 侧边栏上传器类型列表中新增 xlsx/xls/doc
|
|
||||||
- [x] 单元测试: `tests/test_file_parser_formats.py` (4 tests)
|
|
||||||
|
|
||||||
### 14. 批注检测 ✓
|
|
||||||
- [x] `backend/annotation_detector.py` — 圈选 + 箭头 + OCR 关联
|
|
||||||
- [x] 圆圈检测: 红色通道增强 → HoughCircles
|
|
||||||
- [x] 箭头检测: Canny → HoughLinesP → 线段聚类 → 端点方向判定
|
|
||||||
- [x] `format_annotation_context()` — 批注结果格式化为中文提示
|
|
||||||
- [x] `process_input` 节点在 OCR 提取后自动运行批注检测
|
|
||||||
- [x] `annotation_result` 字段持久化到 AgentState + 会话文件
|
|
||||||
- [x] 单元测试: `tests/test_annotation_detector.py` (7 tests)
|
|
||||||
|
|
||||||
### 15. OCR 上下文 LLM 注入 ✓
|
|
||||||
- [x] `prompts/modification.md` — 新增 `{ocr_context}` 占位符
|
|
||||||
- [x] `modify_jrxml` + `generate` 节点注入 OCR 上下文
|
|
||||||
- [x] OCR 上下文包含: 结构化字段、全部文本元素(含坐标)、批注检测结果
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 阶段六:分层精确生成 (v5) ✓
|
|
||||||
|
|
||||||
### 16. 布局 Schema 提取 ✓
|
|
||||||
- [x] `backend/layout_analyzer.py` — 新增 `extract_layout_schema()` 函数(+107 行)
|
|
||||||
- [x] X 坐标聚类列检测(avg_width * 0.5 阈值)
|
|
||||||
- [x] 区域分类:标题/表头/数据/表尾(启发式算法)
|
|
||||||
- [x] `schema_text` 紧凑中文描述(列定义 + 区域 + 宽度分类)
|
|
||||||
- [x] 空行/单行/双行边界情况处理
|
|
||||||
- [x] 单元测试: `tests/test_layered_generation.py::TestExtractLayoutSchema` (9 tests)
|
|
||||||
|
|
||||||
### 17. 3 阶段生成管线 ✓
|
|
||||||
- [x] Phase 1: `generate_skeleton` — 压缩布局 schema → 骨架 JRXML (`$F{field_N}` 占位)
|
|
||||||
- [x] Phase 2: `refine_layout` — 采样坐标(表头+首行数据+末行)→ 像素级位置精调
|
|
||||||
- [x] Phase 3: `map_fields` — OCR 字段名 → 替换占位符为真实字段名
|
|
||||||
- [x] 中间阶段跳过验证(仅最终 mapped 结果进入 validate 循环)
|
|
||||||
- [x] 流式输出支持(每阶段逐字生成)
|
|
||||||
- [x] 单元测试: `tests/test_layered_generation.py::TestIntegration` (4 tests)
|
|
||||||
|
|
||||||
### 18. 路由与状态 ✓
|
|
||||||
- [x] `agent/graph.py` — 新增 `route_after_retrieve()` 条件路由
|
|
||||||
- [x] `layout_schema.total_rows > 0` → 3 阶段,否则 → 原有 1-shot
|
|
||||||
- [x] `agent/state.py` — 新增 `layout_schema: dict` 和 `ocr_elements: list`
|
|
||||||
- [x] 会话持久化支持(`save_session_node` / `load_session_node`)
|
|
||||||
- [x] 文本请求和其他意图零行为变更
|
|
||||||
- [x] 单元测试: `tests/test_layered_generation.py::TestRouting` (4 tests)
|
|
||||||
|
|
||||||
### 19. Prompt 模板 ✓
|
|
||||||
- [x] `prompts/skeleton_generation.md` — 骨架生成 prompt
|
|
||||||
- [x] `prompts/refine_layout.md` — 布局精调 prompt
|
|
||||||
- [x] `prompts/field_mapping.md` — 字段映射 prompt
|
|
||||||
- [x] `prompts/loader.py` — 注册 3 个新模板(热重载)
|
|
||||||
|
|
||||||
### 20. UI 集成 ✓
|
|
||||||
- [x] `app.py` — 上传 A4 图片时自动调用 `extract_layout_schema()`
|
|
||||||
- [x] 新增节点标签:`🏗 生成骨架` / `📐 精调布局` / `🏷 映射字段`
|
|
||||||
- [x] 3 个新节点的详情渲染
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
阶段一立即可做,无外部依赖。阶段二是主要工作量。阶段三是收尾。阶段四是可观测性基础。阶段五是 OCR 智能增强和用户体验改进。阶段六解决 A4 报表图片 OCR 元素过多(数百个)导致 LLM prompt 超长的问题。
|
|
||||||
@@ -1,926 +0,0 @@
|
|||||||
"""Streamlit 多轮对话 UI,用于 JRXML 生成代理。
|
|
||||||
|
|
||||||
支持:
|
|
||||||
- 流式输出(LLM 逐字展示)
|
|
||||||
- 节点平铺展开(每个处理阶段独立展示)
|
|
||||||
- 完成后自动折叠节点区
|
|
||||||
- 过程总结卡片
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
|
|
||||||
|
|
||||||
try:
|
|
||||||
import torchvision
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
import base64
|
|
||||||
import tempfile
|
|
||||||
import time
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import streamlit as st
|
|
||||||
import streamlit.components.v1 as components
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
load_dotenv(override=True)
|
|
||||||
|
|
||||||
from agent.graph import build_graph, create_initial_state
|
|
||||||
from backend.session import (
|
|
||||||
create_session,
|
|
||||||
load_session,
|
|
||||||
delete_session,
|
|
||||||
list_all_sessions,
|
|
||||||
)
|
|
||||||
from backend.logger import get_logger, set_trace_id, generate_trace_id
|
|
||||||
|
|
||||||
_app_log = get_logger("app")
|
|
||||||
|
|
||||||
st.set_page_config(
|
|
||||||
page_title="JRXML 代理",
|
|
||||||
page_icon="📊",
|
|
||||||
layout="wide",
|
|
||||||
initial_sidebar_state="expanded",
|
|
||||||
)
|
|
||||||
|
|
||||||
# 阻止 Streamlit 裸 'c' 键清除缓存,保留 Ctrl+C 复制行为
|
|
||||||
st.html("""
|
|
||||||
<script>
|
|
||||||
(function() {
|
|
||||||
const parent = window.parent.document;
|
|
||||||
parent.addEventListener('keydown', function(e) {
|
|
||||||
// 仅拦截裸 'c' 键(非 Ctrl/Cmd 组合)
|
|
||||||
if (e.key === 'c' && !e.ctrlKey && !e.metaKey && !e.altKey) {
|
|
||||||
const tag = parent.activeElement ? parent.activeElement.tagName : '';
|
|
||||||
if (tag !== 'INPUT' && tag !== 'TEXTAREA' && !parent.activeElement.isContentEditable) {
|
|
||||||
e.stopImmediatePropagation();
|
|
||||||
e.preventDefault();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}, true);
|
|
||||||
})();
|
|
||||||
</script>
|
|
||||||
""")
|
|
||||||
|
|
||||||
# ---- 节点名称 → 中文标签 ----
|
|
||||||
NODE_LABELS = {
|
|
||||||
"load_session": "📂 加载会话",
|
|
||||||
"process_input": "📝 记录输入",
|
|
||||||
"manage_context": "🧠 管理上下文",
|
|
||||||
"save_state_snapshot": "💾 保存快照",
|
|
||||||
"classify_intent": "🔍 识别意图",
|
|
||||||
"retrieve": "📚 检索模板",
|
|
||||||
"generate": "⚙️ 生成 JRXML",
|
|
||||||
"modify_jrxml": "🔧 修改 JRXML",
|
|
||||||
"validate": "✅ 验证",
|
|
||||||
"explain_error": "🔎 分析错误",
|
|
||||||
"correct_jrxml": "🛠 自动修正",
|
|
||||||
"finalize": "📋 完成",
|
|
||||||
"handle_consult": "💬 咨询回答",
|
|
||||||
"handle_undo": "↩ 撤销操作",
|
|
||||||
"handle_reset": "🔄 重置会话",
|
|
||||||
"save_session": "💾 保存会话",
|
|
||||||
"generate_skeleton": "🏗 生成骨架",
|
|
||||||
"refine_layout": "📐 精调布局",
|
|
||||||
"map_fields": "🏷 映射字段",
|
|
||||||
}
|
|
||||||
|
|
||||||
INTENT_LABELS = {
|
|
||||||
"initial_generation": "新建报表",
|
|
||||||
"modify_report": "修改报表",
|
|
||||||
"preview_report": "预览报表",
|
|
||||||
"export_pdf": "导出 PDF",
|
|
||||||
"export_jrxml": "下载 JRXML",
|
|
||||||
"undo_modification": "撤销修改",
|
|
||||||
"consult_question": "咨询问题",
|
|
||||||
"reset_session": "重置会话",
|
|
||||||
}
|
|
||||||
|
|
||||||
SKIP_NODES = {"load_session", "process_input", "manage_context",
|
|
||||||
"save_state_snapshot", "save_session"}
|
|
||||||
|
|
||||||
|
|
||||||
def _render_jrxml(jrxml: str, max_lines: int = 30):
|
|
||||||
"""展示 JRXML 代码(折叠、限行)。"""
|
|
||||||
lines = jrxml.strip().split("\n")
|
|
||||||
preview = "\n".join(lines[:max_lines])
|
|
||||||
if len(lines) > max_lines:
|
|
||||||
preview += f"\n... (共 {len(lines)} 行)"
|
|
||||||
st.code(preview, language="xml")
|
|
||||||
|
|
||||||
|
|
||||||
# ---- URL 参数 ----
|
|
||||||
query_params = st.query_params
|
|
||||||
url_session_id = query_params.get("session_id", "")
|
|
||||||
|
|
||||||
# ---- 会话状态初始化 ----
|
|
||||||
if "messages" not in st.session_state:
|
|
||||||
st.session_state.messages = []
|
|
||||||
if "graph" not in st.session_state:
|
|
||||||
st.session_state.graph = build_graph()
|
|
||||||
if "pending_action" not in st.session_state:
|
|
||||||
st.session_state.pending_action = None
|
|
||||||
if "agent_state" not in st.session_state:
|
|
||||||
if url_session_id:
|
|
||||||
data = load_session(url_session_id)
|
|
||||||
if data and data.get("agent_state"):
|
|
||||||
st.session_state.agent_state = data["agent_state"]
|
|
||||||
st.session_state.agent_state["session_id"] = url_session_id
|
|
||||||
else:
|
|
||||||
st.session_state.agent_state = create_initial_state()
|
|
||||||
new_data = create_session(name="", agent_state=st.session_state.agent_state)
|
|
||||||
st.session_state.agent_state["session_id"] = new_data["session_id"]
|
|
||||||
st.session_state.agent_state["session_name"] = new_data["session_name"]
|
|
||||||
st.session_state.agent_state["created_at"] = new_data["created_at"]
|
|
||||||
else:
|
|
||||||
st.session_state.agent_state = create_initial_state()
|
|
||||||
new_data = create_session(name="", agent_state=st.session_state.agent_state)
|
|
||||||
st.session_state.agent_state["session_id"] = new_data["session_id"]
|
|
||||||
st.session_state.agent_state["session_name"] = new_data["session_name"]
|
|
||||||
st.session_state.agent_state["created_at"] = new_data["created_at"]
|
|
||||||
|
|
||||||
current_session_id = st.session_state.agent_state.get("session_id", "")
|
|
||||||
|
|
||||||
|
|
||||||
def run_agent(user_input: str):
|
|
||||||
"""运行代理图:流式渲染节点进度 + LLM 文本。"""
|
|
||||||
trace_id = generate_trace_id()
|
|
||||||
set_trace_id(trace_id)
|
|
||||||
agent_state = st.session_state.agent_state
|
|
||||||
session_id = agent_state.get("session_id", "")
|
|
||||||
|
|
||||||
_app_log.info(
|
|
||||||
"代理执行开始",
|
|
||||||
extra={
|
|
||||||
"session_id": session_id,
|
|
||||||
"trace_id": trace_id,
|
|
||||||
"user_input_preview": user_input[:200],
|
|
||||||
"user_input_length": len(user_input),
|
|
||||||
"has_jrxml": bool(agent_state.get("current_jrxml", "").strip()),
|
|
||||||
"intent": agent_state.get("intent", ""),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
if agent_state.get("current_jrxml") and agent_state.get("status") == "pass":
|
|
||||||
agent_state["user_modification_request"] = user_input
|
|
||||||
|
|
||||||
agent_state["user_input"] = user_input
|
|
||||||
agent_state["retry_count"] = 0
|
|
||||||
|
|
||||||
# ---- UI 占位 ----
|
|
||||||
progress_placeholder = st.empty() # 实时节点进度
|
|
||||||
streaming_placeholder = st.empty() # 流式文本
|
|
||||||
summary_placeholder = st.empty() # 总结卡片
|
|
||||||
|
|
||||||
# 初始状态提示
|
|
||||||
progress_placeholder.info("⏳ 正在分析您的需求...")
|
|
||||||
|
|
||||||
executed_nodes: list[dict] = []
|
|
||||||
stream_text = ""
|
|
||||||
stream_active = False
|
|
||||||
final_state = None
|
|
||||||
|
|
||||||
def _render_progress(nodes: list[dict]):
|
|
||||||
"""渲染实时节点进度到占位符。"""
|
|
||||||
if not nodes:
|
|
||||||
return
|
|
||||||
lines = []
|
|
||||||
for i, node in enumerate(nodes):
|
|
||||||
icon = "●" if i == len(nodes) - 1 else "✓"
|
|
||||||
detail = f" — {node['detail']}" if node.get("detail") else ""
|
|
||||||
lines.append(f"{icon} {node['label']}{detail}")
|
|
||||||
progress_placeholder.markdown("\n\n".join(lines))
|
|
||||||
|
|
||||||
try:
|
|
||||||
for event in st.session_state.graph.stream(
|
|
||||||
agent_state, stream_mode=["updates", "custom"]
|
|
||||||
):
|
|
||||||
mode, data = event
|
|
||||||
|
|
||||||
if mode == "updates":
|
|
||||||
for node_name, node_state in data.items():
|
|
||||||
label = NODE_LABELS.get(node_name, node_name)
|
|
||||||
if node_name not in SKIP_NODES:
|
|
||||||
executed_nodes.append({
|
|
||||||
"name": node_name,
|
|
||||||
"label": label,
|
|
||||||
})
|
|
||||||
|
|
||||||
if node_name == "classify_intent":
|
|
||||||
intent = node_state.get("intent", "")
|
|
||||||
il = INTENT_LABELS.get(intent, intent)
|
|
||||||
executed_nodes[-1]["detail"] = f"意图: {il}"
|
|
||||||
|
|
||||||
elif node_name == "retrieve":
|
|
||||||
ctx = node_state.get("retrieved_context", "")
|
|
||||||
executed_nodes[-1]["detail"] = (
|
|
||||||
f"找到 {len(ctx)} 字符参考模板" if ctx else "未匹配到模板"
|
|
||||||
)
|
|
||||||
|
|
||||||
elif node_name in ("generate", "modify_jrxml", "correct_jrxml",
|
|
||||||
"generate_skeleton", "refine_layout", "map_fields"):
|
|
||||||
jrxml = node_state.get("current_jrxml", "")
|
|
||||||
executed_nodes[-1]["detail"] = f"生成 {len(jrxml)} 字符 JRXML"
|
|
||||||
|
|
||||||
elif node_name == "validate":
|
|
||||||
status = node_state.get("status", "")
|
|
||||||
if status == "pass":
|
|
||||||
executed_nodes[-1]["detail"] = "验证通过 ✓"
|
|
||||||
else:
|
|
||||||
err = node_state.get("error_msg", "")
|
|
||||||
executed_nodes[-1]["detail"] = f"验证失败: {err[:80]}"
|
|
||||||
|
|
||||||
elif node_name == "explain_error":
|
|
||||||
expl = node_state.get("natural_explanation", "")
|
|
||||||
executed_nodes[-1]["detail"] = expl[:120]
|
|
||||||
|
|
||||||
elif node_name == "handle_consult":
|
|
||||||
ans = node_state.get("consult_answer", "")
|
|
||||||
executed_nodes[-1]["detail"] = ans[:150]
|
|
||||||
|
|
||||||
final_state = node_state
|
|
||||||
|
|
||||||
# 每个节点完成后立即更新进度
|
|
||||||
_render_progress(executed_nodes)
|
|
||||||
|
|
||||||
elif mode == "custom":
|
|
||||||
cd = data
|
|
||||||
if cd.get("type") == "stream":
|
|
||||||
stream_text += cd.get("text", "")
|
|
||||||
stream_active = True
|
|
||||||
streaming_placeholder.code(stream_text, language="xml")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
progress_placeholder.empty()
|
|
||||||
_app_log.error(
|
|
||||||
f"代理执行异常: {e}",
|
|
||||||
extra={"session_id": session_id, "error": str(e)},
|
|
||||||
)
|
|
||||||
st.error(f"工作流异常: {e}")
|
|
||||||
return
|
|
||||||
|
|
||||||
# ---- 清理临时占位 ----
|
|
||||||
progress_placeholder.empty()
|
|
||||||
if stream_active:
|
|
||||||
streaming_placeholder.empty()
|
|
||||||
|
|
||||||
# ---- 总结卡片 ----
|
|
||||||
# 注:node_state 只含变更字段,用 agent_state(被所有节点就地修改)获取完整状态
|
|
||||||
final_state = agent_state
|
|
||||||
if final_state:
|
|
||||||
st.session_state.agent_state = final_state
|
|
||||||
intent = final_state.get("intent", "")
|
|
||||||
status = final_state.get("status", "")
|
|
||||||
|
|
||||||
with summary_placeholder.container(border=True):
|
|
||||||
if intent == "consult_question":
|
|
||||||
answer = final_state.get("consult_answer", "")
|
|
||||||
st.info(answer)
|
|
||||||
st.session_state.messages.append({
|
|
||||||
"role": "assistant", "content": answer, "type": "consult",
|
|
||||||
})
|
|
||||||
|
|
||||||
elif intent in ("undo_modification", "reset_session"):
|
|
||||||
st.success("操作已完成")
|
|
||||||
|
|
||||||
elif intent in ("preview_report", "export_pdf", "export_jrxml"):
|
|
||||||
jrxml = final_state.get("current_jrxml", "")
|
|
||||||
if jrxml:
|
|
||||||
st.success("✅ 当前报表")
|
|
||||||
_render_jrxml(jrxml)
|
|
||||||
st.session_state.messages.append({
|
|
||||||
"role": "assistant", "content": jrxml, "type": "jrxml",
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
st.warning("⚠ 当前没有报表可以展示。")
|
|
||||||
|
|
||||||
elif status == "pass":
|
|
||||||
jrxml = final_state.get("current_jrxml", "")
|
|
||||||
st.success("✅ JRXML 生成成功")
|
|
||||||
st.markdown("**生成结果:**")
|
|
||||||
_render_jrxml(jrxml)
|
|
||||||
st.caption("您可以从侧边栏下载文件,或继续对话进行修改。")
|
|
||||||
st.session_state.messages.append({
|
|
||||||
"role": "assistant", "content": jrxml, "type": "jrxml",
|
|
||||||
})
|
|
||||||
st.session_state.messages.append({
|
|
||||||
"role": "assistant",
|
|
||||||
"content": "✅ JRXML 生成成功!您可以从侧边栏下载文件,或继续修改。",
|
|
||||||
"type": "success",
|
|
||||||
})
|
|
||||||
|
|
||||||
else:
|
|
||||||
jrxml = final_state.get("current_jrxml", "")
|
|
||||||
error_msg = final_state.get("error_msg", "未知错误")
|
|
||||||
explanation = final_state.get("natural_explanation", "")
|
|
||||||
retries = final_state.get("retry_count", 0)
|
|
||||||
st.error(f"❌ 经过 {retries} 次重试后仍无法生成有效的 JRXML")
|
|
||||||
st.markdown(f"**错误:** {error_msg}")
|
|
||||||
if explanation:
|
|
||||||
st.markdown(f"**原因:** {explanation}")
|
|
||||||
if jrxml:
|
|
||||||
with st.expander("查看当前 JRXML"):
|
|
||||||
_render_jrxml(jrxml, max_lines=80)
|
|
||||||
st.caption("💡 下次输入修改需求时,系统会自动加载失败上下文继续修复。")
|
|
||||||
st.session_state.messages.append({
|
|
||||||
"role": "assistant",
|
|
||||||
"content": f"❌ 经过 {retries} 次重试后仍无法生成有效的 JRXML。\n\n**错误:** {error_msg}\n\n💡 请直接描述修改需求,系统会自动加载失败上下文。",
|
|
||||||
"type": "error_explanation",
|
|
||||||
})
|
|
||||||
|
|
||||||
# OCR 字段提取结果展示
|
|
||||||
ocr_result = agent_state.get("ocr_extraction_result", {})
|
|
||||||
if ocr_result and ocr_result.get("ocr_available") and ocr_result.get("fields"):
|
|
||||||
with st.expander("🔍 OCR 单据字段提取结果", expanded=False):
|
|
||||||
fields = ocr_result.get("fields", [])
|
|
||||||
non_empty = [f for f in fields if f.get("field_value")]
|
|
||||||
empty = [f for f in fields if not f.get("field_value")]
|
|
||||||
if non_empty:
|
|
||||||
st.markdown("**已提取字段:**")
|
|
||||||
for f in non_empty:
|
|
||||||
method = f.get("extraction_method", "")
|
|
||||||
conf = f.get("confidence", 0)
|
|
||||||
st.markdown(
|
|
||||||
f"- **{f['field_name']}**: `{f['field_value']}` "
|
|
||||||
f"(置信度: {conf:.0%}, 方法: {method})"
|
|
||||||
)
|
|
||||||
if empty:
|
|
||||||
st.caption(
|
|
||||||
f"未提取到值的字段: {', '.join(f['field_name'] for f in empty)}"
|
|
||||||
)
|
|
||||||
st.caption(
|
|
||||||
f"共检测到 {ocr_result.get('total_elements', 0)} 个文本元素"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
st.error("未产生结果,请重试。")
|
|
||||||
|
|
||||||
_app_log.info(
|
|
||||||
"代理执行完成",
|
|
||||||
extra={
|
|
||||||
"session_id": session_id,
|
|
||||||
"intent": final_state.get("intent", ""),
|
|
||||||
"status": final_state.get("status", ""),
|
|
||||||
"jrxml_length": len(final_state.get("current_jrxml", "")),
|
|
||||||
"retry_count": final_state.get("retry_count", 0),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ---- 侧边栏 ----
|
|
||||||
with st.sidebar:
|
|
||||||
st.title("📊 JRXML 代理")
|
|
||||||
st.markdown("通过自然语言生成 JasperReports 模板。")
|
|
||||||
st.divider()
|
|
||||||
|
|
||||||
# 会话管理
|
|
||||||
st.markdown("### 会话管理")
|
|
||||||
sessions = list_all_sessions()
|
|
||||||
session_options = {}
|
|
||||||
for s in sessions:
|
|
||||||
sid = s["session_id"]
|
|
||||||
name = s.get("session_name", sid)
|
|
||||||
updated = s.get("updated_at", "")[:16]
|
|
||||||
session_options[f"{name} ({updated})"] = sid
|
|
||||||
|
|
||||||
selected_label = None
|
|
||||||
for label, sid in session_options.items():
|
|
||||||
if sid == current_session_id:
|
|
||||||
selected_label = label
|
|
||||||
break
|
|
||||||
|
|
||||||
selected = st.selectbox(
|
|
||||||
"切换会话",
|
|
||||||
options=list(session_options.keys()),
|
|
||||||
index=list(session_options.keys()).index(selected_label) if selected_label else 0,
|
|
||||||
key="session_selector",
|
|
||||||
)
|
|
||||||
|
|
||||||
if selected and session_options.get(selected) != current_session_id:
|
|
||||||
new_sid = session_options[selected]
|
|
||||||
if st.session_state.get("_last_switched_to") == new_sid:
|
|
||||||
# 防止同一会话重复切换导致的无限 rerun 循环
|
|
||||||
st.session_state._last_switched_to = ""
|
|
||||||
else:
|
|
||||||
data = load_session(new_sid)
|
|
||||||
if data and data.get("agent_state"):
|
|
||||||
_app_log.info(
|
|
||||||
"切换会话",
|
|
||||||
extra={"from_session": current_session_id, "to_session": new_sid},
|
|
||||||
)
|
|
||||||
data["agent_state"]["session_id"] = new_sid
|
|
||||||
st.session_state.agent_state = data["agent_state"]
|
|
||||||
st.session_state.messages = []
|
|
||||||
st.session_state._last_switched_to = new_sid
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
col1, col2 = st.columns(2)
|
|
||||||
with col1:
|
|
||||||
if st.button("➕ 新建", use_container_width=True):
|
|
||||||
new_data = create_session(name="", agent_state=create_initial_state())
|
|
||||||
_app_log.info(
|
|
||||||
"新建会话",
|
|
||||||
extra={"session_id": new_data["session_id"]},
|
|
||||||
)
|
|
||||||
st.session_state.agent_state = create_initial_state()
|
|
||||||
st.session_state.agent_state["session_id"] = new_data["session_id"]
|
|
||||||
st.session_state.agent_state["session_name"] = new_data["session_name"]
|
|
||||||
st.session_state.agent_state["created_at"] = new_data["created_at"]
|
|
||||||
st.session_state.messages = []
|
|
||||||
st.rerun()
|
|
||||||
with col2:
|
|
||||||
if st.button("🗑 删除", use_container_width=True):
|
|
||||||
if current_session_id:
|
|
||||||
_app_log.info(
|
|
||||||
"删除会话",
|
|
||||||
extra={"session_id": current_session_id},
|
|
||||||
)
|
|
||||||
delete_session(current_session_id)
|
|
||||||
st.session_state.agent_state = create_initial_state()
|
|
||||||
new_data = create_session(name="", agent_state=st.session_state.agent_state)
|
|
||||||
st.session_state.agent_state["session_id"] = new_data["session_id"]
|
|
||||||
st.session_state.agent_state["session_name"] = new_data["session_name"]
|
|
||||||
st.session_state.agent_state["created_at"] = new_data["created_at"]
|
|
||||||
st.session_state.messages = []
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
current_name = st.session_state.agent_state.get("session_name", "")
|
|
||||||
st.caption(f"当前: {current_name} (`{current_session_id}`)")
|
|
||||||
|
|
||||||
st.divider()
|
|
||||||
st.markdown("### 快捷操作")
|
|
||||||
|
|
||||||
has_jrxml = bool(st.session_state.agent_state.get("current_jrxml", "").strip())
|
|
||||||
has_history = bool(st.session_state.agent_state.get("history_states", []))
|
|
||||||
|
|
||||||
qcol1, qcol2 = st.columns(2)
|
|
||||||
with qcol1:
|
|
||||||
if st.button("👁 预览", use_container_width=True, disabled=not has_jrxml):
|
|
||||||
with st.spinner("正在准备预览..."):
|
|
||||||
run_agent("预览报表")
|
|
||||||
st.rerun()
|
|
||||||
with qcol2:
|
|
||||||
if st.button("↩ 撤销", use_container_width=True, disabled=not has_history):
|
|
||||||
with st.spinner("正在撤销..."):
|
|
||||||
run_agent("撤销上一步修改")
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
if st.button("🔄 重置会话", use_container_width=True):
|
|
||||||
with st.spinner("正在重置..."):
|
|
||||||
run_agent("重新来,清空当前报表")
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
st.divider()
|
|
||||||
st.markdown("### 配置")
|
|
||||||
llm_backend = os.getenv("LLM_BACKEND", "cloud")
|
|
||||||
llm_model = os.getenv("LLM_MODEL", os.getenv("LOCAL_LLM_MODEL", "gpt-4o"))
|
|
||||||
st.caption(f"大语言模型: {llm_backend} / {llm_model}")
|
|
||||||
st.caption(f"最大重试次数: {os.getenv('MAX_RETRY', '5')}")
|
|
||||||
st.caption(f"验证服务: {os.getenv('VALIDATION_SERVICE_URL', 'http://localhost:8001/validate')}")
|
|
||||||
|
|
||||||
st.divider()
|
|
||||||
st.markdown("### 下载")
|
|
||||||
|
|
||||||
final = st.session_state.agent_state.get("final_jrxml", "")
|
|
||||||
versions = st.session_state.agent_state.get("jrxml_versions", [])
|
|
||||||
|
|
||||||
if final:
|
|
||||||
st.download_button(
|
|
||||||
label="📥 下载最新 JRXML",
|
|
||||||
data=final,
|
|
||||||
file_name="report.jrxml",
|
|
||||||
mime="application/xml",
|
|
||||||
use_container_width=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
if versions:
|
|
||||||
with st.expander("📋 历史版本", expanded=False):
|
|
||||||
for i, v in enumerate(reversed(versions)):
|
|
||||||
ts = v.get("ts", "")[:16]
|
|
||||||
label = v.get("label", "版本")
|
|
||||||
status = v.get("status", "")
|
|
||||||
icon = "✅" if status == "pass" else "❌"
|
|
||||||
dl_label = f"{icon} v{len(versions)-i} — {label} ({ts})"
|
|
||||||
st.download_button(
|
|
||||||
label=dl_label,
|
|
||||||
data=v.get("jrxml", ""),
|
|
||||||
file_name=f"report_v{len(versions)-i}.jrxml",
|
|
||||||
mime="application/xml",
|
|
||||||
use_container_width=True,
|
|
||||||
key=f"dl_v{i}",
|
|
||||||
)
|
|
||||||
|
|
||||||
# ---- 标题 ----
|
|
||||||
st.title("📝 JRXML 报表生成器")
|
|
||||||
st.caption("用自然语言描述您的报表需求,我将逐步生成可用的 JRXML 模板。")
|
|
||||||
|
|
||||||
# ---- 聊天历史 ----
|
|
||||||
for msg in st.session_state.messages:
|
|
||||||
with st.chat_message(msg["role"]):
|
|
||||||
if msg.get("type") == "jrxml":
|
|
||||||
with st.expander("查看生成的 JRXML", expanded=False):
|
|
||||||
st.code(msg["content"], language="xml")
|
|
||||||
elif msg.get("type") == "error_explanation":
|
|
||||||
st.warning(msg["content"])
|
|
||||||
elif msg.get("type") == "success":
|
|
||||||
st.success(msg["content"])
|
|
||||||
elif msg.get("type") == "consult":
|
|
||||||
st.info(msg["content"])
|
|
||||||
else:
|
|
||||||
st.markdown(msg["content"])
|
|
||||||
|
|
||||||
# ---- 统一聊天输入组件 ----
|
|
||||||
UNIFIED_CHAT_HTML = r"""
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html lang="zh-CN">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8">
|
|
||||||
<style>
|
|
||||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
||||||
body {
|
|
||||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
|
||||||
background: transparent;
|
|
||||||
padding: 4px 0;
|
|
||||||
}
|
|
||||||
.chat-container {
|
|
||||||
position: relative;
|
|
||||||
border: 1px solid #d1d5db;
|
|
||||||
border-radius: 12px;
|
|
||||||
padding: 8px 12px;
|
|
||||||
background: #ffffff;
|
|
||||||
transition: border-color 0.2s, box-shadow 0.2s;
|
|
||||||
}
|
|
||||||
.chat-container:focus-within {
|
|
||||||
border-color: #3b82f6;
|
|
||||||
box-shadow: 0 0 0 2px rgba(59,130,246,0.15);
|
|
||||||
}
|
|
||||||
.chat-container.drag-active {
|
|
||||||
border-color: #3b82f6;
|
|
||||||
background: rgba(59,130,246,0.04);
|
|
||||||
}
|
|
||||||
.file-chips {
|
|
||||||
display: flex;
|
|
||||||
flex-wrap: wrap;
|
|
||||||
gap: 6px;
|
|
||||||
margin-bottom: 6px;
|
|
||||||
}
|
|
||||||
.file-chips:empty { display: none; }
|
|
||||||
.file-chip {
|
|
||||||
display: inline-flex;
|
|
||||||
align-items: center;
|
|
||||||
gap: 4px;
|
|
||||||
padding: 2px 8px;
|
|
||||||
background: #f3f4f6;
|
|
||||||
border-radius: 14px;
|
|
||||||
font-size: 12px;
|
|
||||||
color: #374151;
|
|
||||||
max-width: 200px;
|
|
||||||
}
|
|
||||||
.file-chip .chip-icon { font-size: 13px; }
|
|
||||||
.file-chip .chip-name {
|
|
||||||
overflow: hidden;
|
|
||||||
text-overflow: ellipsis;
|
|
||||||
white-space: nowrap;
|
|
||||||
}
|
|
||||||
.file-chip .chip-remove {
|
|
||||||
border: none;
|
|
||||||
background: none;
|
|
||||||
cursor: pointer;
|
|
||||||
color: #9ca3af;
|
|
||||||
font-size: 14px;
|
|
||||||
line-height: 1;
|
|
||||||
padding: 0 2px;
|
|
||||||
flex-shrink: 0;
|
|
||||||
}
|
|
||||||
.file-chip .chip-remove:hover { color: #ef4444; }
|
|
||||||
.input-row {
|
|
||||||
display: flex;
|
|
||||||
align-items: flex-end;
|
|
||||||
gap: 8px;
|
|
||||||
}
|
|
||||||
.attach-btn {
|
|
||||||
border: none;
|
|
||||||
background: none;
|
|
||||||
cursor: pointer;
|
|
||||||
padding: 4px 6px;
|
|
||||||
font-size: 20px;
|
|
||||||
line-height: 1;
|
|
||||||
color: #6b7280;
|
|
||||||
border-radius: 6px;
|
|
||||||
transition: background 0.15s, color 0.15s;
|
|
||||||
flex-shrink: 0;
|
|
||||||
}
|
|
||||||
.attach-btn:hover { background: #f3f4f6; color: #374151; }
|
|
||||||
textarea {
|
|
||||||
flex: 1;
|
|
||||||
border: none;
|
|
||||||
outline: none;
|
|
||||||
resize: none;
|
|
||||||
font-size: 15px;
|
|
||||||
line-height: 1.5;
|
|
||||||
font-family: inherit;
|
|
||||||
color: #111827;
|
|
||||||
background: transparent;
|
|
||||||
padding: 4px 0;
|
|
||||||
min-height: 24px;
|
|
||||||
max-height: 120px;
|
|
||||||
overflow-y: auto;
|
|
||||||
}
|
|
||||||
textarea::placeholder { color: #9ca3af; }
|
|
||||||
.send-btn {
|
|
||||||
border: none;
|
|
||||||
cursor: pointer;
|
|
||||||
padding: 4px 10px;
|
|
||||||
font-size: 16px;
|
|
||||||
background: #e5e7eb;
|
|
||||||
color: #9ca3af;
|
|
||||||
border-radius: 8px;
|
|
||||||
transition: all 0.15s;
|
|
||||||
flex-shrink: 0;
|
|
||||||
}
|
|
||||||
.send-btn.active { background: #3b82f6; color: #fff; }
|
|
||||||
.send-btn.active:hover { background: #2563eb; }
|
|
||||||
.send-btn:disabled { opacity: 0.5; cursor: default; }
|
|
||||||
.error-toast {
|
|
||||||
position: fixed;
|
|
||||||
bottom: 12px;
|
|
||||||
left: 50%;
|
|
||||||
transform: translateX(-50%);
|
|
||||||
background: #ef4444;
|
|
||||||
color: #fff;
|
|
||||||
padding: 6px 16px;
|
|
||||||
border-radius: 8px;
|
|
||||||
font-size: 13px;
|
|
||||||
z-index: 9999;
|
|
||||||
animation: toastOut 2.5s forwards;
|
|
||||||
pointer-events: none;
|
|
||||||
}
|
|
||||||
@keyframes toastOut {
|
|
||||||
0%, 70% { opacity: 1; }
|
|
||||||
100% { opacity: 0; }
|
|
||||||
}
|
|
||||||
|
|
||||||
@media (prefers-color-scheme: dark) {
|
|
||||||
.chat-container { background: #1f2937; border-color: #374151; }
|
|
||||||
.chat-container:focus-within { border-color: #3b82f6; }
|
|
||||||
.file-chip { background: #374151; color: #e5e7eb; }
|
|
||||||
.file-chip .chip-remove { color: #6b7280; }
|
|
||||||
.attach-btn { color: #9ca3af; }
|
|
||||||
.attach-btn:hover { background: #374151; color: #e5e7eb; }
|
|
||||||
textarea { color: #f9fafb; }
|
|
||||||
textarea::placeholder { color: #6b7280; }
|
|
||||||
.send-btn { background: #374151; }
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div class="chat-container" id="container">
|
|
||||||
<div class="file-chips" id="chips"></div>
|
|
||||||
<div class="input-row">
|
|
||||||
<button class="attach-btn" id="attachBtn" title="附加文件">📎</button>
|
|
||||||
<textarea id="textInput" placeholder="描述您的报表需求..." rows="1"></textarea>
|
|
||||||
<button class="send-btn" id="sendBtn" title="发送">➤</button>
|
|
||||||
</div>
|
|
||||||
<input type="file" id="fileInput" multiple hidden
|
|
||||||
accept=".png,.jpg,.jpeg,.bmp,.webp,.pdf,.docx,.xlsx,.xls,.doc,.txt">
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
const container = document.getElementById('container');
|
|
||||||
const chipsEl = document.getElementById('chips');
|
|
||||||
const textInput = document.getElementById('textInput');
|
|
||||||
const sendBtn = document.getElementById('sendBtn');
|
|
||||||
const attachBtn = document.getElementById('attachBtn');
|
|
||||||
const fileInput = document.getElementById('fileInput');
|
|
||||||
|
|
||||||
let attachedFiles = [];
|
|
||||||
const MAX_FILES = 10;
|
|
||||||
const MAX_SIZE = 20 * 1024 * 1024;
|
|
||||||
|
|
||||||
function getIcon(type) {
|
|
||||||
if (type.startsWith('image/')) return '🖼';
|
|
||||||
if (type.includes('pdf')) return '📄';
|
|
||||||
if (type.includes('document')) return '📝';
|
|
||||||
if (type.includes('spreadsheet') || type.includes('excel')) return '📊';
|
|
||||||
return '📎';
|
|
||||||
}
|
|
||||||
|
|
||||||
function updateSendBtn() {
|
|
||||||
var canSend = textInput.value.trim() || attachedFiles.length > 0;
|
|
||||||
sendBtn.classList.toggle('active', canSend);
|
|
||||||
}
|
|
||||||
|
|
||||||
function renderChips() {
|
|
||||||
chipsEl.innerHTML = '';
|
|
||||||
attachedFiles.forEach(function(f, i) {
|
|
||||||
var chip = document.createElement('span');
|
|
||||||
chip.className = 'file-chip';
|
|
||||||
var name = f.name.length > 16 ? f.name.slice(0,14)+'..' : f.name;
|
|
||||||
chip.innerHTML = '<span class="chip-icon">'+getIcon(f.type)+'</span>' +
|
|
||||||
'<span class="chip-name">'+name+'</span>' +
|
|
||||||
'<button class="chip-remove">×</button>';
|
|
||||||
chip.querySelector('.chip-remove').onclick = (function(idx) {
|
|
||||||
return function() {
|
|
||||||
attachedFiles.splice(idx, 1);
|
|
||||||
renderChips();
|
|
||||||
updateSendBtn();
|
|
||||||
};
|
|
||||||
})(i);
|
|
||||||
chipsEl.appendChild(chip);
|
|
||||||
});
|
|
||||||
updateSendBtn();
|
|
||||||
}
|
|
||||||
|
|
||||||
function addFiles(fileList) {
|
|
||||||
for (var i = 0; i < fileList.length; i++) {
|
|
||||||
var file = fileList[i];
|
|
||||||
if (attachedFiles.length >= MAX_FILES) { showToast('最多附加 '+MAX_FILES+' 个文件'); break; }
|
|
||||||
if (file.size > MAX_SIZE) { showToast(file.name+' 超过 20MB 限制'); continue; }
|
|
||||||
if (attachedFiles.some(function(f) { return f.name === file.name && f.size === file.size; })) continue;
|
|
||||||
attachedFiles.push({name: file.name, type: file.type, file: file});
|
|
||||||
}
|
|
||||||
renderChips();
|
|
||||||
}
|
|
||||||
|
|
||||||
function showToast(msg) {
|
|
||||||
var t = document.createElement('div');
|
|
||||||
t.className = 'error-toast';
|
|
||||||
t.textContent = msg;
|
|
||||||
document.body.appendChild(t);
|
|
||||||
setTimeout(function() { t.remove(); }, 2600);
|
|
||||||
}
|
|
||||||
|
|
||||||
function readFile(file) {
|
|
||||||
return new Promise(function(resolve, reject) {
|
|
||||||
var reader = new FileReader();
|
|
||||||
reader.onload = function() { resolve(reader.result); };
|
|
||||||
reader.onerror = reject;
|
|
||||||
reader.readAsDataURL(file);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function handleSend() {
|
|
||||||
var text = textInput.value.trim();
|
|
||||||
if (!text && attachedFiles.length === 0) return;
|
|
||||||
|
|
||||||
sendBtn.disabled = true;
|
|
||||||
var files = [];
|
|
||||||
for (var i = 0; i < attachedFiles.length; i++) {
|
|
||||||
var f = attachedFiles[i];
|
|
||||||
try {
|
|
||||||
var dataUrl = await readFile(f.file);
|
|
||||||
files.push({name: f.name, type: f.type, data: dataUrl, size: f.file.size});
|
|
||||||
} catch(e) {
|
|
||||||
showToast(f.name+' 读取失败');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Streamlit.setComponentValue({text: text, files: files});
|
|
||||||
|
|
||||||
textInput.value = '';
|
|
||||||
attachedFiles = [];
|
|
||||||
renderChips();
|
|
||||||
sendBtn.disabled = false;
|
|
||||||
textInput.style.height = 'auto';
|
|
||||||
}
|
|
||||||
|
|
||||||
attachBtn.onclick = function() { fileInput.click(); };
|
|
||||||
fileInput.onchange = function() { addFiles(fileInput.files); fileInput.value = ''; };
|
|
||||||
|
|
||||||
textInput.oninput = function() {
|
|
||||||
updateSendBtn();
|
|
||||||
textInput.style.height = 'auto';
|
|
||||||
textInput.style.height = Math.min(textInput.scrollHeight, 120) + 'px';
|
|
||||||
};
|
|
||||||
|
|
||||||
textInput.onkeydown = function(e) {
|
|
||||||
if (e.key === 'Enter' && !e.shiftKey) {
|
|
||||||
e.preventDefault();
|
|
||||||
handleSend();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
sendBtn.onclick = handleSend;
|
|
||||||
|
|
||||||
document.addEventListener('paste', function(e) {
|
|
||||||
var items = e.clipboardData && e.clipboardData.items;
|
|
||||||
if (!items) return;
|
|
||||||
var files = [];
|
|
||||||
for (var i = 0; i < items.length; i++) {
|
|
||||||
if (items[i].kind === 'file') files.push(items[i].getAsFile());
|
|
||||||
}
|
|
||||||
if (files.length) { e.preventDefault(); addFiles(files); }
|
|
||||||
});
|
|
||||||
|
|
||||||
var containerDiv = document.getElementById('container');
|
|
||||||
containerDiv.addEventListener('dragover', function(e) {
|
|
||||||
e.preventDefault();
|
|
||||||
containerDiv.classList.add('drag-active');
|
|
||||||
});
|
|
||||||
containerDiv.addEventListener('dragleave', function() {
|
|
||||||
containerDiv.classList.remove('drag-active');
|
|
||||||
});
|
|
||||||
containerDiv.addEventListener('drop', function(e) {
|
|
||||||
e.preventDefault();
|
|
||||||
containerDiv.classList.remove('drag-active');
|
|
||||||
addFiles(e.dataTransfer.files);
|
|
||||||
});
|
|
||||||
|
|
||||||
updateSendBtn();
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
|
|
||||||
chat_result = components.html(UNIFIED_CHAT_HTML, height=180)
|
|
||||||
|
|
||||||
if chat_result and isinstance(chat_result, dict):
|
|
||||||
prompt = chat_result.get("text", "")
|
|
||||||
files = chat_result.get("files", [])
|
|
||||||
|
|
||||||
from backend.file_parser import parse_file
|
|
||||||
from backend.layout_analyzer import analyze_layout, extract_layout_schema
|
|
||||||
|
|
||||||
file_texts = []
|
|
||||||
attached_info = []
|
|
||||||
first_image_path = None
|
|
||||||
temp_paths = []
|
|
||||||
|
|
||||||
for f in files:
|
|
||||||
header, b64data = f.get("data", ",").split(",", 1)
|
|
||||||
raw = base64.b64decode(b64data)
|
|
||||||
|
|
||||||
mime = f.get("type", "")
|
|
||||||
mime_to_suffix = {
|
|
||||||
"image/png": ".png", "image/jpeg": ".jpg", "image/bmp": ".bmp",
|
|
||||||
"image/webp": ".webp", "application/pdf": ".pdf",
|
|
||||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
|
||||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
|
||||||
"application/vnd.ms-excel": ".xls", "application/msword": ".doc",
|
|
||||||
"text/plain": ".txt",
|
|
||||||
}
|
|
||||||
suffix = mime_to_suffix.get(mime, Path(f["name"]).suffix.lower())
|
|
||||||
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
|
||||||
tmp.write(raw)
|
|
||||||
tmp_path = tmp.name
|
|
||||||
temp_paths.append(tmp_path)
|
|
||||||
|
|
||||||
result = parse_file(tmp_path, suffix)
|
|
||||||
text = result["text"]
|
|
||||||
file_type = result["file_type"]
|
|
||||||
|
|
||||||
img_suffixes = (".png", ".jpg", ".jpeg", ".bmp", ".webp")
|
|
||||||
if suffix in img_suffixes and result.get("method") not in ("metadata_only", None):
|
|
||||||
try:
|
|
||||||
layout = analyze_layout(tmp_path)
|
|
||||||
tt = layout.get("template_type", "unknown")
|
|
||||||
if tt == "full_a4":
|
|
||||||
text = layout["description"]
|
|
||||||
file_type = "a4_template"
|
|
||||||
schema = extract_layout_schema(layout)
|
|
||||||
st.session_state.agent_state["layout_schema"] = schema
|
|
||||||
st.session_state.agent_state["ocr_elements"] = layout.get("rows", [])
|
|
||||||
elif tt == "partial_rows":
|
|
||||||
file_type = "a4_partial"
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
file_texts.append(f"[附加文件: {f['name']} ({file_type})]\n{text}")
|
|
||||||
attached_info.append({"name": f["name"], "type": file_type, "length": len(text)})
|
|
||||||
|
|
||||||
if not first_image_path and file_type in ("image", "a4_template", "a4_partial"):
|
|
||||||
first_image_path = tmp_path
|
|
||||||
|
|
||||||
if file_texts:
|
|
||||||
full_prompt = "\n\n".join(file_texts) + "\n\n---\n用户需求:\n" + prompt
|
|
||||||
else:
|
|
||||||
full_prompt = prompt
|
|
||||||
|
|
||||||
if first_image_path:
|
|
||||||
st.session_state.agent_state["uploaded_file_path"] = first_image_path
|
|
||||||
|
|
||||||
_app_log.info(
|
|
||||||
"收到用户输入",
|
|
||||||
extra={
|
|
||||||
"session_id": current_session_id,
|
|
||||||
"prompt_preview": prompt[:200],
|
|
||||||
"prompt_length": len(prompt),
|
|
||||||
"has_uploaded_files": bool(attached_info),
|
|
||||||
"uploaded_files": attached_info,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
|
||||||
with st.chat_message("user"):
|
|
||||||
st.markdown(prompt)
|
|
||||||
run_agent(full_prompt)
|
|
||||||
|
|
||||||
for p in temp_paths:
|
|
||||||
try:
|
|
||||||
Path(p).unlink(missing_ok=True)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
st.rerun()
|
|
||||||
@@ -0,0 +1,586 @@
|
|||||||
|
# 对话场景遍历文档
|
||||||
|
|
||||||
|
> 从 `agent/graph.py` 状态图递归遍历生成,覆盖所有用户意图 → 节点路径 → 退出条件。
|
||||||
|
> 最后更新: 2026-05-24
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 状态图总览
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────────────────────────────────────────────┐
|
||||||
|
│ 修正循环 (最多 MAX_RETRY=5 次) │
|
||||||
|
│ ┌─────────┐ ┌──────────────┐ ┌────────┐ │
|
||||||
|
│ │ validate │───→│ explain_error│───→│correct │ │
|
||||||
|
│ └────┬─────┘ └──────────────┘ │_jrxml │ │
|
||||||
|
│ │ pass └───┬────┘ │
|
||||||
|
│ ▼ │ │
|
||||||
|
│ ┌─────────┐ retry<5 │
|
||||||
|
│ │finalize │◄────────────────────────────────┘ │
|
||||||
|
│ └─────────┘ retry>=5 │
|
||||||
|
└──────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
load_session ──→ process_input ──→ manage_context ──→ save_state_snapshot
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
classify_intent
|
||||||
|
│
|
||||||
|
┌────────────┬──────────┬────────┬───────────┼───────────┬──────────┐
|
||||||
|
▼ ▼ ▼ ▼ ▼ ▼ ▼
|
||||||
|
retrieve modify_jrxml save_ handle_ handle_ handle_ (兜底)
|
||||||
|
(新建报表) (修改报表) session consult undo reset
|
||||||
|
│ │ (预览) (咨询) (撤销) (重置)
|
||||||
|
┌────────┴────┐ │ │ │ │ │
|
||||||
|
▼ ▼ │ │ │ │ │
|
||||||
|
generate generate_ │ │ │ │ │
|
||||||
|
(1-shot) skeleton │ │ │ │ │
|
||||||
|
│ │ │ │ │ │ │
|
||||||
|
│ refine_ │ │ │ │ │
|
||||||
|
│ layout │ │ │ │ │
|
||||||
|
│ │ │ │ │ │ │
|
||||||
|
│ map_fields │ │ │ │ │
|
||||||
|
│ │ │ │ │ │ │
|
||||||
|
└──────┬──────┘ │ │ │ │ │
|
||||||
|
▼ ▼ ▼ ▼ ▼ ▼
|
||||||
|
save_session ◄─────┴──────────┘ finalize ◄─── finalize ◄── finalize
|
||||||
|
│ ▲
|
||||||
|
│ (预览/导出跳过验证) │
|
||||||
|
├───────────────────────────────────────┘
|
||||||
|
│ (其他意图走验证)
|
||||||
|
▼
|
||||||
|
validate ──→ explain_error ──→ correct_jrxml ──→ validate (循环)
|
||||||
|
│ pass │ retry>=MAX
|
||||||
|
▼ ▼
|
||||||
|
finalize ────────────────────────────────→ finalize
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 节点详细清单
|
||||||
|
|
||||||
|
每个节点标注了 **代码行号** (`agent/nodes.py` 或 `agent/graph.py`)、**前驱节点** (predecessors)、**后继节点** (successors)。
|
||||||
|
|
||||||
|
### 1. load_session — 加载会话
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:77` |
|
||||||
|
| 前驱 | (入口节点, graph entry_point) |
|
||||||
|
| 后继 | `process_input` (固定边 graph.py:198) |
|
||||||
|
| 功能 | 从 `sessions/{session_id}.json` 磁盘加载状态,注入 agent_state。不从磁盘覆盖 `session_id`。 |
|
||||||
|
| LLM | 否 |
|
||||||
|
|
||||||
|
### 2. process_input — 处理用户输入
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:98` |
|
||||||
|
| 前驱 | `load_session` (graph.py:198) |
|
||||||
|
| 后继 | `manage_context` (graph.py:199) |
|
||||||
|
| 功能 | 文件解析(PDF/DOCX/XLSX/图片/文本)→ OCR 字段提取 → 批注检测 → 模板 JRXML 解析。注入 `ocr_extraction_result`、`layout_schema`、`ocr_elements`、`uploaded_template_jrxml`。 |
|
||||||
|
| LLM | 否(OCR 用 PaddleOCR/EasyOCR) |
|
||||||
|
|
||||||
|
### 3. manage_context — 上下文管理
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:143` |
|
||||||
|
| 前驱 | `process_input` (graph.py:199) |
|
||||||
|
| 后继 | `save_state_snapshot` (graph.py:200) |
|
||||||
|
| 功能 | Token 计数 → 对话压缩(超限时 LLM 压缩为摘要)→ `compressed_history`。 |
|
||||||
|
| LLM | 是(压缩时调 LLM) |
|
||||||
|
|
||||||
|
### 4. save_state_snapshot — 状态快照
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:191` |
|
||||||
|
| 前驱 | `manage_context` (graph.py:200) |
|
||||||
|
| 后继 | `classify_intent` (graph.py:201) |
|
||||||
|
| 功能 | 深拷贝当前状态 → 推入 `history_states` 列表。最多保留 5 个快照。撤销时恢复到最新快照。 |
|
||||||
|
| LLM | 否 |
|
||||||
|
|
||||||
|
### 5. classify_intent — 意图分类
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:200` |
|
||||||
|
| 前驱 | `save_state_snapshot` (graph.py:201) |
|
||||||
|
| 后继 | 6 路条件分发 (graph.py:204-215) |
|
||||||
|
| 功能 | LLM 分类用户意图为 8 种之一。prompt: `prompts/intent_classify.md`。 |
|
||||||
|
| LLM | 是 |
|
||||||
|
| 路由函数 | `route_by_intent` (graph.py:67) |
|
||||||
|
|
||||||
|
**分类逻辑与路由目标**:
|
||||||
|
|
||||||
|
| 意图值 | 路由目标 | 说明 |
|
||||||
|
|--------|---------|------|
|
||||||
|
| `initial_generation` | → `retrieve` | 新建报表 |
|
||||||
|
| `modify_report` | → `modify_jrxml` | 修改现有报表 |
|
||||||
|
| `preview_report` | → `save_session` | 预览(跳过生成) |
|
||||||
|
| `export_pdf` | → `save_session` | 导出 PDF(跳过生成) |
|
||||||
|
| `export_jrxml` | → `save_session` | 下载 JRXML(跳过生成) |
|
||||||
|
| `consult_question` | → `handle_consult` | 咨询问答 |
|
||||||
|
| `undo_modification` | → `handle_undo` | 撤销 |
|
||||||
|
| `reset_session` | → `handle_reset` | 重置 |
|
||||||
|
| 未知/兜底 | 有 `current_jrxml` → `modify_jrxml`; 无 → `retrieve` | |
|
||||||
|
|
||||||
|
### 6. retrieve — RAG/知识库检索
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:442` |
|
||||||
|
| 前驱 | `classify_intent` (graph.py:204-215, intent=initial_generation) |
|
||||||
|
| 后继 | 条件分发: `generate_skeleton` 或 `generate` (graph.py:218-224) |
|
||||||
|
| 功能 | ① ErrorKB 检索历史修正案例 → ② KB 模板检索 → ③ KB 字段定义检索。注入 `retrieved_context`、`kb_template_jrxml`、`kb_fields`。 |
|
||||||
|
| LLM | 否(向量搜索 + 字段匹配) |
|
||||||
|
| 路由函数 | `route_after_retrieve` (graph.py:94) |
|
||||||
|
|
||||||
|
**路由逻辑** (`route_after_retrieve`, graph.py:94-99):
|
||||||
|
- `layout_schema.total_rows > 0` → `generate_skeleton` (3 阶段)
|
||||||
|
- 否则 → `generate` (1-shot)
|
||||||
|
|
||||||
|
### 7. generate — 1-shot 生成
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:578` |
|
||||||
|
| 前驱 | `retrieve` (graph.py:218-224, 无 layout_schema 时) |
|
||||||
|
| 后继 | `save_session` (graph.py:227-231) |
|
||||||
|
| 功能 | LLM 一次生成完整 JRXML。注入 OCR 上下文 + 模板上下文。流式输出。截断时续写(最多 3 轮)。 |
|
||||||
|
| LLM | 是 |
|
||||||
|
| Prompt | `prompts/initial_generation.md` |
|
||||||
|
|
||||||
|
### 8. generate_skeleton — 骨架生成(3 阶段-1)
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:657` |
|
||||||
|
| 前驱 | `retrieve` (graph.py:218-224, 有 layout_schema 时) |
|
||||||
|
| 后继 | `refine_layout` (固定边 graph.py:233) |
|
||||||
|
| 功能 | 压缩布局 schema → LLM 生成骨架 JRXML。字段用 `$F{field_N}` 占位。流式输出 + 续写。 |
|
||||||
|
| LLM | 是 |
|
||||||
|
| Prompt | `prompts/skeleton_generation.md` |
|
||||||
|
|
||||||
|
### 9. refine_layout — 坐标精调(3 阶段-2)
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:879` |
|
||||||
|
| 前驱 | `generate_skeleton` (graph.py:233) |
|
||||||
|
| 后继 | `map_fields` (固定边 graph.py:234) |
|
||||||
|
| 功能 | ① `decompose_jrxml()` 拆解为 header + bands → ② 每个 band 窗口化(>4000 字符切分)→ ③ 逐窗口 LLM 精调坐标 → ④ `reassemble_jrxml()` 重组 → ⑤ `validate_element_count()` 校验(>10% 回退)。header 完全不发给 LLM。 |
|
||||||
|
| LLM | 是(N 次,N = band 窗口数) |
|
||||||
|
| Prompt | `prompts/refine_layout.md` |
|
||||||
|
|
||||||
|
### 10. map_fields — 字段映射(3 阶段-3)
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:978` |
|
||||||
|
| 前驱 | `refine_layout` (graph.py:234) |
|
||||||
|
| 后继 | `save_session` (graph.py:235-239) |
|
||||||
|
| 功能 | 纯程序化正则替换 `$F{field_N}` → OCR 真实字段名。`_sanitize_field_name()` 净化非 ASCII 字符。零 LLM 调用。 |
|
||||||
|
| LLM | 否 |
|
||||||
|
|
||||||
|
### 11. modify_jrxml — 修改报表
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:1022` |
|
||||||
|
| 前驱 | `classify_intent` (graph.py:204-215, intent=modify_report) |
|
||||||
|
| 后继 | `save_session` (graph.py:242-246) |
|
||||||
|
| 功能 | 基于现有 JRXML + 用户修改描述 + OCR 上下文 + 模板上下文 → LLM 修改。流式输出 + 续写。空响应守卫。 |
|
||||||
|
| LLM | 是 |
|
||||||
|
| Prompt | `prompts/modification.md` |
|
||||||
|
|
||||||
|
### 12. handle_consult — 咨询解答
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:261` |
|
||||||
|
| 前驱 | `classify_intent` (graph.py:204-215, intent=consult_question) |
|
||||||
|
| 后继 | `finalize` (固定边 graph.py:280) |
|
||||||
|
| 功能 | LLM 回答 JasperReports 相关知识问题。回答写入 `conversation_history`。 |
|
||||||
|
| LLM | 是 |
|
||||||
|
| Prompt | `prompts/consult.md` |
|
||||||
|
|
||||||
|
### 13. handle_undo — 撤销
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:281` |
|
||||||
|
| 前驱 | `classify_intent` (graph.py:204-215, intent=undo_modification) |
|
||||||
|
| 后继 | `save_session` (graph.py:249-253) |
|
||||||
|
| 功能 | 从 `history_states` 弹出最近快照,恢复 `current_jrxml`、`conversation_history`、`status`。无快照时提示"无可撤销状态"。 |
|
||||||
|
| LLM | 否 |
|
||||||
|
|
||||||
|
### 14. handle_reset — 重置
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:309` |
|
||||||
|
| 前驱 | `classify_intent` (graph.py:204-215, intent=reset_session) |
|
||||||
|
| 后继 | `finalize` (固定边 graph.py:281) |
|
||||||
|
| 功能 | 清空所有状态到 `create_initial_state()` 默认值(保留 `session_id`、`session_name`)。 |
|
||||||
|
| LLM | 否 |
|
||||||
|
|
||||||
|
### 15. save_session — 保存会话
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:325` |
|
||||||
|
| 前驱 | `generate`、`map_fields`、`modify_jrxml`、`handle_undo`、`classify_intent`(预览/导出) |
|
||||||
|
| 后继 | 条件分发: `validate` 或 `finalize` (graph.py:256-260) |
|
||||||
|
| 功能 | 原子持久化会话 JSON (`tempfile + os.replace`)。序列化 `agent_state` 到 `sessions/{session_id}.json`。 |
|
||||||
|
| LLM | 否 |
|
||||||
|
| 路由函数 | `route_after_save` (graph.py:118) |
|
||||||
|
|
||||||
|
**路由逻辑** (`route_after_save`, graph.py:118-123):
|
||||||
|
- `intent in (preview_report, export_pdf, export_jrxml)` → `finalize` (跳过验证)
|
||||||
|
- 其他 → `validate`
|
||||||
|
|
||||||
|
### 16. validate — 验证
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:1235` |
|
||||||
|
| 前驱 | `save_session` (graph.py:256-260)、`correct_jrxml` (graph.py:273-277) |
|
||||||
|
| 后继 | 条件分发: `finalize` 或 `explain_error` (graph.py:263-267) |
|
||||||
|
| 功能 | ① 结构检查(字段引用一致性/SQL 存在/pageWidth/pageHeight/name)→ ② XSD 校验(可选)→ ③ 像素对比(有上传图片时 Java 渲染 JRXML→PNG + OpenCV SSIM)。 |
|
||||||
|
| LLM | 否 |
|
||||||
|
| 路由函数 | `route_after_validate` (graph.py:127) |
|
||||||
|
|
||||||
|
**路由逻辑** (`route_after_validate`, graph.py:127-131):
|
||||||
|
- `status == "pass"` → `finalize`
|
||||||
|
- `status == "fail"` → `explain_error`
|
||||||
|
|
||||||
|
### 17. explain_error — 错误解释
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:1310` |
|
||||||
|
| 前驱 | `validate` (graph.py:263-267, status=fail) |
|
||||||
|
| 后继 | `correct_jrxml` (graph.py:268-272) |
|
||||||
|
| 功能 | LLM 将编译错误翻译为自然语言解释。注入 `natural_explanation`。 |
|
||||||
|
| LLM | 是 |
|
||||||
|
| Prompt | `prompts/explain_error.md` |
|
||||||
|
|
||||||
|
### 18. correct_jrxml — 自动修正
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:1355` |
|
||||||
|
| 前驱 | `explain_error` (graph.py:268-272) |
|
||||||
|
| 后继 | 条件分发: `validate` 或 `finalize` (graph.py:273-277) |
|
||||||
|
| 功能 | 基于错误解释 + OCR 上下文 + 模板上下文 → LLM 修正 JRXML。注入 `last_error_case`。去重检测(输入输出相同则 `retry_count+=2`)。 |
|
||||||
|
| LLM | 是 |
|
||||||
|
| Prompt | `prompts/correction.md` |
|
||||||
|
| 路由函数 | `route_after_correct` (graph.py:139) |
|
||||||
|
|
||||||
|
**路由逻辑** (`route_after_correct`, graph.py:139-143):
|
||||||
|
- `retry_count >= MAX_RETRY` (默认5) → `finalize` (放弃修正)
|
||||||
|
- `retry_count < MAX_RETRY` → `validate` (重新验证)
|
||||||
|
|
||||||
|
### 19. finalize — 最终处理
|
||||||
|
|
||||||
|
| 属性 | 值 |
|
||||||
|
|------|-----|
|
||||||
|
| 代码位置 | `agent/nodes.py:1452` |
|
||||||
|
| 前驱 | `validate`(pass)、`correct_jrxml`(retry>=MAX)、`handle_consult`、`handle_reset`、`save_session`(预览/导出) |
|
||||||
|
| 后继 | `END` (graph.py:284) |
|
||||||
|
| 功能 | 记录 `jrxml_versions` 版本历史。验证通过时设置 `final_jrxml`。失败时记录 `pending_failure_context` 供下次输入自动注入。 |
|
||||||
|
| LLM | 否 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 路由函数索引
|
||||||
|
|
||||||
|
| # | 路由函数 | 代码位置 | 条件 | 分支 |
|
||||||
|
|---|---------|---------|------|------|
|
||||||
|
| R1 | `route_by_intent` | `graph.py:67` | `state.intent` | 6 路: retrieve / modify_jrxml / save_session / handle_consult / handle_undo / handle_reset |
|
||||||
|
| R2 | `route_after_retrieve` | `graph.py:94` | `layout_schema.total_rows > 0` | 2 路: generate_skeleton / generate |
|
||||||
|
| R3 | `route_after_generate` | `graph.py:103` | 无条件 | save_session |
|
||||||
|
| R4 | `route_after_modify` | `graph.py:108` | 无条件 | save_session |
|
||||||
|
| R5 | `route_after_undo` | `graph.py:113` | 无条件 | save_session |
|
||||||
|
| R6 | `route_after_save` | `graph.py:118` | `intent in (preview, export)` | 2 路: finalize / validate |
|
||||||
|
| R7 | `route_after_validate` | `graph.py:127` | `status == "pass"` | 2 路: finalize / explain_error |
|
||||||
|
| R8 | `route_after_explain` | `graph.py:133` | 无条件 | correct_jrxml |
|
||||||
|
| R9 | `route_after_correct` | `graph.py:139` | `retry_count >= MAX_RETRY` | 2 路: finalize / validate |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 完整对话场景
|
||||||
|
|
||||||
|
### 场景 1: 新建报表 — 1-shot(无布局 schema)
|
||||||
|
|
||||||
|
**触发**: `intent=initial_generation` + 无图片/无结构化布局
|
||||||
|
|
||||||
|
**用户示例**: "帮我生成一个销售报表"、"生成一个包含客户名和金额的表格"
|
||||||
|
|
||||||
|
```
|
||||||
|
load_session nodes.py:77
|
||||||
|
→ process_input nodes.py:98
|
||||||
|
→ manage_context nodes.py:143
|
||||||
|
→ save_state_snapshot nodes.py:191
|
||||||
|
→ classify_intent nodes.py:200 意图=initial_generation
|
||||||
|
└─ R1: route_by_intent graph.py:67 → retrieve
|
||||||
|
→ retrieve nodes.py:442
|
||||||
|
└─ R2: route_after_retrieve graph.py:94 layout_schema 为空 → generate
|
||||||
|
→ generate nodes.py:578 LLM 1-shot 生成完整 JRXML
|
||||||
|
└─ R3: route_after_generate graph.py:103 → save_session
|
||||||
|
→ save_session nodes.py:325 持久化到磁盘
|
||||||
|
└─ R6: route_after_save graph.py:118 intent=initial_generation → validate
|
||||||
|
→ validate nodes.py:1235 结构检查 + XSD + 像素对比
|
||||||
|
└─ R7: route_after_validate graph.py:127
|
||||||
|
├─ status=pass → finalize nodes.py:1452 → END ✓
|
||||||
|
└─ status=fail → explain_error nodes.py:1310
|
||||||
|
└─ R8 → correct_jrxml nodes.py:1355
|
||||||
|
└─ R9:
|
||||||
|
retry<5 → validate (循环)
|
||||||
|
retry>=5 → finalize → END ✗
|
||||||
|
```
|
||||||
|
|
||||||
|
**LLM 调用**: `classify_intent` + `generate` + 最多 5× (`explain_error` + `correct_jrxml`)
|
||||||
|
**退出好结局**: `final_jrxml` 有值, `status=pass`
|
||||||
|
**退出坏结局**: `pending_failure_context` 有值, `retry_count=5`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 2: 新建报表 — 3 阶段分层生成(有布局 schema)
|
||||||
|
|
||||||
|
**触发**: `intent=initial_generation` + 上传图片 + OCR 提取到 `layout_schema.total_rows > 0`
|
||||||
|
|
||||||
|
**用户示例**: 上传销售单图片 → "根据这个模板生成报表"
|
||||||
|
|
||||||
|
```
|
||||||
|
load_session nodes.py:77
|
||||||
|
→ process_input nodes.py:98 OCR提取 + 布局分析
|
||||||
|
→ manage_context nodes.py:143
|
||||||
|
→ save_state_snapshot nodes.py:191
|
||||||
|
→ classify_intent nodes.py:200 意图=initial_generation
|
||||||
|
└─ R1: route_by_intent graph.py:67 → retrieve
|
||||||
|
→ retrieve nodes.py:442 KB检索模板+字段
|
||||||
|
└─ R2: route_after_retrieve graph.py:94 layout_schema.total_rows>0 → generate_skeleton
|
||||||
|
→ generate_skeleton nodes.py:657 阶段1: 骨架JRXML ($F{field_N}占位)
|
||||||
|
→ refine_layout nodes.py:879 阶段2: Band级窗口化坐标精调
|
||||||
|
→ map_fields nodes.py:978 阶段3: 程序化字段映射
|
||||||
|
└─ R3: route_after_generate graph.py:103 → save_session
|
||||||
|
→ save_session nodes.py:325
|
||||||
|
└─ R6: route_after_save graph.py:118 → validate
|
||||||
|
→ validate nodes.py:1235
|
||||||
|
└─ R7 同场景1的验证循环
|
||||||
|
```
|
||||||
|
|
||||||
|
**内容保护**:
|
||||||
|
- `refine_layout`: header (field/param/queryString) 完全不发给 LLM
|
||||||
|
- `refine_layout`: 每窗口 ~4000 字符, LLM 无法重写整个报表
|
||||||
|
- `map_fields`: 纯正则替换, 零 LLM, 100% 确定性
|
||||||
|
- `validate_element_count()`: 每阶段后校验, >10% 变化回退
|
||||||
|
|
||||||
|
**LLM 调用**: `classify_intent` + `generate_skeleton` + N×`refine_layout`(N=band窗口数) + 可能的修正循环
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 3: 修改已有报表
|
||||||
|
|
||||||
|
**触发**: `intent=modify_report`(已有 `current_jrxml`)
|
||||||
|
|
||||||
|
**用户示例**: "把标题字体改大"、"在底部加合计行"、"删除第三列"
|
||||||
|
|
||||||
|
```
|
||||||
|
load_session → process_input → manage_context → save_state_snapshot
|
||||||
|
→ classify_intent nodes.py:200 意图=modify_report
|
||||||
|
└─ R1: route_by_intent graph.py:67 → modify_jrxml
|
||||||
|
→ modify_jrxml nodes.py:1022 LLM修改现有JRXML
|
||||||
|
└─ R4: route_after_modify graph.py:108 → save_session
|
||||||
|
→ save_session nodes.py:325
|
||||||
|
└─ R6: route_after_save graph.py:118 → validate
|
||||||
|
→ (同场景1的验证循环)
|
||||||
|
```
|
||||||
|
|
||||||
|
**特殊逻辑**: `correct_jrxml` 去重检测: 输入输出相同 → `retry_count += 2`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 4: 预览 / 导出(跳过验证)
|
||||||
|
|
||||||
|
**触发**: `intent in (preview_report, export_pdf, export_jrxml)`
|
||||||
|
|
||||||
|
**用户示例**: "预览报表"、"导出 PDF"、"下载 JRXML"
|
||||||
|
|
||||||
|
```
|
||||||
|
load_session → process_input → manage_context → save_state_snapshot
|
||||||
|
→ classify_intent nodes.py:200 意图=preview/export
|
||||||
|
└─ R1: route_by_intent graph.py:67 → save_session
|
||||||
|
→ save_session nodes.py:325
|
||||||
|
└─ R6: route_after_save graph.py:118 intent=preview/export → finalize
|
||||||
|
→ finalize nodes.py:1452 → END ✓
|
||||||
|
```
|
||||||
|
|
||||||
|
**LLM 调用**: 仅 `classify_intent` (1次)
|
||||||
|
**跳过**: generate / modify_jrxml / validate / correct_jrxml
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 5: 咨询问答
|
||||||
|
|
||||||
|
**触发**: `intent=consult_question`
|
||||||
|
|
||||||
|
**用户示例**: "JasperReports 里 $F 和 $P 有什么区别?"、"怎么设置页脚?"
|
||||||
|
|
||||||
|
```
|
||||||
|
load_session → process_input → manage_context → save_state_snapshot
|
||||||
|
→ classify_intent nodes.py:200 意图=consult_question
|
||||||
|
└─ R1: route_by_intent graph.py:67 → handle_consult
|
||||||
|
→ handle_consult nodes.py:261 LLM回答
|
||||||
|
→ finalize nodes.py:1452 → END ✓
|
||||||
|
```
|
||||||
|
|
||||||
|
**LLM 调用**: `classify_intent` + `handle_consult` (2次)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 6: 撤销
|
||||||
|
|
||||||
|
**触发**: `intent=undo_modification`
|
||||||
|
|
||||||
|
**用户示例**: "撤销"、"回退"、"恢复到修改前"
|
||||||
|
|
||||||
|
```
|
||||||
|
load_session → process_input → manage_context → save_state_snapshot
|
||||||
|
→ classify_intent nodes.py:200 意图=undo_modification
|
||||||
|
└─ R1: route_by_intent graph.py:67 → handle_undo
|
||||||
|
→ handle_undo nodes.py:281 恢复history_states快照
|
||||||
|
└─ R5: route_after_undo graph.py:113 → save_session
|
||||||
|
→ save_session nodes.py:325
|
||||||
|
└─ R6 → validate → (验证循环)
|
||||||
|
```
|
||||||
|
|
||||||
|
**LLM 调用**: 仅 `classify_intent` (1次)
|
||||||
|
**特殊**: 无快照时提示"无可撤销状态",不改变当前状态
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 7: 重置
|
||||||
|
|
||||||
|
**触发**: `intent=reset_session`
|
||||||
|
|
||||||
|
**用户示例**: "重置"、"重新开始"、"清空对话"
|
||||||
|
|
||||||
|
```
|
||||||
|
load_session → process_input → manage_context → save_state_snapshot
|
||||||
|
→ classify_intent nodes.py:200 意图=reset_session
|
||||||
|
└─ R1: route_by_intent graph.py:67 → handle_reset
|
||||||
|
→ handle_reset nodes.py:309 清空到初始状态
|
||||||
|
→ finalize nodes.py:1452 → END ✓
|
||||||
|
```
|
||||||
|
|
||||||
|
**LLM 调用**: 仅 `classify_intent` (1次)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 8: 兜底路由(未知意图)
|
||||||
|
|
||||||
|
**触发**: LLM 分类返回非标准意图
|
||||||
|
|
||||||
|
```
|
||||||
|
load_session → ... → classify_intent → [未知意图]
|
||||||
|
└─ R1 fallback (graph.py:87-90):
|
||||||
|
├─ state有current_jrxml → modify_jrxml (走修改路径, →场景3)
|
||||||
|
└─ state无current_jrxml → retrieve (走生成路径, →场景1/2)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## AgentState 字段速查
|
||||||
|
|
||||||
|
| 字段 | 类型 | 写节点 | 读节点 |
|
||||||
|
|------|------|--------|--------|
|
||||||
|
| `intent` | `str` | classify_intent | R1 route_by_intent, R6 route_after_save |
|
||||||
|
| `current_jrxml` | `str` | generate, generate_skeleton, refine_layout, map_fields, modify_jrxml, correct_jrxml, handle_undo | validate, save_session, finalize |
|
||||||
|
| `user_input` | `str` | process_input | classify_intent, manage_context |
|
||||||
|
| `user_modification_request` | `str` | process_input | modify_jrxml |
|
||||||
|
| `conversation_history` | `list` | process_input, finalize, handle_consult | manage_context, classify_intent, modify_jrxml |
|
||||||
|
| `full_conversation_history` | `list` | process_input | manage_context |
|
||||||
|
| `compressed_history` | `str` | manage_context | modify_jrxml, handle_consult |
|
||||||
|
| `retry_count` | `int` | correct_jrxml, validate | R7 route_after_correct |
|
||||||
|
| `status` | `str` | validate | R7 route_after_validate, finalize |
|
||||||
|
| `error_msg` | `str` | validate | explain_error, finalize |
|
||||||
|
| `natural_explanation` | `str` | explain_error | correct_jrxml |
|
||||||
|
| `final_jrxml` | `str` | finalize | (用户下载) |
|
||||||
|
| `jrxml_versions` | `list` | finalize | (前端展示) |
|
||||||
|
| `last_error_case` | `dict` | correct_jrxml | retrieve |
|
||||||
|
| `pending_failure_context` | `dict` | finalize | process_input (下次) |
|
||||||
|
| `layout_schema` | `dict` | process_input | R2 route_after_retrieve, generate_skeleton |
|
||||||
|
| `ocr_elements` | `list` | process_input | refine_layout, generate_skeleton |
|
||||||
|
| `ocr_extraction_result` | `dict` | process_input | map_fields, modify_jrxml, correct_jrxml |
|
||||||
|
| `history_states` | `list` | save_state_snapshot | handle_undo |
|
||||||
|
| `kb_id` | `str` | process_input | retrieve |
|
||||||
|
| `kb_fields` | `list` | retrieve | generate_skeleton |
|
||||||
|
| `uploaded_template_jrxml` | `str` | process_input | generate, generate_skeleton, modify_jrxml, correct_jrxml |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## LLM 调用统计
|
||||||
|
|
||||||
|
| 场景 | classify | 生成节点 | 窗口数 | 修正循环 | 总计(最小~最大) |
|
||||||
|
|------|----------|---------|--------|---------|----------------|
|
||||||
|
| 1-shot 生成 | 1 | generate=1 | - | 0~5×2 | 2 ~ 12 |
|
||||||
|
| 3 阶段生成 | 1 | skeleton+refine×N | N | 0~5×2 | 2+N ~ 12+N |
|
||||||
|
| 修改报表 | 1 | modify=1 | - | 0~5×2 | 2 ~ 12 |
|
||||||
|
| 预览/导出 | 1 | - | - | - | 1 |
|
||||||
|
| 咨询 | 1 | consult=1 | - | - | 2 |
|
||||||
|
| 撤销 | 1 | - | - | - | 1 |
|
||||||
|
| 重置 | 1 | - | - | - | 1 |
|
||||||
|
|
||||||
|
> N = band 窗口数。`销售单.jrxml` (73k 字符) 拆解后 N≈17。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 修正循环流程
|
||||||
|
|
||||||
|
```
|
||||||
|
validate ──fail──→ explain_error ──→ correct_jrxml
|
||||||
|
▲ │
|
||||||
|
│ retry_count < MAX_RETRY(5) │
|
||||||
|
└──────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
│ retry_count >= 5
|
||||||
|
▼
|
||||||
|
finalize (放弃, 记录pending_failure_context)
|
||||||
|
```
|
||||||
|
|
||||||
|
**修正轮次推进**:
|
||||||
|
1. `validate` 失败 → `status="fail"`, `error_msg` 有值
|
||||||
|
2. `explain_error` → LLM 翻译错误 → `natural_explanation` 有值
|
||||||
|
3. `correct_jrxml` → LLM 修正 → `retry_count += 1`。去重检测:输入输出相同 → `retry_count += 2`
|
||||||
|
4. `route_after_correct` → retry<5 → 回到 `validate`; retry>=5 → `finalize`
|
||||||
|
|
||||||
|
**失败上下文** (`pending_failure_context`): 重试耗尽后记录 `{error_msg, bad_jrxml, retry_count, ts}`,下次用户消息时 `process_input` 自动注入到 prompt。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 边定义索引(graph.py 全部边)
|
||||||
|
|
||||||
|
| 类型 | 源节点 | 目标节点 | 位置 |
|
||||||
|
|------|--------|---------|------|
|
||||||
|
| 固定边 | load_session | process_input | line 198 |
|
||||||
|
| 固定边 | process_input | manage_context | line 199 |
|
||||||
|
| 固定边 | manage_context | save_state_snapshot | line 200 |
|
||||||
|
| 固定边 | save_state_snapshot | classify_intent | line 201 |
|
||||||
|
| 条件边 | classify_intent | retrieve / modify_jrxml / save_session / handle_consult / handle_undo / handle_reset | lines 204-215 |
|
||||||
|
| 条件边 | retrieve | generate / generate_skeleton | lines 218-224 |
|
||||||
|
| 条件边 | generate | save_session | lines 227-231 |
|
||||||
|
| 固定边 | generate_skeleton | refine_layout | line 233 |
|
||||||
|
| 固定边 | refine_layout | map_fields | line 234 |
|
||||||
|
| 条件边 | map_fields | save_session | lines 235-239 |
|
||||||
|
| 条件边 | modify_jrxml | save_session | lines 242-246 |
|
||||||
|
| 条件边 | handle_undo | save_session | lines 249-253 |
|
||||||
|
| 条件边 | save_session | validate / finalize | lines 256-260 |
|
||||||
|
| 条件边 | validate | finalize / explain_error | lines 263-267 |
|
||||||
|
| 条件边 | explain_error | correct_jrxml | lines 268-272 |
|
||||||
|
| 条件边 | correct_jrxml | validate / finalize | lines 273-277 |
|
||||||
|
| 固定边 | handle_consult | finalize | line 280 |
|
||||||
|
| 固定边 | handle_reset | finalize | line 281 |
|
||||||
|
| 固定边 | finalize | END | line 284 |
|
||||||
-114
@@ -1,114 +0,0 @@
|
|||||||
"""
|
|
||||||
JRXML Agent E2E test — Playwright automation.
|
|
||||||
Tests: page load, upload image, send message, wait for response.
|
|
||||||
Usage: python test_e2e.py
|
|
||||||
Prerequisites: Servers must be running (start.bat or with_server.py)
|
|
||||||
"""
|
|
||||||
import os, sys, time, base64, tempfile
|
|
||||||
from playwright.sync_api import sync_playwright
|
|
||||||
|
|
||||||
FRONTEND = "http://localhost:5173"
|
|
||||||
API = "http://localhost:8000"
|
|
||||||
TEST_IMAGE = r"D:\Idea Project\agent_jrxml\test_invoice_e2e.png"
|
|
||||||
|
|
||||||
def run():
|
|
||||||
with sync_playwright() as p:
|
|
||||||
browser = p.chromium.launch(headless=True)
|
|
||||||
page = browser.new_page(viewport={"width": 1280, "height": 900})
|
|
||||||
|
|
||||||
# Capture console errors
|
|
||||||
errors = []
|
|
||||||
page.on("console", lambda msg: errors.append(msg.text) if msg.type == "error" else None)
|
|
||||||
|
|
||||||
# 1. Navigate and wait
|
|
||||||
print("[1] Loading frontend...")
|
|
||||||
page.goto(FRONTEND, timeout=15000)
|
|
||||||
page.wait_for_load_state("networkidle")
|
|
||||||
page.wait_for_timeout(1000)
|
|
||||||
|
|
||||||
# Screenshot initial state
|
|
||||||
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_01_initial.png", full_page=True)
|
|
||||||
print(" Screenshot: e2e_01_initial.png")
|
|
||||||
|
|
||||||
# Verify sidebar loads
|
|
||||||
sidebar = page.locator(".sidebar")
|
|
||||||
assert sidebar.is_visible(), "Sidebar not visible"
|
|
||||||
print(" OK: Sidebar visible")
|
|
||||||
|
|
||||||
# 2. Create new session (click +)
|
|
||||||
print("[2] Creating new session...")
|
|
||||||
page.locator(".btn-icon").click()
|
|
||||||
page.wait_for_timeout(500)
|
|
||||||
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_02_session.png")
|
|
||||||
print(" OK: New session created")
|
|
||||||
|
|
||||||
# 3. Upload test image
|
|
||||||
print("[3] Uploading test image...")
|
|
||||||
upload_input = page.locator('input[type="file"]')
|
|
||||||
upload_input.set_input_files(TEST_IMAGE)
|
|
||||||
page.wait_for_timeout(500)
|
|
||||||
# Verify file chip appears
|
|
||||||
chip = page.locator(".chip").first
|
|
||||||
assert chip.is_visible(), "File chip not visible after upload"
|
|
||||||
print(f" OK: File chip visible — {chip.inner_text()}")
|
|
||||||
|
|
||||||
# 4. Type message and send
|
|
||||||
print('[4] Sending message...')
|
|
||||||
textarea = page.locator("textarea").first
|
|
||||||
textarea.fill("根据这张图片生成车历卡报表模板")
|
|
||||||
page.wait_for_timeout(200)
|
|
||||||
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_03_input.png")
|
|
||||||
|
|
||||||
# Click send button or press Enter
|
|
||||||
page.locator('button[type="submit"]').click()
|
|
||||||
print(" Sent!")
|
|
||||||
|
|
||||||
# 5. Wait for streaming response
|
|
||||||
print("[5] Waiting for AI response...")
|
|
||||||
try:
|
|
||||||
# Wait up to 3 minutes for a success or error message
|
|
||||||
page.wait_for_selector('.message.assistant', timeout=180000)
|
|
||||||
page.wait_for_timeout(2000)
|
|
||||||
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_04_response.png", full_page=True)
|
|
||||||
|
|
||||||
# Check for success/error
|
|
||||||
messages = page.locator('.message.assistant').all()
|
|
||||||
for m in messages:
|
|
||||||
text = m.inner_text()
|
|
||||||
if "成功" in text:
|
|
||||||
print(f" ✅ SUCCESS: {text[:100]}")
|
|
||||||
elif "失败" in text or "错误" in text:
|
|
||||||
print(f" ❌ ERROR: {text[:100]}")
|
|
||||||
elif "JRXML" in text:
|
|
||||||
print(f" 📄 JRXML generated ({len(text)} chars)")
|
|
||||||
except Exception as e:
|
|
||||||
page.screenshot(path=r"D:\Idea Project\agent_jrxml\e2e_04_timeout.png", full_page=True)
|
|
||||||
print(f" ⚠️ Timeout waiting for response: {e}")
|
|
||||||
|
|
||||||
# 6. Check download button
|
|
||||||
print("[6] Checking download button...")
|
|
||||||
download_btn = page.locator(".btn-download").first
|
|
||||||
if download_btn.is_visible():
|
|
||||||
text = download_btn.inner_text()
|
|
||||||
print(f" Download button: '{text}'")
|
|
||||||
if "暂无" not in text:
|
|
||||||
print(" ✅ Download link available!")
|
|
||||||
else:
|
|
||||||
print(" ⚠️ Download shows '暂无下载文件'")
|
|
||||||
else:
|
|
||||||
print(" ⚠️ Download button not found")
|
|
||||||
|
|
||||||
# Console errors
|
|
||||||
if errors:
|
|
||||||
print(f"\n[!] Console errors ({len(errors)}):")
|
|
||||||
for e in errors[:5]:
|
|
||||||
print(f" {e[:200]}")
|
|
||||||
else:
|
|
||||||
print("\n ✅ No console errors")
|
|
||||||
|
|
||||||
print("\n=== E2E test complete ===")
|
|
||||||
browser.close()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
os.makedirs(r"D:\Idea Project\agent_jrxml", exist_ok=True)
|
|
||||||
run()
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
"""初始化 JRXML 向量知识库。
|
|
||||||
|
|
||||||
rag_jrxml 子项目独立运行管线(分块→向量化→导入),本脚本仅用于预下载嵌入模型。
|
|
||||||
|
|
||||||
用法:
|
|
||||||
python scripts/init_kb.py --download-model # 预下载嵌入模型
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
def download_model():
|
|
||||||
"""预下载嵌入模型到本地。"""
|
|
||||||
model_name = os.getenv("RAG_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B")
|
|
||||||
print(f"正在下载嵌入模型: {model_name}")
|
|
||||||
print("如遇网络超时,可设置环境变量 HF_ENDPOINT=https://hf-mirror.com 使用镜像")
|
|
||||||
print()
|
|
||||||
|
|
||||||
from sentence_transformers import SentenceTransformer
|
|
||||||
|
|
||||||
model = SentenceTransformer(model_name)
|
|
||||||
model.encode("测试下载")
|
|
||||||
print(f"嵌入模型下载完成: {model_name}")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description="JRXML 向量知识库工具")
|
|
||||||
parser.add_argument(
|
|
||||||
"--download-model", action="store_true",
|
|
||||||
help="预下载嵌入模型到本地"
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if args.download_model:
|
|
||||||
download_model()
|
|
||||||
else:
|
|
||||||
print("用法: python scripts/init_kb.py --download-model")
|
|
||||||
print()
|
|
||||||
print("知识库构建请在 rag/ 子项目中独立运行:")
|
|
||||||
print(" cd rag")
|
|
||||||
print(" python batch_chunker.py jrxml_source")
|
|
||||||
print(" python embed_chunks.py")
|
|
||||||
print(" python import_to_chroma.py")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,144 +0,0 @@
|
|||||||
"""
|
|
||||||
agent_jrxml 统一启动/停止脚本
|
|
||||||
用法: python start.py [--frontend]
|
|
||||||
"""
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import signal
|
|
||||||
import os
|
|
||||||
import socket
|
|
||||||
|
|
||||||
PROCESSES = []
|
|
||||||
|
|
||||||
def kill_port(port):
|
|
||||||
"""杀掉占用指定端口的所有进程"""
|
|
||||||
killed = []
|
|
||||||
try:
|
|
||||||
result = subprocess.run(
|
|
||||||
['netstat', '-ano'], capture_output=True, text=True, timeout=10
|
|
||||||
)
|
|
||||||
for line in result.stdout.splitlines():
|
|
||||||
if f':{port}' in line and 'LISTENING' in line:
|
|
||||||
parts = line.strip().split()
|
|
||||||
pid = parts[-1]
|
|
||||||
try:
|
|
||||||
subprocess.run(['taskkill', '/F', '/PID', pid],
|
|
||||||
capture_output=True, timeout=5)
|
|
||||||
killed.append(pid)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
if killed:
|
|
||||||
print(f"[清理] 端口 {port} 已清理 {len(killed)} 个进程: {', '.join(killed)}")
|
|
||||||
return len(killed)
|
|
||||||
|
|
||||||
|
|
||||||
def wait_port(port, timeout=30):
|
|
||||||
"""等待端口就绪"""
|
|
||||||
for i in range(timeout * 2):
|
|
||||||
try:
|
|
||||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
||||||
s.settimeout(1)
|
|
||||||
s.connect(('127.0.0.1', port))
|
|
||||||
s.close()
|
|
||||||
return True
|
|
||||||
except:
|
|
||||||
time.sleep(0.5)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def start(port, module, cwd=None):
|
|
||||||
"""启动一个 uvicorn 服务"""
|
|
||||||
cmd = [
|
|
||||||
sys.executable, '-c',
|
|
||||||
f"import uvicorn; uvicorn.run('{module}', host='0.0.0.0', port={port}, reload=False)"
|
|
||||||
]
|
|
||||||
proc = subprocess.Popen(cmd, cwd=cwd)
|
|
||||||
PROCESSES.append((port, proc))
|
|
||||||
print(f"[启动] {module} -> :{port} (PID: {proc.pid})")
|
|
||||||
return proc
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup():
|
|
||||||
"""清理所有子进程"""
|
|
||||||
print("\n[清理] 正在停止所有服务...")
|
|
||||||
for port, proc in PROCESSES:
|
|
||||||
try:
|
|
||||||
proc.terminate()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
time.sleep(2)
|
|
||||||
for port, proc in PROCESSES:
|
|
||||||
try:
|
|
||||||
proc.kill()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
kill_port(port)
|
|
||||||
print("[清理] 完成")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
frontend = '--frontend' in sys.argv
|
|
||||||
|
|
||||||
# 1. 清理残留进程
|
|
||||||
print("=" * 50)
|
|
||||||
print("agent_jrxml 启动脚本")
|
|
||||||
print("=" * 50)
|
|
||||||
kill_port(8000)
|
|
||||||
kill_port(8001)
|
|
||||||
if frontend:
|
|
||||||
kill_port(5173)
|
|
||||||
|
|
||||||
# 2. 启动服务(基于脚本所在目录自动定位项目)
|
|
||||||
project = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
start(8000, 'api_server:app', cwd=project)
|
|
||||||
start(8001, 'validation_service.main:app', cwd=project)
|
|
||||||
|
|
||||||
if frontend:
|
|
||||||
# 前端用 npm 启动
|
|
||||||
frontend_dir = os.path.join(project, 'frontend')
|
|
||||||
proc = subprocess.Popen(
|
|
||||||
['npm', 'run', 'dev'], cwd=frontend_dir,
|
|
||||||
shell=True
|
|
||||||
)
|
|
||||||
PROCESSES.append((5173, proc))
|
|
||||||
print(f"[启动] frontend (Vite) -> :5173")
|
|
||||||
|
|
||||||
# 3. 等待就绪
|
|
||||||
print("\n[等待] 等待服务就绪...")
|
|
||||||
ok = True
|
|
||||||
for port, _ in PROCESSES:
|
|
||||||
if wait_port(port):
|
|
||||||
print(f" :{port} ✓")
|
|
||||||
else:
|
|
||||||
print(f" :{port} ✗ 超时!")
|
|
||||||
ok = False
|
|
||||||
|
|
||||||
if not ok:
|
|
||||||
print("\n[错误] 部分服务启动失败")
|
|
||||||
cleanup()
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
print(f"\n{'='*50}")
|
|
||||||
print("服务就绪:")
|
|
||||||
print(f" API: http://localhost:8000/docs")
|
|
||||||
print(f" 验证: http://localhost:8001/health")
|
|
||||||
if frontend:
|
|
||||||
print(f" 前端: http://localhost:5173")
|
|
||||||
print(f"\n按 Ctrl+C 停止所有服务")
|
|
||||||
print(f"{'='*50}")
|
|
||||||
|
|
||||||
# 4. 等待退出信号
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
time.sleep(1)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
pass
|
|
||||||
finally:
|
|
||||||
cleanup()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
import sys, io
|
|
||||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
from backend.jrxml_reorder import normalize_jrxml
|
|
||||||
|
|
||||||
bad = '''<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<jasperReport xmlns="http://jasperreports.sourceforge.net/jasperreports" name="Test" pageWidth="595" pageHeight="842">
|
|
||||||
<queryString><![CDATA[SELECT 1]]></queryString>
|
|
||||||
<style name="s1"/>
|
|
||||||
<field name="f1" class="java.lang.String"/>
|
|
||||||
<property name="p1" value="v1"/>
|
|
||||||
<parameter name="param1" class="java.lang.String"/>
|
|
||||||
<title><band height="50"><textField><reportElement x="0" y="0" width="100" height="20"/></textField></band></title>
|
|
||||||
<detail><band height="30"><staticText><reportElement x="0" y="0" width="100" height="20"/><text>Hi</text></staticText></band></detail>
|
|
||||||
</jasperReport>'''
|
|
||||||
|
|
||||||
fixed = normalize_jrxml(bad)
|
|
||||||
print('=== Before ===')
|
|
||||||
root = ET.fromstring(bad)
|
|
||||||
print('Children:', [c.tag.split('}')[-1] for c in root])
|
|
||||||
|
|
||||||
print('\n=== After ===')
|
|
||||||
root2 = ET.fromstring(fixed)
|
|
||||||
print('Children:', [c.tag.split('}')[-1] for c in root2])
|
|
||||||
|
|
||||||
# 验证
|
|
||||||
import requests
|
|
||||||
r = requests.post('http://localhost:8001/validate', json={'jrxml': fixed}, timeout=10)
|
|
||||||
print(f'\nValidation: {r.json()}')
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
@echo off
|
|
||||||
echo 正在启动 JRXML 验证服务...
|
|
||||||
echo.
|
|
||||||
cd /d "%~dp0"
|
|
||||||
python -m uvicorn validation_service.main:app --host 0.0.0.0 --port 8001 --reload
|
|
||||||
pause
|
|
||||||
Reference in New Issue
Block a user