feat: LangGraph工作流核心 — Agent状态/节点/图 + 验证服务 + 知识库

agent/
  state.py: AgentState TypedDict(20字段含意图/压缩/会话/撤销)
  nodes.py: 17个节点函数(生成/修改/验证/纠错/意图分类/压缩/撤销/重置)
  graph.py: 17节点状态图,8意图路由分发

验证服务 validation_service/
  main.py: FastAPI服务,lxml XSD验证 + 结构化检查(字段引用/SQL/尺寸)

数据 data/
  sample_templates/: 4个JRXML示例模板
  corrections/: 3个错误修正案例

脚本 scripts/
  init_kb.py: Chroma知识库初始化
This commit is contained in:
2026-05-14 23:21:10 +08:00
parent 21a5fdf930
commit 4b43c5d3e4
14 changed files with 1375 additions and 0 deletions
View File
+225
View File
@@ -0,0 +1,225 @@
"""LangGraph JRXML 生成代理的状态图定义。"""
import os
from typing import Literal
from dotenv import load_dotenv
from langgraph.graph import StateGraph, END
from agent.state import AgentState
from agent.nodes import (
load_session_node,
process_input,
manage_context,
save_state_snapshot,
classify_intent,
retrieve,
generate,
modify_jrxml,
handle_consult,
handle_undo,
handle_reset,
save_session_node,
validate,
explain_error,
correct_jrxml,
finalize,
)
load_dotenv()
MAX_RETRY = int(os.getenv("MAX_RETRY", "3"))
# ============================================================
# 路由函数
# ============================================================
def route_by_intent(state: AgentState) -> Literal[
"retrieve", "modify_jrxml", "save_session",
"handle_consult", "handle_undo", "handle_reset"
]:
"""根据 classify_intent 的结果路由到对应的处理节点。"""
intent = state.get("intent", "initial_generation")
if intent == "initial_generation":
return "retrieve"
elif intent == "modify_report":
return "modify_jrxml"
elif intent in ("preview_report", "export_pdf", "export_jrxml"):
return "save_session"
elif intent == "consult_question":
return "handle_consult"
elif intent == "undo_modification":
return "handle_undo"
elif intent == "reset_session":
return "handle_reset"
else:
# 兜底:根据是否有报表判断
if state.get("current_jrxml"):
return "modify_jrxml"
return "retrieve"
def route_after_generate(state: AgentState) -> Literal["save_session"]:
return "save_session"
def route_after_modify(state: AgentState) -> Literal["save_session"]:
return "save_session"
def route_after_undo(state: AgentState) -> Literal["save_session"]:
return "save_session"
def route_after_save(state: AgentState) -> Literal["validate"]:
return "validate"
def route_after_validate(state: AgentState) -> Literal["finalize", "explain_error"]:
if state.get("status") == "pass":
return "finalize"
return "explain_error"
def route_after_explain(state: AgentState) -> Literal["correct_jrxml"]:
return "correct_jrxml"
def route_after_correct(state: AgentState) -> Literal["validate", "finalize"]:
retry = state.get("retry_count", 0)
if retry >= MAX_RETRY:
return "finalize"
return "validate"
# ============================================================
# 图构建
# ============================================================
def build_graph() -> StateGraph:
workflow = StateGraph(AgentState)
# 现有节点
workflow.add_node("load_session", load_session_node)
workflow.add_node("process_input", process_input)
workflow.add_node("manage_context", manage_context)
workflow.add_node("save_session", save_session_node)
workflow.add_node("retrieve", retrieve)
workflow.add_node("generate", generate)
workflow.add_node("modify_jrxml", modify_jrxml)
workflow.add_node("validate", validate)
workflow.add_node("explain_error", explain_error)
workflow.add_node("correct_jrxml", correct_jrxml)
workflow.add_node("finalize", finalize)
# 新增节点:意图识别
workflow.add_node("save_state_snapshot", save_state_snapshot)
workflow.add_node("classify_intent", classify_intent)
workflow.add_node("handle_consult", handle_consult)
workflow.add_node("handle_undo", handle_undo)
workflow.add_node("handle_reset", handle_reset)
# ---- 入口和前置流程 ----
workflow.set_entry_point("load_session")
workflow.add_edge("load_session", "process_input")
workflow.add_edge("process_input", "manage_context")
workflow.add_edge("manage_context", "save_state_snapshot")
workflow.add_edge("save_state_snapshot", "classify_intent")
# ---- 意图路由 ----
workflow.add_conditional_edges(
"classify_intent",
route_by_intent,
{
"retrieve": "retrieve",
"modify_jrxml": "modify_jrxml",
"save_session": "save_session",
"handle_consult": "handle_consult",
"handle_undo": "handle_undo",
"handle_reset": "handle_reset",
},
)
# ---- 初始生成分支 ----
workflow.add_edge("retrieve", "generate")
workflow.add_conditional_edges(
"generate",
route_after_generate,
{"save_session": "save_session"},
)
# ---- 修改分支 ----
workflow.add_conditional_edges(
"modify_jrxml",
route_after_modify,
{"save_session": "save_session"},
)
# ---- 撤销分支 ----
workflow.add_conditional_edges(
"handle_undo",
route_after_undo,
{"save_session": "save_session"},
)
# ---- 保存后进入验证 ----
workflow.add_conditional_edges(
"save_session",
route_after_save,
{"validate": "validate"},
)
# ---- 验证 → 修正循环 ----
workflow.add_conditional_edges(
"validate",
route_after_validate,
{"finalize": "finalize", "explain_error": "explain_error"},
)
workflow.add_conditional_edges(
"explain_error",
route_after_explain,
{"correct_jrxml": "correct_jrxml"},
)
workflow.add_conditional_edges(
"correct_jrxml",
route_after_correct,
{"validate": "validate", "finalize": "finalize"},
)
# ---- 咨询 / 重置 → 直接结束 ----
workflow.add_edge("handle_consult", "finalize")
workflow.add_edge("handle_reset", "finalize")
# ---- 结束 ----
workflow.add_edge("finalize", END)
return workflow.compile()
# ============================================================
# 初始状态
# ============================================================
def create_initial_state() -> AgentState:
return AgentState(
conversation_history=[],
current_jrxml="",
user_input="",
status="",
error_msg="",
natural_explanation="",
retry_count=0,
user_modification_request="",
final_jrxml="",
stage="initial_generation",
retrieved_context="",
full_conversation_history=[],
compressed_history="",
current_token_count=0,
session_id="",
session_name="",
created_at="",
updated_at="",
intent="",
history_states=[],
)
+571
View File
@@ -0,0 +1,571 @@
"""LangGraph JRXML 生成工作流的节点函数。"""
import copy
import json
import os
import re
from datetime import datetime, timezone
from typing import Dict
from dotenv import load_dotenv
from agent.state import AgentState
from backend.embeddings import get_embeddings
from backend.llm import get_llm
from backend.validation import validate_jrxml
load_dotenv()
MAX_RETRY = int(os.getenv("MAX_RETRY", "3"))
CONTEXT_MAX_TOKENS = int(os.getenv("CONTEXT_MAX_TOKENS", "6000"))
CONTEXT_KEEP_RECENT = int(os.getenv("CONTEXT_KEEP_RECENT", "4"))
HISTORY_MAX_SNAPSHOTS = int(os.getenv("HISTORY_MAX_SNAPSHOTS", "10"))
# ============================================================
# 意图分类提示词(约 180 tokens,控制在 200 token 以内)
# ============================================================
INTENT_CLASSIFY_PROMPT = """你是意图分类器。根据用户输入判断意图,只输出意图名称。
当前有报表:{has_report}
用户输入:{user_input}
可选意图:
- initial_generation(新建报表,或无报表时的任何需求)
- modify_report(修改当前已有报表)
- preview_report(预览/查看当前报表)
- export_pdf(导出PDF文件)
- export_jrxml(下载/导出/保存JRXML文件)
- undo_modification(撤销/回退上一步修改)
- consult_question(咨询JasperReports相关知识或使用问题)
- reset_session(清空/重置/重新开始)
意图名称:"""
# ============================================================
# 咨询回答提示词
# ============================================================
CONSULT_PROMPT = """你是 JasperReports 专家。用简洁清晰的中文回答用户关于 JasperReports 的问题。
用户问题:{question}
直接回答:"""
# ============================================================
# 原有提示词(不变)
# ============================================================
INITIAL_GENERATION_PROMPT = """你是一位资深 JasperReports 工程师。根据以下参考模板和用户需求,生成一个完整、可编译的 JRXML 文件。
JRXML 必须兼容 JasperReports 7.0.6 schema。
关键规则:
- 只输出 JRXML 代码,不要解释,不要 markdown 标记。
- 报表正文中使用的每个字段必须在 <field name="..."> 部分中声明。
- 根元素为 <jasperReport>,包含正确的 xmlns 属性。
- 包含 <queryString>,在 <![CDATA[...]]> 中包含 SQL 查询。
- 确保所有交叉引用(字段名称、band 元素)保持一致。
参考模板和组件:
{context}
用户需求:
{user_request}
"""
MODIFICATION_PROMPT = """你是一位资深 JasperReports 工程师。用户想要修改一个现有的、可编译的 JRXML 报表。精确应用请求的更改到当前 JRXML 并输出完整修改后的 JRXML。
关键规则:
- 只输出完整修改后的 JRXML 代码,不要解释,不要 markdown 标记。
- 保留所有未被更改的现有结构。
- 结果必须继续与 JasperReports 7.0.6 兼容。
- 报表正文中使用的每个字段必须在 <field> 部分中声明。
- 如果添加新字段,正确声明它们。
- 确保 <queryString> 是 <![CDATA[...]]> 中有效的 SQL。
当前 JRXML
{current_jrxml}
对话历史:
{conversation_history}
用户的修改请求:
{modification_request}
"""
CORRECTION_PROMPT = """你是一位资深 JasperReports 工程师。你生成的 JRXML 文件编译失败。分析错误并修复 JRXML。
关键规则:
- 只输出完整修复后的 JRXML 代码,不要解释,不要 markdown 标记。
- JRXML 必须与 JasperReports 7.0.6 兼容。
- 解决下面列出的特定错误。
当前 JRXML(带错误):
{current_jrxml}
编译错误:
{error_msg}
错误的自然语言解释:
{explanation}
立即生成修正后的 JRXML
"""
EXPLAIN_PROMPT = """你是一位 JasperReports 专家。用普通非技术语言解释以下 JRXML 编译错误,让业务用户能够理解。
错误消息:
{error_msg}
当前 JRXML 片段(前 80 行):
{jrxml_snippet}
用 2-3 句话解释哪里出了问题以及如何修复:
"""
COMPRESSION_PROMPT = """你是一个信息压缩助手。以下是用户与报表生成助手之间的历史对话记录,请将其压缩为一份简洁的摘要(不超过200字)。
摘要必须保留以下关键信息:
- 用户提出的所有报表需求点(字段、标题、分组、汇总等)
- 用户提出的所有修改要求及其顺序
- 当前报表的核心结构(字段列表、标题、分组方式)
- 任何特殊要求或约束条件
只输出摘要文本,不要添加任何解释或标记。
对话记录:
{conversation_text}
"""
# ============================================================
# 核心工作流节点
# ============================================================
def process_input(state: AgentState) -> Dict:
"""记录用户输入到对话历史,重置本轮请求状态。"""
user_input = state.get("user_input", "")
# 维护全量对话历史
full_history = state.get("full_conversation_history", [])
full_history.append({"role": "user", "content": user_input, "ts": _now_iso()})
state["full_conversation_history"] = full_history
# 维护工作对话历史
conv_history = state.get("conversation_history", [])
conv_history.append({"role": "user", "content": user_input})
state["conversation_history"] = conv_history
# 重置本轮请求字段
state["retry_count"] = 0
state["user_modification_request"] = user_input
return state
def save_state_snapshot(state: AgentState) -> Dict:
"""保存当前状态快照到 history_states,用于撤销操作。最多保留 N 个版本。"""
snapshots = state.get("history_states", [])
if not isinstance(snapshots, list):
snapshots = []
snapshot = {
"current_jrxml": state.get("current_jrxml", ""),
"final_jrxml": state.get("final_jrxml", ""),
"status": state.get("status", ""),
"conversation_history": copy.deepcopy(state.get("conversation_history", [])),
"user_modification_request": state.get("user_modification_request", ""),
"intent": state.get("intent", ""),
}
snapshots.append(snapshot)
max_snap = HISTORY_MAX_SNAPSHOTS
if len(snapshots) > max_snap:
snapshots = snapshots[-max_snap:]
state["history_states"] = snapshots
return state
def classify_intent(state: AgentState) -> Dict:
"""使用 LLM 对用户输入进行意图分类(8 种意图)。"""
user_input = state.get("user_input", "")
has_report = "" if state.get("current_jrxml", "").strip() else ""
intent = "initial_generation"
try:
llm = get_llm()
prompt = INTENT_CLASSIFY_PROMPT.format(
has_report=has_report,
user_input=user_input[:500],
)
resp = llm.invoke(prompt)
raw = resp.content.strip().lower()
valid_intents = [
"initial_generation", "modify_report", "preview_report",
"export_pdf", "export_jrxml", "undo_modification",
"consult_question", "reset_session",
]
for vi in valid_intents:
if vi in raw:
intent = vi
break
else:
# 兜底:有报表 → modify_report,无报表 → initial_generation
intent = "modify_report" if has_report == "" else "initial_generation"
except Exception:
intent = "modify_report" if has_report == "" else "initial_generation"
state["intent"] = intent
return state
def handle_consult(state: AgentState) -> Dict:
"""处理咨询类问题:调用 LLM 直接回答,不走报表生成流程。"""
user_input = state.get("user_input", "")
try:
llm = get_llm()
prompt = CONSULT_PROMPT.format(question=user_input)
resp = llm.invoke(prompt)
answer = resp.content.strip()
except Exception:
answer = "抱歉,暂时无法处理您的问题,请稍后再试。"
state["consult_answer"] = answer
state["conversation_history"].append({"role": "assistant", "content": answer})
state["full_conversation_history"].append(
{"role": "assistant", "content": answer, "ts": _now_iso()}
)
return state
def handle_undo(state: AgentState) -> Dict:
"""撤销上一步修改:从 history_states 恢复最近一个快照。"""
snapshots = state.get("history_states", [])
if not isinstance(snapshots, list) or not snapshots:
state["conversation_history"].append(
{"role": "assistant", "content": "没有可撤销的操作。"}
)
return state
prev = snapshots.pop()
state["history_states"] = snapshots
state["current_jrxml"] = prev.get("current_jrxml", "")
state["final_jrxml"] = prev.get("final_jrxml", "")
state["status"] = prev.get("status", "")
state["conversation_history"] = prev.get("conversation_history", [])
state["user_modification_request"] = prev.get("user_modification_request", "")
state["conversation_history"].append(
{"role": "assistant", "content": "已撤销上一步修改,恢复到之前的状态。"}
)
state["full_conversation_history"].append(
{"role": "assistant", "content": "已撤销上一步修改。", "ts": _now_iso()}
)
return state
def handle_reset(state: AgentState) -> Dict:
"""重置当前会话:清空报表相关状态,保留会话信息。"""
state["current_jrxml"] = ""
state["final_jrxml"] = ""
state["status"] = ""
state["error_msg"] = ""
state["natural_explanation"] = ""
state["user_modification_request"] = ""
state["retrieved_context"] = ""
state["retry_count"] = 0
state["compressed_history"] = ""
state["history_states"] = []
state["intent"] = "initial_generation"
state["conversation_history"] = []
state["conversation_history"].append(
{"role": "assistant", "content": "会话已重置,请描述您要创建的新报表。"}
)
state["full_conversation_history"].append(
{"role": "assistant", "content": "会话已重置。", "ts": _now_iso()}
)
return state
def count_tokens(state: AgentState) -> int:
"""使用 tiktoken(gpt-4o 编码器)计算当前上下文 token 数量。"""
try:
import tiktoken
enc = tiktoken.encoding_for_model("gpt-4o")
except Exception:
# 回退方案:中英文混合场景下,近似 1 token ≈ 2.5 个字符
text = json.dumps({
"history": state.get("conversation_history", [])[-CONTEXT_KEEP_RECENT:],
"jrxml": state.get("current_jrxml", ""),
"compressed": state.get("compressed_history", ""),
}, ensure_ascii=False)
return len(text) // 2.5
text = json.dumps({
"history": state.get("conversation_history", [])[-CONTEXT_KEEP_RECENT:],
"jrxml": state.get("current_jrxml", ""),
"compressed": state.get("compressed_history", ""),
}, ensure_ascii=False)
return len(enc.encode(text))
def manage_context(state: AgentState) -> Dict:
"""当 token 数量超过阈值时,压缩较早的对话轮次。"""
token_count = count_tokens(state)
state["current_token_count"] = token_count
if token_count <= CONTEXT_MAX_TOKENS:
return state
full_history = state.get("full_conversation_history", [])
if len(full_history) <= CONTEXT_KEEP_RECENT:
return state
# 最近N轮保留完整,更早的轮次送去压缩
recent = full_history[-CONTEXT_KEEP_RECENT:]
older = full_history[:-CONTEXT_KEEP_RECENT]
if not older:
return state
conv_text = json.dumps(older, ensure_ascii=False, indent=2)
try:
llm = get_llm()
prompt = COMPRESSION_PROMPT.format(conversation_text=conv_text)
resp = llm.invoke(prompt)
new_compressed = resp.content.strip()[:300]
except Exception:
new_compressed = _simple_compress(older)
# 合并已有压缩与新压缩
existing = state.get("compressed_history", "")
if existing:
state["compressed_history"] = f"{existing}\n---\n{new_compressed}"
else:
state["compressed_history"] = new_compressed
state["conversation_history"] = list(recent)
state["current_token_count"] = count_tokens(state)
return state
def load_session_node(state: AgentState) -> Dict:
"""在请求开始时从磁盘加载会话状态。"""
session_id = state.get("session_id", "")
if not session_id:
return state
try:
from backend.session import load_session
data = load_session(session_id)
if data and data.get("agent_state"):
saved = data["agent_state"]
# 恢复核心字段(不覆盖当前请求的 user_input / stage
for key in ("conversation_history", "full_conversation_history",
"current_jrxml", "final_jrxml", "compressed_history",
"session_name", "created_at", "history_states"):
if key in saved and key not in ("user_input", "stage"):
state[key] = saved[key]
state["session_name"] = data.get("session_name", "")
state["created_at"] = data.get("created_at", "")
except Exception:
pass
return state
def save_session_node(state: AgentState) -> Dict:
"""将当前代理状态持久化到磁盘。"""
session_id = state.get("session_id", "")
if not session_id:
return state
try:
from backend.session import save_session
persistable = {}
for key in ("conversation_history", "full_conversation_history",
"current_jrxml", "final_jrxml", "compressed_history",
"status", "error_msg", "history_states"):
if key in state:
persistable[key] = state[key]
persistable["updated_at"] = _now_iso()
session_name = state.get("session_name", "")
if not session_name and state.get("conversation_history"):
first_user = next(
(m["content"][:50] for m in state["conversation_history"]
if m.get("role") == "user"), "")
if first_user:
session_name = first_user
save_session(session_id, persistable, session_name)
if not state.get("session_name"):
state["session_name"] = session_name
state["updated_at"] = persistable["updated_at"]
except Exception:
pass
return state
def _simple_compress(messages: list[dict]) -> str:
"""当 LLM 不可用时,基于简单规则的压缩回退方案。"""
points = []
for m in messages:
if m.get("role") == "user":
points.append(f"用户提问:{m['content'][:100]}")
return "; ".join(points[-10:])
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def retrieve(state: AgentState) -> Dict:
"""在 Chroma 中搜索相关的 JRXML 模板和组件。"""
try:
embeddings = get_embeddings()
from langchain_chroma import Chroma
persist_dir = os.getenv("CHROMA_PERSIST_DIR", "./db/chroma")
if not os.path.exists(persist_dir) or not os.listdir(persist_dir):
state["retrieved_context"] = ""
return state
vectorstore = Chroma(
embedding_function=embeddings,
persist_directory=persist_dir,
)
user_input = state.get("user_input", "")
docs = vectorstore.similarity_search(user_input, k=5)
context_parts = []
for d in docs:
context_parts.append(d.page_content)
state["retrieved_context"] = "\n\n---\n\n".join(context_parts)
except Exception:
state["retrieved_context"] = ""
return state
def generate(state: AgentState) -> Dict:
"""根据用户需求和检索到的上下文生成初始 JRXML。"""
llm = get_llm()
prompt = INITIAL_GENERATION_PROMPT.format(
context=state.get("retrieved_context", ""),
user_request=state.get("user_input", ""),
)
resp = llm.invoke(prompt)
jrxml = _extract_jrxml(resp.content)
state["current_jrxml"] = jrxml
state["conversation_history"].append({"role": "assistant", "content": jrxml})
return state
def modify_jrxml(state: AgentState) -> Dict:
"""根据用户的修改请求修改现有 JRXML。"""
llm = get_llm()
# 构建对话上下文:压缩摘要 + 最近对话
compressed = state.get("compressed_history", "")
recent = state.get("conversation_history", [])[-6:]
conv_parts = []
if compressed:
conv_parts.append(f"[早期对话摘要]\n{compressed}")
conv_parts.append(json.dumps(recent, ensure_ascii=False, indent=2))
conv_text = "\n\n---\n\n".join(conv_parts)
prompt = MODIFICATION_PROMPT.format(
current_jrxml=state.get("current_jrxml", ""),
conversation_history=conv_text,
modification_request=state.get("user_modification_request", ""),
)
resp = llm.invoke(prompt)
jrxml = _extract_jrxml(resp.content)
state["current_jrxml"] = jrxml
state["conversation_history"].append(
{
"role": "user",
"content": state.get("user_modification_request", ""),
}
)
state["conversation_history"].append({"role": "assistant", "content": jrxml})
state["full_conversation_history"] = (
list(state.get("full_conversation_history", [])) +
[
{"role": "user", "content": state.get("user_modification_request", ""), "ts": _now_iso()},
{"role": "assistant", "content": jrxml, "ts": _now_iso()},
]
)
state["retry_count"] = 0
return state
def validate(state: AgentState) -> Dict:
"""根据 FastAPI 验证服务验证当前 JRXML。"""
jrxml = state.get("current_jrxml", "")
if not jrxml:
state["status"] = "fail"
state["error_msg"] = "没有 JRXML 内容可供验证。"
return state
result = validate_jrxml(jrxml)
state["status"] = "pass" if result.get("valid") else "fail"
state["error_msg"] = result.get("error", "")
return state
def explain_error(state: AgentState) -> Dict:
"""生成验证错误的可读解释。"""
llm = get_llm()
jrxml = state.get("current_jrxml", "")
lines = jrxml.split("\n")[:80]
snippet = "\n".join(lines)
prompt = EXPLAIN_PROMPT.format(
error_msg=state.get("error_msg", "未知错误"),
jrxml_snippet=snippet,
)
resp = llm.invoke(prompt)
state["natural_explanation"] = resp.content.strip()
return state
def correct_jrxml(state: AgentState) -> Dict:
"""尝试自动修正验证失败的 JRXML。"""
llm = get_llm()
prompt = CORRECTION_PROMPT.format(
current_jrxml=state.get("current_jrxml", ""),
error_msg=state.get("error_msg", ""),
explanation=state.get("natural_explanation", ""),
)
resp = llm.invoke(prompt)
jrxml = _extract_jrxml(resp.content)
state["current_jrxml"] = jrxml
state["retry_count"] = state.get("retry_count", 0) + 1
state["conversation_history"].append(
{"role": "assistant", "content": f"[自动修正,第 {state['retry_count']} 次尝试]\n{jrxml}"}
)
return state
def finalize(state: AgentState) -> Dict:
"""保存最终验证通过的 JRXML 并更新对话历史。"""
state["final_jrxml"] = state.get("current_jrxml", "")
return state
def _extract_jrxml(text: str) -> str:
"""从 LLM 响应中提取 JRXML 内容,如有 markdown 标记则去除。"""
text = text.strip()
xml_pattern = re.compile(r"```(?:xml|jrxml)?\s*([\s\S]*?)```", re.IGNORECASE)
m = xml_pattern.search(text)
if m:
return m.group(1).strip()
jasper_tag = re.search(r"(<\?xml[\s\S]*?</jasperReport>)", text, re.IGNORECASE)
if jasper_tag:
return jasper_tag.group(1).strip()
if text.startswith("<?xml") or text.startswith("<jasperReport"):
return text
return text
+33
View File
@@ -0,0 +1,33 @@
"""LangGraph JRXML 生成代理工作流的状态定义。"""
from typing import TypedDict, List
class AgentState(TypedDict, total=False):
# 核心工作流字段
conversation_history: List[dict]
current_jrxml: str
user_input: str
status: str
error_msg: str
natural_explanation: str
retry_count: int
user_modification_request: str
final_jrxml: str
stage: str
retrieved_context: str
# 需求1:智能上下文压缩
full_conversation_history: List[dict]
compressed_history: str
current_token_count: int
# 需求2:多会话持久化
session_id: str
session_name: str
created_at: str
updated_at: str
# 需求3:意图识别
intent: str
history_states: List[dict]
@@ -0,0 +1,6 @@
# Error case: queryString is empty or missing
# Correction: add SQL query in CDATA
ERROR: <queryString></queryString> is empty.
FIX: Add a SQL query inside CDATA: <queryString><![CDATA[SELECT col1, col2 FROM table_name]]></queryString>
+6
View File
@@ -0,0 +1,6 @@
# Error case: field used in expression but not declared
# Correction: add field declaration
ERROR: textFieldExpression uses $F{total_amount} but no <field name="total_amount"> declared.
FIX: Add <field name="total_amount" class="java.math.BigDecimal"/> to the field declarations section.
+6
View File
@@ -0,0 +1,6 @@
# Error case: jasperReport missing pageWidth and pageHeight
# Correction: add page dimensions
ERROR: <jasperReport name="Report" ...> has no pageWidth/pageHeight attributes.
FIX: Add pageWidth="595" pageHeight="842" to the <jasperReport> root element.
@@ -0,0 +1,69 @@
<?xml version="1.0" encoding="UTF-8"?>
<jasperReport xmlns="http://jasperreports.sourceforge.net/jasperreports"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://jasperreports.sourceforge.net/jasperreports http://jasperreports.sourceforge.net/xsd/jasperreport.xsd"
name="EmployeeRoster" pageWidth="595" pageHeight="842" columnWidth="555"
leftMargin="20" rightMargin="20" topMargin="20" bottomMargin="20">
<queryString>
<![CDATA[SELECT emp_id, emp_name, department, hire_date FROM employees ORDER BY department, emp_name]]>
</queryString>
<field name="emp_id" class="java.lang.Integer"/>
<field name="emp_name" class="java.lang.String"/>
<field name="department" class="java.lang.String"/>
<field name="hire_date" class="java.sql.Date"/>
<title>
<band height="50">
<staticText>
<reportElement x="0" y="10" width="555" height="30"/>
<textElement textAlignment="Center">
<font size="16" isBold="true"/>
</textElement>
<text><![CDATA[Employee Roster]]></text>
</staticText>
</band>
</title>
<columnHeader>
<band height="25">
<staticText>
<reportElement x="0" y="0" width="80" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[ID]]></text>
</staticText>
<staticText>
<reportElement x="90" y="0" width="180" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Name]]></text>
</staticText>
<staticText>
<reportElement x="280" y="0" width="150" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Department]]></text>
</staticText>
<staticText>
<reportElement x="440" y="0" width="115" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Hire Date]]></text>
</staticText>
</band>
</columnHeader>
<detail>
<band height="20">
<textField>
<reportElement x="0" y="0" width="80" height="20"/>
<textFieldExpression><![CDATA[$F{emp_id}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="90" y="0" width="180" height="20"/>
<textFieldExpression><![CDATA[$F{emp_name}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="280" y="0" width="150" height="20"/>
<textFieldExpression><![CDATA[$F{department}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="440" y="0" width="115" height="20"/>
<textFieldExpression><![CDATA[$F{hire_date}]]></textFieldExpression>
</textField>
</band>
</detail>
</jasperReport>
@@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<jasperReport xmlns="http://jasperreports.sourceforge.net/jasperreports"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://jasperreports.sourceforge.net/jasperreports http://jasperreports.sourceforge.net/xsd/jasperreport.xsd"
name="InventoryList" pageWidth="595" pageHeight="842" columnWidth="555"
leftMargin="20" rightMargin="20" topMargin="20" bottomMargin="20">
<queryString>
<![CDATA[SELECT item_code, item_name, category, quantity, unit_price FROM inventory ORDER BY category, item_name]]>
</queryString>
<field name="item_code" class="java.lang.String"/>
<field name="item_name" class="java.lang.String"/>
<field name="category" class="java.lang.String"/>
<field name="quantity" class="java.lang.Integer"/>
<field name="unit_price" class="java.math.BigDecimal"/>
<title>
<band height="50">
<staticText>
<reportElement x="0" y="10" width="555" height="30"/>
<textElement textAlignment="Center">
<font size="16" isBold="true"/>
</textElement>
<text><![CDATA[Inventory List]]></text>
</staticText>
</band>
</title>
<columnHeader>
<band height="25">
<staticText>
<reportElement x="0" y="0" width="100" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Code]]></text>
</staticText>
<staticText>
<reportElement x="110" y="0" width="180" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Item Name]]></text>
</staticText>
<staticText>
<reportElement x="300" y="0" width="100" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Category]]></text>
</staticText>
<staticText>
<reportElement x="410" y="0" width="70" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Qty]]></text>
</staticText>
<staticText>
<reportElement x="485" y="0" width="70" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Price]]></text>
</staticText>
</band>
</columnHeader>
<detail>
<band height="20">
<textField>
<reportElement x="0" y="0" width="100" height="20"/>
<textFieldExpression><![CDATA[$F{item_code}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="110" y="0" width="180" height="20"/>
<textFieldExpression><![CDATA[$F{item_name}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="300" y="0" width="100" height="20"/>
<textFieldExpression><![CDATA[$F{category}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="410" y="0" width="70" height="20"/>
<textFieldExpression><![CDATA[$F{quantity}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="485" y="0" width="70" height="20"/>
<textFieldExpression><![CDATA[$F{unit_price}]]></textFieldExpression>
</textField>
</band>
</detail>
</jasperReport>
@@ -0,0 +1,89 @@
<?xml version="1.0" encoding="UTF-8"?>
<jasperReport xmlns="http://jasperreports.sourceforge.net/jasperreports"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://jasperreports.sourceforge.net/jasperreports http://jasperreports.sourceforge.net/xsd/jasperreport.xsd"
name="SalesSummary" pageWidth="595" pageHeight="842" columnWidth="555"
leftMargin="20" rightMargin="20" topMargin="20" bottomMargin="20">
<queryString>
<![CDATA[SELECT department, total_sales, employee_count FROM dept_summary ORDER BY total_sales DESC]]>
</queryString>
<field name="department" class="java.lang.String"/>
<field name="total_sales" class="java.math.BigDecimal"/>
<field name="employee_count" class="java.lang.Integer"/>
<variable name="grand_total" class="java.math.BigDecimal" calculation="Sum">
<variableExpression><![CDATA[$F{total_sales}]]></variableExpression>
</variable>
<variable name="total_employees" class="java.lang.Integer" calculation="Sum">
<variableExpression><![CDATA[$F{employee_count}]]></variableExpression>
</variable>
<title>
<band height="60">
<staticText>
<reportElement x="0" y="10" width="555" height="30"/>
<textElement textAlignment="Center">
<font size="16" isBold="true"/>
</textElement>
<text><![CDATA[Department Sales Summary]]></text>
</staticText>
</band>
</title>
<columnHeader>
<band height="25">
<staticText>
<reportElement x="0" y="0" width="200" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Department]]></text>
</staticText>
<staticText>
<reportElement x="210" y="0" width="170" height="20"/>
<textElement textAlignment="Right"><font isBold="true"/></textElement>
<text><![CDATA[Total Sales]]></text>
</staticText>
<staticText>
<reportElement x="390" y="0" width="165" height="20"/>
<textElement textAlignment="Right"><font isBold="true"/></textElement>
<text><![CDATA[Employee Count]]></text>
</staticText>
</band>
</columnHeader>
<detail>
<band height="20">
<textField>
<reportElement x="0" y="0" width="200" height="20"/>
<textFieldExpression><![CDATA[$F{department}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="210" y="0" width="170" height="20"/>
<textElement textAlignment="Right"/>
<textFieldExpression><![CDATA[$F{total_sales}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="390" y="0" width="165" height="20"/>
<textElement textAlignment="Right"/>
<textFieldExpression><![CDATA[$F{employee_count}]]></textFieldExpression>
</textField>
</band>
</detail>
<summary>
<band height="40">
<line>
<reportElement x="0" y="0" width="555" height="1"/>
</line>
<staticText>
<reportElement x="0" y="5" width="200" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Grand Total]]></text>
</staticText>
<textField>
<reportElement x="210" y="5" width="170" height="20"/>
<textElement textAlignment="Right"><font isBold="true"/></textElement>
<textFieldExpression><![CDATA[$V{grand_total}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="390" y="5" width="165" height="20"/>
<textElement textAlignment="Right"><font isBold="true"/></textElement>
<textFieldExpression><![CDATA[$V{total_employees}]]></textFieldExpression>
</textField>
</band>
</summary>
</jasperReport>
+69
View File
@@ -0,0 +1,69 @@
<?xml version="1.0" encoding="UTF-8"?>
<jasperReport xmlns="http://jasperreports.sourceforge.net/jasperreports"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://jasperreports.sourceforge.net/jasperreports http://jasperreports.sourceforge.net/xsd/jasperreport.xsd"
name="SalesOrder" pageWidth="595" pageHeight="842" columnWidth="555"
leftMargin="20" rightMargin="20" topMargin="20" bottomMargin="20">
<queryString>
<![CDATA[SELECT order_id, customer_name, amount, order_date FROM sales_orders ORDER BY order_date DESC]]>
</queryString>
<field name="order_id" class="java.lang.String"/>
<field name="customer_name" class="java.lang.String"/>
<field name="amount" class="java.math.BigDecimal"/>
<field name="order_date" class="java.sql.Date"/>
<title>
<band height="50">
<staticText>
<reportElement x="0" y="10" width="555" height="30"/>
<textElement textAlignment="Center">
<font size="16" isBold="true"/>
</textElement>
<text><![CDATA[Sales Orders Report]]></text>
</staticText>
</band>
</title>
<columnHeader>
<band height="25">
<staticText>
<reportElement x="0" y="0" width="120" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Order ID]]></text>
</staticText>
<staticText>
<reportElement x="130" y="0" width="180" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Customer]]></text>
</staticText>
<staticText>
<reportElement x="320" y="0" width="120" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Amount]]></text>
</staticText>
<staticText>
<reportElement x="450" y="0" width="105" height="20"/>
<textElement><font isBold="true"/></textElement>
<text><![CDATA[Order Date]]></text>
</staticText>
</band>
</columnHeader>
<detail>
<band height="20">
<textField>
<reportElement x="0" y="0" width="120" height="20"/>
<textFieldExpression><![CDATA[$F{order_id}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="130" y="0" width="180" height="20"/>
<textFieldExpression><![CDATA[$F{customer_name}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="320" y="0" width="120" height="20"/>
<textFieldExpression><![CDATA[$F{amount}]]></textFieldExpression>
</textField>
<textField>
<reportElement x="450" y="0" width="105" height="20"/>
<textFieldExpression><![CDATA[$F{order_date}]]></textFieldExpression>
</textField>
</band>
</detail>
</jasperReport>
+87
View File
@@ -0,0 +1,87 @@
"""初始化 Chroma 知识库,加载示例 JRXML 模板和错误修正案例。
用法: python scripts/init_kb.py
"""
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
sys.path.insert(0, str(Path(__file__).parent.parent))
load_dotenv()
from backend.embeddings import get_embeddings
def load_templates(template_dir: Path) -> list[dict]:
docs = []
for fpath in template_dir.glob('*.jrxml'):
content = fpath.read_text(encoding='utf-8')
name = fpath.stem
docs.append({
'content': content,
'metadata': {
'source': str(fpath),
'type': 'full_report',
'name': name,
},
})
return docs
def load_corrections(corrections_dir: Path) -> list[dict]:
docs = []
for fpath in corrections_dir.glob('*.jrxml'):
content = fpath.read_text(encoding='utf-8')
docs.append({
'content': content,
'metadata': {
'source': str(fpath),
'type': 'correction_case',
'name': fpath.stem,
},
})
return docs
def main():
persist_dir = os.getenv('CHROMA_PERSIST_DIR', './db/chroma')
data_dir = Path(__file__).parent.parent / 'data'
template_dir = data_dir / 'sample_templates'
corrections_dir = data_dir / 'corrections'
docs = []
if template_dir.exists():
docs.extend(load_templates(template_dir))
print(f'{template_dir} 加载了 {len(docs)} 个模板')
if corrections_dir.exists():
corr = load_corrections(corrections_dir)
docs.extend(corr)
print(f'{corrections_dir} 加载了 {len(corr)} 个修正案例')
if not docs:
print('未找到文档,无需索引。')
return
embeddings = get_embeddings()
from langchain_chroma import Chroma
texts = [d['content'] for d in docs]
metadatas = [d['metadata'] for d in docs]
Chroma.from_texts(
texts=texts,
embedding=embeddings,
metadatas=metadatas,
persist_directory=persist_dir,
)
print(f'已将 {len(docs)} 个文档索引到 Chroma,存储位置: {persist_dir}')
if __name__ == '__main__':
main()
+129
View File
@@ -0,0 +1,129 @@
"""JRXML 文件验证服务(FastAPI)。
使用 lxml XML Schema 验证作为 JasperReports 7.0.6 编译验证的第一阶段后备方案。
要进行完整的编译验证,需要基于 Java 的验证器以及 JasperReports 7.0.6 + JDK 21。
启动: uvicorn validation_service.main:app --port 8001
"""
import re
import xml.etree.ElementTree as ET
from pathlib import Path
from fastapi import FastAPI
from lxml import etree
from pydantic import BaseModel
app = FastAPI(title="JRXML 验证服务")
SCHEMA_DIR = Path(__file__).parent / "schemas"
SCHEMA_FILE = SCHEMA_DIR / "jasperreport_7_0_6.xsd"
class ValidationRequest(BaseModel):
jrxml: str
class ValidationResponse(BaseModel):
valid: bool
error: str
def _check_structural_issues(jrxml: str) -> list[str]:
"""检查 JRXML 中常见的结构性问题。"""
issues = []
root = None
try:
root = ET.fromstring(jrxml)
except ET.ParseError as e:
issues.append(f"XML 解析错误:{e}")
return issues
# 同时处理带命名空间和不带命名空间的元素名
ns = "http://jasperreports.sourceforge.net/jasperreports"
declared_fields = set()
for elem in root.iter():
tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
if tag == "field":
name = elem.get("name")
if name:
declared_fields.add(name)
field_expr_pattern = re.compile(r'\$F\{(\w+)\}')
for m in field_expr_pattern.finditer(jrxml):
field_name = m.group(1)
if field_name not in declared_fields:
issues.append(
f"字段 '{field_name}' 在表达式中使用但未在 <field> 部分声明"
)
query = None
for elem in root.iter():
tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
if tag == "queryString":
query = elem
break
if query is not None:
query_text = "".join(query.itertext()).strip()
if not query_text:
issues.append("<queryString> 为空 - 请在 CDATA 中添加 SQL 查询")
elif not any(kw in query_text.upper() for kw in ["SELECT"]):
issues.append("<queryString> 似乎不包含 SQL SELECT 查询")
if not root.get("pageWidth"):
issues.append("缺少 <jasperReport> 上的 pageWidth 属性")
if not root.get("pageHeight"):
issues.append("缺少 <jasperReport> 上的 pageHeight 属性")
if not root.get("name"):
issues.append("缺少 <jasperReport> 上的 'name' 属性")
return issues
def _validate_xsd(jrxml: str) -> tuple[bool, str]:
"""根据 JasperReports XSD schema 验证 JRXML。"""
if not SCHEMA_FILE.exists():
return True, ""
try:
schema_doc = etree.parse(str(SCHEMA_FILE))
xmlschema = etree.XMLSchema(schema_doc)
doc = etree.fromstring(jrxml.encode("utf-8"))
xmlschema.assertValid(doc)
return True, ""
except etree.DocumentInvalid as e:
return False, str(e)
except etree.XMLSchemaError as e:
return False, f"Schema 错误:{e}"
except Exception as e:
return False, f"XML 验证错误:{e}"
@app.post("/validate", response_model=ValidationResponse)
async def validate_jrxml(req: ValidationRequest):
jrxml = req.jrxml.strip()
if not jrxml:
return ValidationResponse(valid=False, error="JRXML 内容为空")
structural_issues = _check_structural_issues(jrxml)
if structural_issues:
return ValidationResponse(valid=False, error="; ".join(structural_issues))
valid, xsd_error = _validate_xsd(jrxml)
if not valid:
return ValidationResponse(valid=False, error=xsd_error)
return ValidationResponse(valid=True, error="")
@app.get("/health")
async def health():
schema_available = SCHEMA_FILE.exists()
return {
"status": "ok",
"schema_available": schema_available,
"validation_type": "XSD" if schema_available else "仅结构检查",
"note": "如需完整的 JasperReports 7.0.6 编译验证,请使用基于 Java 的验证器",
}
+6
View File
@@ -0,0 +1,6 @@
@echo off
echo 正在启动 JRXML 验证服务...
echo.
cd /d "%~dp0"
python -m uvicorn validation_service.main:app --host 0.0.0.0 --port 8001 --reload
pause