feat: LangGraph工作流核心 — Agent状态/节点/图 + 验证服务 + 知识库

agent/ state.py: AgentState TypedDict（20字段含意图/压缩/会话/撤销） nodes.py: 17个节点函数（生成/修改/验证/纠错/意图分类/压缩/撤销/重置） graph.py: 17节点状态图，8意图路由分发验证服务 validation_service/ main.py: FastAPI服务，lxml XSD验证 + 结构化检查（字段引用/SQL/尺寸）数据 data/ sample_templates/: 4个JRXML示例模板 corrections/: 3个错误修正案例脚本 scripts/ init_kb.py: Chroma知识库初始化
2026-05-14 23:21:10 +08:00
parent 21a5fdf930
commit 4b43c5d3e4
14 changed files with 1375 additions and 0 deletions
@@ -0,0 +1,87 @@
+"""初始化 Chroma 知识库，加载示例 JRXML 模板和错误修正案例。
+
+用法: python scripts/init_kb.py
+"""
+
+import os
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+load_dotenv()
+
+from backend.embeddings import get_embeddings
+
+
+def load_templates(template_dir: Path) -> list[dict]:
+    docs = []
+    for fpath in template_dir.glob('*.jrxml'):
+        content = fpath.read_text(encoding='utf-8')
+        name = fpath.stem
+        docs.append({
+            'content': content,
+            'metadata': {
+                'source': str(fpath),
+                'type': 'full_report',
+                'name': name,
+            },
+        })
+    return docs
+
+
+def load_corrections(corrections_dir: Path) -> list[dict]:
+    docs = []
+    for fpath in corrections_dir.glob('*.jrxml'):
+        content = fpath.read_text(encoding='utf-8')
+        docs.append({
+            'content': content,
+            'metadata': {
+                'source': str(fpath),
+                'type': 'correction_case',
+                'name': fpath.stem,
+            },
+        })
+    return docs
+
+
+def main():
+    persist_dir = os.getenv('CHROMA_PERSIST_DIR', './db/chroma')
+    data_dir = Path(__file__).parent.parent / 'data'
+
+    template_dir = data_dir / 'sample_templates'
+    corrections_dir = data_dir / 'corrections'
+
+    docs = []
+    if template_dir.exists():
+        docs.extend(load_templates(template_dir))
+        print(f'从 {template_dir} 加载了 {len(docs)} 个模板')
+
+    if corrections_dir.exists():
+        corr = load_corrections(corrections_dir)
+        docs.extend(corr)
+        print(f'从 {corrections_dir} 加载了 {len(corr)} 个修正案例')
+
+    if not docs:
+        print('未找到文档，无需索引。')
+        return
+
+    embeddings = get_embeddings()
+    from langchain_chroma import Chroma
+
+    texts = [d['content'] for d in docs]
+    metadatas = [d['metadata'] for d in docs]
+
+    Chroma.from_texts(
+        texts=texts,
+        embedding=embeddings,
+        metadatas=metadatas,
+        persist_directory=persist_dir,
+    )
+    print(f'已将 {len(docs)} 个文档索引到 Chroma，存储位置: {persist_dir}')
+
+
+if __name__ == '__main__':
+    main()