Initial commit: jaspersoft-agent-learn teaching project

2026-05-29 23:22:18 +08:00
commit 05bb511aab
20 changed files with 4476 additions and 0 deletions
@@ -0,0 +1,235 @@
+# Step 05-07: RAG / Self-Correction / Multi-Agent
+
+> 这些步骤是进阶内容，包含核心概念和实现代码。
+
+## Step 05: RAG - 知识检索
+
+### 核心概念
+
+RAG = Retrieval-Augmented Generation（检索增强生成）
+
+```
+┌─────────────────────────────────────────────────────┐
+│                    RAG 流程                          │
+├─────────────────────────────────────────────────────┤
+│                                                      │
+│  用户问题 ──▶ 编码为向量 ──▶ 向量数据库检索           │
+│                   │                                  │
+│                   ▼                                  │
+│           找到最相关的文档                            │
+│                   │                                  │
+│                   ▼                                  │
+│      把文档和问题一起发送给 LLM                       │
+│                   │                                  │
+│                   ▼                                  │
+│              生成答案                                 │
+│                                                      │
+└─────────────────────────────────────────────────────┘
+```
+
+### 为什么需要 RAG？
+
+1. LLM 的知识有截止日期
+2. LLM 不知道你私有的数据
+3. RAG 让 LLM 能"查阅"外部知识
+
+### 关键组件
+
+| 组件 | 作用 |
+|------|------|
+| Embedding Model | 把文本变成向量 |
+| Vector Database | 存储和检索向量 |
+| Retrieval | 找到最相关的文档 |
+| Generation | 用检索结果生成答案 |
+
+### 简化实现
+
+```python
+class SimpleRAG:
+    """简化版 RAG 系统"""
+
+    def __init__(self):
+        # 文档存储
+        self.documents = []
+        # 向量存储（简化版，用关键词）
+        self.vectors = {}
+
+    def add_document(self, text: str, metadata: dict = None):
+        """添加文档"""
+        self.documents.append({
+            "text": text,
+            "metadata": metadata or {}
+        })
+
+    def retrieve(self, query: str, top_k: int = 3) -> list:
+        """检索相关文档"""
+        # 简化版：基于关键词匹配
+        results = []
+        for doc in self.documents:
+            # 计算简单相关性分数
+            score = sum(1 for word in query if word in doc["text"].lower())
+            if score > 0:
+                results.append((score, doc))
+        # 排序并返回 top_k
+        results.sort(key=lambda x: x[0], reverse=True)
+        return [doc for _, doc in results[:top_k]]
+
+    def generate(self, query: str, llm) -> str:
+        """RAG 生成"""
+        docs = self.retrieve(query)
+        context = "\n".join([d["text"] for d in docs])
+
+        prompt = f"""
+根据以下上下文回答问题：
+
+上下文：
+{context}
+
+问题：{query}
+
+答案：
+"""
+        return llm.invoke(prompt)
+```
+
+---
+
+## Step 06: Self-Correction - 自我修正
+
+### 核心概念
+
+Self-Correction = 让 Agent 能够自我发现并修复错误
+
+```
+┌─────────────────────────────────────────────────────┐
+│                Self-Correction 流程                  │
+├─────────────────────────────────────────────────────┤
+│                                                      │
+│  生成结果 ──▶ 验证 ──▶ 有问题？                       │
+│                   │                                  │
+│              ┌────┴────┐                            │
+│              │         │                            │
+│             是         否                            │
+│              │         │                            │
+│              ▼         ▼                            │
+│        分析错误    返回结果                          │
+│              │                                        │
+│              ▼                                        │
+│        生成修复方案                                   │
+│              │                                        │
+│              ▼                                        │
+│        重新生成 ──▶ 再次验证                          │
+│                                                      │
+└─────────────────────────────────────────────────────┘
+```
+
+### 实现要点
+
+```python
+class SelfCorrectingAgent:
+    """自我修正 Agent"""
+
+    def __init__(self):
+        self.max_retries = 3
+
+    def try_generate(self, requirement: str) -> str:
+        """带自我修正的生成"""
+        for attempt in range(self.max_retries):
+            # 1. 生成
+            result = self.generate(requirement)
+
+            # 2. 验证
+            validation = self.validate(result)
+
+            # 3. 检查是否通过
+            if validation["passed"]:
+                return result
+
+            # 4. 分析错误
+            error = validation["error"]
+            print(f"尝试 {attempt + 1} 失败: {error}")
+
+            # 5. 准备修复
+            requirement = self.prepare_fix(requirement, error, result)
+
+        return f"经过 {self.max_retries} 次尝试仍失败"
+```
+
+---
+
+## Step 07: Multi-Agent - 多 Agent 协作
+
+### 核心概念
+
+Multi-Agent = 多个专门的 Agent 协同工作
+
+```
+┌─────────────────────────────────────────────────────┐
+│                  Multi-Agent 架构                   │
+├─────────────────────────────────────────────────────┤
+│                                                      │
+│                  ┌──────────────┐                    │
+│                  │ Orchestrator │                    │
+│                  │  (协调者)    │                    │
+│                  └──────┬───────┘                    │
+│                         │                            │
+│     ┌───────────────────┼───────────────────┐         │
+│     │                   │                   │         │
+│     ▼                   ▼                   ▼         │
+│ ┌──────────┐      ┌──────────┐      ┌──────────┐   │
+│ │ Generator│      │ Validator│      │ Searcher │   │
+│ │ (生成者) │      │ (验证者) │      │ (搜索者) │   │
+│ └──────────┘      └──────────┘      └──────────┘   │
+│                                                      │
+└─────────────────────────────────────────────────────┘
+```
+
+### 协作模式
+
+| 模式 | 说明 | 适用场景 |
+|------|------|---------|
+| 串行 | A → B → C 依次执行 | 步骤有依赖 |
+| 并行 | A / B / C 同时执行 | 步骤独立 |
+| 循环 | A → B → A → B 循环 | 需要反复验证 |
+
+### 简化实现
+
+```python
+class MultiAgentSystem:
+    """多 Agent 协作系统"""
+
+    def __init__(self):
+        # 注册各个 Agent
+        self.agents = {
+            "generator": GeneratorAgent(),
+            "validator": ValidatorAgent(),
+            "searcher": SearcherAgent(),
+        }
+        # 协调器
+        self.orchestrator = Orchestrator(self.agents)
+
+    def process(self, requirement: str) -> str:
+        """协调多个 Agent 处理请求"""
+        # 1. 搜索相关知识
+        context = self.agents["searcher"].search(requirement)
+
+        # 2. 生成（可能需要多轮）
+        for attempt in range(3):
+            draft = self.agents["generator"].generate(requirement, context)
+
+            # 3. 验证
+            validation = self.agents["validator"].validate(draft)
+
+            if validation["passed"]:
+                return validation["result"]
+
+        return "处理失败"
+```
+
+---
+
+## 📚 学习资源
+
+- [LangGraph 文档](https://langchain-ai.github.io/langgraph/)
+- [RAG 最佳实践](https://www.pinecone.io/learn/rag/)
+- [Multi-Agent 系统设计](https://arxiv.org/abs/2308.03688)
@@ -0,0 +1,286 @@
+"""
+Step 05-07: RAG / Self-Correction / Multi-Agent
+
+进阶内容代码示例
+"""
+
+# ═══════════════════════════════════════════════════════════════════════════════════════
+# RAG 实现
+# ═══════════════════════════════════════════════════════════════════════════════════════
+
+class SimpleRAG:
+    """
+    简化版 RAG 系统
+
+    实际应用中请使用：
+    - ChromaDB / Pinecone / Weaviate（向量数据库）
+    - sentence-transformers / OpenAI Embeddings（向量模型）
+    """
+
+    def __init__(self):
+        self.documents = []
+
+    def add_document(self, text: str, metadata: dict = None):
+        """添加文档"""
+        self.documents.append({
+            "text": text,
+            "metadata": metadata or {},
+            "id": len(self.documents)
+        })
+
+    def retrieve(self, query: str, top_k: int = 3) -> list:
+        """检索相关文档（简化版：基于关键词）"""
+        results = []
+        query_words = set(query.lower().split())
+
+        for doc in self.documents:
+            doc_words = set(doc["text"].lower().split())
+            # 简单的 Jaccard 相似度
+            intersection = query_words & doc_words
+            union = query_words | doc_words
+            if union:
+                score = len(intersection) / len(union)
+                results.append((score, doc))
+
+        results.sort(key=lambda x: x[0], reverse=True)
+        return [doc for _, doc in results[:top_k]]
+
+    def generate(self, query: str, context_only: bool = False):
+        """
+        生成答案
+
+        如果 context_only=True，只返回检索到的上下文
+        否则进行 RAG 生成（需要接入 LLM）
+        """
+        docs = self.retrieve(query)
+        context = "\n\n".join([
+            f"[来源: {d['metadata'].get('source', '未知')}]\n{d['text']}"
+            for d in docs
+        ])
+        return context
+
+
+# ═══════════════════════════════════════════════════════════════════════════════════════
+# Self-Correction 实现
+# ═══════════════════════════════════════════════════════════════════════════════════════
+
+@dataclass
+class ValidationResult:
+    """验证结果"""
+    passed: bool
+    score: float
+    issues: List[str]
+    suggestion: str = ""
+
+
+class SelfCorrectingAgent:
+    """
+    自我修正 Agent
+
+    工作流程：
+    1. 生成初始结果
+    2. 验证结果
+    3. 如果有问题，分析错误并修复
+    4. 循环直到通过或达到最大重试次数
+    """
+
+    def __init__(self, generator, validator):
+        self.generator = generator
+        self.validator = validator
+        self.max_retries = 3
+
+    def generate_with_correction(self, requirement: str) -> dict:
+        """带自我修正的生成"""
+        history = []
+        current_requirement = requirement
+
+        for attempt in range(self.max_retries):
+            # 1. 生成
+            result = self.generator.generate(current_requirement)
+            history.append({
+                "attempt": attempt + 1,
+                "requirement": current_requirement,
+                "result": result
+            })
+
+            # 2. 验证
+            validation = self.validator.validate(result)
+            history[-1]["validation"] = validation
+
+            if validation.passed:
+                return {
+                    "success": True,
+                    "result": result,
+                    "attempts": attempt + 1,
+                    "history": history
+                }
+
+            # 3. 分析错误，准备修复
+            print(f"尝试 {attempt + 1} 失败: {validation.issues}")
+            current_requirement = self._prepare_fix(
+                requirement,
+                validation,
+                result
+            )
+
+        return {
+            "success": False,
+            "error": "达到最大重试次数",
+            "history": history
+        }
+
+    def _prepare_fix(self, original: str, validation: ValidationResult, result) -> str:
+        """准备修复提示"""
+        issues_text = "\n".join(f"- {issue}" for issue in validation.issues)
+
+        return f"""
+原始需求：{original}
+
+上次生成结果：
+{result}
+
+验证发现的问题：
+{issues_text}
+
+验证建议：{validation.suggestion}
+
+请根据以上信息，修正生成结果。
+"""
+
+
+# ═══════════════════════════════════════════════════════════════════════════════════════
+# Multi-Agent 实现
+# ═══════════════════════════════════════════════════════════════════════════════════════
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Callable
+
+
+@dataclass
+class AgentMessage:
+    """Agent 之间的消息"""
+    from_agent: str
+    to_agent: str
+    content: Any
+    message_type: str  # "request" / "response" / "broadcast"
+
+
+class Agent:
+    """基础 Agent 类"""
+    name: str
+
+    def process(self, input_data: Any) -> Any:
+        """处理输入，返回结果"""
+        raise NotImplementedError
+
+
+class MultiAgentSystem:
+    """
+    多 Agent 协作系统
+
+    组件：
+    - agents: 注册的 Agent 字典
+    - orchestrator: 协调器，决定消息路由
+    - message_queue: 消息队列
+    """
+
+    def __init__(self):
+        self.agents: Dict[str, Agent] = {}
+        self.message_queue: List[AgentMessage] = []
+        self.history: List[AgentMessage] = []
+
+    def register(self, agent: Agent):
+        """注册 Agent"""
+        self.agents[agent.name] = agent
+
+    def send_message(self, from_agent: str, to_agent: str, content: Any,
+                    msg_type: str = "request"):
+        """发送消息"""
+        msg = AgentMessage(
+            from_agent=from_agent,
+            to_agent=to_agent,
+            content=content,
+            message_type=msg_type
+        )
+        self.message_queue.append(msg)
+
+    def broadcast(self, from_agent: str, content: Any):
+        """广播消息给所有 Agent"""
+        for agent_name in self.agents:
+            if agent_name != from_agent:
+                self.send_message(from_agent, agent_name, content, "broadcast")
+
+    def process(self, requirement: str) -> Any:
+        """
+        处理请求
+
+        简化实现：顺序执行各个 Agent
+        """
+        # 1. 搜索
+        searcher = self.agents.get("searcher")
+        context = searcher.process(requirement) if searcher else ""
+
+        # 2. 生成
+        generator = self.agents.get("generator")
+        result = generator.process({"requirement": requirement, "context": context}) if generator else requirement
+
+        # 3. 验证
+        validator = self.agents.get("validator")
+        validation = validator.process(result) if validator else {"passed": True}
+
+        if not validation.get("passed", True):
+            return {"error": "验证失败", "validation": validation}
+
+        return result
+
+
+def demo():
+    """演示"""
+    print("=" * 60)
+    print("Step 05-07: 进阶功能演示")
+    print("=" * 60)
+
+    # RAG 演示
+    print("\n📚 RAG 演示")
+    rag = SimpleRAG()
+    rag.add_document("JasperReports 是一个 Java 报表库", {"source": "文档1"})
+    rag.add_document("JRXML 是 JasperReports 的报表模板格式", {"source": "文档2"})
+    rag.add_document("可以使用 LLM 生成 JRXML 代码", {"source": "文档3"})
+
+    result = rag.retrieve("JasperReports 是什么")
+    print(f"  查询 'JasperReports 是什么'")
+    for doc in result:
+        print(f"    - {doc['text']} (来源: {doc['metadata']['source']})")
+
+    # Multi-Agent 演示
+    print("\n\n🤖 Multi-Agent 演示")
+
+    class DemoSearcher(Agent):
+        name = "searcher"
+        def process(self, input_data):
+            print(f"    [{self.name}] 搜索相关资料...")
+            return "找到相关模板和文档"
+
+    class DemoGenerator(Agent):
+        name = "generator"
+        def process(self, input_data):
+            print(f"    [{self.name}] 生成报表...")
+            return "<jasperReport>生成的报表</jasperReport>"
+
+    class DemoValidator(Agent):
+        name = "validator"
+        def process(self, input_data):
+            print(f"    [{self.name}] 验证结果...")
+            return {"passed": True}
+
+    system = MultiAgentSystem()
+    system.register(DemoSearcher())
+    system.register(DemoGenerator())
+    system.register(DemoValidator())
+
+    result = system.process("生成销售报表")
+    print(f"\n  最终结果: {result[:50]}...")
+
+
+if __name__ == "__main__":
+    demo()