Initial commit: jaspersoft-agent-learn teaching project
This commit is contained in:
@@ -0,0 +1,235 @@
|
||||
# Step 05-07: RAG / Self-Correction / Multi-Agent
|
||||
|
||||
> 这些步骤是进阶内容,包含核心概念和实现代码。
|
||||
|
||||
## Step 05: RAG - 知识检索
|
||||
|
||||
### 核心概念
|
||||
|
||||
RAG = Retrieval-Augmented Generation(检索增强生成)
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ RAG 流程 │
|
||||
├─────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ 用户问题 ──▶ 编码为向量 ──▶ 向量数据库检索 │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 找到最相关的文档 │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 把文档和问题一起发送给 LLM │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 生成答案 │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### 为什么需要 RAG?
|
||||
|
||||
1. LLM 的知识有截止日期
|
||||
2. LLM 不知道你私有的数据
|
||||
3. RAG 让 LLM 能"查阅"外部知识
|
||||
|
||||
### 关键组件
|
||||
|
||||
| 组件 | 作用 |
|
||||
|------|------|
|
||||
| Embedding Model | 把文本变成向量 |
|
||||
| Vector Database | 存储和检索向量 |
|
||||
| Retrieval | 找到最相关的文档 |
|
||||
| Generation | 用检索结果生成答案 |
|
||||
|
||||
### 简化实现
|
||||
|
||||
```python
|
||||
class SimpleRAG:
|
||||
"""简化版 RAG 系统"""
|
||||
|
||||
def __init__(self):
|
||||
# 文档存储
|
||||
self.documents = []
|
||||
# 向量存储(简化版,用关键词)
|
||||
self.vectors = {}
|
||||
|
||||
def add_document(self, text: str, metadata: dict = None):
|
||||
"""添加文档"""
|
||||
self.documents.append({
|
||||
"text": text,
|
||||
"metadata": metadata or {}
|
||||
})
|
||||
|
||||
def retrieve(self, query: str, top_k: int = 3) -> list:
|
||||
"""检索相关文档"""
|
||||
# 简化版:基于关键词匹配
|
||||
results = []
|
||||
for doc in self.documents:
|
||||
# 计算简单相关性分数
|
||||
score = sum(1 for word in query if word in doc["text"].lower())
|
||||
if score > 0:
|
||||
results.append((score, doc))
|
||||
# 排序并返回 top_k
|
||||
results.sort(key=lambda x: x[0], reverse=True)
|
||||
return [doc for _, doc in results[:top_k]]
|
||||
|
||||
def generate(self, query: str, llm) -> str:
|
||||
"""RAG 生成"""
|
||||
docs = self.retrieve(query)
|
||||
context = "\n".join([d["text"] for d in docs])
|
||||
|
||||
prompt = f"""
|
||||
根据以下上下文回答问题:
|
||||
|
||||
上下文:
|
||||
{context}
|
||||
|
||||
问题:{query}
|
||||
|
||||
答案:
|
||||
"""
|
||||
return llm.invoke(prompt)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 06: Self-Correction - 自我修正
|
||||
|
||||
### 核心概念
|
||||
|
||||
Self-Correction = 让 Agent 能够自我发现并修复错误
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ Self-Correction 流程 │
|
||||
├─────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ 生成结果 ──▶ 验证 ──▶ 有问题? │
|
||||
│ │ │
|
||||
│ ┌────┴────┐ │
|
||||
│ │ │ │
|
||||
│ 是 否 │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ 分析错误 返回结果 │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 生成修复方案 │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 重新生成 ──▶ 再次验证 │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### 实现要点
|
||||
|
||||
```python
|
||||
class SelfCorrectingAgent:
|
||||
"""自我修正 Agent"""
|
||||
|
||||
def __init__(self):
|
||||
self.max_retries = 3
|
||||
|
||||
def try_generate(self, requirement: str) -> str:
|
||||
"""带自我修正的生成"""
|
||||
for attempt in range(self.max_retries):
|
||||
# 1. 生成
|
||||
result = self.generate(requirement)
|
||||
|
||||
# 2. 验证
|
||||
validation = self.validate(result)
|
||||
|
||||
# 3. 检查是否通过
|
||||
if validation["passed"]:
|
||||
return result
|
||||
|
||||
# 4. 分析错误
|
||||
error = validation["error"]
|
||||
print(f"尝试 {attempt + 1} 失败: {error}")
|
||||
|
||||
# 5. 准备修复
|
||||
requirement = self.prepare_fix(requirement, error, result)
|
||||
|
||||
return f"经过 {self.max_retries} 次尝试仍失败"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 07: Multi-Agent - 多 Agent 协作
|
||||
|
||||
### 核心概念
|
||||
|
||||
Multi-Agent = 多个专门的 Agent 协同工作
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ Multi-Agent 架构 │
|
||||
├─────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌──────────────┐ │
|
||||
│ │ Orchestrator │ │
|
||||
│ │ (协调者) │ │
|
||||
│ └──────┬───────┘ │
|
||||
│ │ │
|
||||
│ ┌───────────────────┼───────────────────┐ │
|
||||
│ │ │ │ │
|
||||
│ ▼ ▼ ▼ │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||
│ │ Generator│ │ Validator│ │ Searcher │ │
|
||||
│ │ (生成者) │ │ (验证者) │ │ (搜索者) │ │
|
||||
│ └──────────┘ └──────────┘ └──────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### 协作模式
|
||||
|
||||
| 模式 | 说明 | 适用场景 |
|
||||
|------|------|---------|
|
||||
| 串行 | A → B → C 依次执行 | 步骤有依赖 |
|
||||
| 并行 | A / B / C 同时执行 | 步骤独立 |
|
||||
| 循环 | A → B → A → B 循环 | 需要反复验证 |
|
||||
|
||||
### 简化实现
|
||||
|
||||
```python
|
||||
class MultiAgentSystem:
|
||||
"""多 Agent 协作系统"""
|
||||
|
||||
def __init__(self):
|
||||
# 注册各个 Agent
|
||||
self.agents = {
|
||||
"generator": GeneratorAgent(),
|
||||
"validator": ValidatorAgent(),
|
||||
"searcher": SearcherAgent(),
|
||||
}
|
||||
# 协调器
|
||||
self.orchestrator = Orchestrator(self.agents)
|
||||
|
||||
def process(self, requirement: str) -> str:
|
||||
"""协调多个 Agent 处理请求"""
|
||||
# 1. 搜索相关知识
|
||||
context = self.agents["searcher"].search(requirement)
|
||||
|
||||
# 2. 生成(可能需要多轮)
|
||||
for attempt in range(3):
|
||||
draft = self.agents["generator"].generate(requirement, context)
|
||||
|
||||
# 3. 验证
|
||||
validation = self.agents["validator"].validate(draft)
|
||||
|
||||
if validation["passed"]:
|
||||
return validation["result"]
|
||||
|
||||
return "处理失败"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📚 学习资源
|
||||
|
||||
- [LangGraph 文档](https://langchain-ai.github.io/langgraph/)
|
||||
- [RAG 最佳实践](https://www.pinecone.io/learn/rag/)
|
||||
- [Multi-Agent 系统设计](https://arxiv.org/abs/2308.03688)
|
||||
@@ -0,0 +1,286 @@
|
||||
"""
|
||||
Step 05-07: RAG / Self-Correction / Multi-Agent
|
||||
|
||||
进阶内容代码示例
|
||||
"""
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════════════
|
||||
# RAG 实现
|
||||
# ═══════════════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
class SimpleRAG:
|
||||
"""
|
||||
简化版 RAG 系统
|
||||
|
||||
实际应用中请使用:
|
||||
- ChromaDB / Pinecone / Weaviate(向量数据库)
|
||||
- sentence-transformers / OpenAI Embeddings(向量模型)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.documents = []
|
||||
|
||||
def add_document(self, text: str, metadata: dict = None):
|
||||
"""添加文档"""
|
||||
self.documents.append({
|
||||
"text": text,
|
||||
"metadata": metadata or {},
|
||||
"id": len(self.documents)
|
||||
})
|
||||
|
||||
def retrieve(self, query: str, top_k: int = 3) -> list:
|
||||
"""检索相关文档(简化版:基于关键词)"""
|
||||
results = []
|
||||
query_words = set(query.lower().split())
|
||||
|
||||
for doc in self.documents:
|
||||
doc_words = set(doc["text"].lower().split())
|
||||
# 简单的 Jaccard 相似度
|
||||
intersection = query_words & doc_words
|
||||
union = query_words | doc_words
|
||||
if union:
|
||||
score = len(intersection) / len(union)
|
||||
results.append((score, doc))
|
||||
|
||||
results.sort(key=lambda x: x[0], reverse=True)
|
||||
return [doc for _, doc in results[:top_k]]
|
||||
|
||||
def generate(self, query: str, context_only: bool = False):
|
||||
"""
|
||||
生成答案
|
||||
|
||||
如果 context_only=True,只返回检索到的上下文
|
||||
否则进行 RAG 生成(需要接入 LLM)
|
||||
"""
|
||||
docs = self.retrieve(query)
|
||||
context = "\n\n".join([
|
||||
f"[来源: {d['metadata'].get('source', '未知')}]\n{d['text']}"
|
||||
for d in docs
|
||||
])
|
||||
return context
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════════════
|
||||
# Self-Correction 实现
|
||||
# ═══════════════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""验证结果"""
|
||||
passed: bool
|
||||
score: float
|
||||
issues: List[str]
|
||||
suggestion: str = ""
|
||||
|
||||
|
||||
class SelfCorrectingAgent:
|
||||
"""
|
||||
自我修正 Agent
|
||||
|
||||
工作流程:
|
||||
1. 生成初始结果
|
||||
2. 验证结果
|
||||
3. 如果有问题,分析错误并修复
|
||||
4. 循环直到通过或达到最大重试次数
|
||||
"""
|
||||
|
||||
def __init__(self, generator, validator):
|
||||
self.generator = generator
|
||||
self.validator = validator
|
||||
self.max_retries = 3
|
||||
|
||||
def generate_with_correction(self, requirement: str) -> dict:
|
||||
"""带自我修正的生成"""
|
||||
history = []
|
||||
current_requirement = requirement
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
# 1. 生成
|
||||
result = self.generator.generate(current_requirement)
|
||||
history.append({
|
||||
"attempt": attempt + 1,
|
||||
"requirement": current_requirement,
|
||||
"result": result
|
||||
})
|
||||
|
||||
# 2. 验证
|
||||
validation = self.validator.validate(result)
|
||||
history[-1]["validation"] = validation
|
||||
|
||||
if validation.passed:
|
||||
return {
|
||||
"success": True,
|
||||
"result": result,
|
||||
"attempts": attempt + 1,
|
||||
"history": history
|
||||
}
|
||||
|
||||
# 3. 分析错误,准备修复
|
||||
print(f"尝试 {attempt + 1} 失败: {validation.issues}")
|
||||
current_requirement = self._prepare_fix(
|
||||
requirement,
|
||||
validation,
|
||||
result
|
||||
)
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
"error": "达到最大重试次数",
|
||||
"history": history
|
||||
}
|
||||
|
||||
def _prepare_fix(self, original: str, validation: ValidationResult, result) -> str:
|
||||
"""准备修复提示"""
|
||||
issues_text = "\n".join(f"- {issue}" for issue in validation.issues)
|
||||
|
||||
return f"""
|
||||
原始需求:{original}
|
||||
|
||||
上次生成结果:
|
||||
{result}
|
||||
|
||||
验证发现的问题:
|
||||
{issues_text}
|
||||
|
||||
验证建议:{validation.suggestion}
|
||||
|
||||
请根据以上信息,修正生成结果。
|
||||
"""
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════════════
|
||||
# Multi-Agent 实现
|
||||
# ═══════════════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Callable
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentMessage:
|
||||
"""Agent 之间的消息"""
|
||||
from_agent: str
|
||||
to_agent: str
|
||||
content: Any
|
||||
message_type: str # "request" / "response" / "broadcast"
|
||||
|
||||
|
||||
class Agent:
|
||||
"""基础 Agent 类"""
|
||||
name: str
|
||||
|
||||
def process(self, input_data: Any) -> Any:
|
||||
"""处理输入,返回结果"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class MultiAgentSystem:
|
||||
"""
|
||||
多 Agent 协作系统
|
||||
|
||||
组件:
|
||||
- agents: 注册的 Agent 字典
|
||||
- orchestrator: 协调器,决定消息路由
|
||||
- message_queue: 消息队列
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.agents: Dict[str, Agent] = {}
|
||||
self.message_queue: List[AgentMessage] = []
|
||||
self.history: List[AgentMessage] = []
|
||||
|
||||
def register(self, agent: Agent):
|
||||
"""注册 Agent"""
|
||||
self.agents[agent.name] = agent
|
||||
|
||||
def send_message(self, from_agent: str, to_agent: str, content: Any,
|
||||
msg_type: str = "request"):
|
||||
"""发送消息"""
|
||||
msg = AgentMessage(
|
||||
from_agent=from_agent,
|
||||
to_agent=to_agent,
|
||||
content=content,
|
||||
message_type=msg_type
|
||||
)
|
||||
self.message_queue.append(msg)
|
||||
|
||||
def broadcast(self, from_agent: str, content: Any):
|
||||
"""广播消息给所有 Agent"""
|
||||
for agent_name in self.agents:
|
||||
if agent_name != from_agent:
|
||||
self.send_message(from_agent, agent_name, content, "broadcast")
|
||||
|
||||
def process(self, requirement: str) -> Any:
|
||||
"""
|
||||
处理请求
|
||||
|
||||
简化实现:顺序执行各个 Agent
|
||||
"""
|
||||
# 1. 搜索
|
||||
searcher = self.agents.get("searcher")
|
||||
context = searcher.process(requirement) if searcher else ""
|
||||
|
||||
# 2. 生成
|
||||
generator = self.agents.get("generator")
|
||||
result = generator.process({"requirement": requirement, "context": context}) if generator else requirement
|
||||
|
||||
# 3. 验证
|
||||
validator = self.agents.get("validator")
|
||||
validation = validator.process(result) if validator else {"passed": True}
|
||||
|
||||
if not validation.get("passed", True):
|
||||
return {"error": "验证失败", "validation": validation}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def demo():
|
||||
"""演示"""
|
||||
print("=" * 60)
|
||||
print("Step 05-07: 进阶功能演示")
|
||||
print("=" * 60)
|
||||
|
||||
# RAG 演示
|
||||
print("\n📚 RAG 演示")
|
||||
rag = SimpleRAG()
|
||||
rag.add_document("JasperReports 是一个 Java 报表库", {"source": "文档1"})
|
||||
rag.add_document("JRXML 是 JasperReports 的报表模板格式", {"source": "文档2"})
|
||||
rag.add_document("可以使用 LLM 生成 JRXML 代码", {"source": "文档3"})
|
||||
|
||||
result = rag.retrieve("JasperReports 是什么")
|
||||
print(f" 查询 'JasperReports 是什么'")
|
||||
for doc in result:
|
||||
print(f" - {doc['text']} (来源: {doc['metadata']['source']})")
|
||||
|
||||
# Multi-Agent 演示
|
||||
print("\n\n🤖 Multi-Agent 演示")
|
||||
|
||||
class DemoSearcher(Agent):
|
||||
name = "searcher"
|
||||
def process(self, input_data):
|
||||
print(f" [{self.name}] 搜索相关资料...")
|
||||
return "找到相关模板和文档"
|
||||
|
||||
class DemoGenerator(Agent):
|
||||
name = "generator"
|
||||
def process(self, input_data):
|
||||
print(f" [{self.name}] 生成报表...")
|
||||
return "<jasperReport>生成的报表</jasperReport>"
|
||||
|
||||
class DemoValidator(Agent):
|
||||
name = "validator"
|
||||
def process(self, input_data):
|
||||
print(f" [{self.name}] 验证结果...")
|
||||
return {"passed": True}
|
||||
|
||||
system = MultiAgentSystem()
|
||||
system.register(DemoSearcher())
|
||||
system.register(DemoGenerator())
|
||||
system.register(DemoValidator())
|
||||
|
||||
result = system.process("生成销售报表")
|
||||
print(f"\n 最终结果: {result[:50]}...")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo()
|
||||
Reference in New Issue
Block a user