fix: 修复 NameError/状态污染/类型标注/统计; 补全练习与 main; 新增 config/.gitignore/requirements; 文档统一

This commit is contained in:
agent
2026-06-02 13:44:46 +08:00
parent ef876a22d1
commit 908431e25f
23 changed files with 919 additions and 77 deletions
+3 -3
View File
@@ -5,9 +5,9 @@ OPENAI_API_KEY=your_openai_api_key_here
ANTHROPIC_API_KEY=your_anthropic_api_key_here
# LLM 配置
LLM_PROVIDER=anthropic # openai
LLM_MODEL=MiniMax-M2.7 # gpt-4o
LLM_MAX_TOKENS=8192
LLM_PROVIDER=openai # 可选: openai / anthropic
LLM_MODEL=gpt-4o-mini # 例如 gpt-4o / claude-3-5-sonnet-20241022
LLM_MAX_TOKENS=4096
# RAG 配置
RAG_CHROMA_PATH=./db/chroma
+11
View File
@@ -0,0 +1,11 @@
__pycache__/
*.pyc
*.pyo
*.pyd
.Python
*.egg-info/
.venv/
venv/
.env
.idea/
.vscode/
+13 -5
View File
@@ -76,7 +76,7 @@ jaspersoft-agent-learn/
```bash
# 克隆项目
git clone https://www.1415243231.top/gitea/panda/jaspersoft-agent-learn.git
git clone https://gitea.1415243231.top/panda/jaspersoft-agent-learn.git
cd jaspersoft-agent-learn
# 创建虚拟环境(Python 3.9+
@@ -86,7 +86,7 @@ source venv/bin/activate # Linux/Mac
.\venv\Scripts\activate # Windows
# 安装依赖
pip install python-dotenv openai
pip install -r requirements.txt
```
### 2. 配置环境变量
@@ -110,6 +110,14 @@ python main.py
# Step 03: 简单 Agent
cd ../step_03_simple_agent
python main.py
# Step 04: 记忆系统
cd ../step_04_memory
python main.py
# Step 05-07: 进阶(RAG / Self-Correction / Multi-Agent
cd ../step_05_07_advanced
python main.py
```
---
@@ -153,7 +161,7 @@ Step 07: 协作 → 多Agent
### 3. 零依赖入门
- Step 01~03: 仅需 Python 标准库
- Step 04+: 只需 `python-dotenv`
- Step 04+:仅标准库(如需 LLM 调用,见 `config.py` 加载 `.env`
- 无需 LangChain、LangGraph 等框架
---
@@ -250,8 +258,8 @@ class StateManager:
## 🔗 相关资源
### 内部项目
- **JasperSoft 主项目**: `D:\Idea Project\jaspersoft` - LangGraph 实现参考
- **日报系统**: `D:\Idea Project\daily_on_work` - 自动化工作流
- **JasperSoft 主项目**: 同组织下的 `jaspersoft` 仓库 - LangGraph 实现参考
- **日报系统**: 同组织下的 `daily_on_work` 仓库 - 自动化工作流
### 外部资源
- [LangGraph 文档](https://langchain-ai.github.io/langgraph/)
+21 -21
View File
@@ -9,17 +9,15 @@
## 🎯 这是什么?
一个渐进式的 AI Agent 开发学习项目,通过 **7 Step** 带你从零掌握 AI Agent 的核心概念:
一个渐进式的 AI Agent 开发学习项目,通过 **5阶段(7 个主题)** 带你从零掌握 AI Agent 的核心概念:
| Step | 主题 | 你将学会 |
| 阶段 | 主题 | 你将学会 |
|------|------|---------|
| 01 | Tool 工具系统 | 如何定义和注册工具 |
| 02 | State 状态管理 | 如何管理 Agent 的状态 |
| 03 | Simple Agent | 如何构建 Agent 循环 |
| 04 | Memory 记忆 | 如何让 Agent 记住对话 |
| 05 | RAG 知识检索 | 如何让 Agent 查阅知识库 |
| 06 | Self-Correction | 如何让 Agent 自我修正 |
| 07 | Multi-Agent | 如何构建多 Agent 协作 |
| Step 01 | Tool 工具系统 | 如何定义和注册工具 |
| Step 02 | State 状态管理 | 如何管理 Agent 的状态 |
| Step 03 | Simple Agent | 如何构建 Agent 循环 |
| Step 04 | Memory 记忆 | 如何让 Agent 记住对话 |
| Step 05~07 | RAG / Self-Correction / Multi-Agent | 进阶能力(同一目录) |
---
@@ -30,11 +28,11 @@
- Day 3-4: 掌握 State 管理
- Day 5-7: 构建简单 Agent
### 第 2 周:进阶(Step 04-07
### 第 2 周:进阶(Step 04 + 进阶包
- Day 8-9: 多级记忆系统
- Day 10-11: RAG 与知识增强
- Day 12-13: 自我修正模式
- Day 14: 多 Agent 协作
- Day 10-11: RAG 与知识增强Step 05
- Day 12-13: 自我修正模式Step 06
- Day 14: 多 Agent 协作Step 07
---
@@ -42,7 +40,7 @@
### 1. 克隆项目
```bash
git clone https://www.1415243231.top/gitea/panda/jaspersoft-agent-learn.git
git clone https://gitea.1415243231.top/panda/jaspersoft-agent-learn.git
cd jaspersoft-agent-learn
```
@@ -51,7 +49,7 @@ cd jaspersoft-agent-learn
python -m venv venv
source venv/bin/activate # Linux/Mac
.\venv\Scripts\activate # Windows
pip install python-dotenv
pip install -r requirements.txt
```
### 3. 开始学习
@@ -115,9 +113,9 @@ while not done:
- ✅ 掌握 State 的设计模式
- ✅ 能够构建简单的 Agent 循环
- ✅ 实现多级记忆系统
- ✅ 理解 RAG 架构
- ✅ 掌握 Self-Correction 模式
- ✅ 设计 Multi-Agent 协作系统
- ✅ 理解 RAG 架构Step 05
- ✅ 掌握 Self-Correction 模式Step 06
- ✅ 设计 Multi-Agent 协作系统Step 07
---
@@ -133,7 +131,9 @@ jaspersoft-agent-learn/
├── step_02_state/ # 状态管理
├── step_03_simple_agent/ # 简单 Agent
├── step_04_memory/ # 记忆系统
── step_05_07_advanced/ # RAG/修正/多Agent
── step_05_07_advanced/ # RAG05/ Self-Correction06/ Multi-Agent07
├── config.py # 集中读取 .envLLM key、RAG 路径等)
└── requirements.txt # 可选依赖清单
```
---
@@ -141,8 +141,8 @@ jaspersoft-agent-learn/
## 🔗 相关链接
- 📂 **详细学习指南**: [LEARN_GUIDE.md](./LEARN_GUIDE.md)
- 🏠 **主项目**: [JasperSoft](https://www.1415243231.top/gitea/panda/jaspersoft)
- 📊 **日报系统**: [Daily On Work](https://www.1415243231.top)
- 🏠 **主项目**: [JasperSoft](https://gitea.1415243231.top/panda/jaspersoft)
- 📊 **日报系统**: [Daily On Work](https://gitea.1415243231.top)
---
+87
View File
@@ -0,0 +1,87 @@
"""
集中读取 .env / 环境变量。
使用方式:
from config import settings
print(settings.llm_model)
print(settings.has_openai_key)
"""
import os
from dataclasses import dataclass
from pathlib import Path
def _load_dotenv(env_path: Path) -> None:
"""极简 .env 解析,避免引入 python-dotenv 依赖。"""
if not env_path.is_file():
return
for raw_line in env_path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line or line.startswith("#"):
continue
if "=" not in line:
continue
key, _, value = line.partition("=")
key = key.strip()
value = value.strip().strip('"').strip("'")
# 已存在则不覆盖(让真实环境变量优先)
os.environ.setdefault(key, value)
_ROOT = Path(__file__).resolve().parent
_load_dotenv(_ROOT / ".env")
@dataclass(frozen=True)
class Settings:
openai_api_key: str
anthropic_api_key: str
llm_provider: str
llm_model: str
llm_max_tokens: int
rag_chroma_path: str
rag_collection_name: str
rag_embed_model: str
validation_service_url: str
log_level: str
@property
def has_openai_key(self) -> bool:
return bool(self.openai_api_key) and self.openai_api_key != "your_openai_api_key_here"
@property
def has_anthropic_key(self) -> bool:
return bool(self.anthropic_api_key) and self.anthropic_api_key != "your_anthropic_api_key_here"
def _int(name: str, default: int) -> int:
raw = os.environ.get(name)
try:
return int(raw) if raw else default
except ValueError:
return default
settings = Settings(
openai_api_key=os.environ.get("OPENAI_API_KEY", ""),
anthropic_api_key=os.environ.get("ANTHROPIC_API_KEY", ""),
llm_provider=os.environ.get("LLM_PROVIDER", "openai"),
llm_model=os.environ.get("LLM_MODEL", "gpt-4o-mini"),
llm_max_tokens=_int("LLM_MAX_TOKENS", 4096),
rag_chroma_path=os.environ.get("RAG_CHROMA_PATH", "./db/chroma"),
rag_collection_name=os.environ.get("RAG_COLLECTION_NAME", "jrxml_chunks"),
rag_embed_model=os.environ.get("RAG_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2"),
validation_service_url=os.environ.get("VALIDATION_SERVICE_URL", "http://localhost:8001"),
log_level=os.environ.get("LOG_LEVEL", "INFO"),
)
if __name__ == "__main__":
s = settings
print(f"provider = {s.llm_provider}")
print(f"model = {s.llm_model}")
print(f"max_tok = {s.llm_max_tokens}")
print(f"openai? = {s.has_openai_key}")
print(f"anthro? = {s.has_anthropic_key}")
print(f"rag_path = {s.rag_chroma_path}")
+15
View File
@@ -0,0 +1,15 @@
# 基础依赖(Step 01-04 仅标准库,无需安装)
# 进阶依赖(按需安装)
#
# 复制 .env.example 为 .env 后填入真实 key
# 然后按需取消注释:
# LLM 客户端(Step 03+ 接 LLM 时需要)
# openai>=1.30.0
# anthropic>=0.30.0
# 向量数据库(Step 05 RAG 时需要)
# chromadb>=0.5.0
# Embedding 模型(Step 05 RAG 时需要)
# sentence-transformers>=2.7.0
-1
View File
@@ -14,7 +14,6 @@ Step 01: 理解 Tool - 工具系统基础
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional
from dataclasses import dataclass
import json
# ═══════════════════════════════════════════════════════════════════════════════
+5
View File
@@ -0,0 +1,5 @@
# Jaspersoft Learn - Step 02: 状态管理
from .concept import AgentState, JaspersoftAgentState
__all__ = ["AgentState", "JaspersoftAgentState"]
+1 -2
View File
@@ -12,9 +12,8 @@ Step 02: 理解 State - 状态管理
"""
from typing import TypedDict, List, Dict, Any, Optional
from dataclasses import dataclass, field
from dataclasses import dataclass
from datetime import datetime
import json
# ═══════════════════════════════════════════════════════════════════════════════
-1
View File
@@ -12,7 +12,6 @@ Step 02 练习题:设计你的第一个 Agent State
"""
from typing import TypedDict, List, Dict, Any
import json
# ═══════════════════════════════════════════════════════════════════════════════
+5 -9
View File
@@ -100,15 +100,6 @@ class SimpleAgentState(TypedDict, total=False):
# 第三部分:定义 Tool 接口
# ═══════════════════════════════════════════════════════════════════════════════
@dataclass
class ToolCall:
"""工具调用的数据结构"""
name: str # 工具名称
arguments: dict # 传递给工具的参数
result: Any = None # 工具执行结果
error: str = None # 错误信息
class BaseTool(ABC):
"""
工具基类(简化版,来自 Step 01)
@@ -390,6 +381,9 @@ class SimpleAgent:
status="input"
)
# 对话轮次计数器
self.round_count = 0
def reset(self):
"""重置 Agent 状态"""
self.state = SimpleAgentState(
@@ -454,6 +448,8 @@ class SimpleAgent:
"content": user_input
})
self.state["status"] = "thinking"
self.state["tool_result"] = None
self.round_count += 1
# 2. 大脑决定行动
decision = self.brain.decide(self.state)
+87
View File
@@ -0,0 +1,87 @@
"""
Step 03 练习题扩展 SimpleAgent
🎯 练习目标
1. 巩固 Agent 循环的运行机制
2. 增强 Brain 的决策能力
3. 体验 Tool 的注册流程
"""
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 1:实现 DateTimeTool 并注册到 Agent
# ═══════════════════════════════════════════════════════════════════════════════
"""
任务
SimpleAgent 中添加一个 DateTimeTool提供现在几点 / 今天日期能力
要求
1. 继承 BaseTool
2. name = "datetime"description 描述清楚能做什么
3. execute(**kwargs) 接收 operation支持
- "now" -> 返回当前时间字符串"%Y-%m-%d %H:%M:%S"
- "today" -> 返回当前日期字符串"%Y-%m-%d"
- "weekday" -> 返回今天是星期几中文 "星期一"
4. 注册到 SimpleAgent.tools 字典中
5. 测试用户输入现在几点 Brain 能正确选择 datetime 工具
"""
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 2:改进 Brain 的工具匹配
# ═══════════════════════════════════════════════════════════════════════════════
"""
任务
当前 AgentBrain.decide() 关键词 + 表达式正则匹配 calculator
iOS / Android 兼容性产品 A+这种文本会误判
要求
1. AgentBrain.decide() 中加入你新加的 DateTimeTool 的路由
2. 修复 calculator 匹配的脆弱性例如优先匹配明确的算式语法
3. Brain 在没有工具可调时返回 {"action": "respond", "response": "..."}
提示
- 可用正则在 user_input 中提取首个形如数字 运算符 数字的子串
- keyword in user_input 检测现在今天星期触发 datetime
"""
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 3:让 Agent 暴露对话快照
# ═══════════════════════════════════════════════════════════════════════════════
"""
任务
SimpleAgent 增加 snapshot()/restore(snap) 方法用于保存和恢复会话
要求
1. snapshot() 返回 dict包含 messagestool_resultcurrent_action
2. restore(snap) snap 覆盖对应字段
3. 验证snapshot -> 多轮对话 -> restore -> 状态回到 snapshot 时刻
提示
- copy.deepcopy() 避免引用共享
- 只恢复可序列化的字段不要把 self.brain / self.tools 一起覆盖
"""
# ═══════════════════════════════════════════════════════════════════════════════
# 测试
# ═══════════════════════════════════════════════════════════════════════════════
def test_exercises():
from step_03_simple_agent.concept import SimpleAgent
agent = SimpleAgent()
print("当前已注册工具:", list(agent.tools.keys()))
# TODO: 你的测试
if __name__ == "__main__":
test_exercises()
+160
View File
@@ -0,0 +1,160 @@
"""
Step 03 练习题答案
先自己思考再看答案
答案不是唯一的这里只是其中一种实现
"""
import copy
import re
from datetime import datetime
from step_03_simple_agent.concept import (
BaseTool,
SimpleAgent,
ToolResult,
)
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 1 答案:DateTimeTool
# ═══════════════════════════════════════════════════════════════════════════════
class DateTimeTool(BaseTool):
"""日期时间工具"""
@property
def name(self) -> str:
return "datetime"
@property
def description(self) -> str:
return "日期时间工具,支持 now(当前时间)/ today(今天日期)/ weekday(星期几)"
def execute(self, **kwargs) -> ToolResult:
operation = kwargs.get("operation", "now")
now = datetime.now()
if operation == "now":
return ToolResult(success=True, result=now.strftime("%Y-%m-%d %H:%M:%S"))
if operation == "today":
return ToolResult(success=True, result=now.strftime("%Y-%m-%d"))
if operation == "weekday":
names = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
return ToolResult(success=True, result=names[now.weekday()])
return ToolResult(success=False, error=f"不支持的操作: {operation}")
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 2 答案:稳健的 Brain 路由
# ═══════════════════════════════════════════════════════════════════════════════
EXPR_PATTERN = re.compile(r"-?\d+(?:\.\d+)?\s*[+\-*/]\s*-?\d+(?:\.\d+)?")
def improved_brain_decide(self, state):
"""把 AgentBrain.decide 替换为更稳健的版本。"""
user_input = state.get("user_input", "")
tool_result = state.get("tool_result")
# 工具结果回灌:直接产出回应
if tool_result is not None:
if isinstance(tool_result, dict) and not tool_result.get("success", True):
return {
"action": "respond",
"response": f"工具执行失败: {tool_result.get('error', '未知错误')}",
}
return {"action": "respond", "response": f"工具执行完成,结果:{tool_result}"}
text = user_input.lower()
# 明确的算式(如 "1 + 2" / "10*3")才走 calculator
expr_match = EXPR_PATTERN.search(user_input)
if expr_match and re.fullmatch(r"[\d\s+\-*/().]+", expr_match.group()):
return {
"action": "use_tool",
"tool_name": "calculator",
"tool_args": {"expression": expr_match.group().strip()},
}
# 日期时间路由
if any(kw in text for kw in ["现在几点", "现在时间", "今天", "日期", "星期"]):
op = "weekday" if "星期" in text else ("today" if "今天" in text or "日期" in text else "now")
return {"action": "use_tool", "tool_name": "datetime", "tool_args": {"operation": op}}
# 其他交给模板搜索
if any(kw in user_input for kw in ["报表", "模板", "jrxml", "jasper"]):
return {
"action": "use_tool",
"tool_name": "template_search",
"tool_args": {"keyword": user_input},
}
return {
"action": "respond",
"response": f"我收到了你的输入:{user_input}(暂无可用工具直接处理)",
}
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 3 答案:snapshot / restore
# ═══════════════════════════════════════════════════════════════════════════════
def install_snapshot_methods(agent: SimpleAgent) -> None:
"""给 SimpleAgent 实例挂上 snapshot/restore 方法。"""
def snapshot(self):
return {
"messages": copy.deepcopy(self.state["messages"]),
"tool_result": copy.deepcopy(self.state.get("tool_result")),
"current_action": self.state.get("current_action"),
"round_count": self.round_count,
}
def restore(self, snap):
self.state["messages"] = copy.deepcopy(snap["messages"])
self.state["tool_result"] = copy.deepcopy(snap.get("tool_result"))
self.state["current_action"] = snap.get("current_action")
self.round_count = snap.get("round_count", 0)
SimpleAgent.snapshot = snapshot
SimpleAgent.restore = restore
# ═══════════════════════════════════════════════════════════════════════════════
# 测试
# ═══════════════════════════════════════════════════════════════════════════════
def test_answers():
print("\n" + "=" * 60)
print("Step 03 练习答案测试")
print("=" * 60)
agent = SimpleAgent()
# 注册新工具 + 替换 brain
agent.tools["datetime"] = DateTimeTool()
agent.brain.decide = improved_brain_decide.__get__(agent.brain)
print("\n📝 练习 1: DateTimeTool")
print(" 工具列表:", list(agent.tools.keys()))
print(" now ->", agent.tools["datetime"].execute(operation="now").result)
print(" weekday ->", agent.tools["datetime"].execute(operation="weekday").result)
print("\n📝 练习 2: 改进的 Brain 路由")
for q in ["1 + 2", "iOS / Android 兼容性", "现在几点", "今天星期几"]:
decision = agent.brain.decide({"user_input": q, "tool_result": None})
print(f" '{q}' -> {decision['action']} / {decision.get('tool_name', decision.get('response'))}")
print("\n📝 练习 3: snapshot / restore")
install_snapshot_methods(agent)
agent.process("1 + 2")
snap = agent.snapshot()
print(" snapshot round_count =", snap["round_count"])
agent.process("3 * 4")
print(" after 2 rounds, round_count =", agent.round_count)
agent.restore(snap)
print(" after restore, round_count =", agent.round_count)
if __name__ == "__main__":
test_answers()
+1 -1
View File
@@ -67,7 +67,7 @@ def main():
print("\n\n" + "=" * 70)
print("📊 会话统计")
print("=" * 70)
print(f" 对话轮次: {len(agent.get_history()) // 2}")
print(f" 对话轮次: {agent.round_count}")
print(f" 工具调用: {len(agent.state.get('tool_calls', []))}")
print("\n💡 继续学习:")
print(" Step 04: 添加 Memory - 记忆系统")
-13
View File
@@ -68,19 +68,6 @@ class Message:
}
@dataclass
class MemorySnapshot:
"""记忆快照 - 用于保存和恢复状态"""
state: Dict[str, Any] # 关键状态
messages: List[Message] # 消息历史
key_info: Dict[str, Any] # 关键信息摘要
timestamp: str = ""
def __post_init__(self):
if not self.timestamp:
self.timestamp = datetime.now().isoformat()
# ═══════════════════════════════════════════════════════════════════════════════
# 第三部分:Working Memory(工作记忆)
# ═══════════════════════════════════════════════════════════════════════════════
+82
View File
@@ -0,0 +1,82 @@
"""
Step 04 练习题Memory 实战
🎯 练习目标
1. 理解三层记忆的协作方式
2. 实现一个 Token 估算器
3. 体验摘要压缩的副作用
"""
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 1Token 估算
# ═══════════════════════════════════════════════════════════════════════════════
"""
任务
ShortTermMemory 上加一个 estimate_tokens() 方法粗略估计当前占用的 token
要求
1. 简单规则1 个中文字符 1.5 token1 个英文单词 1.3 token
2. 对所有消息求和
3. 返回 int向上取整
提示
- 正则分中英文re.findall(r'[\u4e00-\u9fff]', text) 取汉字剩下按空格分词
- import math; math.ceil(...)
"""
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 2:基于 Token 阈值的自动压缩
# ═══════════════════════════════════════════════════════════════════════════════
"""
任务
ShortTermMemory 加一个 maybe_compress(max_tokens: int) 方法
estimate_tokens() 超过 max_tokens 把较早的对话压缩成一行摘要
保留最近的 5
要求
1. 触发时调用 summarize_older(keep_recent=5)
2. 把摘要作为一个新的 Message(role="system", content=summary) 放回 messages 头部
3. 删除被摘要覆盖的旧消息避免 token 没降反升
"""
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 3:把 MemorySystem 接到 SimpleAgent
# ═══════════════════════════════════════════════════════════════════════════════
"""
任务
SimpleAgent process() 时把每一轮对话写入 MemorySystem
并在下次决策前把 memory.get_context() 注入到 state['context']
要求
1. SimpleAgent.__init__ new 一个 MemorySystem
2. process() 末尾self.memory.add_message('user' / 'assistant', ...)
3. process() 开头self.state['context'] = self.memory.get_context()
提示
- 直接修改 step_03/concept.py 是允许的学习项目不是发布包
- 可以通过 Monkey-patching 避免破坏 step_03 原有行为
"""
def test_exercises():
from step_04_memory.concept import MemorySystem
mem = MemorySystem()
mem.add_message("user", "帮我生成销售月报")
mem.add_message("assistant", "好的,请告诉我字段")
print("上下文片段:")
print(mem.get_context()[:200])
if __name__ == "__main__":
test_exercises()
+118
View File
@@ -0,0 +1,118 @@
"""
Step 04 练习题答案
先自己思考再看答案
答案不是唯一的这里只是其中一种实现
"""
import math
import re
from step_04_memory.concept import MemorySystem, Message, ShortTermMemory
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 1 答案:Token 估算
# ═══════════════════════════════════════════════════════════════════════════════
_CN_PATTERN = re.compile(r"[\u4e00-\u9fff]")
_EN_WORD = re.compile(r"[A-Za-z]+")
def estimate_tokens_for_text(text: str) -> int:
cn = len(_CN_PATTERN.findall(text))
en = len(_EN_WORD.findall(text))
return math.ceil(cn * 1.5 + en * 1.3)
def install_estimate_tokens() -> None:
def estimate_tokens(self: ShortTermMemory) -> int:
return sum(estimate_tokens_for_text(m.content) for m in self.messages)
ShortTermMemory.estimate_tokens = estimate_tokens
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 2 答案:基于 Token 阈值的自动压缩
# ═══════════════════════════════════════════════════════════════════════════════
def install_maybe_compress() -> None:
def maybe_compress(self: ShortTermMemory, max_tokens: int = 800) -> bool:
if not hasattr(self, "estimate_tokens"):
raise RuntimeError("请先调用 install_estimate_tokens()")
if self.estimate_tokens() <= max_tokens:
return False
summary = self.summarize_older(keep_recent=5)
if not summary:
return False
# 保留最近 5 条,把摘要作为 system message 放最前
recent = self.messages[-5:]
self.messages = [Message(role="system", content=f"[历史摘要]\n{summary}")] + recent
return True
ShortTermMemory.maybe_compress = maybe_compress
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 3 答案:把 MemorySystem 接到 SimpleAgent
# ═══════════════════════════════════════════════════════════════════════════════
def install_memory_to_agent() -> None:
from step_03_simple_agent.concept import SimpleAgent
orig_init = SimpleAgent.__init__
def patched_init(self, *args, **kwargs):
orig_init(self, *args, **kwargs)
self.memory = MemorySystem()
SimpleAgent.__init__ = patched_init
orig_process = SimpleAgent.process
def patched_process(self, user_input: str) -> str:
# 把记忆上下文注入 state
self.state["context"] = self.memory.get_context()
response = orig_process(self, user_input)
# 记录本轮对话
self.memory.add_message("user", user_input)
self.memory.add_message("assistant", response)
return response
SimpleAgent.process = patched_process
# ═══════════════════════════════════════════════════════════════════════════════
# 测试
# ═══════════════════════════════════════════════════════════════════════════════
def test_answers():
print("\n" + "=" * 60)
print("Step 04 练习答案测试")
print("=" * 60)
install_estimate_tokens()
install_maybe_compress()
mem = MemorySystem()
for i in range(20):
mem.short_term.add("user", f"{i} 轮对话内容,包含中文与 english words " * 5)
print(f"\n📝 练习 1: 注入 20 条后估算 token = {mem.short_term.estimate_tokens()}")
compressed = mem.short_term.maybe_compress(max_tokens=200)
print(f" maybe_compress() = {compressed}, 压缩后消息数 = {len(mem.short_term.messages)}")
print(f" 压缩后估算 token = {mem.short_term.estimate_tokens()}")
print("\n📝 练习 3: SimpleAgent 接入 Memory")
try:
install_memory_to_agent()
from step_03_simple_agent.concept import SimpleAgent
agent = SimpleAgent()
agent.process("1 + 2")
print(f" agent.memory 工作正常,消息数 = {len(agent.memory.short_term.messages)}")
except Exception as e:
print(f" 接入失败(可忽略,需在 step_03 父目录运行): {e}")
if __name__ == "__main__":
test_answers()
+17
View File
@@ -0,0 +1,17 @@
"""
Step 04: Memory - 记忆系统 主程序
运行方式
cd step_04_memory
python main.py
"""
from concept import demo
def main():
demo()
if __name__ == "__main__":
main()
+15 -17
View File
@@ -200,30 +200,28 @@ class MultiAgentSystem:
def __init__(self):
# 注册各个 Agent
self.agents = {
"generator": GeneratorAgent(),
"validator": ValidatorAgent(),
"searcher": SearcherAgent(),
}
# 协调器
self.orchestrator = Orchestrator(self.agents)
self.agents: dict[str, Agent] = {}
def process(self, requirement: str) -> str:
"""协调多个 Agent 处理请求"""
def register(self, agent: Agent) -> None:
self.agents[agent.name] = agent
def process(self, requirement: str):
# 1. 搜索相关知识
context = self.agents["searcher"].search(requirement)
searcher = self.agents.get("searcher")
context = searcher.process(requirement) if searcher else ""
# 2. 生成(可能需要多轮)
for attempt in range(3):
draft = self.agents["generator"].generate(requirement, context)
generator = self.agents.get("generator")
draft = generator.process({"requirement": requirement, "context": context}) if generator else requirement
# 3. 验证
validation = self.agents["validator"].validate(draft)
validator = self.agents.get("validator")
if validator:
validation = validator.process(draft)
if not validation.get("passed", True):
return {"error": "验证失败", "validation": validation}
if validation["passed"]:
return validation["result"]
return "处理失败"
return draft
```
---
+3 -3
View File
@@ -4,6 +4,9 @@ Step 05-07: RAG / Self-Correction / Multi-Agent
进阶内容代码示例
"""
from dataclasses import dataclass
from typing import Any, Dict, List
# ═══════════════════════════════════════════════════════════════════════════════════════
# RAG 实现
# ═══════════════════════════════════════════════════════════════════════════════════════
@@ -152,9 +155,6 @@ class SelfCorrectingAgent:
# Multi-Agent 实现
# ═══════════════════════════════════════════════════════════════════════════════════════
from dataclasses import dataclass, field
from typing import Dict, List, Callable
@dataclass
class AgentMessage:
+88
View File
@@ -0,0 +1,88 @@
"""
Step 05-07 练习题进阶能力
🎯 练习目标
1. 体验 RAG 的检索质量
2. 写一个 Self-Correction 闭环
3. 设计多 Agent 编排
"""
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 1:提升 SimpleRAG 的检索质量
# ═══════════════════════════════════════════════════════════════════════════════
"""
任务
SimpleRAG.retrieve() 当前用 Jaccard 相似度 + 简单分词
改造为把分词改成 "中文按字 + 英文按词 + 大小写归一化" 后再算 Jaccard
要求
1. 复用 SimpleRAG 不要重写
2. 实现 upgrade_retrieve(rag) 替换 rag.retrieve 方法
3. 用一个含中英文的小语料验证
提示
- re.findall(r'[\u4e00-\u9fff]|[A-Za-z]+', text.lower())
"""
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 2:实现 Self-Correction 主循环
# ═══════════════════════════════════════════════════════════════════════════════
"""
任务
SelfCorrectingAgent concept.py 中是骨架请补全它的 run() 方法
def run(self, requirement: str, generate_fn, validate_fn, max_retries=3):
for attempt in range(max_retries):
output = generate_fn(requirement, attempt, feedback)
validation = validate_fn(output)
if validation.passed:
return output
feedback = self.build_feedback(validation)
return output
要求
1. 第一次 attempt 不带 feedback
2. 每次失败用 build_feedback 拼出新的 feedback
3. 超过 max_retries 返回最后一次 output不要抛异常
"""
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 3:给 MultiAgentSystem 加超时与失败回退
# ═══════════════════════════════════════════════════════════════════════════════
"""
任务
MultiAgentSystem.process() 是顺序调用 searcher -> generator -> validator
任何一个 Agent 抛异常都让整个流程崩
要求
1. process() 外层包 try/except失败时返回 {"error": str(e)}
2. 给每个 Agent timeout_seconds 参数 time.monotonic
3. 验证故意让 validator 抛异常确认 process() 不会让程序崩溃
提示
- time.monotonic() 不受系统时间影响
- 简单演示里可以靠 sleep + 时间比较实现超时
"""
def test_exercises():
from step_05_07_advanced.concept import SimpleRAG
rag = SimpleRAG()
rag.add_document("JasperReports 是一个 Java 报表库", {"source": "doc1"})
rag.add_document("JRXML 是 JasperReports 模板格式", {"source": "doc2"})
print(rag.retrieve("JasperReports"))
if __name__ == "__main__":
test_exercises()
+169
View File
@@ -0,0 +1,169 @@
"""
Step 05-07 练习题答案
先自己思考再看答案
答案不是唯一的这里只是其中一种实现
"""
import re
import time
from typing import Callable
from step_05_07_advanced.concept import (
Agent,
MultiAgentSystem,
SelfCorrectingAgent,
SimpleRAG,
ValidationResult,
)
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 1 答案:升级 SimpleRAG 分词
# ═══════════════════════════════════════════════════════════════════════════════
_TOKEN_PATTERN = re.compile(r"[\u4e00-\u9fff]|[A-Za-z]+")
def _tokenize(text: str) -> set[str]:
return set(_TOKEN_PATTERN.findall(text.lower()))
def upgrade_retrieve(rag: SimpleRAG) -> None:
def retrieve(self, query: str, top_k: int = 3):
q_words = _tokenize(query)
scored = []
for doc in self.documents:
d_words = _tokenize(doc["text"])
union = q_words | d_words
if not union:
continue
score = len(q_words & d_words) / len(union)
scored.append((score, doc))
scored.sort(key=lambda x: x[0], reverse=True)
return [doc for _, doc in scored[:top_k]]
SimpleRAG.retrieve = retrieve
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 2 答案:Self-Correction 主循环
# ═══════════════════════════════════════════════════════════════════════════════
def install_self_correction_run() -> None:
def run(
self: SelfCorrectingAgent,
requirement: str,
generate_fn: Callable,
validate_fn: Callable,
max_retries: int = 3,
):
feedback = None
output = None
for attempt in range(max_retries):
output = generate_fn(requirement, attempt, feedback)
validation: ValidationResult = validate_fn(output)
if validation.passed:
return output
feedback = self.build_feedback(validation, output, attempt)
return output
SelfCorrectingAgent.run = run
# ═══════════════════════════════════════════════════════════════════════════════
# 练习 3 答案:MultiAgentSystem 超时与回退
# ═══════════════════════════════════════════════════════════════════════════════
def install_safe_process() -> None:
def process(self: MultiAgentSystem, requirement: str, timeout_seconds: float = 2.0):
try:
return self._timed_process(requirement, timeout_seconds)
except Exception as e:
return {"error": str(e)}
def _timed_process(self, requirement: str, timeout_seconds: float):
deadline = time.monotonic() + timeout_seconds
searcher = self.agents.get("searcher")
if searcher:
self._check_timeout(deadline)
context = searcher.process(requirement)
else:
context = ""
generator = self.agents.get("generator")
if generator:
self._check_timeout(deadline)
result = generator.process({"requirement": requirement, "context": context})
else:
result = requirement
validator = self.agents.get("validator")
if validator:
self._check_timeout(deadline)
validation = validator.process(result)
if not validation.get("passed", True):
return {"error": "验证失败", "validation": validation}
return result
def _check_timeout(self, deadline: float):
if time.monotonic() > deadline:
raise TimeoutError("Multi-Agent 处理超时")
MultiAgentSystem.process = process
MultiAgentSystem._timed_process = _timed_process
MultiAgentSystem._check_timeout = _check_timeout
# ═══════════════════════════════════════════════════════════════════════════════
# 测试
# ═══════════════════════════════════════════════════════════════════════════════
def test_answers():
print("\n" + "=" * 60)
print("Step 05-07 练习答案测试")
print("=" * 60)
print("\n📝 练习 1: 升级 SimpleRAG")
rag = SimpleRAG()
rag.add_document("JasperReports 是一个 Java 报表库", {"source": "doc1"})
rag.add_document("JRXML 是 JasperReports 模板格式", {"source": "doc2"})
upgrade_retrieve(rag)
hits = rag.retrieve("JasperReports")
print(f" 检索命中 {len(hits)}")
for d in hits:
print(f" - {d['text']}")
print("\n📝 练习 2: Self-Correction run()")
install_self_correction_run()
sc = SelfCorrectingAgent()
def fake_generate(req, attempt, feedback):
# 第一次失败,第二次成功
return f"v{attempt}"
def fake_validate(output):
passed = output == "v1"
return ValidationResult(passed=passed, score=1.0 if passed else 0.2, issues=[] if passed else ["不达标"])
final = sc.run("测试", fake_generate, fake_validate, max_retries=3)
print(f" 最终结果 = {final}")
print("\n📝 练习 3: Multi-Agent 安全 process()")
install_safe_process()
class BoomValidator(Agent):
name = "validator"
def process(self, input_data):
raise RuntimeError("故意崩溃")
sys = MultiAgentSystem()
sys.agents["validator"] = BoomValidator()
res = sys.process("任何需求")
print(f" 异常被吞掉: {res}")
if __name__ == "__main__":
test_answers()
+17
View File
@@ -0,0 +1,17 @@
"""
Step 05-07: RAG / Self-Correction / Multi-Agent 主程序
运行方式
cd step_05_07_advanced
python main.py
"""
from concept import demo
def main():
demo()
if __name__ == "__main__":
main()