Initial commit: jaspersoft-agent-learn teaching project

2026-05-29 23:22:18 +08:00
commit 05bb511aab
20 changed files with 4476 additions and 0 deletions
@@ -0,0 +1,54 @@
+# Step 02: 理解 State - 状态管理
+
+## 🎯 学习目标
+
+- 理解什么是 Agent State（代理状态）
+- 理解为什么 Agent 需要状态
+- 学会设计合理的状态结构
+- 理解状态在多步骤任务中的作用
+
+---
+
+## 📖 概念讲解
+
+### 什么是 State？
+
+State（状态）是 Agent 的"记忆"——它记录了：
+
+1. **当前任务进展**：完成了多少，还剩多少
+2. **历史数据**：用户说过什么，生成过什么
+3. **中间结果**：每个步骤的输出是什么
+4. **工具调用结果**：工具返回了什么
+
+```
+没有 State 的 Agent：
+    用户: "生成报表"
+    Agent: 生成报表
+    用户: "把标题改成黑色"
+    Agent: ??? 我不记得你刚才生成的是什么报表
+
+有 State 的 Agent：
+    用户: "生成报表"
+    Agent: 生成报表，记录到 state
+            state = {current_jrxml: "..."}
+
+    用户: "把标题改成黑色"
+    Agent: 从 state 读取 current_jrxml
+            修改标题
+            更新 state = {current_jrxml: "新报表"}
+```
+
+### 为什么需要精心设计 State？
+
+一个好的 State 设计应该：
+
+1. **包含所有必要信息**：不遗漏关键数据
+2. **避免信息冗余**：不要重复存储相同数据
+3. **结构清晰**：易于读取和更新
+4. **类型安全**：有类型提示，减少 bug
+
+---
+
+## 💻 代码实现
+
+请打开 `concept.py` 查看详细代码注释。
@@ -0,0 +1,486 @@
+"""
+Step 02: 理解 State - 状态管理
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+🎓 本节内容：
+    1. 什么是 Agent State？
+    2. 如何设计 State 结构？
+    3. State 如何在多步骤任务中传递？
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+"""
+
+from typing import TypedDict, List, Dict, Any, Optional
+from dataclasses import dataclass, field
+from datetime import datetime
+import json
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 第一部分：理解为什么需要 State
+# ═══════════════════════════════════════════════════════════════════════════════
+
+"""
+在开始写代码之前，我们先理解 State 的本质。
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+场景：用户想生成一个报表，然后修改它
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+❌ 没有 State 的实现（错误）：
+    def handle_request(user_input):
+        if "生成" in user_input:
+            jrxml = generate_jrxml(user_input)
+            return jrxml  # 生成后就"丢"了
+
+        if "修改" in user_input:
+            # 糟糕！我不知道当前报表是什么
+            # 只能让用户重新描述
+            return "请重新描述你想要修改的报表"
+
+✅ 有 State 的实现（正确）：
+    class Agent:
+        def __init__(self):
+            self.state = {}  # 用一个字典存储状态
+
+        def handle_request(self, user_input):
+            if "生成" in user_input:
+                jrxml = generate_jrxml(user_input)
+                self.state["current_jrxml"] = jrxml  # 保存到状态
+                return jrxml
+
+            if "修改" in user_input:
+                current = self.state.get("current_jrxml")  # 从状态读取
+                if not current:
+                    return "没有可修改的报表"
+                modified = modify_jrxml(current, user_input)
+                self.state["current_jrxml"] = modified  # 更新状态
+                return modified
+
+这就是 State 的作用：在多次交互中保持信息！
+"""
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 第二部分：设计 State 的数据结构
+# ═══════════════════════════════════════════════════════════════════════════════
+
+"""
+设计 State 时，我们使用 Python 的 TypedDict
+这是因为：
+    1. 有类型提示，IDE 能帮你检查错误
+    2. 有代码补全，写代码更方便
+    3. 文档化，其他人知道 State 里有什么
+"""
+
+class AgentState(TypedDict, total=False):
+    """
+    Agent 的状态定义
+
+    为什么用 TypedDict 而不是 dataclass？
+        因为 TypedDict 更直观，看起来就像一个字典
+        而且 LangGraph 直接支持 TypedDict
+
+    total=False 的含义：
+        所有字段都是可选的
+        这样初始化时可以只填需要的字段
+
+    每个字段的用途：
+        - user_input: 当前用户的输入
+        - current_jrxml: 当前正在编辑的报表代码
+        - conversation_history: 对话历史
+        - status: 当前状态（处理中/完成/错误）
+        - error_msg: 错误信息（如果有）
+    """
+    # === 核心工作字段 ===
+    user_input: str                    # 用户当前输入
+    current_jrxml: str                # 当前 JRXML 代码
+    status: str                       # 处理状态: "processing" / "success" / "error"
+    error_msg: str                    # 错误信息
+
+    # === 对话相关 ===
+    conversation_history: List[dict]  # 对话历史 [{"role": "user", "content": "..."}]
+    full_conversation_history: List[dict]  # 完整的对话历史（含时间戳）
+
+    # === 生成相关 ===
+    stage: str                       # 当前阶段: "initial" / "refine" / "mapping"
+    generated_jrxml: str            # 生成的完整 JRXML
+    is_modified: bool                # 是否有未保存的修改
+
+    # === 验证相关 ===
+    validation_result: dict          # 验证结果
+    retry_count: int                 # 重试次数
+
+    # === 元信息 ===
+    session_id: str                  # 会话 ID
+    created_at: str                  # 创建时间
+    updated_at: str                  # 更新时间
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 第三部分：实际应用 - Jaspersoft 报表生成的完整状态
+# ═══════════════════════════════════════════════════════════════════════════════
+
+class JaspersoftAgentState(TypedDict, total=False):
+    """
+    Jaspersoft 报表生成 Agent 的完整状态
+
+    这个状态设计对应了你实际项目中的需求
+    我们逐个解释每个字段的作用
+    """
+
+    # ═══════════════════════════════════════════════════════════════════════
+    # 1. 基础信息
+    # ═══════════════════════════════════════════════════════════════════════
+
+    session_id: str          # 会话唯一标识
+    session_name: str        # 会话名称（用户友好）
+    created_at: str          # 创建时间
+    updated_at: str          # 最后更新时间
+
+    # ═══════════════════════════════════════════════════════════════════════
+    # 2. 用户输入
+    # ═══════════════════════════════════════════════════════════════════════
+
+    user_input: str         # 用户当前的输入
+    uploaded_file_path: str  # 上传的文件路径（如果有）
+
+    # ═══════════════════════════════════════════════════════════════════════
+    # 3. 对话历史
+    # ═══════════════════════════════════════════════════════════════════════
+
+    """
+    对话历史的设计考虑：
+
+    为什么需要两种历史？
+        1. conversation_history：精简版，用于发送给 LLM（节省 token）
+        2. full_conversation_history：完整版，包含时间戳等元信息（用于审计）
+
+    为什么不直接保存所有消息？
+        因为 LLM 有上下文长度限制
+        当对话很长时，我们只能发送最近的几轮
+        所以需要"精简版"和"完整版"的区分
+    """
+    conversation_history: List[dict]         # 精简对话历史
+    full_conversation_history: List[dict]    # 完整对话历史
+    compressed_history: str                  # 压缩后的早期对话
+
+    # ═══════════════════════════════════════════════════════════════════════
+    # 4. 报表相关
+    # ═══════════════════════════════════════════════════════════════════════
+
+    """
+    报表相关的状态字段是最核心的部分
+
+    current_jrxml vs final_jrxml 的区别：
+        - current_jrxml：正在编辑的版本，可能还没验证通过
+        - final_jrxml：经过验证的最终版本，可以导出
+
+    为什么需要版本历史？
+        - 支持撤销操作
+        - 用户可能想回退到之前的某个版本
+        - 记录每次修改的轨迹
+    """
+    current_jrxml: str       # 当前正在编辑的 JRXML
+    final_jrxml: str         # 最终确认的 JRXML（验证通过）
+
+    # 版本管理
+    jrxml_versions: List[dict]   # 历史版本列表
+    history_states: List[dict]   # 历史状态快照（用于撤销）
+    last_saved_version: int     # 最后保存的版本号
+
+    # ═══════════════════════════════════════════════════════════════════════
+    # 5. 生成过程
+    # ═══════════════════════════════════════════════════════════════════════
+
+    """
+    生成过程的中间状态
+
+    为什么要记录这些？
+        1. 用户可能想知道生成到哪一步了
+        2. 出错时可以定位问题在哪一步
+        3. 方便调试和优化
+    """
+    stage: str              # 当前阶段
+    """
+    可能的阶段值：
+        - "initial_generation": 初始生成
+        - "layout_refine": 布局精调
+        - "field_mapping": 字段映射
+        - "validation": 验证
+        - "correction": 修正
+    """
+
+    intent: str             # 用户意图
+    """
+    可能的意图值：
+        - "initial_generation": 生成新报表
+        - "modify_report": 修改现有报表
+        - "preview_report": 预览报表
+        - "consult_question": 咨询问题
+    """
+
+    # 生成相关的中间结果
+    retrieved_context: str      # RAG 检索到的上下文
+    layout_schema: dict         # OCR 分析出的布局信息
+    ocr_extraction_result: dict # OCR 提取的字段
+
+    # ═══════════════════════════════════════════════════════════════════════
+    # 6. 验证和错误处理
+    # ═══════════════════════════════════════════════════════════════════════
+
+    """
+    验证和错误处理是 Agent 可靠性的关键
+
+    retry_count 的设计：
+        - 每次生成失败后 +1
+        - 达到上限后停止重试
+        - 这样避免无限循环
+    """
+    status: str                    # 状态：success / error / processing
+    error_msg: str                 # 错误信息
+    retry_count: int               # 当前重试次数
+    max_retries: int               # 最大重试次数
+
+    # 错误处理
+    pending_failure_context: dict   # 待处理的失败上下文
+    """
+    这个字段用于"失败恢复"
+    当重试耗尽时，我们保存失败信息
+    下次用户输入时，自动注入这个上下文
+    """
+
+    # ═══════════════════════════════════════════════════════════════════════
+    # 7. 知识库相关
+    # ═══════════════════════════════════════════════════════════════════════
+
+    """
+    知识库（KB）相关的状态
+
+    多租户设计：
+        - kb_id：当前会话绑定的知识库 ID
+        - 不同用户/项目可以使用不同的知识库
+    """
+    kb_id: str               # 当前知识库 ID
+    kb_fields: List[dict]   # 知识库中的字段定义
+    kb_template_jrxml: str  # 知识库中的模板 JRXML
+
+    # ═══════════════════════════════════════════════════════════════════════
+    # 8. 用户解释（让用户理解 Agent 在做什么）
+    # ═══════════════════════════════════════════════════════════════════════
+
+    """
+    natural_explanation 是给用户看的解释
+
+    为什么需要这个？
+        - 用户不只是想知道结果，还想知道 Agent 是怎么想的
+        - 如果出错，用户想知道哪里出了问题
+        - 这增加了透明度和信任
+    """
+    natural_explanation: str  # 对用户的自然语言解释
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 第四部分：State 的操作工具函数
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def create_initial_state(session_id: str) -> JaspersoftAgentState:
+    """
+    创建初始状态
+
+    这是一个工厂函数，用于生成新的状态实例
+    所有必要的默认值在这里设置
+
+    为什么用工厂函数而不是直接初始化？
+        1. 确保所有必填字段有默认值
+        2. 统一初始化逻辑
+        3. 方便以后修改默认行为
+    """
+    now = datetime.now().isoformat()
+
+    return JaspersoftAgentState(
+        # 基础信息
+        session_id=session_id,
+        session_name="新会话",
+        created_at=now,
+        updated_at=now,
+
+        # 对话历史
+        conversation_history=[],
+        full_conversation_history=[],
+        compressed_history="",
+
+        # 报表相关
+        current_jrxml="",
+        final_jrxml="",
+        jrxml_versions=[],
+        history_states=[],
+        last_saved_version=0,
+
+        # 生成过程
+        stage="initial",
+        intent="initial_generation",
+        retrieved_context="",
+        layout_schema={},
+        ocr_extraction_result={},
+
+        # 验证和错误
+        status="processing",
+        error_msg="",
+        retry_count=0,
+        max_retries=5,
+
+        # 知识库
+        kb_id="",
+        kb_fields=[],
+        kb_template_jrxml="",
+
+        # 解释
+        natural_explanation="",
+    )
+
+
+def update_state(state: JaspersoftAgentState, **updates) -> JaspersoftAgentState:
+    """
+    更新状态
+
+    这是一个辅助函数，用于安全地更新状态字段
+
+    为什么需要这个函数？
+        1. 自动更新时间戳
+        2. 类型检查（确保字段存在）
+        3. 记录更新历史（可选）
+
+    用法：
+        state = update_state(state, current_jrxml="new content", status="success")
+    """
+    # 更新指定字段
+    for key, value in updates.items():
+        if key in state:
+            state[key] = value
+        else:
+            raise KeyError(f"State 没有字段: {key}")
+
+    # 自动更新时间戳
+    state["updated_at"] = datetime.now().isoformat()
+
+    return state
+
+
+def save_state_snapshot(state: JaspersoftAgentState) -> dict:
+    """
+    保存状态快照
+
+    这用于"撤销"功能
+    在执行重要操作前，保存当前状态的快照
+    如果操作失败，可以回滚到这个快照
+
+    返回的快照包含：
+        - 报表内容
+        - 对话历史
+        - 意图
+        - 用户请求
+    """
+    return {
+        "current_jrxml": state.get("current_jrxml", ""),
+        "final_jrxml": state.get("final_jrxml", ""),
+        "status": state.get("status", ""),
+        "conversation_history": list(state.get("conversation_history", [])),
+        "user_input": state.get("user_input", ""),
+        "intent": state.get("intent", ""),
+        "timestamp": datetime.now().isoformat(),
+    }
+
+
+def restore_state_snapshot(state: JaspersoftAgentState, snapshot: dict) -> JaspersoftAgentState:
+    """
+    从快照恢复状态
+
+    用于"撤销"操作
+    从历史快照中恢复之前保存的状态
+    """
+    state["current_jrxml"] = snapshot.get("current_jrxml", "")
+    state["final_jrxml"] = snapshot.get("final_jrxml", "")
+    state["status"] = snapshot.get("status", "")
+    state["conversation_history"] = snapshot.get("conversation_history", [])
+    state["updated_at"] = datetime.now().isoformat()
+
+    return state
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 第五部分：演示代码
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def demo():
+    """
+    演示 State 的使用
+    """
+    print("=" * 60)
+    print("Step 02: 理解 State - 状态管理演示")
+    print("=" * 60)
+
+    # 1. 创建初始状态
+    print("\n📦 步骤 1: 创建初始状态")
+    state = create_initial_state("session_001")
+    print(f"  会话 ID: {state['session_id']}")
+    print(f"  创建时间: {state['created_at']}")
+    print(f"  当前状态: {state['status']}")
+
+    # 2. 更新状态
+    print("\n🔄 步骤 2: 更新状态")
+    state = update_state(
+        state,
+        user_input="生成一个销售报表",
+        current_jrxml='<?xml version="1.0"?><jasperReport/>',
+        status="success",
+    )
+    print(f"  用户输入: {state['user_input']}")
+    print(f"  生成状态: {state['status']}")
+    print(f"  JRXML 长度: {len(state['current_jrxml'])} 字符")
+
+    # 3. 保存快照
+    print("\n📸 步骤 3: 保存状态快照")
+    snapshot = save_state_snapshot(state)
+    print(f"  快照时间: {snapshot['timestamp']}")
+    print(f"  快照内容: JRXML ({len(snapshot['current_jrxml'])} 字符)")
+
+    # 4. 修改状态（模拟用户修改）
+    print("\n✏️ 步骤 4: 修改报表（模拟）")
+    state["current_jrxml"] = '<?xml version="1.0"?><jasperReport modified="true"/>'
+    state["status"] = "modified"
+    print(f"  新状态: {state['status']}")
+    print(f"  新 JRXML: {state['current_jrxml']}")
+
+    # 5. 撤销（恢复到快照）
+    print("\n↩️ 步骤 5: 撤销操作")
+    state = restore_state_snapshot(state, snapshot)
+    print(f"  恢复状态: {state['status']}")
+    print(f"  恢复 JRXML: {state['current_jrxml']}")
+
+    # 6. 模拟完整的生成流程
+    print("\n🔄 步骤 6: 模拟完整生成流程")
+    state = create_initial_state("session_002")
+
+    # 阶段 1: 用户输入
+    state["user_input"] = "生成一个采购单报表"
+    state["intent"] = "initial_generation"
+    print(f"  [阶段1] 用户输入: {state['user_input']}")
+
+    # 阶段 2: 生成
+    state["current_jrxml"] = "<?xml>...生成的 JRXML...</xml>"
+    state["stage"] = "initial_generation"
+    print(f"  [阶段2] 生成完成，长度: {len(state['current_jrxml'])}")
+
+    # 阶段 3: 验证
+    state["status"] = "success"
+    state["final_jrxml"] = state["current_jrxml"]
+    print(f"  [阶段3] 验证通过，状态: {state['status']}")
+
+    print("\n" + "=" * 60)
+    print("✅ State 管理演示完成")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    demo()
@@ -0,0 +1,230 @@
+"""
+Step 02 练习题：设计你的第一个 Agent State
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+🎯 练习目标：
+    1. 巩固 State 的基本结构
+    2. 设计一个业务相关的 State
+    3. 理解状态在多步骤任务中的作用
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+"""
+
+from typing import TypedDict, List, Dict, Any
+import json
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 练习 1：完善一个简单的聊天机器人状态
+# ═══════════════════════════════════════════════════════════════════════════════
+
+"""
+任务：
+    设计一个客服聊天机器人的状态
+
+要求：
+    1. 记录用户信息（用户名、ID）
+    2. 记录对话历史
+    3. 记录当前正在处理的问题
+    4. 记录问题解决状态
+    5. 记录用户满意度评分
+
+提示：
+    - 使用 TypedDict 定义状态
+    - 考虑哪些字段是必须的，哪些是可选的
+"""
+
+class CustomerServiceState(TypedDict, total=False):
+    """
+    客服聊天机器人的状态
+
+    请补全以下字段的定义：
+    """
+    # 用户信息
+    user_id: str
+    user_name: str
+
+    # TODO: 添加更多字段...
+    pass
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 练习 2：设计一个数据分析 Agent 的状态
+# ═══════════════════════════════════════════════════════════════════════════════
+
+"""
+任务：
+    设计一个数据分析 Agent 的状态
+
+功能：
+    1. 用户提出数据分析需求
+    2. Agent 连接数据源
+    3. Agent 执行查询
+    4. Agent 生成报告
+
+请设计状态来支持这个流程，包括：
+    - 用户需求
+    - 数据源配置
+    - 查询结果
+    - 生成的报告
+    - 中间状态
+"""
+
+class DataAnalysisState(TypedDict, total=False):
+    """
+    数据分析 Agent 的状态
+
+    请补全状态定义...
+    """
+    pass
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 练习 3：实现状态快照和恢复
+# ═══════════════════════════════════════════════════════════════════════════════
+
+"""
+任务：
+    给以下状态实现快照和恢复功能
+
+提示：
+    - 快照应该保存足够的信息来恢复状态
+    - 恢复后状态应该是完整的
+    - 考虑哪些字段需要保存，哪些不需要
+"""
+
+def create_snapshot(state: dict) -> dict:
+    """
+    创建状态快照
+
+    应该保存：
+        - 关键业务数据
+        - 不包括临时计算的中间结果
+
+    返回格式：
+        {
+            "data": {...},  # 快照数据
+            "timestamp": "..."  # 时间戳
+        }
+    """
+    # TODO: 实现这个函数
+    pass
+
+
+def restore_from_snapshot(state: dict, snapshot: dict) -> dict:
+    """
+    从快照恢复状态
+
+    参数：
+        state: 当前状态（会被更新）
+        snapshot: 之前保存的快照
+
+    返回：
+        恢复后的状态
+    """
+    # TODO: 实现这个函数
+    pass
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 练习 4：状态验证
+# ═══════════════════════════════════════════════════════════════════════════════
+
+"""
+任务：
+    实现状态验证函数
+
+验证规则：
+    1. 必填字段不能为空
+    2. 字段类型要正确
+    3. 某些字段有取值范围限制
+
+返回：
+    {
+        "valid": True/False,
+        "errors": ["错误1", "错误2", ...]
+    }
+"""
+
+def validate_state(state: dict, rules: dict) -> dict:
+    """
+    验证状态
+
+    参数：
+        state: 要验证的状态
+        rules: 验证规则，格式：
+            {
+                "field_name": {
+                    "type": int/str/list/dict,
+                    "required": True/False,
+                    "min": 0,  # 可选，数字最小值
+                    "max": 100,  # 可选，数字最大值
+                    "choices": ["a", "b"]  # 可选，枚举值
+                }
+            }
+
+    示例：
+        rules = {
+            "user_id": {"type": str, "required": True},
+            "age": {"type": int, "min": 0, "max": 150},
+            "status": {"type": str, "choices": ["active", "inactive"]}
+        }
+    """
+    # TODO: 实现这个函数
+    pass
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 测试
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_exercises():
+    """测试所有练习"""
+    print("\n" + "=" * 60)
+    print("测试练习答案")
+    print("=" * 60)
+
+    # 测试练习 3
+    print("\n📝 练习 3: 快照和恢复")
+    # 示例状态
+    sample_state = {
+        "user_id": "123",
+        "current_task": "数据分析",
+        "progress": 50,
+        "temp_data": ["计算中...", "处理中..."],  # 临时数据
+    }
+
+    print(f"原始状态: {sample_state}")
+
+    # 创建快照
+    snapshot = create_snapshot(sample_state)
+    print(f"快照: {snapshot}")
+
+    # 修改状态
+    sample_state["progress"] = 100
+    print(f"修改后状态: {sample_state}")
+
+    # 恢复
+    restored = restore_from_snapshot(sample_state, snapshot)
+    print(f"恢复后状态: {restored}")
+
+    # 测试练习 4
+    print("\n📝 练习 4: 状态验证")
+    rules = {
+        "user_id": {"type": str, "required": True},
+        "age": {"type": int, "min": 0, "max": 150},
+        "status": {"type": str, "choices": ["active", "inactive"]}
+    }
+
+    # 有效状态
+    valid_state = {"user_id": "123", "age": 25, "status": "active"}
+    print(f"验证有效状态: {validate_state(valid_state, rules)}")
+
+    # 无效状态
+    invalid_state = {"user_id": "123", "age": 200, "status": "unknown"}
+    print(f"验证无效状态: {validate_state(invalid_state, rules)}")
+
+
+if __name__ == "__main__":
+    test_exercises()
@@ -0,0 +1,274 @@
+"""
+Step 02 练习题答案
+
+⚠️ 先自己思考，再看答案！
+⚠️ 答案不是唯一的，这里只是其中一种实现
+"""
+
+from typing import TypedDict, List, Dict, Any
+from datetime import datetime
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 练习 1 答案：客服聊天机器人状态
+# ═══════════════════════════════════════════════════════════════════════════════
+
+class CustomerServiceState(TypedDict, total=False):
+    """客服聊天机器人的状态"""
+
+    # === 用户信息 ===
+    user_id: str                    # 用户 ID
+    user_name: str                  # 用户名
+    user_email: str                 # 用户邮箱（可选）
+
+    # === 对话历史 ===
+    conversation_history: List[dict]  # 对话历史
+    """
+    格式: [{"role": "user", "content": "..."},
+          {"role": "assistant", "content": "..."}]
+    """
+
+    # === 问题处理 ===
+    current_issue: str              # 当前正在处理的问题描述
+    issue_status: str              # 问题状态: "open" / "investigating" / "resolved" / "closed"
+    issue_priority: str             # 优先级: "low" / "medium" / "high" / "urgent"
+
+    # === 解决方案 ===
+    proposed_solution: str          # 提出的解决方案
+    solution_steps: List[str]       # 解决步骤列表
+    is_resolved: bool              # 是否已解决
+
+    # === 用户反馈 ===
+    satisfaction_rating: int        # 满意度评分 1-5
+    feedback_comment: str           # 反馈意见
+
+    # === 元信息 ===
+    session_start: str              # 会话开始时间
+    last_interaction: str           # 最后互动时间
+    agent_id: str                   # 处理此会话的客服 ID
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 练习 2 答案：数据分析 Agent 状态
+# ═══════════════════════════════════════════════════════════════════════════════
+
+class DataAnalysisState(TypedDict, total=False):
+    """数据分析 Agent 的状态"""
+
+    # === 用户需求 ===
+    user_request: str               # 用户的分析需求
+    session_id: str                 # 会话 ID
+
+    # === 数据源配置 ===
+    data_source_type: str            # 数据源类型: "database" / "file" / "api"
+    data_source_config: dict        # 数据源配置（连接信息等）
+    """
+    示例：
+        {
+            "host": "localhost",
+            "database": "sales_db",
+            "table": "orders"
+        }
+    """
+
+    # === 查询和结果 ===
+    query: str                      # 执行的 SQL 或查询条件
+    query_result: Any               # 查询结果
+    result_row_count: int           # 结果行数
+    result_columns: List[str]       # 结果列名
+
+    # === 分析过程 ===
+    stage: str                      # 当前阶段
+    """
+    阶段值：
+        - "initial": 初始状态
+        - "connecting": 连接数据源
+        - "querying": 执行查询
+        - "analyzing": 分析数据
+        - "generating_report": 生成报告
+        - "completed": 完成
+        - "error": 出错
+    """
+
+    # === 生成的报告 ===
+    generated_report: str           # 生成的报告内容
+    report_format: str              # 报告格式: "json" / "csv" / "markdown" / "html"
+
+    # === 错误处理 ===
+    error_message: str              # 错误信息（如果有）
+    retry_count: int                # 重试次数
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 练习 3 答案：快照和恢复
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def create_snapshot(state: dict) -> dict:
+    """
+    创建状态快照
+
+    策略：只保存"业务关键"数据，不保存临时计算结果
+    """
+    # 定义需要保存的字段（业务关键数据）
+    business_fields = [
+        "user_id",
+        "user_name",
+        "current_task",
+        "progress",
+        "status",
+        # 根据实际情况添加更多...
+    ]
+
+    snapshot_data = {}
+    for field in business_fields:
+        if field in state:
+            snapshot_data[field] = state[field]
+
+    return {
+        "data": snapshot_data,
+        "timestamp": datetime.now().isoformat(),
+        "version": "1.0"
+    }
+
+
+def restore_from_snapshot(state: dict, snapshot: dict) -> dict:
+    """
+    从快照恢复状态
+    """
+    if not snapshot or "data" not in snapshot:
+        return state
+
+    # 从快照恢复数据
+    snapshot_data = snapshot["data"]
+    for key, value in snapshot_data.items():
+        state[key] = value
+
+    # 更新恢复后的时间戳
+    state["_restored_at"] = datetime.now().isoformat()
+    state["_restored_from"] = snapshot.get("timestamp", "unknown")
+
+    return state
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 练习 4 答案：状态验证
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def validate_state(state: dict, rules: dict) -> dict:
+    """
+    验证状态
+    """
+    errors = []
+
+    for field_name, field_rules in rules.items():
+        value = state.get(field_name)
+        field_type = field_rules.get("type")
+        required = field_rules.get("required", False)
+
+        # 1. 检查必填
+        if required and (value is None or value == ""):
+            errors.append(f"字段 '{field_name}' 是必填的")
+            continue
+
+        # 如果字段为空且不是必填，跳过后续检查
+        if value is None or value == "":
+            continue
+
+        # 2. 检查类型
+        if field_type and not isinstance(value, field_type):
+            errors.append(
+                f"字段 '{field_name}' 类型错误: "
+                f"期望 {field_type.__name__}, 实际 {type(value).__name__}"
+            )
+            continue
+
+        # 3. 检查数值范围
+        if isinstance(value, (int, float)):
+            if "min" in field_rules and value < field_rules["min"]:
+                errors.append(
+                    f"字段 '{field_name}' 小于最小值: "
+                    f"{value} < {field_rules['min']}"
+                )
+            if "max" in field_rules and value > field_rules["max"]:
+                errors.append(
+                    f"字段 '{field_name}' 大于最大值: "
+                    f"{value} > {field_rules['max']}"
+                )
+
+        # 4. 检查枚举值
+        if "choices" in field_rules:
+            if value not in field_rules["choices"]:
+                errors.append(
+                    f"字段 '{field_name}' 值不在允许范围内: "
+                    f"{value} not in {field_rules['choices']}"
+                )
+
+    return {
+        "valid": len(errors) == 0,
+        "errors": errors
+    }
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# 测试
+# ═══════════════════════════════════════════════════════════════════════════════
+
+def test_answers():
+    """测试答案"""
+    print("\n" + "=" * 60)
+    print("测试练习答案")
+    print("=" * 60)
+
+    # 测试练习 1
+    print("\n📝 练习 1: 客服状态")
+    cs_state: CustomerServiceState = {
+        "user_id": "user_001",
+        "user_name": "张三",
+        "issue_status": "open",
+        "issue_priority": "high",
+    }
+    print(f"  状态: {cs_state}")
+
+    # 测试练习 2
+    print("\n📝 练习 2: 数据分析状态")
+    da_state: DataAnalysisState = {
+        "user_request": "分析本月销售数据",
+        "data_source_type": "database",
+        "stage": "querying",
+    }
+    print(f"  状态: {da_state}")
+
+    # 测试练习 3
+    print("\n📝 练习 3: 快照和恢复")
+    sample_state = {
+        "user_id": "123",
+        "current_task": "数据分析",
+        "progress": 50,
+        "temp_data": ["计算中...", "处理中..."],
+    }
+    print(f"  原始状态: {sample_state}")
+
+    snapshot = create_snapshot(sample_state)
+    print(f"  快照: {snapshot}")
+
+    sample_state["progress"] = 100
+    restored = restore_from_snapshot(sample_state, snapshot)
+    print(f"  恢复后状态: {restored}")
+
+    # 测试练习 4
+    print("\n📝 练习 4: 状态验证")
+    rules = {
+        "user_id": {"type": str, "required": True},
+        "age": {"type": int, "min": 0, "max": 150},
+        "status": {"type": str, "choices": ["active", "inactive"]}
+    }
+
+    valid_state = {"user_id": "123", "age": 25, "status": "active"}
+    print(f"  有效状态: {validate_state(valid_state, rules)}")
+
+    invalid_state = {"user_id": "123", "age": 200, "status": "unknown"}
+    print(f"  无效状态: {validate_state(invalid_state, rules)}")
+
+
+if __name__ == "__main__":
+    test_answers()
@@ -0,0 +1,196 @@
+"""
+Step 02: State 状态管理 - 主程序
+
+运行方式：
+    cd step_02_state
+    python main.py
+"""
+
+from concept import (
+    JaspersoftAgentState,
+    create_initial_state,
+    update_state,
+    save_state_snapshot,
+    restore_state_snapshot,
+)
+
+
+def main():
+    """演示状态管理的完整使用流程"""
+
+    print("=" * 70)
+    print(" Step 02: 理解 State - 状态管理")
+    print("=" * 70)
+    print()
+
+    # ═══════════════════════════════════════════════════════════════════════════════
+    # 场景：用户生成并修改报表的完整流程
+    # ═══════════════════════════════════════════════════════════════════════════════
+
+    print("📦 场景：用户生成并修改报表的完整流程")
+    print("-" * 70)
+    print("""
+假设用户执行以下操作：
+    1. 描述需求：生成一个销售报表
+    2. Agent 生成报表
+    3. Agent 验证通过
+    4. 用户要求修改标题
+    5. 用户要求撤销修改
+
+我们需要状态来追踪这个完整的流程。
+""")
+
+    # ═══════════════════════════════════════════════════════════════════════════════
+    # 步骤 1：创建初始状态
+    # ═══════════════════════════════════════════════════════════════════════════════
+
+    print("\n📋 步骤 1: 创建初始状态")
+    print("-" * 40)
+
+    state = create_initial_state("session_001")
+    print(f"  会话 ID: {state['session_id']}")
+    print(f"  创建时间: {state['created_at']}")
+    print(f"  初始状态: {state['status']}")
+
+    # ═══════════════════════════════════════════════════════════════════════════════
+    # 步骤 2：用户描述需求
+    # ═══════════════════════════════════════════════════════════════════════════════
+
+    print("\n📝 步骤 2: 用户描述需求")
+    print("-" * 40)
+
+    state["user_input"] = "生成一个销售报表，显示月度汇总"
+    state["intent"] = "initial_generation"
+    state["stage"] = "initial"
+
+    print(f"  用户输入: {state['user_input']}")
+    print(f"  意图: {state['intent']}")
+    print(f"  阶段: {state['stage']}")
+
+    # ═══════════════════════════════════════════════════════════════════════════════
+    # 步骤 3：生成报表（模拟）
+    # ═══════════════════════════════════════════════════════════════════════════════
+
+    print("\n🔧 步骤 3: Agent 生成报表")
+    print("-" * 40)
+
+    # 模拟生成过程
+    state["stage"] = "generation"
+    generated_jrxml = '''<?xml version="1.0" encoding="UTF-8"?>
+<jasperReport name="SalesReport">
+    <title>月度销售汇总报表</title>
+    <queryString>SELECT product, SUM(amount) FROM sales GROUP BY product</queryString>
+    <field name="product" class="java.lang.String"/>
+    <field name="amount" class="java.math.BigDecimal"/>
+    <band height="100">
+        <staticText><text>产品</text></staticText>
+        <textField><textFieldExpression>$F{product}</textFieldExpression></textField>
+    </band>
+</jasperReport>'''
+
+    state["current_jrxml"] = generated_jrxml
+    print(f"  生成完成!")
+    print(f"  JRXML 长度: {len(generated_jrxml)} 字符")
+    print(f"  当前阶段: {state['stage']}")
+
+    # ═══════════════════════════════════════════════════════════════════════════════
+    # 步骤 4：验证通过
+    # ═══════════════════════════════════════════════════════════════════════════════
+
+    print("\n✅ 步骤 4: 验证通过")
+    print("-" * 40)
+
+    # 保存状态快照（修改前的备份）
+    state_snapshot = save_state_snapshot(state)
+    print(f"  ✓ 保存状态快照")
+    print(f"    快照时间: {state_snapshot['timestamp']}")
+    print(f"    包含内容: current_jrxml, conversation_history 等")
+
+    # 验证通过，更新状态
+    state["status"] = "success"
+    state["final_jrxml"] = state["current_jrxml"]
+    state["stage"] = "completed"
+
+    print(f"  验证状态: {state['status']}")
+    print(f"  最终版本: ✓ 已保存")
+
+    # ═══════════════════════════════════════════════════════════════════════════════
+    # 步骤 5：用户要求修改标题
+    # ═══════════════════════════════════════════════════════════════════════════════
+
+    print("\n✏️ 步骤 5: 用户要求修改标题")
+    print("-" * 40)
+
+    # 保存修改前的快照
+    pre_modify_snapshot = save_state_snapshot(state)
+    print(f"  ✓ 修改前保存快照")
+
+    # 执行修改
+    new_jrxml = state["current_jrxml"].replace("月度销售汇总报表", "2024年销售汇总报表")
+    state["current_jrxml"] = new_jrxml
+    state["intent"] = "modify_report"
+    state["stage"] = "modification"
+
+    print(f"  修改内容: 标题从'月度销售汇总报表'改为'2024年销售汇总报表'")
+    print(f"  当前意图: {state['intent']}")
+
+    # ═══════════════════════════════════════════════════════════════════════════════
+    # 步骤 6：用户撤销修改
+    # ═══════════════════════════════════════════════════════════════════════════════
+
+    print("\n↩️ 步骤 6: 用户撤销修改")
+    print("-" * 40)
+
+    # 恢复到修改前的状态
+    state = restore_state_snapshot(state, pre_modify_snapshot)
+    print(f"  ✓ 撤销成功!")
+    print(f"  恢复标题: {state['current_jrxml'][:50]}...")
+    print(f"  当前意图: {state['intent']}")
+
+    # ═══════════════════════════════════════════════════════════════════════════════
+    # 展示完整状态
+    # ═══════════════════════════════════════════════════════════════════════════════
+
+    print("\n\n" + "=" * 70)
+    print("📊 完整状态一览")
+    print("=" * 70)
+
+    key_fields = [
+        ("session_id", "会话ID"),
+        ("status", "状态"),
+        ("intent", "意图"),
+        ("stage", "阶段"),
+        ("user_input", "用户输入"),
+        ("current_jrxml", "当前JRXML"),
+        ("final_jrxml", "最终JRXML"),
+        ("created_at", "创建时间"),
+        ("updated_at", "更新时间"),
+    ]
+
+    for field, desc in key_fields:
+        value = state.get(field, "")
+        if field in ["current_jrxml", "final_jrxml"] and value:
+            value = f"{value[:50]}..." if len(value) > 50 else value
+        print(f"  {desc}: {value}")
+
+    # ═══════════════════════════════════════════════════════════════════════════════
+    # 总结
+    # ═══════════════════════════════════════════════════════════════════════════════
+
+    print("\n\n" + "=" * 70)
+    print(" ✅ Step 02 完成!")
+    print("=" * 70)
+    print("""
+学到的关键概念：
+    1. State 是 Agent 的"记忆"，在多步骤任务中保持信息
+    2. 使用 TypedDict 定义状态，有类型提示更安全
+    3. 状态快照用于"撤销"功能
+    4. 不同字段用于不同目的：业务数据、对话历史、元信息
+
+下一步：
+    继续 Step 03，学习如何把 Tool + State 组合成简单的 Agent
+""")
+
+
+if __name__ == "__main__":
+    main()