1. 统一为使用基于pydantic的.env环境变量管理配置

2. 全项目基于loguru进行日志管理
2025-11-05 14:56:49 +08:00
parent 1d2e23d8c1
commit 537d682861
50 changed files with 1404 additions and 1731 deletions
@@ -5,69 +5,74 @@

 from abc import ABC, abstractmethod
 from typing import Any, Dict, Optional
+from loguru import logger
 from ..llms.base import LLMClient
 from ..state.state import State


 class BaseNode(ABC):
    """节点基类"""
-    
+
    def __init__(self, llm_client: LLMClient, node_name: str = ""):
        """
        初始化节点
-        
+
        Args:
            llm_client: LLM客户端
            node_name: 节点名称
        """
        self.llm_client = llm_client
        self.node_name = node_name or self.__class__.__name__
-    
+
    @abstractmethod
    def run(self, input_data: Any, **kwargs) -> Any:
        """
        执行节点处理逻辑
-        
+
        Args:
            input_data: 输入数据
            **kwargs: 额外参数
-            
+
        Returns:
            处理结果
        """
        pass
-    
+
    def validate_input(self, input_data: Any) -> bool:
        """
        验证输入数据
-        
+
        Args:
            input_data: 输入数据
-            
+
        Returns:
            验证是否通过
        """
        return True
-    
+
    def process_output(self, output: Any) -> Any:
        """
        处理输出数据
-        
+
        Args:
            output: 原始输出
-            
+
        Returns:
            处理后的输出
        """
        return output
-    
+
    def log_info(self, message: str):
        """记录信息日志"""
-        print(f"[{self.node_name}] {message}")
+        logger.info(f"[{self.node_name}] {message}")
    
+    def log_warning(self, message: str):
+        """记录警告日志"""
+        logger.warning(f"[{self.node_name}] 警告: {message}")
+
    def log_error(self, message: str):
        """记录错误日志"""
-        print(f"[{self.node_name}] 错误: {message}")
+        logger.error(f"[{self.node_name}] 错误: {message}")


 class StateMutationNode(BaseNode):
@@ -7,6 +7,7 @@ import json
 from typing import List, Dict, Any

 from .base_node import BaseNode
+from loguru import logger
 from ..prompts import SYSTEM_PROMPT_REPORT_FORMATTING
 from ..utils.text_processing import (
    remove_reasoning_from_output,
@@ -65,7 +66,7 @@ class ReportFormattingNode(BaseNode):
            else:
                message = json.dumps(input_data, ensure_ascii=False)
            
-            self.log_info("正在格式化最终报告")
+            logger.info("正在格式化最终报告")
            
            # 调用LLM生成Markdown格式
            response = self.llm_client.invoke(
@@ -76,11 +77,11 @@ class ReportFormattingNode(BaseNode):
            # 处理响应
            processed_response = self.process_output(response)
            
-            self.log_info("成功生成格式化报告")
+            logger.info("成功生成格式化报告")
            return processed_response
            
        except Exception as e:
-            self.log_error(f"报告格式化失败: {str(e)}")
+            logger.exception(f"报告格式化失败: {str(e)}")
            raise e
    
    def process_output(self, output: str) -> str:
@@ -109,7 +110,7 @@ class ReportFormattingNode(BaseNode):
            return cleaned_output.strip()
            
        except Exception as e:
-            self.log_error(f"处理输出失败: {str(e)}")
+            logger.exception(f"处理输出失败: {str(e)}")
            return "# 报告处理失败\n\n报告格式化过程中发生错误。"
    
    def format_report_manually(self, paragraphs_data: List[Dict[str, str]], 
@@ -125,7 +126,7 @@ class ReportFormattingNode(BaseNode):
            格式化的Markdown报告
        """
        try:
-            self.log_info("使用手动格式化方法")
+            logger.info("使用手动格式化方法")
            
            # 构建报告
            report_lines = [
@@ -163,5 +164,5 @@ class ReportFormattingNode(BaseNode):
            return "\n".join(report_lines)
            
        except Exception as e:
-            self.log_error(f"手动格式化失败: {str(e)}")
+            logger.exception(f"手动格式化失败: {str(e)}")
            return "# 报告生成失败\n\n无法完成报告格式化。"
@@ -6,6 +6,7 @@
 import json
 from typing import Dict, Any, List
 from json.decoder import JSONDecodeError
+from loguru import logger

 from .base_node import StateMutationNode
 from ..state.state import State
@@ -48,7 +49,7 @@ class ReportStructureNode(StateMutationNode):
            报告结构列表
        """
        try:
-            self.log_info(f"正在为查询生成报告结构: {self.query}")
+            logger.info(f"正在为查询生成报告结构: {self.query}")
            
            # 调用LLM
            response = self.llm_client.invoke(SYSTEM_PROMPT_REPORT_STRUCTURE, self.query)
@@ -56,11 +57,11 @@ class ReportStructureNode(StateMutationNode):
            # 处理响应
            processed_response = self.process_output(response)
            
-            self.log_info(f"成功生成 {len(processed_response)} 个段落结构")
+            logger.info(f"成功生成 {len(processed_response)} 个段落结构")
            return processed_response
            
        except Exception as e:
-            self.log_error(f"生成报告结构失败: {str(e)}")
+            logger.exception(f"生成报告结构失败: {str(e)}")
            raise e
    
    def process_output(self, output: str) -> List[Dict[str, str]]:
@@ -79,54 +80,54 @@ class ReportStructureNode(StateMutationNode):
            cleaned_output = clean_json_tags(cleaned_output)
            
            # 记录清理后的输出用于调试
-            self.log_info(f"清理后的输出: {cleaned_output}")
+            logger.info(f"清理后的输出: {cleaned_output}")
            
            # 解析JSON
            try:
                report_structure = json.loads(cleaned_output)
-                self.log_info("JSON解析成功")
+                logger.info("JSON解析成功")
            except JSONDecodeError as e:
-                self.log_info(f"JSON解析失败: {str(e)}")
+                logger.exception(f"JSON解析失败: {str(e)}")
                # 使用更强大的提取方法
                report_structure = extract_clean_response(cleaned_output)
                if "error" in report_structure:
-                    self.log_error("JSON解析失败，尝试修复...")
+                    logger.error("JSON解析失败，尝试修复...")
                    # 尝试修复JSON
                    fixed_json = fix_incomplete_json(cleaned_output)
                    if fixed_json:
                        try:
                            report_structure = json.loads(fixed_json)
-                            self.log_info("JSON修复成功")
+                            logger.info("JSON修复成功")
                        except JSONDecodeError:
-                            self.log_error("JSON修复失败")
+                            logger.error("JSON修复失败")
                            # 返回默认结构
                            return self._generate_default_structure()
                    else:
-                        self.log_error("无法修复JSON，使用默认结构")
+                        logger.error("无法修复JSON，使用默认结构")
                        return self._generate_default_structure()
            
            # 验证结构
            if not isinstance(report_structure, list):
-                self.log_info("报告结构不是列表，尝试转换...")
+                logger.info("报告结构不是列表，尝试转换...")
                if isinstance(report_structure, dict):
                    # 如果是单个对象，包装成列表
                    report_structure = [report_structure]
                else:
-                    self.log_error("报告结构格式无效，使用默认结构")
+                    logger.error("报告结构格式无效，使用默认结构")
                    return self._generate_default_structure()
            
            # 验证每个段落
            validated_structure = []
            for i, paragraph in enumerate(report_structure):
                if not isinstance(paragraph, dict):
-                    self.log_warning(f"段落 {i+1} 不是字典格式，跳过")
+                    logger.warning(f"段落 {i+1} 不是字典格式，跳过")
                    continue
                
                title = paragraph.get("title", f"段落 {i+1}")
                content = paragraph.get("content", "")
                
                if not title or not content:
-                    self.log_warning(f"段落 {i+1} 缺少标题或内容，跳过")
+                    logger.warning(f"段落 {i+1} 缺少标题或内容，跳过")
                    continue
                
                validated_structure.append({
@@ -135,14 +136,14 @@ class ReportStructureNode(StateMutationNode):
                })
            
            if not validated_structure:
-                self.log_warning("没有有效的段落结构，使用默认结构")
+                logger.warning("没有有效的段落结构，使用默认结构")
                return self._generate_default_structure()
            
-            self.log_info(f"成功验证 {len(validated_structure)} 个段落结构")
+            logger.info(f"成功验证 {len(validated_structure)} 个段落结构")
            return validated_structure
            
        except Exception as e:
-            self.log_error(f"处理输出失败: {str(e)}")
+            logger.exception(f"处理输出失败: {str(e)}")
            return self._generate_default_structure()
    
    def _generate_default_structure(self) -> List[Dict[str, str]]:
@@ -152,7 +153,7 @@ class ReportStructureNode(StateMutationNode):
        Returns:
            默认的报告结构列表
        """
-        self.log_info("生成默认报告结构")
+        logger.info("生成默认报告结构")
        return [
            {
                "title": "研究概述",
@@ -195,9 +196,9 @@ class ReportStructureNode(StateMutationNode):
                    content=paragraph_data["content"]
                )
            
-            self.log_info(f"已将 {len(report_structure)} 个段落添加到状态中")
+            logger.info(f"已将 {len(report_structure)} 个段落添加到状态中")
            return state
            
        except Exception as e:
-            self.log_error(f"状态更新失败: {str(e)}")
+            logger.exception(f"状态更新失败: {str(e)}")
            raise e
@@ -6,6 +6,7 @@
 import json
 from typing import Dict, Any
 from json.decoder import JSONDecodeError
+from loguru import logger

 from .base_node import BaseNode
 from ..prompts import SYSTEM_PROMPT_FIRST_SEARCH, SYSTEM_PROMPT_REFLECTION
@@ -62,7 +63,7 @@ class FirstSearchNode(BaseNode):
            else:
                message = json.dumps(input_data, ensure_ascii=False)
            
-            self.log_info("正在生成首次搜索查询")
+            logger.info("正在生成首次搜索查询")
            
            # 调用LLM
            response = self.llm_client.invoke(SYSTEM_PROMPT_FIRST_SEARCH, message)
@@ -70,11 +71,11 @@ class FirstSearchNode(BaseNode):
            # 处理响应
            processed_response = self.process_output(response)
            
-            self.log_info(f"生成搜索查询: {processed_response.get('search_query', 'N/A')}")
+            logger.info(f"生成搜索查询: {processed_response.get('search_query', 'N/A')}")
            return processed_response
            
        except Exception as e:
-            self.log_error(f"生成首次搜索查询失败: {str(e)}")
+            logger.exception(f"生成首次搜索查询失败: {str(e)}")
            raise e
    
    def process_output(self, output: str) -> Dict[str, str]:
@@ -93,30 +94,30 @@ class FirstSearchNode(BaseNode):
            cleaned_output = clean_json_tags(cleaned_output)
            
            # 记录清理后的输出用于调试
-            self.log_info(f"清理后的输出: {cleaned_output}")
+            logger.info(f"清理后的输出: {cleaned_output}")
            
            # 解析JSON
            try:
                result = json.loads(cleaned_output)
-                self.log_info("JSON解析成功")
+                logger.info("JSON解析成功")
            except JSONDecodeError as e:
-                self.log_info(f"JSON解析失败: {str(e)}")
+                logger.exception(f"JSON解析失败: {str(e)}")
                # 使用更强大的提取方法
                result = extract_clean_response(cleaned_output)
                if "error" in result:
-                    self.log_error("JSON解析失败，尝试修复...")
+                    logger.error("JSON解析失败，尝试修复...")
                    # 尝试修复JSON
                    fixed_json = fix_incomplete_json(cleaned_output)
                    if fixed_json:
                        try:
                            result = json.loads(fixed_json)
-                            self.log_info("JSON修复成功")
+                            logger.info("JSON修复成功")
                        except JSONDecodeError:
-                            self.log_error("JSON修复失败")
+                            logger.error("JSON修复失败")
                            # 返回默认查询
                            return self._get_default_search_query()
                    else:
-                        self.log_error("无法修复JSON，使用默认查询")
+                        logger.error("无法修复JSON，使用默认查询")
                        return self._get_default_search_query()
            
            # 验证和清理结果
@@ -124,7 +125,7 @@ class FirstSearchNode(BaseNode):
            reasoning = result.get("reasoning", "")
            
            if not search_query:
-                self.log_warning("未找到搜索查询，使用默认查询")
+                logger.warning("未找到搜索查询，使用默认查询")
                return self._get_default_search_query()
            
            return {
@@ -197,7 +198,7 @@ class ReflectionNode(BaseNode):
            else:
                message = json.dumps(input_data, ensure_ascii=False)
            
-            self.log_info("正在进行反思并生成新搜索查询")
+            logger.info("正在进行反思并生成新搜索查询")
            
            # 调用LLM
            response = self.llm_client.invoke(SYSTEM_PROMPT_REFLECTION, message)
@@ -205,11 +206,11 @@ class ReflectionNode(BaseNode):
            # 处理响应
            processed_response = self.process_output(response)
            
-            self.log_info(f"反思生成搜索查询: {processed_response.get('search_query', 'N/A')}")
+            logger.info(f"反思生成搜索查询: {processed_response.get('search_query', 'N/A')}")
            return processed_response
            
        except Exception as e:
-            self.log_error(f"反思生成搜索查询失败: {str(e)}")
+            logger.exception(f"反思生成搜索查询失败: {str(e)}")
            raise e
    
    def process_output(self, output: str) -> Dict[str, str]:
@@ -228,30 +229,30 @@ class ReflectionNode(BaseNode):
            cleaned_output = clean_json_tags(cleaned_output)
            
            # 记录清理后的输出用于调试
-            self.log_info(f"清理后的输出: {cleaned_output}")
+            logger.info(f"清理后的输出: {cleaned_output}")
            
            # 解析JSON
            try:
                result = json.loads(cleaned_output)
-                self.log_info("JSON解析成功")
+                logger.info("JSON解析成功")
            except JSONDecodeError as e:
-                self.log_info(f"JSON解析失败: {str(e)}")
+                logger.exception(f"JSON解析失败: {str(e)}")
                # 使用更强大的提取方法
                result = extract_clean_response(cleaned_output)
                if "error" in result:
-                    self.log_error("JSON解析失败，尝试修复...")
+                    logger.error("JSON解析失败，尝试修复...")
                    # 尝试修复JSON
                    fixed_json = fix_incomplete_json(cleaned_output)
                    if fixed_json:
                        try:
                            result = json.loads(fixed_json)
-                            self.log_info("JSON修复成功")
+                            logger.info("JSON修复成功")
                        except JSONDecodeError:
-                            self.log_error("JSON修复失败")
+                            logger.error("JSON修复失败")
                            # 返回默认查询
                            return self._get_default_reflection_query()
                    else:
-                        self.log_error("无法修复JSON，使用默认查询")
+                        logger.error("无法修复JSON，使用默认查询")
                        return self._get_default_reflection_query()
            
            # 验证和清理结果
@@ -259,7 +260,7 @@ class ReflectionNode(BaseNode):
            reasoning = result.get("reasoning", "")
            
            if not search_query:
-                self.log_warning("未找到搜索查询，使用默认查询")
+                logger.warning("未找到搜索查询，使用默认查询")
                return self._get_default_reflection_query()
            
            return {
@@ -268,7 +269,7 @@ class ReflectionNode(BaseNode):
            }
            
        except Exception as e:
-            self.log_error(f"处理输出失败: {str(e)}")
+            logger.exception(f"处理输出失败: {str(e)}")
            # 返回默认查询
            return self._get_default_reflection_query()
    
@@ -6,6 +6,7 @@
 import json
 from typing import Dict, Any, List
 from json.decoder import JSONDecodeError
+from loguru import logger

 from .base_node import StateMutationNode
 from ..state.state import State
@@ -27,7 +28,7 @@ try:
    FORUM_READER_AVAILABLE = True
 except ImportError:
    FORUM_READER_AVAILABLE = False
-    print("警告: 无法导入forum_reader模块，将跳过HOST发言读取功能")
+    logger.warning("警告: 无法导入forum_reader模块，将跳过HOST发言读取功能")


 class FirstSummaryNode(StateMutationNode):
@@ -84,9 +85,9 @@ class FirstSummaryNode(StateMutationNode):
                    if host_speech:
                        # 将HOST发言添加到输入数据中
                        data['host_speech'] = host_speech
-                        self.log_info(f"已读取HOST发言，长度: {len(host_speech)}字符")
+                        logger.info(f"已读取HOST发言，长度: {len(host_speech)}字符")
                except Exception as e:
-                    self.log_info(f"读取HOST发言失败: {str(e)}")
+                    logger.exception(f"读取HOST发言失败: {str(e)}")
            
            # 转换为JSON字符串
            message = json.dumps(data, ensure_ascii=False)
@@ -96,7 +97,7 @@ class FirstSummaryNode(StateMutationNode):
                formatted_host = format_host_speech_for_prompt(data['host_speech'])
                message = formatted_host + "\n" + message
            
-            self.log_info("正在生成首次段落总结")
+            logger.info("正在生成首次段落总结")
            
            # 调用LLM生成总结
            response = self.llm_client.invoke(
@@ -107,11 +108,11 @@ class FirstSummaryNode(StateMutationNode):
            # 处理响应
            processed_response = self.process_output(response)
            
-            self.log_info("成功生成首次段落总结")
+            logger.info("成功生成首次段落总结")
            return processed_response
            
        except Exception as e:
-            self.log_error(f"生成首次总结失败: {str(e)}")
+            logger.exception(f"生成首次总结失败: {str(e)}")
            raise e
    
    def process_output(self, output: str) -> str:
@@ -130,26 +131,26 @@ class FirstSummaryNode(StateMutationNode):
            cleaned_output = clean_json_tags(cleaned_output)
            
            # 记录清理后的输出用于调试
-            self.log_info(f"清理后的输出: {cleaned_output}")
+            logger.info(f"清理后的输出: {cleaned_output}")
            
            # 解析JSON
            try:
                result = json.loads(cleaned_output)
-                self.log_info("JSON解析成功")
+                logger.info("JSON解析成功")
            except JSONDecodeError as e:
-                self.log_info(f"JSON解析失败: {str(e)}")
+                logger.exception(f"JSON解析失败: {str(e)}")
                # 尝试修复JSON
                fixed_json = fix_incomplete_json(cleaned_output)
                if fixed_json:
                    try:
                        result = json.loads(fixed_json)
-                        self.log_info("JSON修复成功")
+                        logger.info("JSON修复成功")
                    except JSONDecodeError:
-                        self.log_info("JSON修复失败，直接使用清理后的文本")
+                        logger.exception("JSON修复失败，直接使用清理后的文本")
                        # 如果不是JSON格式，直接返回清理后的文本
                        return cleaned_output
                else:
-                    self.log_info("无法修复JSON，直接使用清理后的文本")
+                    logger.exception("无法修复JSON，直接使用清理后的文本")
                    # 如果不是JSON格式，直接返回清理后的文本
                    return cleaned_output
            
@@ -163,7 +164,7 @@ class FirstSummaryNode(StateMutationNode):
            return cleaned_output
            
        except Exception as e:
-            self.log_error(f"处理输出失败: {str(e)}")
+            logger.exception(f"处理输出失败: {str(e)}")
            return "段落总结生成失败"
    
    def mutate_state(self, input_data: Any, state: State, paragraph_index: int, **kwargs) -> State:
@@ -186,7 +187,7 @@ class FirstSummaryNode(StateMutationNode):
            # 更新状态
            if 0 <= paragraph_index < len(state.paragraphs):
                state.paragraphs[paragraph_index].research.latest_summary = summary
-                self.log_info(f"已更新段落 {paragraph_index} 的首次总结")
+                logger.info(f"已更新段落 {paragraph_index} 的首次总结")
            else:
                raise ValueError(f"段落索引 {paragraph_index} 超出范围")
            
@@ -194,7 +195,7 @@ class FirstSummaryNode(StateMutationNode):
            return state
            
        except Exception as e:
-            self.log_error(f"状态更新失败: {str(e)}")
+            logger.exception(f"状态更新失败: {str(e)}")
            raise e


@@ -252,9 +253,9 @@ class ReflectionSummaryNode(StateMutationNode):
                    if host_speech:
                        # 将HOST发言添加到输入数据中
                        data['host_speech'] = host_speech
-                        self.log_info(f"已读取HOST发言，长度: {len(host_speech)}字符")
+                        logger.info(f"已读取HOST发言，长度: {len(host_speech)}字符")
                except Exception as e:
-                    self.log_info(f"读取HOST发言失败: {str(e)}")
+                    logger.exception(f"读取HOST发言失败: {str(e)}")
            
            # 转换为JSON字符串
            message = json.dumps(data, ensure_ascii=False)
@@ -264,7 +265,7 @@ class ReflectionSummaryNode(StateMutationNode):
                formatted_host = format_host_speech_for_prompt(data['host_speech'])
                message = formatted_host + "\n" + message
            
-            self.log_info("正在生成反思总结")
+            logger.info("正在生成反思总结")
            
            # 调用LLM生成总结
            response = self.llm_client.invoke(
@@ -275,11 +276,11 @@ class ReflectionSummaryNode(StateMutationNode):
            # 处理响应
            processed_response = self.process_output(response)
            
-            self.log_info("成功生成反思总结")
+            logger.info("成功生成反思总结")
            return processed_response
            
        except Exception as e:
-            self.log_error(f"生成反思总结失败: {str(e)}")
+            logger.exception(f"生成反思总结失败: {str(e)}")
            raise e
    
    def process_output(self, output: str) -> str:
@@ -298,26 +299,26 @@ class ReflectionSummaryNode(StateMutationNode):
            cleaned_output = clean_json_tags(cleaned_output)
            
            # 记录清理后的输出用于调试
-            self.log_info(f"清理后的输出: {cleaned_output}")
+            logger.info(f"清理后的输出: {cleaned_output}")
            
            # 解析JSON
            try:
                result = json.loads(cleaned_output)
-                self.log_info("JSON解析成功")
+                logger.info("JSON解析成功")
            except JSONDecodeError as e:
-                self.log_info(f"JSON解析失败: {str(e)}")
+                logger.exception(f"JSON解析失败: {str(e)}")
                # 尝试修复JSON
                fixed_json = fix_incomplete_json(cleaned_output)
                if fixed_json:
                    try:
                        result = json.loads(fixed_json)
-                        self.log_info("JSON修复成功")
+                        logger.info("JSON修复成功")
                    except JSONDecodeError:
-                        self.log_info("JSON修复失败，直接使用清理后的文本")
+                        logger.exception("JSON修复失败，直接使用清理后的文本")
                        # 如果不是JSON格式，直接返回清理后的文本
                        return cleaned_output
                else:
-                    self.log_info("无法修复JSON，直接使用清理后的文本")
+                    logger.exception("无法修复JSON，直接使用清理后的文本")
                    # 如果不是JSON格式，直接返回清理后的文本
                    return cleaned_output
            
@@ -331,7 +332,7 @@ class ReflectionSummaryNode(StateMutationNode):
            return cleaned_output
            
        except Exception as e:
-            self.log_error(f"处理输出失败: {str(e)}")
+            logger.exception(f"处理输出失败: {str(e)}")
            return "反思总结生成失败"
    
    def mutate_state(self, input_data: Any, state: State, paragraph_index: int, **kwargs) -> State:
@@ -355,7 +356,7 @@ class ReflectionSummaryNode(StateMutationNode):
            if 0 <= paragraph_index < len(state.paragraphs):
                state.paragraphs[paragraph_index].research.latest_summary = updated_summary
                state.paragraphs[paragraph_index].research.increment_reflection()
-                self.log_info(f"已更新段落 {paragraph_index} 的反思总结")
+                logger.info(f"已更新段落 {paragraph_index} 的反思总结")
            else:
                raise ValueError(f"段落索引 {paragraph_index} 超出范围")
            
@@ -363,5 +364,5 @@ class ReflectionSummaryNode(StateMutationNode):
            return state
            
        except Exception as e:
-            self.log_error(f"状态更新失败: {str(e)}")
+            logger.exception(f"状态更新失败: {str(e)}")
            raise e