1. 统一为使用基于pydantic的.env环境变量管理配置

2. 全项目基于loguru进行日志管理
2025-11-05 14:56:49 +08:00
parent 1d2e23d8c1
commit 537d682861
50 changed files with 1404 additions and 1731 deletions
@@ -12,15 +12,15 @@ from .text_processing import (
    format_search_results_for_prompt
 )

-from .config import Config, load_config
+from .config import Settings, settings

 __all__ = [
    "clean_json_tags",
    "clean_markdown_tags",
-    "remove_reasoning_from_output", 
+    "remove_reasoning_from_output",
    "extract_clean_response",
    "update_state_with_search_results",
    "format_search_results_for_prompt",
-    "Config",
-    "load_config"
+    "Settings",
+    "settings"
 ]
@@ -1,157 +1,83 @@
 """
-Configuration management module for the Media Engine.
+Configuration management module for the Media Engine (pydantic_settings style).
 """

-import os
-from dataclasses import dataclass
+from pathlib import Path
+from pydantic_settings import BaseSettings
+from pydantic import Field
 from typing import Optional


-def _get_value(source, key: str, default=None, *fallback_keys: str):
-    candidates = (key,) + fallback_keys
-    value = None
-    for candidate in candidates:
-        if isinstance(source, dict):
-            value = source.get(candidate)
-        else:
-            value = getattr(source, candidate, None)
-        if value not in (None, ""):
-            break
-    if value in (None, ""):
-        for candidate in candidates:
-            env_val = os.getenv(candidate)
-            if env_val not in (None, ""):
-                value = env_val
-                break
-    return value if value not in (None, "") else default
+# 计算 .env 优先级：优先当前工作目录，其次项目根目录
+PROJECT_ROOT: Path = Path(__file__).resolve().parents[2]
+CWD_ENV: Path = Path.cwd() / ".env"
+ENV_FILE: str = str(CWD_ENV if CWD_ENV.exists() else (PROJECT_ROOT / ".env"))
+
+class Settings(BaseSettings):
+    """
+    全局配置；支持 .env 和环境变量自动加载。
+    变量名与原 config.py 大写一致，便于平滑过渡。
+    """
+    # ====================== 数据库配置 ======================
+    DB_HOST: str = Field("your_db_host", description="数据库主机，例如localhost 或 127.0.0.1。我们也提供云数据库资源便捷配置，日均10w+数据，可免费申请，联系我们：670939375@qq.com NOTE：为进行数据合规性审查与服务升级，云数据库自2025年10月1日起暂停接收新的使用申请")
+    DB_PORT: int = Field(3306, description="数据库端口号，默认为3306")
+    DB_USER: str = Field("your_db_user", description="数据库用户名")
+    DB_PASSWORD: str = Field("your_db_password", description="数据库密码")
+    DB_NAME: str = Field("your_db_name", description="数据库名称")
+    DB_CHARSET: str = Field("utf8mb4", description="数据库字符集，推荐utf8mb4，兼容emoji")
+    DB_DIALECT: str = Field("mysql", description="数据库类型，例如 'mysql' 或 'postgresql'。用于支持多种数据库后端（如 SQLAlchemy，请与连接信息共同配置）")
+
+    # ======================= LLM 相关 =======================
+    INSIGHT_ENGINE_API_KEY: str = Field(None, description="Insight Agent（推荐Kimi，https://platform.moonshot.cn/）API密钥，用于主LLM。您可以更改每个部分LLM使用的API，🚩只要兼容OpenAI请求格式都可以，定义好KEY、BASE_URL与MODEL_NAME即可正常使用。重要提醒：我们强烈推荐您先使用推荐的配置申请API，先跑通再进行您的更改！")
+    INSIGHT_ENGINE_BASE_URL: Optional[str] = Field("https://api.moonshot.cn/v1", description="Insight Agent LLM接口BaseUrl，可自定义厂商API")
+    INSIGHT_ENGINE_MODEL_NAME: str = Field("kimi-k2-0711-preview", description="Insight Agent LLM模型名称，如kimi-k2-0711-preview")
+    
+    MEDIA_ENGINE_API_KEY: str = Field(None, description="Media Agent（推荐Gemini，这里我用了一个中转厂商，你也可以换成你自己的，申请地址：https://www.chataiapi.com/）API密钥")
+    MEDIA_ENGINE_BASE_URL: Optional[str] = Field("https://www.chataiapi.com/v1", description="Media Agent LLM接口BaseUrl")
+    MEDIA_ENGINE_MODEL_NAME: str = Field("gemini-2.5-pro", description="Media Agent LLM模型名称，如gemini-2.5-pro")
+    
+    BOCHA_WEB_SEARCH_API_KEY: Optional[str] = Field(None, description="Bocha Web Search API Key")
+    BOCHA_API_KEY: Optional[str] = Field(None, description="Bocha 兼容键（别名）")
+    
+    SEARCH_TIMEOUT: int = Field(240, description="搜索超时（秒）")
+    SEARCH_CONTENT_MAX_LENGTH: int = Field(20000, description="用于提示的最长内容长度")
+    MAX_REFLECTIONS: int = Field(2, description="最大反思轮数")
+    MAX_PARAGRAPHS: int = Field(5, description="最大段落数")
+    
+    MINDSPIDER_API_KEY: Optional[str] = Field(None, description="MindSpider API密钥")
+    MINDSPIDER_BASE_URL: Optional[str] = Field("https://api.deepseek.com", description="MindSpider LLM接口BaseUrl")
+    MINDSPIDER_MODEL_NAME: str = Field("deepseek-reasoner", description="MindSpider LLM模型名称，如deepseek-reasoner")
+    
+    OUTPUT_DIR: str = Field("reports", description="输出目录")
+    SAVE_INTERMEDIATE_STATES: bool = Field(True, description="是否保存中间状态")
+
+    
+    QUERY_ENGINE_API_KEY: str = Field(None, description="Query Agent（推荐DeepSeek，https://www.deepseek.com/）API密钥")
+    QUERY_ENGINE_BASE_URL: Optional[str] = Field("https://api.deepseek.com", description="Query Agent LLM接口BaseUrl")
+    QUERY_ENGINE_MODEL_NAME: str = Field("deepseek-reasoner", description="Query Agent LLM模型，如deepseek-reasoner")
+    
+    REPORT_ENGINE_API_KEY: str = Field(None, description="Report Agent（推荐Gemini，这里我用了一个中转厂商，你也可以换成你自己的，申请地址：https://www.chataiapi.com/）API密钥")
+    REPORT_ENGINE_BASE_URL: Optional[str] = Field("https://www.chataiapi.com/v1", description="Report Agent LLM接口BaseUrl")
+    REPORT_ENGINE_MODEL_NAME: str = Field("gemini-2.5-pro", description="Report Agent LLM模型，如gemini-2.5-pro")
+    
+    FORUM_HOST_API_KEY: str = Field(None, description="Forum Host（Qwen3最新模型，这里我使用了硅基流动这个平台，申请地址：https://cloud.siliconflow.cn/）API密钥")
+    FORUM_HOST_BASE_URL: Optional[str] = Field("https://api.siliconflow.cn/v1", description="Forum Host LLM BaseUrl")
+    FORUM_HOST_MODEL_NAME: str = Field("Qwen/Qwen3-235B-A22B-Instruct-2507", description="Forum Host LLM模型名，如Qwen/Qwen3-235B-A22B-Instruct-2507")
+    
+    KEYWORD_OPTIMIZER_API_KEY: str = Field(None, description="SQL keyword Optimizer（小参数Qwen3模型，这里我使用了硅基流动这个平台，申请地址：https://cloud.siliconflow.cn/）API密钥")
+    KEYWORD_OPTIMIZER_BASE_URL: Optional[str] = Field("https://api.siliconflow.cn/v1", description="Keyword Optimizer BaseUrl")
+    KEYWORD_OPTIMIZER_MODEL_NAME: str = Field("Qwen/Qwen3-30B-A3B-Instruct-2507", description="Keyword Optimizer LLM模型名称，如Qwen/Qwen3-30B-A3B-Instruct-2507")
+
+    # ================== 网络工具配置 ====================
+    TAVILY_API_KEY: str = Field(None, description="Tavily API（申请地址：https://www.tavily.com/）API密钥，用于Tavily网络搜索")
+    BOCHA_BASE_URL: Optional[str] = Field("https://api.bochaai.com/v1/ai-search", description="Bocha AI 搜索BaseUrl或博查网页搜索BaseUrl")
+    BOCHA_WEB_SEARCH_API_KEY: str = Field(None, description="Bocha API（申请地址：https://open.bochaai.com/）API密钥，用于Bocha搜索")
+
+    class Config:
+        env_file = ENV_FILE
+        env_prefix = ""
+        case_sensitive = False
+        extra = "allow"


-@dataclass
-class Config:
-    """Media Engine configuration."""
-
-    llm_api_key: Optional[str] = None
-    llm_base_url: Optional[str] = None
-    llm_model_name: Optional[str] = None
-    llm_provider: Optional[str] = None  # compatibility
-
-    bocha_api_key: Optional[str] = None
-
-    search_timeout: int = 240
-    max_content_length: int = 20000
-    max_reflections: int = 2
-    max_paragraphs: int = 5
-
-    output_dir: str = "reports"
-    save_intermediate_states: bool = True
-
-    def __post_init__(self):
-        if not self.llm_provider and self.llm_model_name:
-            self.llm_provider = self.llm_model_name
-
-    def validate(self) -> bool:
-        if not self.llm_api_key:
-            print("错误: Media Engine LLM API Key 未设置 (MEDIA_ENGINE_API_KEY)。")
-            return False
-        if not self.llm_model_name:
-            print("错误: Media Engine 模型名称未设置 (MEDIA_ENGINE_MODEL_NAME)。")
-            return False
-        if not self.bocha_api_key:
-            print("错误: Bocha API Key 未设置 (BOCHA_WEB_SEARCH_API_KEY)。")
-            return False
-        return True
-
-    @classmethod
-    def from_file(cls, config_file: str) -> "Config":
-        if config_file.endswith(".py"):
-            import importlib.util
-
-            spec = importlib.util.spec_from_file_location("config", config_file)
-            config_module = importlib.util.module_from_spec(spec)
-            spec.loader.exec_module(config_module)
-
-            return cls(
-                llm_api_key=_get_value(config_module, "MEDIA_ENGINE_API_KEY"),
-                llm_base_url=_get_value(config_module, "MEDIA_ENGINE_BASE_URL"),
-                llm_model_name=_get_value(config_module, "MEDIA_ENGINE_MODEL_NAME"),
-                bocha_api_key=_get_value(
-                    config_module,
-                    "BOCHA_WEB_SEARCH_API_KEY",
-                    None,
-                    "BOCHA_API_KEY",
-                ),
-                search_timeout=int(_get_value(config_module, "SEARCH_TIMEOUT", 240)),
-                max_content_length=int(_get_value(config_module, "SEARCH_CONTENT_MAX_LENGTH", 20000)),
-                max_reflections=int(_get_value(config_module, "MAX_REFLECTIONS", 2)),
-                max_paragraphs=int(_get_value(config_module, "MAX_PARAGRAPHS", 5)),
-                output_dir=_get_value(config_module, "OUTPUT_DIR", "reports"),
-                save_intermediate_states=str(
-                    _get_value(config_module, "SAVE_INTERMEDIATE_STATES", "true")
-                ).lower()
-                in ("true", "1", "yes"),
-            )
-
-        config_dict = {}
-        if os.path.exists(config_file):
-            with open(config_file, "r", encoding="utf-8") as f:
-                for line in f:
-                    line = line.strip()
-                    if line and not line.startswith("#") and "=" in line:
-                        key, value = line.split("=", 1)
-                        config_dict[key.strip()] = value.strip()
-
-        return cls(
-            llm_api_key=_get_value(config_dict, "MEDIA_ENGINE_API_KEY"),
-            llm_base_url=_get_value(config_dict, "MEDIA_ENGINE_BASE_URL"),
-            llm_model_name=_get_value(config_dict, "MEDIA_ENGINE_MODEL_NAME"),
-            bocha_api_key=_get_value(
-                config_dict,
-                "BOCHA_WEB_SEARCH_API_KEY",
-                None,
-                "BOCHA_API_KEY",
-            ),
-            search_timeout=int(_get_value(config_dict, "SEARCH_TIMEOUT", 240)),
-            max_content_length=int(_get_value(config_dict, "SEARCH_CONTENT_MAX_LENGTH", 20000)),
-            max_reflections=int(_get_value(config_dict, "MAX_REFLECTIONS", 2)),
-            max_paragraphs=int(_get_value(config_dict, "MAX_PARAGRAPHS", 5)),
-            output_dir=_get_value(config_dict, "OUTPUT_DIR", "reports"),
-            save_intermediate_states=str(
-                _get_value(config_dict, "SAVE_INTERMEDIATE_STATES", "true")
-            ).lower()
-            in ("true", "1", "yes"),
-        )
-
-
-def load_config(config_file: Optional[str] = None) -> Config:
-    if config_file:
-        if not os.path.exists(config_file):
-            raise FileNotFoundError(f"配置文件不存在: {config_file}")
-        file_to_load = config_file
-    else:
-        for candidate in ("config.py", "config.env", ".env"):
-            if os.path.exists(candidate):
-                file_to_load = candidate
-                print(f"已找到配置文件: {candidate}")
-                break
-        else:
-            raise FileNotFoundError("未找到配置文件，请创建 config.py。")
-
-    config = Config.from_file(file_to_load)
-    if not config.validate():
-        raise ValueError("配置校验失败，请检查 config.py 中的相关配置。")
-    return config
-
-
-def print_config(config: Config):
-    print("\n=== Media Engine 配置 ===")
-    print(f"LLM 模型: {config.llm_model_name}")
-    print(f"LLM Base URL: {config.llm_base_url or '(默认)'}")
-    print(f"Bocha API Key: {'已配置' if config.bocha_api_key else '未配置'}")
-    print(f"搜索超时: {config.search_timeout} 秒")
-    print(f"最长内容长度: {config.max_content_length}")
-    print(f"最大反思次数: {config.max_reflections}")
-    print(f"最大段落数: {config.max_paragraphs}")
-    print(f"输出目录: {config.output_dir}")
-    print(f"保存中间状态: {config.save_intermediate_states}")
-    print(f"LLM API Key: {'已配置' if config.llm_api_key else '未配置'}")
-    print("========================\n")
+settings = Settings()