Completely refactor the LLM integration method to easily replace the LLM used by each module and optimize the retransmission mechanism.

This commit is contained in:
666ghj
2025-10-09 13:45:39 +08:00
parent ce74f00137
commit 154b29c0d7
73 changed files with 942 additions and 51758 deletions
+7 -21
View File
@@ -9,7 +9,7 @@ import re
from datetime import datetime
from typing import Optional, Dict, Any, List, Union
from .llms import DeepSeekLLM, OpenAILLM, KimiLLM, BaseLLM
from .llms import LLMClient
from .nodes import (
ReportStructureNode,
FirstSearchNode,
@@ -67,27 +67,13 @@ class DeepSearchAgent:
print(f"搜索工具集: MediaCrawlerDB (支持5种本地数据库查询工具)")
print(f"情感分析: WeiboMultilingualSentiment (支持22种语言的情感分析)")
def _initialize_llm(self) -> BaseLLM:
def _initialize_llm(self) -> LLMClient:
"""初始化LLM客户端"""
if self.config.default_llm_provider == "deepseek":
return DeepSeekLLM(
api_key=self.config.deepseek_api_key,
model_name=self.config.deepseek_model,
base_url=self.config.deepseek_base_url
)
elif self.config.default_llm_provider == "openai":
return OpenAILLM(
api_key=self.config.openai_api_key,
model_name=self.config.openai_model
)
elif self.config.default_llm_provider == "kimi":
return KimiLLM(
api_key=self.config.kimi_api_key,
model_name=self.config.kimi_model,
base_url=self.config.kimi_base_url
)
else:
raise ValueError(f"不支持的LLM提供商: {self.config.default_llm_provider}")
return LLMClient(
api_key=self.config.llm_api_key,
model_name=self.config.llm_model_name,
base_url=self.config.llm_base_url,
)
def _initialize_nodes(self):
"""初始化处理节点"""
+4 -7
View File
@@ -1,11 +1,8 @@
"""
LLM调用模块
支持多种大语言模型的统一接口
LLM module
Provides a unified OpenAI-compatible client for the Insight Engine.
"""
from .base import BaseLLM
from .deepseek import DeepSeekLLM
from .openai_llm import OpenAILLM
from .kimi import KimiLLM
from .base import LLMClient
__all__ = ["BaseLLM", "DeepSeekLLM", "OpenAILLM", "KimiLLM"]
__all__ = ["LLMClient"]
+78 -50
View File
@@ -1,61 +1,89 @@
"""
LLM基础抽象类
定义所有LLM实现需要遵循的接口标准
Unified OpenAI-compatible LLM client for the Insight Engine, with retry support.
"""
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any
import os
import sys
from typing import Any, Dict, Optional
from openai import OpenAI
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(os.path.dirname(current_dir))
utils_dir = os.path.join(project_root, "utils")
if utils_dir not in sys.path:
sys.path.append(utils_dir)
try:
from retry_helper import with_retry, LLM_RETRY_CONFIG
except ImportError:
def with_retry(config=None):
def decorator(func):
return func
return decorator
LLM_RETRY_CONFIG = None
class BaseLLM(ABC):
"""LLM基础抽象类"""
def __init__(self, api_key: str, model_name: Optional[str] = None):
"""
初始化LLM客户端
Args:
api_key: API密钥
model_name: 模型名称,如果不指定则使用默认模型
"""
class LLMClient:
"""Minimal wrapper around the OpenAI-compatible chat completion API."""
def __init__(self, api_key: str, model_name: str, base_url: Optional[str] = None):
if not api_key:
raise ValueError("Insight Engine LLM API key is required.")
if not model_name:
raise ValueError("Insight Engine model name is required.")
self.api_key = api_key
self.base_url = base_url
self.model_name = model_name
@abstractmethod
self.provider = model_name
timeout_fallback = os.getenv("LLM_REQUEST_TIMEOUT") or os.getenv("INSIGHT_ENGINE_REQUEST_TIMEOUT") or "180"
try:
self.timeout = float(timeout_fallback)
except ValueError:
self.timeout = 300.0
client_kwargs: Dict[str, Any] = {
"api_key": api_key,
"max_retries": 0,
}
if base_url:
client_kwargs["base_url"] = base_url
self.client = OpenAI(**client_kwargs)
@with_retry(LLM_RETRY_CONFIG)
def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
"""
调用LLM生成回复
Args:
system_prompt: 系统提示词
user_prompt: 用户输入
**kwargs: 其他参数,如temperature、max_tokens等
Returns:
LLM生成的回复文本
"""
pass
@abstractmethod
def get_default_model(self) -> str:
"""
获取默认模型名称
Returns:
默认模型名称
"""
pass
def validate_response(self, response: str) -> str:
"""
验证和清理响应内容
Args:
response: LLM原始响应
Returns:
清理后的响应内容
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
allowed_keys = {"temperature", "top_p", "presence_penalty", "frequency_penalty", "stream"}
extra_params = {key: value for key, value in kwargs.items() if key in allowed_keys and value is not None}
timeout = kwargs.pop("timeout", self.timeout)
response = self.client.chat.completions.create(
model=self.model_name,
messages=messages,
timeout=timeout,
**extra_params,
)
if response.choices and response.choices[0].message:
return self.validate_response(response.choices[0].message.content)
return ""
@staticmethod
def validate_response(response: Optional[str]) -> str:
if response is None:
return ""
return response.strip()
def get_model_info(self) -> Dict[str, Any]:
return {
"provider": self.provider,
"model": self.model_name,
"api_base": self.base_url or "default",
}
-119
View File
@@ -1,119 +0,0 @@
"""
DeepSeek LLM实现
使用DeepSeek API进行文本生成
"""
import os
from typing import Optional, Dict, Any
from openai import OpenAI
from .base import BaseLLM
import sys
DEFAULT_DEEPSEEK_BASE_URL = "https://api.deepseek.com"
# 添加utils目录到Python路径
current_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.dirname(os.path.dirname(current_dir))
utils_dir = os.path.join(root_dir, 'utils')
if utils_dir not in sys.path:
sys.path.append(utils_dir)
try:
from retry_helper import with_retry, with_graceful_retry, LLM_RETRY_CONFIG
except ImportError:
# 如果无法导入重试模块,使用空装饰器
def with_retry(config):
def decorator(func):
return func
return decorator
LLM_RETRY_CONFIG = None
class DeepSeekLLM(BaseLLM):
"""DeepSeek LLM实现类"""
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None, base_url: Optional[str] = None):
"""
初始化DeepSeek客户端
Args:
api_key: DeepSeek API密钥,如果不提供则从环境变量读取
model_name: 模型名称,默认使用deepseek-chat
base_url: DeepSeek API基础地址
"""
if api_key is None:
api_key = os.getenv("DEEPSEEK_API_KEY")
if not api_key:
raise ValueError("DeepSeek API Key未找到!请设置DEEPSEEK_API_KEY环境变量或在初始化时提供")
super().__init__(api_key, model_name)
self.base_url = base_url or os.getenv("DEEPSEEK_BASE_URL") or DEFAULT_DEEPSEEK_BASE_URL
# 初始化OpenAI客户端,使用DeepSeek的endpoint
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
self.default_model = model_name or self.get_default_model()
def get_default_model(self) -> str:
"""获取默认模型名称"""
return "deepseek-chat"
@with_retry(LLM_RETRY_CONFIG)
def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
"""
调用DeepSeek API生成回复
Args:
system_prompt: 系统提示词
user_prompt: 用户输入
**kwargs: 其他参数,如temperature、max_tokens等
Returns:
DeepSeek生成的回复文本
"""
try:
# 构建消息
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
# 设置默认参数
params = {
"model": self.default_model,
"messages": messages,
"temperature": kwargs.get("temperature", 0.7),
"max_tokens": kwargs.get("max_tokens", 4000),
"stream": False
}
# 调用API
response = self.client.chat.completions.create(**params)
# 提取回复内容
if response.choices and response.choices[0].message:
content = response.choices[0].message.content
return self.validate_response(content)
else:
return ""
except Exception as e:
print(f"DeepSeek API调用错误: {str(e)}")
raise e
def get_model_info(self) -> Dict[str, Any]:
"""
获取当前模型信息
Returns:
模型信息字典
"""
return {
"provider": "DeepSeek",
"model": self.default_model,
"api_base": self.base_url
}
-167
View File
@@ -1,167 +0,0 @@
"""
Kimi LLM实现
使用Moonshot AI的Kimi API进行文本生成
"""
import os
import sys
from typing import Optional, Dict, Any
from openai import OpenAI
# 假设 .base 模块和 BaseLLM 类已存在
from .base import BaseLLM
DEFAULT_KIMI_BASE_URL = "https://api.moonshot.cn/v1"
# 添加utils目录到Python路径并导入重试模块
try:
current_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.dirname(os.path.dirname(current_dir))
utils_dir = os.path.join(root_dir, 'utils')
if utils_dir not in sys.path:
sys.path.append(utils_dir)
from retry_helper import with_retry, with_graceful_retry, LLM_RETRY_CONFIG
except ImportError:
# 如果无法导入重试模块,使用空装饰器避免报错
def with_retry(config):
def decorator(func):
return func
return decorator
LLM_RETRY_CONFIG = None
class KimiLLM(BaseLLM):
"""Kimi LLM实现类"""
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None, base_url: Optional[str] = None):
"""
初始化Kimi客户端
Args:
api_key: Kimi API密钥,如果不提供则从环境变量读取
model_name: 模型名称,默认使用kimi-k2-0711-preview
base_url: Kimi API基础地址
"""
if api_key is None:
api_key = os.getenv("KIMI_API_KEY")
if not api_key:
raise ValueError("Kimi API Key未找到!请设置KIMI_API_KEY环境变量或在初始化时提供")
super().__init__(api_key, model_name)
self.base_url = base_url or os.getenv("KIMI_BASE_URL") or DEFAULT_KIMI_BASE_URL
# 初始化OpenAI客户端,使用Kimi的endpoint
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
self.default_model = model_name or self.get_default_model()
def get_default_model(self) -> str:
"""获取默认模型名称"""
return "kimi-k2-0711-preview"
@with_retry(LLM_RETRY_CONFIG)
def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
"""
调用Kimi API生成回复
Args:
system_prompt: 系统提示词
user_prompt: 用户输入
**kwargs: 其他参数,如temperature、max_tokens等
Returns:
Kimi生成的回复文本
"""
try:
# 构建消息
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
# 智能计算max_tokens - 根据输入长度自动调整输出长度
input_length = len(system_prompt) + len(user_prompt)
if input_length > 100000: # 超长文本
default_max_tokens = 81920
elif input_length > 50000: # 超长文本
default_max_tokens = 40960
elif input_length > 20000: # 长文本
default_max_tokens = 16384
elif input_length > 5000: # 中等文本
default_max_tokens = 8192
else: # 短文本
default_max_tokens = 4096
# 设置默认参数,针对长文本处理优化
params = {
"model": self.default_model,
"messages": messages,
"temperature": kwargs.get("temperature", 0.6), # Kimi建议使用0.6
"max_tokens": kwargs.get("max_tokens", default_max_tokens), # 智能调整token限制
"stream": False
}
# 添加其他可选参数
if "top_p" in kwargs:
params["top_p"] = kwargs["top_p"]
if "presence_penalty" in kwargs:
params["presence_penalty"] = kwargs["presence_penalty"]
if "frequency_penalty" in kwargs:
params["frequency_penalty"] = kwargs["frequency_penalty"]
if "stop" in kwargs:
params["stop"] = kwargs["stop"]
# 输出调试信息(仅在使用Kimi时)
print(f"[Kimi] 输入长度: {input_length}, 使用max_tokens: {params['max_tokens']}")
# 调用API
response = self.client.chat.completions.create(**params)
# 提取回复内容
if response.choices and response.choices[0].message:
content = response.choices[0].message.content
return self.validate_response(content)
else:
return ""
except Exception as e:
print(f"Kimi API调用错误: {str(e)}")
raise e
def get_model_info(self) -> Dict[str, Any]:
"""
获取当前模型信息
Returns:
模型信息字典
"""
return {
"provider": "Kimi",
"model": self.default_model,
"api_base": self.base_url,
"max_context_length": "长文本支持(200K+ tokens"
}
# ==================== 代码修改部分 ====================
def invoke_long_context(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
"""
专门用于长文本处理的调用方法 (作为invoke的兼容接口)。
此方法通过设置推荐的默认参数,然后调用通用的invoke方法来处理请求。
Args:
system_prompt: 系统提示词
user_prompt: 用户输入
**kwargs: 其他参数
Returns:
Kimi生成的回复文本
"""
# 为长文本场景,设置一个慷慨的默认 max_tokens,仅当用户未指定时生效。
# 您原有的16384是一个非常合理的值。
kwargs.setdefault("max_tokens", 16384)
# 直接调用核心的invoke方法,将所有参数(包括预设的默认值)传递给它。
return self.invoke(system_prompt, user_prompt, **kwargs)
-108
View File
@@ -1,108 +0,0 @@
"""
OpenAI LLM实现
使用OpenAI API进行文本生成
"""
import os
import sys
from typing import Optional, Dict, Any
from openai import OpenAI
from .base import BaseLLM
# 添加utils目录到Python路径并导入重试模块
try:
current_dir = os.path.dirname(os.path.abspath(__file__))
root_dir = os.path.dirname(os.path.dirname(current_dir))
utils_dir = os.path.join(root_dir, 'utils')
if utils_dir not in sys.path:
sys.path.append(utils_dir)
from retry_helper import with_retry, with_graceful_retry, LLM_RETRY_CONFIG
except ImportError:
# 如果无法导入重试模块,使用空装饰器避免报错
def with_retry(config):
def decorator(func):
return func
return decorator
LLM_RETRY_CONFIG = None
class OpenAILLM(BaseLLM):
"""OpenAI LLM实现类"""
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None):
"""
初始化OpenAI客户端
Args:
api_key: OpenAI API密钥,如果不提供则从环境变量读取
model_name: 模型名称,默认使用gpt-4o-mini
"""
if api_key is None:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OpenAI API Key未找到!请设置OPENAI_API_KEY环境变量或在初始化时提供")
super().__init__(api_key, model_name)
# 初始化OpenAI客户端
self.client = OpenAI(api_key=self.api_key)
self.default_model = model_name or self.get_default_model()
def get_default_model(self) -> str:
"""获取默认模型名称"""
return "gpt-4o-mini"
@with_retry(LLM_RETRY_CONFIG)
def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
"""
调用OpenAI API生成回复
Args:
system_prompt: 系统提示词
user_prompt: 用户输入
**kwargs: 其他参数,如temperature、max_tokens等
Returns:
OpenAI生成的回复文本
"""
try:
# 构建消息
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
# 设置默认参数
params = {
"model": self.default_model,
"messages": messages,
"temperature": kwargs.get("temperature", 0.7),
"max_tokens": kwargs.get("max_tokens", 4000)
}
# 调用API
response = self.client.chat.completions.create(**params)
# 提取回复内容
if response.choices and response.choices[0].message:
content = response.choices[0].message.content
return self.validate_response(content)
else:
return ""
except Exception as e:
print(f"OpenAI API调用错误: {str(e)}")
raise e
def get_model_info(self) -> Dict[str, Any]:
"""
获取当前模型信息
Returns:
模型信息字典
"""
return {
"provider": "OpenAI",
"model": self.default_model,
"api_base": "https://api.openai.com"
}
+2 -2
View File
@@ -5,14 +5,14 @@
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional
from ..llms.base import BaseLLM
from ..llms.base import LLMClient
from ..state.state import State
class BaseNode(ABC):
"""节点基类"""
def __init__(self, llm_client: BaseLLM, node_name: str = ""):
def __init__(self, llm_client: LLMClient, node_name: str = ""):
"""
初始化节点
+6 -7
View File
@@ -12,7 +12,7 @@ from dataclasses import dataclass
# 添加项目根目录到Python路径以导入config
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from config import GUIJI_QWEN3_API_KEY, GUIJI_QWEN3_BASE_URL
from config import KEYWORD_OPTIMIZER_API_KEY, KEYWORD_OPTIMIZER_BASE_URL, KEYWORD_OPTIMIZER_MODEL_NAME
# 添加utils目录到Python路径
current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -38,7 +38,7 @@ class KeywordOptimizer:
使用硅基流动的Qwen3模型将Agent生成的搜索词优化为更贴近真实舆情的关键词
"""
def __init__(self, api_key: str = None, base_url: str = None):
def __init__(self, api_key: str = None, base_url: str = None, model_name: str = None):
"""
初始化关键词优化器
@@ -46,18 +46,18 @@ class KeywordOptimizer:
api_key: 硅基流动API密钥,如果不提供则从配置文件读取
base_url: 接口基础地址,默认使用配置文件提供的SiliconFlow地址
"""
self.api_key = api_key or GUIJI_QWEN3_API_KEY
self.api_key = api_key or KEYWORD_OPTIMIZER_API_KEY
if not self.api_key:
raise ValueError("未找到硅基流动API密钥,请在config.py中设置GUIJI_QWEN3_API_KEY")
raise ValueError("未找到硅基流动API密钥,请在config.py中设置KEYWORD_OPTIMIZER_API_KEY")
self.base_url = base_url or GUIJI_QWEN3_BASE_URL
self.base_url = base_url or KEYWORD_OPTIMIZER_BASE_URL
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
self.model = "Qwen/Qwen3-30B-A3B-Instruct-2507"
self.model = model_name or KEYWORD_OPTIMIZER_MODEL_NAME
def optimize_keywords(self, original_query: str, context: str = "") -> KeywordOptimizationResponse:
"""
@@ -192,7 +192,6 @@ class KeywordOptimizer:
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
max_tokens=10000,
temperature=0.7,
)
+161 -171
View File
@@ -1,6 +1,6 @@
"""
配置管理模块
处理环境变量和配置参数
Configuration management module for the Insight Engine.
Handles environment variables and config file parameters.
"""
import os
@@ -8,226 +8,216 @@ from dataclasses import dataclass
from typing import Optional
def _get_value(source, key: str, default=None):
"""
Helper to fetch a configuration value with environment fallback.
"""
value = None
if isinstance(source, dict):
value = source.get(key)
else:
value = getattr(source, key, None)
if value is None:
value = os.getenv(key, default)
return value if value not in ("", None) else default
@dataclass
class Config:
"""配置类"""
# API密钥
deepseek_api_key: Optional[str] = None
openai_api_key: Optional[str] = None
kimi_api_key: Optional[str] = None
deepseek_base_url: str = "https://api.deepseek.com"
openai_base_url: Optional[str] = None
kimi_base_url: str = "https://api.moonshot.cn/v1"
# 数据库配置
"""Insight Engine configuration."""
# LLM configuration
llm_api_key: Optional[str] = None
llm_base_url: Optional[str] = None
llm_model_name: Optional[str] = None
llm_provider: Optional[str] = None # kept for backward compatibility
# Database configuration
db_host: Optional[str] = None
db_user: Optional[str] = None
db_password: Optional[str] = None
db_name: Optional[str] = None
db_port: int = 3306
db_charset: str = "utf8mb4"
# 模型配置
default_llm_provider: str = "deepseek" # deepseek、openai 或 kimi
deepseek_model: str = "deepseek-chat"
openai_model: str = "gpt-4o-mini"
kimi_model: str = "kimi-k2-0711-preview"
# 搜索配置
# Model behaviour configuration
max_reflections: int = 3
max_paragraphs: int = 6
search_timeout: int = 240
max_content_length: int = 500000 # 提高5倍以充分利用Kimi的长文本能力
# 数据库查询限制
max_content_length: int = 500000
# Search result limits
default_search_hot_content_limit: int = 100
default_search_topic_globally_limit_per_table: int = 50
default_search_topic_by_date_limit_per_table: int = 100
default_get_comments_for_topic_limit: int = 500
default_search_topic_on_platform_limit: int = 200
# Agent配置
max_reflections: int = 3
max_paragraphs: int = 6
# 结果处理限制
max_search_results_for_llm: int = 0 # 0表示不限制,传递所有搜索结果给LLM
max_high_confidence_sentiment_results: int = 0 # 0表示不限制,返回所有高置信度情感分析结果
# 输出配置
max_search_results_for_llm: int = 0
max_high_confidence_sentiment_results: int = 0
# Output configuration
output_dir: str = "reports"
save_intermediate_states: bool = True
def __post_init__(self):
if not self.llm_provider and self.llm_model_name:
# Provider is no longer used, but keep the attribute for compatibility.
self.llm_provider = self.llm_model_name
def validate(self) -> bool:
"""验证配置"""
# 检查必需的API密钥
if self.default_llm_provider == "deepseek" and not self.deepseek_api_key:
print("错误: DeepSeek API Key未设置")
"""Validate configuration."""
if not self.llm_api_key:
print("错误: Insight Engine LLM API Key 未设置 (INSIGHT_ENGINE_API_KEY)。")
return False
if self.default_llm_provider == "openai" and not self.openai_api_key:
print("错误: OpenAI API Key未设置")
if not self.llm_model_name:
print("错误: Insight Engine 模型名称未设置 (INSIGHT_ENGINE_MODEL_NAME)。")
return False
if not all([self.db_host, self.db_user, self.db_password, self.db_name]):
print("错误: 数据库连接信息不完整")
print("错误: 数据库连接信息不完整,请检查 config.py 中的 DB_* 配置。")
return False
return True
@classmethod
def from_file(cls, config_file: str) -> "Config":
"""从配置文件创建配置"""
if config_file.endswith('.py'):
# Python配置文件
"""Create configuration from file."""
if config_file.endswith(".py"):
import importlib.util
# 动态导入配置文件
spec = importlib.util.spec_from_file_location("config", config_file)
config_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(config_module)
return cls(
deepseek_api_key=getattr(config_module, "DEEPSEEK_API_KEY", None),
openai_api_key=getattr(config_module, "OPENAI_API_KEY", None),
kimi_api_key=getattr(config_module, "KIMI_API_KEY", None),
deepseek_base_url=getattr(config_module, "DEEPSEEK_BASE_URL", "https://api.deepseek.com"),
openai_base_url=getattr(config_module, "OPENAI_BASE_URL", None),
kimi_base_url=getattr(config_module, "KIMI_BASE_URL", "https://api.moonshot.cn/v1"),
db_host=getattr(config_module, "DB_HOST", None),
db_user=getattr(config_module, "DB_USER", None),
db_password=getattr(config_module, "DB_PASSWORD", None),
db_name=getattr(config_module, "DB_NAME", None),
db_port=getattr(config_module, "DB_PORT", 3306),
db_charset=getattr(config_module, "DB_CHARSET", "utf8mb4"),
default_llm_provider=getattr(config_module, "DEFAULT_LLM_PROVIDER", "deepseek"),
deepseek_model=getattr(config_module, "DEEPSEEK_MODEL", "deepseek-chat"),
openai_model=getattr(config_module, "OPENAI_MODEL", "gpt-4o-mini"),
search_timeout=getattr(config_module, "SEARCH_TIMEOUT", 240),
max_content_length=getattr(config_module, "SEARCH_CONTENT_MAX_LENGTH", 200000),
default_search_hot_content_limit=getattr(config_module, "DEFAULT_SEARCH_HOT_CONTENT_LIMIT", 100),
default_search_topic_globally_limit_per_table=getattr(config_module, "DEFAULT_SEARCH_TOPIC_GLOBALLY_LIMIT_PER_TABLE", 50),
default_search_topic_by_date_limit_per_table=getattr(config_module, "DEFAULT_SEARCH_TOPIC_BY_DATE_LIMIT_PER_TABLE", 100),
default_get_comments_for_topic_limit=getattr(config_module, "DEFAULT_GET_COMMENTS_FOR_TOPIC_LIMIT", 500),
default_search_topic_on_platform_limit=getattr(config_module, "DEFAULT_SEARCH_TOPIC_ON_PLATFORM_LIMIT", 200),
max_reflections=getattr(config_module, "MAX_REFLECTIONS", 2),
max_paragraphs=getattr(config_module, "MAX_PARAGRAPHS", 5),
max_search_results_for_llm=getattr(config_module, "MAX_SEARCH_RESULTS_FOR_LLM", 0),
max_high_confidence_sentiment_results=getattr(config_module, "MAX_HIGH_CONFIDENCE_SENTIMENT_RESULTS", 0),
output_dir=getattr(config_module, "OUTPUT_DIR", "reports"),
save_intermediate_states=getattr(config_module, "SAVE_INTERMEDIATE_STATES", True)
)
else:
# .env格式配置文件
config_dict = {}
if os.path.exists(config_file):
with open(config_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, value = line.split('=', 1)
config_dict[key.strip()] = value.strip()
return cls(
deepseek_api_key=config_dict.get("DEEPSEEK_API_KEY"),
openai_api_key=config_dict.get("OPENAI_API_KEY"),
kimi_api_key=config_dict.get("KIMI_API_KEY"),
deepseek_base_url=config_dict.get("DEEPSEEK_BASE_URL", "https://api.deepseek.com"),
openai_base_url=config_dict.get("OPENAI_BASE_URL"),
kimi_base_url=config_dict.get("KIMI_BASE_URL", "https://api.moonshot.cn/v1"),
db_host=config_dict.get("DB_HOST"),
db_user=config_dict.get("DB_USER"),
db_password=config_dict.get("DB_PASSWORD"),
db_name=config_dict.get("DB_NAME"),
db_port=int(config_dict.get("DB_PORT", "3306")),
db_charset=config_dict.get("DB_CHARSET", "utf8mb4"),
default_llm_provider=config_dict.get("DEFAULT_LLM_PROVIDER", "deepseek"),
deepseek_model=config_dict.get("DEEPSEEK_MODEL", "deepseek-chat"),
openai_model=config_dict.get("OPENAI_MODEL", "gpt-4o-mini"),
kimi_model=config_dict.get("KIMI_MODEL", "kimi-k2-0711-preview"),
search_timeout=int(config_dict.get("SEARCH_TIMEOUT", "240")),
max_content_length=int(config_dict.get("SEARCH_CONTENT_MAX_LENGTH", "500000")),
default_search_hot_content_limit=int(config_dict.get("DEFAULT_SEARCH_HOT_CONTENT_LIMIT", "100")),
default_search_topic_globally_limit_per_table=int(config_dict.get("DEFAULT_SEARCH_TOPIC_GLOBALLY_LIMIT_PER_TABLE", "50")),
default_search_topic_by_date_limit_per_table=int(config_dict.get("DEFAULT_SEARCH_TOPIC_BY_DATE_LIMIT_PER_TABLE", "100")),
default_get_comments_for_topic_limit=int(config_dict.get("DEFAULT_GET_COMMENTS_FOR_TOPIC_LIMIT", "500")),
default_search_topic_on_platform_limit=int(config_dict.get("DEFAULT_SEARCH_TOPIC_ON_PLATFORM_LIMIT", "200")),
max_reflections=int(config_dict.get("MAX_REFLECTIONS", "2")),
max_paragraphs=int(config_dict.get("MAX_PARAGRAPHS", "5")),
max_search_results_for_llm=int(config_dict.get("MAX_SEARCH_RESULTS_FOR_LLM", "0")),
max_high_confidence_sentiment_results=int(config_dict.get("MAX_HIGH_CONFIDENCE_SENTIMENT_RESULTS", "0")),
output_dir=config_dict.get("OUTPUT_DIR", "reports"),
save_intermediate_states=config_dict.get("SAVE_INTERMEDIATE_STATES", "true").lower() == "true"
llm_api_key=_get_value(config_module, "INSIGHT_ENGINE_API_KEY"),
llm_base_url=_get_value(config_module, "INSIGHT_ENGINE_BASE_URL"),
llm_model_name=_get_value(config_module, "INSIGHT_ENGINE_MODEL_NAME"),
db_host=_get_value(config_module, "DB_HOST"),
db_user=_get_value(config_module, "DB_USER"),
db_password=_get_value(config_module, "DB_PASSWORD"),
db_name=_get_value(config_module, "DB_NAME"),
db_port=int(_get_value(config_module, "DB_PORT", 3306)),
db_charset=_get_value(config_module, "DB_CHARSET", "utf8mb4"),
max_reflections=int(_get_value(config_module, "MAX_REFLECTIONS", 3)),
max_paragraphs=int(_get_value(config_module, "MAX_PARAGRAPHS", 6)),
search_timeout=int(_get_value(config_module, "SEARCH_TIMEOUT", 240)),
max_content_length=int(_get_value(config_module, "SEARCH_CONTENT_MAX_LENGTH", 500000)),
default_search_hot_content_limit=int(
_get_value(config_module, "DEFAULT_SEARCH_HOT_CONTENT_LIMIT", 100)
),
default_search_topic_globally_limit_per_table=int(
_get_value(config_module, "DEFAULT_SEARCH_TOPIC_GLOBALLY_LIMIT_PER_TABLE", 50)
),
default_search_topic_by_date_limit_per_table=int(
_get_value(config_module, "DEFAULT_SEARCH_TOPIC_BY_DATE_LIMIT_PER_TABLE", 100)
),
default_get_comments_for_topic_limit=int(
_get_value(config_module, "DEFAULT_GET_COMMENTS_FOR_TOPIC_LIMIT", 500)
),
default_search_topic_on_platform_limit=int(
_get_value(config_module, "DEFAULT_SEARCH_TOPIC_ON_PLATFORM_LIMIT", 200)
),
max_search_results_for_llm=int(_get_value(config_module, "MAX_SEARCH_RESULTS_FOR_LLM", 0)),
max_high_confidence_sentiment_results=int(
_get_value(config_module, "MAX_HIGH_CONFIDENCE_SENTIMENT_RESULTS", 0)
),
output_dir=_get_value(config_module, "OUTPUT_DIR", "reports"),
save_intermediate_states=str(
_get_value(config_module, "SAVE_INTERMEDIATE_STATES", "true")
).lower()
in ("true", "1", "yes"),
)
# .env style configuration
config_dict = {}
if os.path.exists(config_file):
with open(config_file, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#") and "=" in line:
key, value = line.split("=", 1)
config_dict[key.strip()] = value.strip()
return cls(
llm_api_key=_get_value(config_dict, "INSIGHT_ENGINE_API_KEY"),
llm_base_url=_get_value(config_dict, "INSIGHT_ENGINE_BASE_URL"),
llm_model_name=_get_value(config_dict, "INSIGHT_ENGINE_MODEL_NAME"),
db_host=_get_value(config_dict, "DB_HOST"),
db_user=_get_value(config_dict, "DB_USER"),
db_password=_get_value(config_dict, "DB_PASSWORD"),
db_name=_get_value(config_dict, "DB_NAME"),
db_port=int(_get_value(config_dict, "DB_PORT", 3306)),
db_charset=_get_value(config_dict, "DB_CHARSET", "utf8mb4"),
max_reflections=int(_get_value(config_dict, "MAX_REFLECTIONS", 3)),
max_paragraphs=int(_get_value(config_dict, "MAX_PARAGRAPHS", 6)),
search_timeout=int(_get_value(config_dict, "SEARCH_TIMEOUT", 240)),
max_content_length=int(_get_value(config_dict, "SEARCH_CONTENT_MAX_LENGTH", 500000)),
default_search_hot_content_limit=int(
_get_value(config_dict, "DEFAULT_SEARCH_HOT_CONTENT_LIMIT", 100)
),
default_search_topic_globally_limit_per_table=int(
_get_value(config_dict, "DEFAULT_SEARCH_TOPIC_GLOBALLY_LIMIT_PER_TABLE", 50)
),
default_search_topic_by_date_limit_per_table=int(
_get_value(config_dict, "DEFAULT_SEARCH_TOPIC_BY_DATE_LIMIT_PER_TABLE", 100)
),
default_get_comments_for_topic_limit=int(
_get_value(config_dict, "DEFAULT_GET_COMMENTS_FOR_TOPIC_LIMIT", 500)
),
default_search_topic_on_platform_limit=int(
_get_value(config_dict, "DEFAULT_SEARCH_TOPIC_ON_PLATFORM_LIMIT", 200)
),
max_search_results_for_llm=int(_get_value(config_dict, "MAX_SEARCH_RESULTS_FOR_LLM", 0)),
max_high_confidence_sentiment_results=int(
_get_value(config_dict, "MAX_HIGH_CONFIDENCE_SENTIMENT_RESULTS", 0)
),
output_dir=_get_value(config_dict, "OUTPUT_DIR", "reports"),
save_intermediate_states=str(
_get_value(config_dict, "SAVE_INTERMEDIATE_STATES", "true")
).lower()
in ("true", "1", "yes"),
)
def load_config(config_file: Optional[str] = None) -> Config:
"""
加载配置
Args:
config_file: 配置文件路径,如果不指定则使用默认路径
Returns:
配置对象
Load configuration.
"""
# 确定配置文件路径
if config_file:
if not os.path.exists(config_file):
raise FileNotFoundError(f"配置文件不存在: {config_file}")
file_to_load = config_file
else:
# 尝试加载常见的配置文件
for config_path in ["config.py", "config.env", ".env"]:
if os.path.exists(config_path):
file_to_load = config_path
print(f"已找到配置文件: {config_path}")
for candidate in ("config.py", "config.env", ".env"):
if os.path.exists(candidate):
file_to_load = candidate
print(f"已找到配置文件: {candidate}")
break
else:
raise FileNotFoundError("未找到配置文件,请创建 config.py 文件")
# 创建配置对象
raise FileNotFoundError("未找到配置文件,请创建 config.py")
config = Config.from_file(file_to_load)
# 验证配置
if not config.validate():
raise ValueError("配置验失败,请检查配置文件中的API密钥")
raise ValueError("配置验失败,请检查 config.py 中的相关配置。")
return config
def print_config(config: Config):
"""打印配置信息(隐藏敏感信息)"""
print("\n=== 当前配置 ===")
print(f"LLM提供商: {config.default_llm_provider}")
print(f"DeepSeek模型: {config.deepseek_model}")
print(f"OpenAI模型: {config.openai_model}")
print(f"搜索超时: {config.search_timeout}")
print(f"最大内容长度: {config.max_content_length}")
"""Print configuration (sensitive values masked)."""
print("\n=== Insight Engine 配置 ===")
print(f"LLM 模型: {config.llm_model_name}")
print(f"LLM Base URL: {config.llm_base_url or '(默认)'}")
print(f"搜索超时: {config.search_timeout}")
print(f"最长内容长度: {config.max_content_length}")
print(f"最大反思次数: {config.max_reflections}")
print(f"最大段落数: {config.max_paragraphs}")
print(f"输出目录: {config.output_dir}")
print(f"保存中间状态: {config.save_intermediate_states}")
# 显示API密钥和数据库状态(不显示实际密钥)
print(f"DeepSeek API Key: {'已设置' if config.deepseek_api_key else '未设置'}")
print(f"OpenAI API Key: {'已设置' if config.openai_api_key else '未设置'}")
print(f"LLM API Key: {'已配置' if config.llm_api_key else '未配置'}")
print(f"数据库连接: {'已配置' if all([config.db_host, config.db_user, config.db_password, config.db_name]) else '未配置'}")
print(f"数据库主机: {config.db_host}")
print(f"数据库端口: {config.db_port}")
print(f"数据库名称: {config.db_name}")
print("==================\n")
print("========================\n")