Completely refactor the LLM integration method to easily replace the LLM used by each module and optimize the retransmission mechanism.
This commit is contained in:
+8
-15
@@ -9,7 +9,7 @@ import re
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
from .llms import DeepSeekLLM, OpenAILLM, BaseLLM
|
||||
from .llms import LLMClient
|
||||
from .nodes import (
|
||||
ReportStructureNode,
|
||||
FirstSearchNode,
|
||||
@@ -35,6 +35,7 @@ class DeepSearchAgent:
|
||||
"""
|
||||
# 加载配置
|
||||
self.config = config or load_config()
|
||||
os.environ["TAVILY_API_KEY"] = self.config.tavily_api_key or ""
|
||||
|
||||
# 初始化LLM客户端
|
||||
self.llm_client = self._initialize_llm()
|
||||
@@ -55,21 +56,13 @@ class DeepSearchAgent:
|
||||
print(f"使用LLM: {self.llm_client.get_model_info()}")
|
||||
print(f"搜索工具集: TavilyNewsAgency (支持6种搜索工具)")
|
||||
|
||||
def _initialize_llm(self) -> BaseLLM:
|
||||
def _initialize_llm(self) -> LLMClient:
|
||||
"""初始化LLM客户端"""
|
||||
if self.config.default_llm_provider == "deepseek":
|
||||
return DeepSeekLLM(
|
||||
api_key=self.config.deepseek_api_key,
|
||||
model_name=self.config.deepseek_model,
|
||||
base_url=self.config.deepseek_base_url
|
||||
)
|
||||
elif self.config.default_llm_provider == "openai":
|
||||
return OpenAILLM(
|
||||
api_key=self.config.openai_api_key,
|
||||
model_name=self.config.openai_model
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"不支持的LLM提供商: {self.config.default_llm_provider}")
|
||||
return LLMClient(
|
||||
api_key=self.config.llm_api_key,
|
||||
model_name=self.config.llm_model_name,
|
||||
base_url=self.config.llm_base_url,
|
||||
)
|
||||
|
||||
def _initialize_nodes(self):
|
||||
"""初始化处理节点"""
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
"""
|
||||
LLM调用模块
|
||||
支持多种大语言模型的统一接口
|
||||
LLM module for the Query Engine.
|
||||
"""
|
||||
|
||||
from .base import BaseLLM
|
||||
from .deepseek import DeepSeekLLM
|
||||
from .openai_llm import OpenAILLM
|
||||
from .base import LLMClient
|
||||
|
||||
__all__ = ["BaseLLM", "DeepSeekLLM", "OpenAILLM"]
|
||||
__all__ = ["LLMClient"]
|
||||
|
||||
+78
-50
@@ -1,61 +1,89 @@
|
||||
"""
|
||||
LLM基础抽象类
|
||||
定义所有LLM实现需要遵循的接口标准
|
||||
Unified OpenAI-compatible LLM client for the Query Engine, with retry support.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, Dict, Any
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
project_root = os.path.dirname(os.path.dirname(current_dir))
|
||||
utils_dir = os.path.join(project_root, "utils")
|
||||
if utils_dir not in sys.path:
|
||||
sys.path.append(utils_dir)
|
||||
|
||||
try:
|
||||
from retry_helper import with_retry, LLM_RETRY_CONFIG
|
||||
except ImportError:
|
||||
def with_retry(config=None):
|
||||
def decorator(func):
|
||||
return func
|
||||
return decorator
|
||||
|
||||
LLM_RETRY_CONFIG = None
|
||||
|
||||
|
||||
class BaseLLM(ABC):
|
||||
"""LLM基础抽象类"""
|
||||
|
||||
def __init__(self, api_key: str, model_name: Optional[str] = None):
|
||||
"""
|
||||
初始化LLM客户端
|
||||
|
||||
Args:
|
||||
api_key: API密钥
|
||||
model_name: 模型名称,如果不指定则使用默认模型
|
||||
"""
|
||||
class LLMClient:
|
||||
"""Minimal wrapper around the OpenAI-compatible chat completion API."""
|
||||
|
||||
def __init__(self, api_key: str, model_name: str, base_url: Optional[str] = None):
|
||||
if not api_key:
|
||||
raise ValueError("Query Engine LLM API key is required.")
|
||||
if not model_name:
|
||||
raise ValueError("Query Engine model name is required.")
|
||||
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
self.model_name = model_name
|
||||
|
||||
@abstractmethod
|
||||
self.provider = model_name
|
||||
timeout_fallback = os.getenv("LLM_REQUEST_TIMEOUT") or os.getenv("QUERY_ENGINE_REQUEST_TIMEOUT") or "180"
|
||||
try:
|
||||
self.timeout = float(timeout_fallback)
|
||||
except ValueError:
|
||||
self.timeout = 180.0
|
||||
|
||||
client_kwargs: Dict[str, Any] = {
|
||||
"api_key": api_key,
|
||||
"max_retries": 0,
|
||||
}
|
||||
if base_url:
|
||||
client_kwargs["base_url"] = base_url
|
||||
self.client = OpenAI(**client_kwargs)
|
||||
|
||||
@with_retry(LLM_RETRY_CONFIG)
|
||||
def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
|
||||
"""
|
||||
调用LLM生成回复
|
||||
|
||||
Args:
|
||||
system_prompt: 系统提示词
|
||||
user_prompt: 用户输入
|
||||
**kwargs: 其他参数,如temperature、max_tokens等
|
||||
|
||||
Returns:
|
||||
LLM生成的回复文本
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_default_model(self) -> str:
|
||||
"""
|
||||
获取默认模型名称
|
||||
|
||||
Returns:
|
||||
默认模型名称
|
||||
"""
|
||||
pass
|
||||
|
||||
def validate_response(self, response: str) -> str:
|
||||
"""
|
||||
验证和清理响应内容
|
||||
|
||||
Args:
|
||||
response: LLM原始响应
|
||||
|
||||
Returns:
|
||||
清理后的响应内容
|
||||
"""
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
]
|
||||
|
||||
allowed_keys = {"temperature", "top_p", "presence_penalty", "frequency_penalty", "stream"}
|
||||
extra_params = {key: value for key, value in kwargs.items() if key in allowed_keys and value is not None}
|
||||
|
||||
timeout = kwargs.pop("timeout", self.timeout)
|
||||
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=messages,
|
||||
timeout=timeout,
|
||||
**extra_params,
|
||||
)
|
||||
|
||||
if response.choices and response.choices[0].message:
|
||||
return self.validate_response(response.choices[0].message.content)
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def validate_response(response: Optional[str]) -> str:
|
||||
if response is None:
|
||||
return ""
|
||||
return response.strip()
|
||||
|
||||
def get_model_info(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"provider": self.provider,
|
||||
"model": self.model_name,
|
||||
"api_base": self.base_url or "default",
|
||||
}
|
||||
|
||||
@@ -1,118 +0,0 @@
|
||||
"""
|
||||
DeepSeek LLM实现
|
||||
使用DeepSeek API进行文本生成
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional, Dict, Any
|
||||
from openai import OpenAI
|
||||
from .base import BaseLLM
|
||||
|
||||
DEFAULT_DEEPSEEK_BASE_URL = "https://api.deepseek.com"
|
||||
|
||||
# 添加utils目录到Python路径并导入重试模块
|
||||
try:
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
root_dir = os.path.dirname(os.path.dirname(current_dir))
|
||||
utils_dir = os.path.join(root_dir, 'utils')
|
||||
if utils_dir not in sys.path:
|
||||
sys.path.append(utils_dir)
|
||||
from retry_helper import with_retry, with_graceful_retry, LLM_RETRY_CONFIG
|
||||
except ImportError:
|
||||
# 如果无法导入重试模块,使用空装饰器避免报错
|
||||
def with_retry(config):
|
||||
def decorator(func):
|
||||
return func
|
||||
return decorator
|
||||
LLM_RETRY_CONFIG = None
|
||||
|
||||
|
||||
class DeepSeekLLM(BaseLLM):
|
||||
"""DeepSeek LLM实现类"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None, base_url: Optional[str] = None):
|
||||
"""
|
||||
初始化DeepSeek客户端
|
||||
|
||||
Args:
|
||||
api_key: DeepSeek API密钥,如果不提供则从环境变量读取
|
||||
model_name: 模型名称,默认使用deepseek-chat
|
||||
base_url: DeepSeek API基础地址
|
||||
"""
|
||||
if api_key is None:
|
||||
api_key = os.getenv("DEEPSEEK_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("DeepSeek API Key未找到!请设置DEEPSEEK_API_KEY环境变量或在初始化时提供")
|
||||
|
||||
super().__init__(api_key, model_name)
|
||||
|
||||
self.base_url = base_url or os.getenv("DEEPSEEK_BASE_URL") or DEFAULT_DEEPSEEK_BASE_URL
|
||||
|
||||
# 初始化OpenAI客户端,使用DeepSeek的endpoint
|
||||
self.client = OpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url
|
||||
)
|
||||
|
||||
self.default_model = model_name or self.get_default_model()
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
"""获取默认模型名称"""
|
||||
return "deepseek-chat"
|
||||
|
||||
@with_retry(LLM_RETRY_CONFIG)
|
||||
def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
|
||||
"""
|
||||
调用DeepSeek API生成回复
|
||||
|
||||
Args:
|
||||
system_prompt: 系统提示词
|
||||
user_prompt: 用户输入
|
||||
**kwargs: 其他参数,如temperature、max_tokens等
|
||||
|
||||
Returns:
|
||||
DeepSeek生成的回复文本
|
||||
"""
|
||||
try:
|
||||
# 构建消息
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
|
||||
# 设置默认参数
|
||||
params = {
|
||||
"model": self.default_model,
|
||||
"messages": messages,
|
||||
"temperature": kwargs.get("temperature", 0.7),
|
||||
"max_tokens": kwargs.get("max_tokens", 8192), # 提高到30000以支持一万字报告
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# 调用API
|
||||
response = self.client.chat.completions.create(**params)
|
||||
|
||||
# 提取回复内容
|
||||
if response.choices and response.choices[0].message:
|
||||
content = response.choices[0].message.content
|
||||
return self.validate_response(content)
|
||||
else:
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
print(f"DeepSeek API调用错误: {str(e)}")
|
||||
raise e
|
||||
|
||||
def get_model_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
获取当前模型信息
|
||||
|
||||
Returns:
|
||||
模型信息字典
|
||||
"""
|
||||
return {
|
||||
"provider": "DeepSeek",
|
||||
"model": self.default_model,
|
||||
"api_base": self.base_url
|
||||
}
|
||||
@@ -1,108 +0,0 @@
|
||||
"""
|
||||
OpenAI LLM实现
|
||||
使用OpenAI API进行文本生成
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional, Dict, Any
|
||||
from openai import OpenAI
|
||||
from .base import BaseLLM
|
||||
|
||||
# 添加utils目录到Python路径并导入重试模块
|
||||
try:
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
root_dir = os.path.dirname(os.path.dirname(current_dir))
|
||||
utils_dir = os.path.join(root_dir, 'utils')
|
||||
if utils_dir not in sys.path:
|
||||
sys.path.append(utils_dir)
|
||||
from retry_helper import with_retry, with_graceful_retry, LLM_RETRY_CONFIG
|
||||
except ImportError:
|
||||
# 如果无法导入重试模块,使用空装饰器避免报错
|
||||
def with_retry(config):
|
||||
def decorator(func):
|
||||
return func
|
||||
return decorator
|
||||
LLM_RETRY_CONFIG = None
|
||||
|
||||
|
||||
class OpenAILLM(BaseLLM):
|
||||
"""OpenAI LLM实现类"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, model_name: Optional[str] = None):
|
||||
"""
|
||||
初始化OpenAI客户端
|
||||
|
||||
Args:
|
||||
api_key: OpenAI API密钥,如果不提供则从环境变量读取
|
||||
model_name: 模型名称,默认使用gpt-4o-mini
|
||||
"""
|
||||
if api_key is None:
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("OpenAI API Key未找到!请设置OPENAI_API_KEY环境变量或在初始化时提供")
|
||||
|
||||
super().__init__(api_key, model_name)
|
||||
|
||||
# 初始化OpenAI客户端
|
||||
self.client = OpenAI(api_key=self.api_key)
|
||||
self.default_model = model_name or self.get_default_model()
|
||||
|
||||
def get_default_model(self) -> str:
|
||||
"""获取默认模型名称"""
|
||||
return "gpt-4o-mini"
|
||||
|
||||
@with_retry(LLM_RETRY_CONFIG)
|
||||
def invoke(self, system_prompt: str, user_prompt: str, **kwargs) -> str:
|
||||
"""
|
||||
调用OpenAI API生成回复
|
||||
|
||||
Args:
|
||||
system_prompt: 系统提示词
|
||||
user_prompt: 用户输入
|
||||
**kwargs: 其他参数,如temperature、max_tokens等
|
||||
|
||||
Returns:
|
||||
OpenAI生成的回复文本
|
||||
"""
|
||||
try:
|
||||
# 构建消息
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
|
||||
# 设置默认参数
|
||||
params = {
|
||||
"model": self.default_model,
|
||||
"messages": messages,
|
||||
"temperature": kwargs.get("temperature", 0.7),
|
||||
"max_tokens": kwargs.get("max_tokens", 8192) # 提高到30000以支持一万字报告
|
||||
}
|
||||
|
||||
# 调用API
|
||||
response = self.client.chat.completions.create(**params)
|
||||
|
||||
# 提取回复内容
|
||||
if response.choices and response.choices[0].message:
|
||||
content = response.choices[0].message.content
|
||||
return self.validate_response(content)
|
||||
else:
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
print(f"OpenAI API调用错误: {str(e)}")
|
||||
raise e
|
||||
|
||||
def get_model_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
获取当前模型信息
|
||||
|
||||
Returns:
|
||||
模型信息字典
|
||||
"""
|
||||
return {
|
||||
"provider": "OpenAI",
|
||||
"model": self.default_model,
|
||||
"api_base": "https://api.openai.com"
|
||||
}
|
||||
@@ -5,14 +5,14 @@
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, Optional
|
||||
from ..llms.base import BaseLLM
|
||||
from ..llms.base import LLMClient
|
||||
from ..state.state import State
|
||||
|
||||
|
||||
class BaseNode(ABC):
|
||||
"""节点基类"""
|
||||
|
||||
def __init__(self, llm_client: BaseLLM, node_name: str = ""):
|
||||
def __init__(self, llm_client: LLMClient, node_name: str = ""):
|
||||
"""
|
||||
初始化节点
|
||||
|
||||
|
||||
@@ -67,11 +67,10 @@ class ReportFormattingNode(BaseNode):
|
||||
|
||||
self.log_info("正在格式化最终报告")
|
||||
|
||||
# 调用LLM,传递更大的max_tokens以支持长文本报告
|
||||
# 调用LLM生成Markdown格式
|
||||
response = self.llm_client.invoke(
|
||||
SYSTEM_PROMPT_REPORT_FORMATTING,
|
||||
message,
|
||||
max_tokens=8192 # 支持一万字的报告输出
|
||||
SYSTEM_PROMPT_REPORT_FORMATTING,
|
||||
message,
|
||||
)
|
||||
|
||||
# 处理响应
|
||||
|
||||
@@ -98,11 +98,10 @@ class FirstSummaryNode(StateMutationNode):
|
||||
|
||||
self.log_info("正在生成首次段落总结")
|
||||
|
||||
# 调用LLM,增加max_tokens以支持更长的总结
|
||||
# 调用LLM生成总结
|
||||
response = self.llm_client.invoke(
|
||||
SYSTEM_PROMPT_FIRST_SUMMARY,
|
||||
SYSTEM_PROMPT_FIRST_SUMMARY,
|
||||
message,
|
||||
max_tokens=8192 # 支持更长的总结内容
|
||||
)
|
||||
|
||||
# 处理响应
|
||||
@@ -267,11 +266,10 @@ class ReflectionSummaryNode(StateMutationNode):
|
||||
|
||||
self.log_info("正在生成反思总结")
|
||||
|
||||
# 调用LLM,增加max_tokens以支持更长的总结
|
||||
# 调用LLM生成总结
|
||||
response = self.llm_client.invoke(
|
||||
SYSTEM_PROMPT_REFLECTION_SUMMARY,
|
||||
SYSTEM_PROMPT_REFLECTION_SUMMARY,
|
||||
message,
|
||||
max_tokens=8192 # 支持更长的总结内容
|
||||
)
|
||||
|
||||
# 处理响应
|
||||
|
||||
+99
-116
@@ -1,6 +1,5 @@
|
||||
"""
|
||||
配置管理模块
|
||||
处理环境变量和配置参数
|
||||
Configuration management module for the Query Engine.
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -8,161 +7,145 @@ from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _get_value(source, key: str, default=None, *fallback_keys: str):
|
||||
candidates = (key,) + fallback_keys
|
||||
value = None
|
||||
for candidate in candidates:
|
||||
if isinstance(source, dict):
|
||||
value = source.get(candidate)
|
||||
else:
|
||||
value = getattr(source, candidate, None)
|
||||
if value not in (None, ""):
|
||||
break
|
||||
if value in (None, ""):
|
||||
for candidate in candidates:
|
||||
env_val = os.getenv(candidate)
|
||||
if env_val not in (None, ""):
|
||||
value = env_val
|
||||
break
|
||||
return value if value not in (None, "") else default
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
"""配置类"""
|
||||
# API密钥
|
||||
deepseek_api_key: Optional[str] = None
|
||||
openai_api_key: Optional[str] = None
|
||||
"""Query Engine configuration."""
|
||||
|
||||
llm_api_key: Optional[str] = None
|
||||
llm_base_url: Optional[str] = None
|
||||
llm_model_name: Optional[str] = None
|
||||
llm_provider: Optional[str] = None # compatibility
|
||||
|
||||
tavily_api_key: Optional[str] = None
|
||||
deepseek_base_url: str = "https://api.deepseek.com"
|
||||
openai_base_url: Optional[str] = None
|
||||
|
||||
# 模型配置
|
||||
default_llm_provider: str = "deepseek" # deepseek 或 openai
|
||||
deepseek_model: str = "deepseek-chat"
|
||||
openai_model: str = "gpt-4o-mini"
|
||||
|
||||
# 搜索配置
|
||||
|
||||
search_timeout: int = 240
|
||||
max_content_length: int = 20000
|
||||
|
||||
# Agent配置
|
||||
max_reflections: int = 2
|
||||
max_paragraphs: int = 5
|
||||
|
||||
# 输出配置
|
||||
max_search_results: int = 20
|
||||
|
||||
output_dir: str = "reports"
|
||||
save_intermediate_states: bool = True
|
||||
|
||||
|
||||
def __post_init__(self):
|
||||
if not self.llm_provider and self.llm_model_name:
|
||||
self.llm_provider = self.llm_model_name
|
||||
|
||||
def validate(self) -> bool:
|
||||
"""验证配置"""
|
||||
# 检查必需的API密钥
|
||||
if self.default_llm_provider == "deepseek" and not self.deepseek_api_key:
|
||||
print("错误: DeepSeek API Key未设置")
|
||||
if not self.llm_api_key:
|
||||
print("错误: Query Engine LLM API Key 未设置 (QUERY_ENGINE_API_KEY)。")
|
||||
return False
|
||||
|
||||
if self.default_llm_provider == "openai" and not self.openai_api_key:
|
||||
print("错误: OpenAI API Key未设置")
|
||||
if not self.llm_model_name:
|
||||
print("错误: Query Engine 模型名称未设置 (QUERY_ENGINE_MODEL_NAME)。")
|
||||
return False
|
||||
|
||||
if not self.tavily_api_key:
|
||||
print("错误: Tavily API Key未设置")
|
||||
print("错误: Tavily API Key 未设置 (TAVILY_API_KEY)。")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, config_file: str) -> "Config":
|
||||
"""从配置文件创建配置"""
|
||||
if config_file.endswith('.py'):
|
||||
# Python配置文件
|
||||
if config_file.endswith(".py"):
|
||||
import importlib.util
|
||||
|
||||
# 动态导入配置文件
|
||||
|
||||
spec = importlib.util.spec_from_file_location("config", config_file)
|
||||
config_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(config_module)
|
||||
|
||||
return cls(
|
||||
deepseek_api_key=getattr(config_module, "DEEPSEEK_API_KEY", None),
|
||||
openai_api_key=getattr(config_module, "OPENAI_API_KEY", None),
|
||||
tavily_api_key=getattr(config_module, "TAVILY_API_KEY", None),
|
||||
deepseek_base_url=getattr(config_module, "DEEPSEEK_BASE_URL", "https://api.deepseek.com"),
|
||||
openai_base_url=getattr(config_module, "OPENAI_BASE_URL", None),
|
||||
default_llm_provider=getattr(config_module, "DEFAULT_LLM_PROVIDER", "deepseek"),
|
||||
deepseek_model=getattr(config_module, "DEEPSEEK_MODEL", "deepseek-chat"),
|
||||
openai_model=getattr(config_module, "OPENAI_MODEL", "gpt-4o-mini"),
|
||||
|
||||
search_timeout=getattr(config_module, "SEARCH_TIMEOUT", 240),
|
||||
max_content_length=getattr(config_module, "SEARCH_CONTENT_MAX_LENGTH", 20000),
|
||||
max_reflections=getattr(config_module, "MAX_REFLECTIONS", 2),
|
||||
max_paragraphs=getattr(config_module, "MAX_PARAGRAPHS", 5),
|
||||
output_dir=getattr(config_module, "OUTPUT_DIR", "reports"),
|
||||
save_intermediate_states=getattr(config_module, "SAVE_INTERMEDIATE_STATES", True)
|
||||
)
|
||||
else:
|
||||
# .env格式配置文件
|
||||
config_dict = {}
|
||||
|
||||
if os.path.exists(config_file):
|
||||
with open(config_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#') and '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
config_dict[key.strip()] = value.strip()
|
||||
|
||||
return cls(
|
||||
deepseek_api_key=config_dict.get("DEEPSEEK_API_KEY"),
|
||||
openai_api_key=config_dict.get("OPENAI_API_KEY"),
|
||||
tavily_api_key=config_dict.get("TAVILY_API_KEY"),
|
||||
deepseek_base_url=config_dict.get("DEEPSEEK_BASE_URL", "https://api.deepseek.com"),
|
||||
openai_base_url=config_dict.get("OPENAI_BASE_URL"),
|
||||
default_llm_provider=config_dict.get("DEFAULT_LLM_PROVIDER", "deepseek"),
|
||||
deepseek_model=config_dict.get("DEEPSEEK_MODEL", "deepseek-chat"),
|
||||
openai_model=config_dict.get("OPENAI_MODEL", "gpt-4o-mini"),
|
||||
|
||||
search_timeout=int(config_dict.get("SEARCH_TIMEOUT", "240")),
|
||||
max_content_length=int(config_dict.get("SEARCH_CONTENT_MAX_LENGTH", "20000")),
|
||||
max_reflections=int(config_dict.get("MAX_REFLECTIONS", "2")),
|
||||
max_paragraphs=int(config_dict.get("MAX_PARAGRAPHS", "5")),
|
||||
output_dir=config_dict.get("OUTPUT_DIR", "reports"),
|
||||
save_intermediate_states=config_dict.get("SAVE_INTERMEDIATE_STATES", "true").lower() == "true"
|
||||
llm_api_key=_get_value(config_module, "QUERY_ENGINE_API_KEY"),
|
||||
llm_base_url=_get_value(config_module, "QUERY_ENGINE_BASE_URL"),
|
||||
llm_model_name=_get_value(config_module, "QUERY_ENGINE_MODEL_NAME"),
|
||||
tavily_api_key=_get_value(config_module, "TAVILY_API_KEY"),
|
||||
search_timeout=int(_get_value(config_module, "SEARCH_TIMEOUT", 240)),
|
||||
max_content_length=int(_get_value(config_module, "SEARCH_CONTENT_MAX_LENGTH", 20000)),
|
||||
max_reflections=int(_get_value(config_module, "MAX_REFLECTIONS", 2)),
|
||||
max_paragraphs=int(_get_value(config_module, "MAX_PARAGRAPHS", 5)),
|
||||
max_search_results=int(_get_value(config_module, "MAX_SEARCH_RESULTS", 20)),
|
||||
output_dir=_get_value(config_module, "OUTPUT_DIR", "reports"),
|
||||
save_intermediate_states=str(
|
||||
_get_value(config_module, "SAVE_INTERMEDIATE_STATES", "true")
|
||||
).lower()
|
||||
in ("true", "1", "yes"),
|
||||
)
|
||||
|
||||
config_dict = {}
|
||||
if os.path.exists(config_file):
|
||||
with open(config_file, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith("#") and "=" in line:
|
||||
key, value = line.split("=", 1)
|
||||
config_dict[key.strip()] = value.strip()
|
||||
|
||||
return cls(
|
||||
llm_api_key=_get_value(config_dict, "QUERY_ENGINE_API_KEY"),
|
||||
llm_base_url=_get_value(config_dict, "QUERY_ENGINE_BASE_URL"),
|
||||
llm_model_name=_get_value(config_dict, "QUERY_ENGINE_MODEL_NAME"),
|
||||
tavily_api_key=_get_value(config_dict, "TAVILY_API_KEY"),
|
||||
search_timeout=int(_get_value(config_dict, "SEARCH_TIMEOUT", 240)),
|
||||
max_content_length=int(_get_value(config_dict, "SEARCH_CONTENT_MAX_LENGTH", 20000)),
|
||||
max_reflections=int(_get_value(config_dict, "MAX_REFLECTIONS", 2)),
|
||||
max_paragraphs=int(_get_value(config_dict, "MAX_PARAGRAPHS", 5)),
|
||||
max_search_results=int(_get_value(config_dict, "MAX_SEARCH_RESULTS", 20)),
|
||||
output_dir=_get_value(config_dict, "OUTPUT_DIR", "reports"),
|
||||
save_intermediate_states=str(
|
||||
_get_value(config_dict, "SAVE_INTERMEDIATE_STATES", "true")
|
||||
).lower()
|
||||
in ("true", "1", "yes"),
|
||||
)
|
||||
|
||||
|
||||
def load_config(config_file: Optional[str] = None) -> Config:
|
||||
"""
|
||||
加载配置
|
||||
|
||||
Args:
|
||||
config_file: 配置文件路径,如果不指定则使用默认路径
|
||||
|
||||
Returns:
|
||||
配置对象
|
||||
"""
|
||||
# 确定配置文件路径
|
||||
if config_file:
|
||||
if not os.path.exists(config_file):
|
||||
raise FileNotFoundError(f"配置文件不存在: {config_file}")
|
||||
file_to_load = config_file
|
||||
else:
|
||||
# 尝试加载常见的配置文件
|
||||
for config_path in ["config.py", "config.env", ".env"]:
|
||||
if os.path.exists(config_path):
|
||||
file_to_load = config_path
|
||||
print(f"已找到配置文件: {config_path}")
|
||||
for candidate in ("config.py", "config.env", ".env"):
|
||||
if os.path.exists(candidate):
|
||||
file_to_load = candidate
|
||||
print(f"已找到配置文件: {candidate}")
|
||||
break
|
||||
else:
|
||||
raise FileNotFoundError("未找到配置文件,请创建 config.py 文件")
|
||||
|
||||
# 创建配置对象
|
||||
raise FileNotFoundError("未找到配置文件,请创建 config.py。")
|
||||
|
||||
config = Config.from_file(file_to_load)
|
||||
|
||||
# 验证配置
|
||||
if not config.validate():
|
||||
raise ValueError("配置验证失败,请检查配置文件中的API密钥")
|
||||
|
||||
raise ValueError("配置校验失败,请检查 config.py 中的相关配置。")
|
||||
return config
|
||||
|
||||
|
||||
def print_config(config: Config):
|
||||
"""打印配置信息(隐藏敏感信息)"""
|
||||
print("\n=== 当前配置 ===")
|
||||
print(f"LLM提供商: {config.default_llm_provider}")
|
||||
print(f"DeepSeek模型: {config.deepseek_model}")
|
||||
print(f"OpenAI模型: {config.openai_model}")
|
||||
print(f"最大搜索结果数: {config.max_search_results}")
|
||||
print(f"搜索超时: {config.search_timeout}秒")
|
||||
print(f"最大内容长度: {config.max_content_length}")
|
||||
print("\n=== Query Engine 配置 ===")
|
||||
print(f"LLM 模型: {config.llm_model_name}")
|
||||
print(f"LLM Base URL: {config.llm_base_url or '(默认)'}")
|
||||
print(f"Tavily API Key: {'已配置' if config.tavily_api_key else '未配置'}")
|
||||
print(f"搜索超时: {config.search_timeout} 秒")
|
||||
print(f"最长内容长度: {config.max_content_length}")
|
||||
print(f"最大反思次数: {config.max_reflections}")
|
||||
print(f"最大段落数: {config.max_paragraphs}")
|
||||
print(f"最大搜索结果数: {config.max_search_results}")
|
||||
print(f"输出目录: {config.output_dir}")
|
||||
print(f"保存中间状态: {config.save_intermediate_states}")
|
||||
|
||||
# 显示API密钥状态(不显示实际密钥)
|
||||
print(f"DeepSeek API Key: {'已设置' if config.deepseek_api_key else '未设置'}")
|
||||
print(f"OpenAI API Key: {'已设置' if config.openai_api_key else '未设置'}")
|
||||
print(f"Tavily API Key: {'已设置' if config.tavily_api_key else '未设置'}")
|
||||
print("==================\n")
|
||||
print(f"LLM API Key: {'已配置' if config.llm_api_key else '未配置'}")
|
||||
print("========================\n")
|
||||
|
||||
Reference in New Issue
Block a user