Different types of base models adapted for each agent.

This commit is contained in:
戒酒的李白
2025-08-23 20:19:57 +08:00
parent f448ddd466
commit 339545f7fb
16 changed files with 1072 additions and 89 deletions
+191 -23
View File
@@ -7,9 +7,9 @@ import json
import os
import re
from datetime import datetime
from typing import Optional, Dict, Any, List
from typing import Optional, Dict, Any, List, Union
from .llms import DeepSeekLLM, OpenAILLM, BaseLLM
from .llms import DeepSeekLLM, OpenAILLM, KimiLLM, BaseLLM
from .nodes import (
ReportStructureNode,
FirstSearchNode,
@@ -19,7 +19,7 @@ from .nodes import (
ReportFormattingNode
)
from .state import State
from .tools import MediaCrawlerDB, DBResponse, keyword_optimizer
from .tools import MediaCrawlerDB, DBResponse, keyword_optimizer, multilingual_sentiment_analyzer
from .utils import Config, load_config, format_search_results_for_prompt
@@ -50,6 +50,9 @@ class DeepSearchAgent:
# 初始化搜索工具集
self.search_agency = MediaCrawlerDB()
# 初始化情感分析器
self.sentiment_analyzer = multilingual_sentiment_analyzer
# 初始化节点
self._initialize_nodes()
@@ -62,6 +65,7 @@ class DeepSearchAgent:
print(f"Deep Search Agent 已初始化")
print(f"使用LLM: {self.llm_client.get_model_info()}")
print(f"搜索工具集: MediaCrawlerDB (支持5种本地数据库查询工具)")
print(f"情感分析: WeiboMultilingualSentiment (支持22种语言的情感分析)")
def _initialize_llm(self) -> BaseLLM:
"""初始化LLM客户端"""
@@ -75,6 +79,11 @@ class DeepSearchAgent:
api_key=self.config.openai_api_key,
model_name=self.config.openai_model
)
elif self.config.default_llm_provider == "kimi":
return KimiLLM(
api_key=self.config.kimi_api_key,
model_name=self.config.kimi_model
)
else:
raise ValueError(f"不支持的LLM提供商: {self.config.default_llm_provider}")
@@ -113,7 +122,7 @@ class DeepSearchAgent:
def execute_search_tool(self, tool_name: str, query: str, **kwargs) -> DBResponse:
"""
执行指定的数据库查询工具(集成关键词优化中间件)
执行指定的数据库查询工具(集成关键词优化中间件和情感分析
Args:
tool_name: 工具名称,可选值:
@@ -122,11 +131,13 @@ class DeepSearchAgent:
- "search_topic_by_date": 按日期搜索话题
- "get_comments_for_topic": 获取话题评论
- "search_topic_on_platform": 平台定向搜索
- "analyze_sentiment": 对查询结果进行情感分析
query: 搜索关键词/话题
**kwargs: 额外参数(如start_date, end_date, platform, limit等)
**kwargs: 额外参数(如start_date, end_date, platform, limit, enable_sentiment等)
enable_sentiment: 是否自动对搜索结果进行情感分析(默认True)
Returns:
DBResponse对象
DBResponse对象(可能包含情感分析结果)
"""
print(f" → 执行数据库查询工具: {tool_name}")
@@ -134,7 +145,36 @@ class DeepSearchAgent:
if tool_name == "search_hot_content":
time_period = kwargs.get("time_period", "week")
limit = kwargs.get("limit", 100)
return self.search_agency.search_hot_content(time_period=time_period, limit=limit)
response = self.search_agency.search_hot_content(time_period=time_period, limit=limit)
# 检查是否需要进行情感分析
enable_sentiment = kwargs.get("enable_sentiment", True)
if enable_sentiment and response.results and len(response.results) > 0:
print(f" 🎭 开始对热点内容进行情感分析...")
sentiment_analysis = self._perform_sentiment_analysis(response.results)
if sentiment_analysis:
# 将情感分析结果添加到响应的parameters中
response.parameters["sentiment_analysis"] = sentiment_analysis
print(f" ✅ 情感分析完成")
return response
# 独立情感分析工具
if tool_name == "analyze_sentiment":
texts = kwargs.get("texts", query) # 可以通过texts参数传递,或使用query
sentiment_result = self.analyze_sentiment_only(texts)
# 构建DBResponse格式的响应
return DBResponse(
tool_name="analyze_sentiment",
parameters={
"texts": texts if isinstance(texts, list) else [texts],
**kwargs
},
results=[], # 情感分析不返回搜索结果
results_count=0,
metadata=sentiment_result
)
# 对于需要搜索词的工具,使用关键词优化中间件
optimized_response = keyword_optimizer.optimize_keywords(
@@ -154,31 +194,35 @@ class DeepSearchAgent:
try:
if tool_name == "search_topic_globally":
limit_per_table = kwargs.get("limit_per_table", 100)
# 使用配置文件中的默认值,忽略agent提供的limit_per_table参数
limit_per_table = self.config.default_search_topic_globally_limit_per_table
response = self.search_agency.search_topic_globally(topic=keyword, limit_per_table=limit_per_table)
elif tool_name == "search_topic_by_date":
start_date = kwargs.get("start_date")
end_date = kwargs.get("end_date")
limit_per_table = kwargs.get("limit_per_table", 100)
# 使用配置文件中的默认值,忽略agent提供的limit_per_table参数
limit_per_table = self.config.default_search_topic_by_date_limit_per_table
if not start_date or not end_date:
raise ValueError("search_topic_by_date工具需要start_date和end_date参数")
response = self.search_agency.search_topic_by_date(topic=keyword, start_date=start_date, end_date=end_date, limit_per_table=limit_per_table)
elif tool_name == "get_comments_for_topic":
limit = kwargs.get("limit", 500) // len(optimized_response.optimized_keywords)
# 使用配置文件中的默认值,按关键词数量分配,但保证最小值
limit = self.config.default_get_comments_for_topic_limit // len(optimized_response.optimized_keywords)
limit = max(limit, 50)
response = self.search_agency.get_comments_for_topic(topic=keyword, limit=limit)
elif tool_name == "search_topic_on_platform":
platform = kwargs.get("platform")
start_date = kwargs.get("start_date")
end_date = kwargs.get("end_date")
limit = kwargs.get("limit", 200) // len(optimized_response.optimized_keywords)
# 使用配置文件中的默认值,按关键词数量分配,但保证最小值
limit = self.config.default_search_topic_on_platform_limit // len(optimized_response.optimized_keywords)
limit = max(limit, 30)
if not platform:
raise ValueError("search_topic_on_platform工具需要platform参数")
response = self.search_agency.search_topic_on_platform(platform=platform, topic=keyword, start_date=start_date, end_date=end_date, limit=limit)
else:
print(f" 未知的搜索工具: {tool_name},使用默认全局搜索")
response = self.search_agency.search_topic_globally(topic=keyword, limit_per_table=100)
response = self.search_agency.search_topic_globally(topic=keyword, limit_per_table=self.config.default_search_topic_globally_limit_per_table)
# 收集结果
if response.results:
@@ -209,6 +253,16 @@ class DeepSearchAgent:
results_count=len(unique_results)
)
# 检查是否需要进行情感分析
enable_sentiment = kwargs.get("enable_sentiment", True)
if enable_sentiment and unique_results and len(unique_results) > 0:
print(f" 🎭 开始对搜索结果进行情感分析...")
sentiment_analysis = self._perform_sentiment_analysis(unique_results)
if sentiment_analysis:
# 将情感分析结果添加到响应的parameters中
integrated_response.parameters["sentiment_analysis"] = sentiment_analysis
print(f" ✅ 情感分析完成")
return integrated_response
def _deduplicate_results(self, results: List) -> List:
@@ -227,6 +281,99 @@ class DeepSearchAgent:
return unique_results
def _perform_sentiment_analysis(self, results: List) -> Optional[Dict[str, Any]]:
"""
对搜索结果执行情感分析
Args:
results: 搜索结果列表
Returns:
情感分析结果字典,如果失败则返回None
"""
try:
# 初始化情感分析器(如果尚未初始化)
if not self.sentiment_analyzer.is_initialized:
print(" 初始化情感分析模型...")
if not self.sentiment_analyzer.initialize():
print(" ❌ 情感分析模型初始化失败")
return None
# 将查询结果转换为字典格式
results_dict = []
for result in results:
result_dict = {
"content": result.title_or_content,
"platform": result.platform,
"author": result.author_nickname,
"url": result.url,
"publish_time": str(result.publish_time) if result.publish_time else None
}
results_dict.append(result_dict)
# 执行情感分析
sentiment_analysis = self.sentiment_analyzer.analyze_query_results(
query_results=results_dict,
text_field="content",
min_confidence=0.5
)
return sentiment_analysis.get("sentiment_analysis")
except Exception as e:
print(f" ❌ 情感分析过程中发生错误: {str(e)}")
return None
def analyze_sentiment_only(self, texts: Union[str, List[str]]) -> Dict[str, Any]:
"""
独立的情感分析工具
Args:
texts: 单个文本或文本列表
Returns:
情感分析结果
"""
print(f" → 执行独立情感分析")
try:
# 初始化情感分析器(如果尚未初始化)
if not self.sentiment_analyzer.is_initialized:
print(" 初始化情感分析模型...")
if not self.sentiment_analyzer.initialize():
return {
"success": False,
"error": "情感分析模型初始化失败",
"results": []
}
# 执行分析
if isinstance(texts, str):
result = self.sentiment_analyzer.analyze_single_text(texts)
return {
"success": True,
"total_analyzed": 1,
"results": [result.__dict__]
}
else:
batch_result = self.sentiment_analyzer.analyze_batch(texts, show_progress=True)
return {
"success": True,
"total_analyzed": batch_result.total_processed,
"success_count": batch_result.success_count,
"failed_count": batch_result.failed_count,
"average_confidence": batch_result.average_confidence,
"results": [result.__dict__ for result in batch_result.results]
}
except Exception as e:
print(f" ❌ 情感分析过程中发生错误: {str(e)}")
return {
"success": False,
"error": str(e),
"results": []
}
def research(self, query: str, save_report: bool = True) -> str:
"""
执行深度研究
@@ -356,17 +503,23 @@ class DeepSearchAgent:
print(f" ⚠️ search_topic_on_platform工具缺少平台参数,改用全局搜索")
search_tool = "search_topic_globally"
# 处理限制参数
# 处理限制参数,使用配置文件中的默认值而不是agent提供的参数
if search_tool == "search_hot_content":
time_period = search_output.get("time_period", "week")
limit = search_output.get("limit", 100)
limit = self.config.default_search_hot_content_limit
search_kwargs["time_period"] = time_period
search_kwargs["limit"] = limit
elif search_tool in ["search_topic_globally", "search_topic_by_date"]:
limit_per_table = search_output.get("limit_per_table", 100)
if search_tool == "search_topic_globally":
limit_per_table = self.config.default_search_topic_globally_limit_per_table
else: # search_topic_by_date
limit_per_table = self.config.default_search_topic_by_date_limit_per_table
search_kwargs["limit_per_table"] = limit_per_table
elif search_tool in ["get_comments_for_topic", "search_topic_on_platform"]:
limit = search_output.get("limit", 200)
if search_tool == "get_comments_for_topic":
limit = self.config.default_get_comments_for_topic_limit
else: # search_topic_on_platform
limit = self.config.default_search_topic_on_platform_limit
search_kwargs["limit"] = limit
search_response = self.execute_search_tool(search_tool, search_query, **search_kwargs)
@@ -374,8 +527,11 @@ class DeepSearchAgent:
# 转换为兼容格式
search_results = []
if search_response and search_response.results:
# 每种搜索工具都有其特定的结果数量,这里取前100个作为上限
max_results = min(len(search_response.results), 100)
# 使用配置文件控制传递给LLM的结果数量,0表示不限制
if self.config.max_search_results_for_llm > 0:
max_results = min(len(search_response.results), self.config.max_search_results_for_llm)
else:
max_results = len(search_response.results) # 不限制,传递所有结果
for result in search_response.results[:max_results]:
search_results.append({
'title': result.title_or_content,
@@ -479,14 +635,23 @@ class DeepSearchAgent:
# 处理限制参数
if search_tool == "search_hot_content":
time_period = reflection_output.get("time_period", "week")
limit = reflection_output.get("limit", 10)
# 使用配置文件中的默认值,不允许agent控制limit参数
limit = self.config.default_search_hot_content_limit
search_kwargs["time_period"] = time_period
search_kwargs["limit"] = limit
elif search_tool in ["search_topic_globally", "search_topic_by_date"]:
limit_per_table = reflection_output.get("limit_per_table", 5)
# 使用配置文件中的默认值,不允许agent控制limit_per_table参数
if search_tool == "search_topic_globally":
limit_per_table = self.config.default_search_topic_globally_limit_per_table
else: # search_topic_by_date
limit_per_table = self.config.default_search_topic_by_date_limit_per_table
search_kwargs["limit_per_table"] = limit_per_table
elif search_tool in ["get_comments_for_topic", "search_topic_on_platform"]:
limit = reflection_output.get("limit", 20)
# 使用配置文件中的默认值,不允许agent控制limit参数
if search_tool == "get_comments_for_topic":
limit = self.config.default_get_comments_for_topic_limit
else: # search_topic_on_platform
limit = self.config.default_search_topic_on_platform_limit
search_kwargs["limit"] = limit
search_response = self.execute_search_tool(search_tool, search_query, **search_kwargs)
@@ -494,8 +659,11 @@ class DeepSearchAgent:
# 转换为兼容格式
search_results = []
if search_response and search_response.results:
# 每种搜索工具都有其特定的结果数量,这里取前100个作为上限
max_results = min(len(search_response.results), 100)
# 使用配置文件控制传递给LLM的结果数量,0表示不限制
if self.config.max_search_results_for_llm > 0:
max_results = min(len(search_response.results), self.config.max_search_results_for_llm)
else:
max_results = len(search_response.results) # 不限制,传递所有结果
for result in search_response.results[:max_results]:
search_results.append({
'title': result.title_or_content,