diff --git a/InsightEngine/agent.py b/InsightEngine/agent.py index fbca388..b1bdd68 100644 --- a/InsightEngine/agent.py +++ b/InsightEngine/agent.py @@ -292,13 +292,14 @@ class DeepSearchAgent: 情感分析结果字典,如果失败则返回None """ try: - # 初始化情感分析器(如果尚未初始化) - if not self.sentiment_analyzer.is_initialized: + # 初始化情感分析器(如果尚未初始化且未被禁用) + if not self.sentiment_analyzer.is_initialized and not self.sentiment_analyzer.is_disabled: print(" 初始化情感分析模型...") if not self.sentiment_analyzer.initialize(): - print(" ❌ 情感分析模型初始化失败") - return None - + print(" 情感分析模型初始化失败,将直接透传原始文本") + elif self.sentiment_analyzer.is_disabled: + print(" 情感分析功能已禁用,直接透传原始文本") + # 将查询结果转换为字典格式 results_dict = [] for result in results: @@ -337,34 +338,46 @@ class DeepSearchAgent: print(f" → 执行独立情感分析") try: - # 初始化情感分析器(如果尚未初始化) - if not self.sentiment_analyzer.is_initialized: + # 初始化情感分析器(如果尚未初始化且未被禁用) + if not self.sentiment_analyzer.is_initialized and not self.sentiment_analyzer.is_disabled: print(" 初始化情感分析模型...") if not self.sentiment_analyzer.initialize(): - return { - "success": False, - "error": "情感分析模型初始化失败", - "results": [] - } + print(" 情感分析模型初始化失败,将直接透传原始文本") + elif self.sentiment_analyzer.is_disabled: + print(" 情感分析功能已禁用,直接透传原始文本") # 执行分析 if isinstance(texts, str): result = self.sentiment_analyzer.analyze_single_text(texts) - return { - "success": True, - "total_analyzed": 1, - "results": [result.__dict__] + result_dict = result.__dict__ + response = { + "success": result.success and result.analysis_performed, + "total_analyzed": 1 if result.analysis_performed and result.success else 0, + "results": [result_dict] } + if not result.analysis_performed: + response["success"] = False + response["warning"] = result.error_message or "情感分析功能不可用,已直接返回原始文本" + return response else: - batch_result = self.sentiment_analyzer.analyze_batch(texts, show_progress=True) - return { - "success": True, - "total_analyzed": batch_result.total_processed, + texts_list = list(texts) + batch_result = self.sentiment_analyzer.analyze_batch(texts_list, show_progress=True) + response = { + "success": batch_result.analysis_performed and batch_result.success_count > 0, + "total_analyzed": batch_result.total_processed if batch_result.analysis_performed else 0, "success_count": batch_result.success_count, "failed_count": batch_result.failed_count, - "average_confidence": batch_result.average_confidence, + "average_confidence": batch_result.average_confidence if batch_result.analysis_performed else 0.0, "results": [result.__dict__ for result in batch_result.results] } + if not batch_result.analysis_performed: + warning = next( + (r.error_message for r in batch_result.results if r.error_message), + "情感分析功能不可用,已直接返回原始文本" + ) + response["success"] = False + response["warning"] = warning + return response except Exception as e: print(f" ❌ 情感分析过程中发生错误: {str(e)}") @@ -486,11 +499,11 @@ class DeepSearchAgent: search_kwargs["end_date"] = end_date print(f" - 时间范围: {start_date} 到 {end_date}") else: - print(f" ⚠️ 日期格式错误(应为YYYY-MM-DD),改用全局搜索") + print(f" 日期格式错误(应为YYYY-MM-DD),改用全局搜索") print(f" 提供的日期: start_date={start_date}, end_date={end_date}") search_tool = "search_topic_globally" elif search_tool == "search_topic_by_date": - print(f" ⚠️ search_topic_by_date工具缺少时间参数,改用全局搜索") + print(f" search_topic_by_date工具缺少时间参数,改用全局搜索") search_tool = "search_topic_globally" # 处理需要平台参数的工具 @@ -500,7 +513,7 @@ class DeepSearchAgent: search_kwargs["platform"] = platform print(f" - 指定平台: {platform}") else: - print(f" ⚠️ search_topic_on_platform工具缺少平台参数,改用全局搜索") + print(f" search_topic_on_platform工具缺少平台参数,改用全局搜索") search_tool = "search_topic_globally" # 处理限制参数,使用配置文件中的默认值而不是agent提供的参数 @@ -615,11 +628,11 @@ class DeepSearchAgent: search_kwargs["end_date"] = end_date print(f" 时间范围: {start_date} 到 {end_date}") else: - print(f" ⚠️ 日期格式错误(应为YYYY-MM-DD),改用全局搜索") + print(f" 日期格式错误(应为YYYY-MM-DD),改用全局搜索") print(f" 提供的日期: start_date={start_date}, end_date={end_date}") search_tool = "search_topic_globally" elif search_tool == "search_topic_by_date": - print(f" ⚠️ search_topic_by_date工具缺少时间参数,改用全局搜索") + print(f" search_topic_by_date工具缺少时间参数,改用全局搜索") search_tool = "search_topic_globally" # 处理需要平台参数的工具 @@ -629,7 +642,7 @@ class DeepSearchAgent: search_kwargs["platform"] = platform print(f" 指定平台: {platform}") else: - print(f" ⚠️ search_topic_on_platform工具缺少平台参数,改用全局搜索") + print(f" search_topic_on_platform工具缺少平台参数,改用全局搜索") search_tool = "search_topic_globally" # 处理限制参数 diff --git a/InsightEngine/tools/keyword_optimizer.py b/InsightEngine/tools/keyword_optimizer.py index 21adf70..a862cd7 100644 --- a/InsightEngine/tools/keyword_optimizer.py +++ b/InsightEngine/tools/keyword_optimizer.py @@ -147,6 +147,8 @@ class KeywordOptimizer: 5. **数量控制**:最少提供10个关键词,最多提供20个关键词 6. **避免重复**:不要脱离初始查询的主题 +**重要提醒**:每个关键词都必须是一个不可分割的独立词条,严禁在词条内部包含空格。例如,应使用 "雷军班争议" 而不是错误的 "雷军班 争议"。 + **输出格式**: 请以JSON格式返回结果: { diff --git a/InsightEngine/tools/sentiment_analyzer.py b/InsightEngine/tools/sentiment_analyzer.py index 1ff3a8f..c20c49f 100644 --- a/InsightEngine/tools/sentiment_analyzer.py +++ b/InsightEngine/tools/sentiment_analyzer.py @@ -16,7 +16,6 @@ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(_ weibo_sentiment_path = os.path.join(project_root, "SentimentAnalysisModel", "WeiboMultilingualSentiment") sys.path.append(weibo_sentiment_path) - @dataclass class SentimentResult: """情感分析结果数据类""" @@ -26,6 +25,7 @@ class SentimentResult: probability_distribution: Dict[str, float] success: bool = True error_message: Optional[str] = None + analysis_performed: bool = True @dataclass @@ -36,6 +36,7 @@ class BatchSentimentResult: success_count: int failed_count: int average_confidence: float + analysis_performed: bool = True class WeiboMultilingualSentimentAnalyzer: @@ -50,6 +51,7 @@ class WeiboMultilingualSentimentAnalyzer: self.tokenizer = None self.device = None self.is_initialized = False + self.is_disabled = False # 情感标签映射(5级分类) self.sentiment_map = { @@ -69,6 +71,10 @@ class WeiboMultilingualSentimentAnalyzer: Returns: 是否初始化成功 """ + if self.is_disabled: + print("情感分析功能已禁用,跳过模型加载") + return False + if self.is_initialized: print("模型已经初始化,无需重复加载") return True @@ -102,6 +108,7 @@ class WeiboMultilingualSentimentAnalyzer: self.model.to(self.device) self.model.eval() self.is_initialized = True + self.is_disabled = False print(f"模型加载成功! 使用设备: {self.device}") print("支持语言: 中文、英文、西班牙文、阿拉伯文、日文、韩文等22种语言") @@ -113,6 +120,11 @@ class WeiboMultilingualSentimentAnalyzer: print(f"模型加载失败: {e}") print("请检查网络连接或模型文件") self.is_initialized = False + self.is_disabled = True + self.model = None + self.tokenizer = None + self.device = None + print("情感分析功能已禁用,将直接返回原始文本内容") return False def _preprocess_text(self, text: str) -> str: @@ -144,6 +156,17 @@ class WeiboMultilingualSentimentAnalyzer: Returns: SentimentResult对象 """ + if self.is_disabled: + return SentimentResult( + text=text, + sentiment_label="情感分析未执行", + confidence=0.0, + probability_distribution={}, + success=False, + error_message="情感分析功能已禁用", + analysis_performed=False + ) + if not self.is_initialized: return SentimentResult( text=text, @@ -151,13 +174,14 @@ class WeiboMultilingualSentimentAnalyzer: confidence=0.0, probability_distribution={}, success=False, - error_message="模型未初始化,请先调用 initialize() 方法" + error_message="模型未初始化,请先调用initialize() 方法", + analysis_performed=False ) - + try: # 预处理文本 processed_text = self._preprocess_text(text) - + if not processed_text: return SentimentResult( text=text, @@ -165,9 +189,10 @@ class WeiboMultilingualSentimentAnalyzer: confidence=0.0, probability_distribution={}, success=False, - error_message="输入文本为空或无效" + error_message="输入文本为空或无效内容", + analysis_performed=False ) - + # 分词编码 inputs = self.tokenizer( processed_text, @@ -176,26 +201,26 @@ class WeiboMultilingualSentimentAnalyzer: truncation=True, return_tensors='pt' ) - + # 转移到设备 inputs = {k: v.to(self.device) for k, v in inputs.items()} - + # 预测 with torch.no_grad(): outputs = self.model(**inputs) logits = outputs.logits probabilities = torch.softmax(logits, dim=1) prediction = torch.argmax(probabilities, dim=1).item() - + # 构建结果 confidence = probabilities[0][prediction].item() label = self.sentiment_map[prediction] - + # 构建概率分布字典 prob_dist = {} - for i, (label_name, prob) in enumerate(zip(self.sentiment_map.values(), probabilities[0])): + for label_name, prob in zip(self.sentiment_map.values(), probabilities[0]): prob_dist[label_name] = prob.item() - + return SentimentResult( text=text, sentiment_label=label, @@ -203,7 +228,7 @@ class WeiboMultilingualSentimentAnalyzer: probability_distribution=prob_dist, success=True ) - + except Exception as e: return SentimentResult( text=text, @@ -211,9 +236,10 @@ class WeiboMultilingualSentimentAnalyzer: confidence=0.0, probability_distribution={}, success=False, - error_message=f"预测时发生错误: {str(e)}" + error_message=f"预测时发生错误: {str(e)}", + analysis_performed=False ) - + def analyze_batch(self, texts: List[str], show_progress: bool = True) -> BatchSentimentResult: """ 批量情感分析 @@ -231,7 +257,30 @@ class WeiboMultilingualSentimentAnalyzer: total_processed=0, success_count=0, failed_count=0, - average_confidence=0.0 + average_confidence=0.0, + analysis_performed=not self.is_disabled and self.is_initialized + ) + + if self.is_disabled or not self.is_initialized: + passthrough_results = [ + SentimentResult( + text=text, + sentiment_label="情感分析未执行", + confidence=0.0, + probability_distribution={}, + success=False, + error_message="情感分析功能不可用", + analysis_performed=False + ) + for text in texts + ] + return BatchSentimentResult( + results=passthrough_results, + total_processed=len(texts), + success_count=0, + failed_count=len(texts), + average_confidence=0.0, + analysis_performed=False ) results = [] @@ -257,9 +306,46 @@ class WeiboMultilingualSentimentAnalyzer: total_processed=len(texts), success_count=success_count, failed_count=failed_count, - average_confidence=average_confidence + average_confidence=average_confidence, + analysis_performed=True ) + def _build_passthrough_analysis( + self, + original_data: List[Dict[str, Any]], + reason: str, + texts: Optional[List[str]] = None, + results: Optional[List[SentimentResult]] = None + ) -> Dict[str, Any]: + """ + 构建在情感分析不可用时的透传结�? + """ + total_items = len(texts) if texts is not None else len(original_data) + response: Dict[str, Any] = { + "sentiment_analysis": { + "available": False, + "reason": reason, + "total_analyzed": 0, + "success_rate": f"0/{total_items}", + "average_confidence": 0.0, + "sentiment_distribution": {}, + "high_confidence_results": [], + "summary": f"情感分析未执行:{reason}", + "original_texts": original_data + } + } + + if texts is not None: + response["sentiment_analysis"]["passthrough_texts"] = texts + + if results is not None: + response["sentiment_analysis"]["results"] = [ + result.__dict__ if isinstance(result, SentimentResult) else result + for result in results + ] + + return response + def analyze_query_results(self, query_results: List[Dict[str, Any]], text_field: str = "content", min_confidence: float = 0.5) -> Dict[str, Any]: @@ -311,10 +397,30 @@ class WeiboMultilingualSentimentAnalyzer: } } + if self.is_disabled: + return self._build_passthrough_analysis( + original_data=original_data, + reason="情感分析模型不可用", + texts=texts_to_analyze + ) + # 执行批量情感分析 print(f"正在对{len(texts_to_analyze)}条内容进行情感分析...") batch_result = self.analyze_batch(texts_to_analyze, show_progress=True) + if not batch_result.analysis_performed: + reason = "情感分析功能不可用" + if batch_result.results: + candidate_error = next((r.error_message for r in batch_result.results if r.error_message), None) + if candidate_error: + reason = candidate_error + return self._build_passthrough_analysis( + original_data=original_data, + reason=reason, + texts=texts_to_analyze, + results=batch_result.results + ) + # 统计情感分布 sentiment_distribution = {} high_confidence_results = [] @@ -392,31 +498,18 @@ def analyze_sentiment(text_or_texts: Union[str, List[str]], Returns: SentimentResult或BatchSentimentResult """ - if initialize_if_needed and not multilingual_sentiment_analyzer.is_initialized: - if not multilingual_sentiment_analyzer.initialize(): - # 如果初始化失败,返回失败结果 - if isinstance(text_or_texts, str): - return SentimentResult( - text=text_or_texts, - sentiment_label="初始化失败", - confidence=0.0, - probability_distribution={}, - success=False, - error_message="模型初始化失败" - ) - else: - return BatchSentimentResult( - results=[], - total_processed=0, - success_count=0, - failed_count=len(text_or_texts), - average_confidence=0.0 - ) + if ( + initialize_if_needed + and not multilingual_sentiment_analyzer.is_initialized + and not multilingual_sentiment_analyzer.is_disabled + ): + multilingual_sentiment_analyzer.initialize() if isinstance(text_or_texts, str): return multilingual_sentiment_analyzer.analyze_single_text(text_or_texts) else: - return multilingual_sentiment_analyzer.analyze_batch(text_or_texts) + texts_list = list(text_or_texts) + return multilingual_sentiment_analyzer.analyze_batch(texts_list) if __name__ == "__main__":