""" HTML生成节点将整合后的内容转换为美观的HTML报告 """ import json from datetime import datetime from typing import Dict, Any from .base_node import StateMutationNode from ..llms.base import BaseLLM from ..state.state import ReportState from ..prompts import SYSTEM_PROMPT_HTML_GENERATION # 不再需要text_processing依赖 class HTMLGenerationNode(StateMutationNode): """HTML生成处理节点""" def __init__(self, llm_client: BaseLLM): """ 初始化HTML生成节点 Args: llm_client: LLM客户端 """ super().__init__(llm_client, "HTMLGenerationNode") def run(self, input_data: Dict[str, Any], **kwargs) -> str: """ 执行HTML生成 Args: input_data: 包含报告数据的字典 - query: 原始查询 - query_engine_report: QueryEngine报告内容 - media_engine_report: MediaEngine报告内容 - insight_engine_report: InsightEngine报告内容 - forum_logs: 论坛日志内容 - selected_template: 选择的模板内容 Returns: 生成的HTML内容 """ self.log_info("开始生成HTML报告...") try: # 准备LLM输入数据 llm_input = { "query": input_data.get('query', ''), "query_engine_report": input_data.get('query_engine_report', ''), "media_engine_report": input_data.get('media_engine_report', ''), "insight_engine_report": input_data.get('insight_engine_report', ''), "forum_logs": input_data.get('forum_logs', ''), "selected_template": input_data.get('selected_template', '') } # 转换为JSON格式 message = json.dumps(llm_input, ensure_ascii=False, indent=2) # 调用LLM生成HTML response = self.llm_client.invoke(SYSTEM_PROMPT_HTML_GENERATION, message) # 处理响应 processed_response = self.process_output(response) self.log_info("HTML报告生成完成") return processed_response except Exception as e: self.log_error(f"HTML生成失败: {str(e)}") # 返回备用HTML return self._generate_fallback_html(input_data) def mutate_state(self, input_data: Dict[str, Any], state: ReportState, **kwargs) -> ReportState: """ 修改报告状态，添加生成的HTML内容 Args: input_data: 输入数据 state: 当前报告状态 **kwargs: 额外参数 Returns: 更新后的报告状态 """ # 生成HTML html_content = self.run(input_data, **kwargs) # 更新状态 state.html_content = html_content state.mark_completed() return state def process_output(self, output: str) -> str: """ 处理LLM输出，提取HTML内容 Args: output: LLM原始输出 Returns: 清理后的HTML内容 """ try: self.log_info(f"处理LLM原始输出，长度: {len(output)} 字符") html_content = "" # 尝试解析JSON响应 try: result = json.loads(output) html_content = result.get('html_content', '') self.log_info("成功从JSON中提取html_content") except json.JSONDecodeError: self.log_info("不是JSON格式，直接使用原始输出") html_content = output # 如果还是没有内容，尝试其他提取方法 if not html_content.strip(): # 查找HTML标记 if '' in output: start_idx = output.find('') html_content = output[start_idx:] elif ' 智能舆情分析报告 {html_content} """ self.log_info(f"HTML处理完成，最终长度: {len(html_content)} 字符") return html_content.strip() except Exception as e: self.log_error(f"处理HTML输出失败: {str(e)}") return self._generate_error_html(str(e)) def _generate_fallback_html(self, input_data: Dict[str, Any]) -> str: """ 生成备用HTML报告（当LLM失败时使用） Args: input_data: 输入数据 Returns: 备用HTML内容 """ self.log_info("使用备用HTML生成方法") query = input_data.get('query', '智能舆情分析报告') query_report = input_data.get('query_engine_report', '') media_report = input_data.get('media_engine_report', '') insight_report = input_data.get('insight_engine_report', '') forum_logs = input_data.get('forum_logs', '') generation_time = datetime.now().strftime("%Y年%m月%d日 %H:%M:%S") html_content = f""" {query} - 智能舆情分析报告

{query}

报告生成时间: {generation_time}
数据来源: QueryEngine、MediaEngine、InsightEngine、ForumEngine
报告类型: 综合舆情分析报告

执行摘要

本报告整合了多个分析引擎的研究结果，为您提供全面的舆情分析洞察。通过对查询主题"{query}"的深度分析，我们从多个维度展现了当前的舆情态势。

{f'

QueryEngine分析结果

{query_report}

' if query_report else ''} {f'

MediaEngine分析结果

{media_report}

' if media_report else ''} {f'

InsightEngine分析结果

{insight_report}

' if insight_report else ''} {f'

论坛监控数据

{forum_logs}

' if forum_logs else ''}

综合结论

基于多个分析引擎的综合研究，我们对"{query}"主题进行了全面分析。各引擎从不同角度提供了深入洞察，为决策提供了重要参考。

""" return html_content def _generate_error_html(self, error_message: str) -> str: """ 生成错误HTML页面 Args: error_message: 错误信息 Returns: 错误HTML内容 """ return f""" 报告生成失败

报告生成失败

错误信息: {error_message}

请检查输入数据或稍后重试。

"""