341 lines
11 KiB
Python
341 lines
11 KiB
Python
"""
|
|
HTML生成节点
|
|
将整合后的内容转换为美观的HTML报告
|
|
"""
|
|
|
|
import json
|
|
from datetime import datetime
|
|
from typing import Dict, Any
|
|
|
|
from .base_node import StateMutationNode
|
|
from ..llms.base import BaseLLM
|
|
from ..state.state import ReportState
|
|
from ..prompts import SYSTEM_PROMPT_HTML_GENERATION
|
|
# 不再需要text_processing依赖
|
|
|
|
|
|
class HTMLGenerationNode(StateMutationNode):
|
|
"""HTML生成处理节点"""
|
|
|
|
def __init__(self, llm_client: BaseLLM):
|
|
"""
|
|
初始化HTML生成节点
|
|
|
|
Args:
|
|
llm_client: LLM客户端
|
|
"""
|
|
super().__init__(llm_client, "HTMLGenerationNode")
|
|
|
|
def run(self, input_data: Dict[str, Any], **kwargs) -> str:
|
|
"""
|
|
执行HTML生成
|
|
|
|
Args:
|
|
input_data: 包含报告数据的字典
|
|
- query: 原始查询
|
|
- query_engine_report: QueryEngine报告内容
|
|
- media_engine_report: MediaEngine报告内容
|
|
- insight_engine_report: InsightEngine报告内容
|
|
- forum_logs: 论坛日志内容
|
|
- selected_template: 选择的模板内容
|
|
|
|
Returns:
|
|
生成的HTML内容
|
|
"""
|
|
self.log_info("开始生成HTML报告...")
|
|
|
|
try:
|
|
# 准备LLM输入数据
|
|
llm_input = {
|
|
"query": input_data.get('query', ''),
|
|
"query_engine_report": input_data.get('query_engine_report', ''),
|
|
"media_engine_report": input_data.get('media_engine_report', ''),
|
|
"insight_engine_report": input_data.get('insight_engine_report', ''),
|
|
"forum_logs": input_data.get('forum_logs', ''),
|
|
"selected_template": input_data.get('selected_template', '')
|
|
}
|
|
|
|
# 转换为JSON格式
|
|
message = json.dumps(llm_input, ensure_ascii=False, indent=2)
|
|
|
|
# 调用LLM生成HTML
|
|
response = self.llm_client.invoke(SYSTEM_PROMPT_HTML_GENERATION, message)
|
|
|
|
# 处理响应
|
|
processed_response = self.process_output(response)
|
|
|
|
self.log_info("HTML报告生成完成")
|
|
return processed_response
|
|
|
|
except Exception as e:
|
|
self.log_error(f"HTML生成失败: {str(e)}")
|
|
# 返回备用HTML
|
|
return self._generate_fallback_html(input_data)
|
|
|
|
def mutate_state(self, input_data: Dict[str, Any], state: ReportState, **kwargs) -> ReportState:
|
|
"""
|
|
修改报告状态,添加生成的HTML内容
|
|
|
|
Args:
|
|
input_data: 输入数据
|
|
state: 当前报告状态
|
|
**kwargs: 额外参数
|
|
|
|
Returns:
|
|
更新后的报告状态
|
|
"""
|
|
# 生成HTML
|
|
html_content = self.run(input_data, **kwargs)
|
|
|
|
# 更新状态
|
|
state.html_content = html_content
|
|
state.mark_completed()
|
|
|
|
return state
|
|
|
|
def process_output(self, output: str) -> str:
|
|
"""
|
|
处理LLM输出,提取HTML内容
|
|
|
|
Args:
|
|
output: LLM原始输出
|
|
|
|
Returns:
|
|
清理后的HTML内容
|
|
"""
|
|
try:
|
|
self.log_info(f"处理LLM原始输出,长度: {len(output)} 字符")
|
|
|
|
html_content = ""
|
|
|
|
# 尝试解析JSON响应
|
|
try:
|
|
result = json.loads(output)
|
|
html_content = result.get('html_content', '')
|
|
self.log_info("成功从JSON中提取html_content")
|
|
except json.JSONDecodeError:
|
|
self.log_info("不是JSON格式,直接使用原始输出")
|
|
html_content = output
|
|
|
|
# 如果还是没有内容,尝试其他提取方法
|
|
if not html_content.strip():
|
|
# 查找HTML标记
|
|
if '<!DOCTYPE html>' in output:
|
|
start_idx = output.find('<!DOCTYPE html>')
|
|
html_content = output[start_idx:]
|
|
elif '<html' in output:
|
|
start_idx = output.find('<html')
|
|
html_content = output[start_idx:]
|
|
else:
|
|
html_content = output
|
|
|
|
# 清理markdown代码块标记
|
|
if html_content.startswith('```html'):
|
|
html_content = html_content.replace('```html', '').replace('```', '').strip()
|
|
elif html_content.startswith('```'):
|
|
html_content = html_content.replace('```', '').strip()
|
|
|
|
# 处理转义字符
|
|
html_content = html_content.replace('\\n', '\n')
|
|
html_content = html_content.replace('\\t', '\t')
|
|
html_content = html_content.replace('\\r', '\r')
|
|
html_content = html_content.replace('\\"', '"')
|
|
html_content = html_content.replace("\\'", "'")
|
|
|
|
# 验证HTML内容
|
|
if not html_content.strip():
|
|
raise ValueError("生成的HTML内容为空")
|
|
|
|
# 确保HTML有基本结构
|
|
if not html_content.strip().startswith('<!DOCTYPE') and not html_content.strip().startswith('<html'):
|
|
self.log_info("HTML缺少基本结构,添加包装")
|
|
html_content = f"""<!DOCTYPE html>
|
|
<html lang="zh-CN">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>智能舆情分析报告</title>
|
|
</head>
|
|
<body>
|
|
{html_content}
|
|
</body>
|
|
</html>"""
|
|
|
|
self.log_info(f"HTML处理完成,最终长度: {len(html_content)} 字符")
|
|
return html_content.strip()
|
|
|
|
except Exception as e:
|
|
self.log_error(f"处理HTML输出失败: {str(e)}")
|
|
return self._generate_error_html(str(e))
|
|
|
|
def _generate_fallback_html(self, input_data: Dict[str, Any]) -> str:
|
|
"""
|
|
生成备用HTML报告(当LLM失败时使用)
|
|
|
|
Args:
|
|
input_data: 输入数据
|
|
|
|
Returns:
|
|
备用HTML内容
|
|
"""
|
|
self.log_info("使用备用HTML生成方法")
|
|
|
|
query = input_data.get('query', '智能舆情分析报告')
|
|
query_report = input_data.get('query_engine_report', '')
|
|
media_report = input_data.get('media_engine_report', '')
|
|
insight_report = input_data.get('insight_engine_report', '')
|
|
forum_logs = input_data.get('forum_logs', '')
|
|
|
|
generation_time = datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")
|
|
|
|
html_content = f"""<!DOCTYPE html>
|
|
<html lang="zh-CN">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>{query} - 智能舆情分析报告</title>
|
|
<style>
|
|
body {{
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
|
line-height: 1.6;
|
|
color: #333;
|
|
max-width: 1200px;
|
|
margin: 0 auto;
|
|
padding: 20px;
|
|
background: #f5f5f5;
|
|
}}
|
|
.container {{
|
|
background: white;
|
|
padding: 40px;
|
|
border-radius: 8px;
|
|
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
|
}}
|
|
h1 {{
|
|
color: #2c3e50;
|
|
border-bottom: 3px solid #3498db;
|
|
padding-bottom: 10px;
|
|
}}
|
|
h2 {{
|
|
color: #34495e;
|
|
margin-top: 30px;
|
|
margin-bottom: 15px;
|
|
}}
|
|
.section {{
|
|
margin-bottom: 30px;
|
|
padding: 20px;
|
|
border-left: 4px solid #3498db;
|
|
background: #f8f9fa;
|
|
}}
|
|
.meta {{
|
|
background: #e9ecef;
|
|
padding: 15px;
|
|
border-radius: 5px;
|
|
margin-bottom: 20px;
|
|
}}
|
|
.footer {{
|
|
margin-top: 40px;
|
|
padding-top: 20px;
|
|
border-top: 1px solid #eee;
|
|
text-align: center;
|
|
color: #666;
|
|
}}
|
|
pre {{
|
|
background: #f4f4f4;
|
|
padding: 15px;
|
|
border-radius: 5px;
|
|
overflow-x: auto;
|
|
white-space: pre-wrap;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="container">
|
|
<h1>{query}</h1>
|
|
|
|
<div class="meta">
|
|
<strong>报告生成时间:</strong> {generation_time}<br>
|
|
<strong>数据来源:</strong> QueryEngine、MediaEngine、InsightEngine、ForumEngine<br>
|
|
<strong>报告类型:</strong> 综合舆情分析报告
|
|
</div>
|
|
|
|
<h2>执行摘要</h2>
|
|
<div class="section">
|
|
本报告整合了多个分析引擎的研究结果,为您提供全面的舆情分析洞察。
|
|
通过对查询主题"{query}"的深度分析,我们从多个维度展现了当前的舆情态势。
|
|
</div>
|
|
|
|
{f'<h2>QueryEngine分析结果</h2><div class="section"><pre>{query_report}</pre></div>' if query_report else ''}
|
|
|
|
{f'<h2>MediaEngine分析结果</h2><div class="section"><pre>{media_report}</pre></div>' if media_report else ''}
|
|
|
|
{f'<h2>InsightEngine分析结果</h2><div class="section"><pre>{insight_report}</pre></div>' if insight_report else ''}
|
|
|
|
{f'<h2>论坛监控数据</h2><div class="section"><pre>{forum_logs}</pre></div>' if forum_logs else ''}
|
|
|
|
<h2>综合结论</h2>
|
|
<div class="section">
|
|
基于多个分析引擎的综合研究,我们对"{query}"主题进行了全面分析。
|
|
各引擎从不同角度提供了深入洞察,为决策提供了重要参考。
|
|
</div>
|
|
|
|
<div class="footer">
|
|
<p>本报告由智能舆情分析平台自动生成</p>
|
|
<p>ReportEngine v1.0 | 生成时间: {generation_time}</p>
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>"""
|
|
|
|
return html_content
|
|
|
|
def _generate_error_html(self, error_message: str) -> str:
|
|
"""
|
|
生成错误HTML页面
|
|
|
|
Args:
|
|
error_message: 错误信息
|
|
|
|
Returns:
|
|
错误HTML内容
|
|
"""
|
|
return f"""<!DOCTYPE html>
|
|
<html lang="zh-CN">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>报告生成失败</title>
|
|
<style>
|
|
body {{
|
|
font-family: Arial, sans-serif;
|
|
text-align: center;
|
|
padding: 50px;
|
|
background: #f8f9fa;
|
|
}}
|
|
.error-container {{
|
|
background: white;
|
|
padding: 40px;
|
|
border-radius: 8px;
|
|
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
|
max-width: 600px;
|
|
margin: 0 auto;
|
|
}}
|
|
.error-title {{
|
|
color: #e74c3c;
|
|
font-size: 24px;
|
|
margin-bottom: 20px;
|
|
}}
|
|
.error-message {{
|
|
color: #666;
|
|
margin-bottom: 20px;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="error-container">
|
|
<div class="error-title">报告生成失败</div>
|
|
<div class="error-message">错误信息: {error_message}</div>
|
|
<p>请检查输入数据或稍后重试。</p>
|
|
</div>
|
|
</body>
|
|
</html>"""
|