Blocked HTML
This commit is contained in:
+357
-97
@@ -5,15 +5,28 @@ Report Agent主类
|
||||
|
||||
import json
|
||||
import os
|
||||
from loguru import logger
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from .core import (
|
||||
ChapterStorage,
|
||||
DocumentComposer,
|
||||
TemplateSection,
|
||||
parse_template_sections,
|
||||
)
|
||||
from .ir import IRValidator
|
||||
from .llms import LLMClient
|
||||
from .nodes import (
|
||||
TemplateSelectionNode,
|
||||
HTMLGenerationNode
|
||||
ChapterGenerationNode,
|
||||
DocumentLayoutNode,
|
||||
WordBudgetNode,
|
||||
)
|
||||
from .renderers import HTMLRenderer
|
||||
from .state import ReportState
|
||||
from .utils.config import settings, Settings
|
||||
|
||||
@@ -128,6 +141,12 @@ class ReportAgent:
|
||||
# 初始化LLM客户端
|
||||
self.llm_client = self._initialize_llm()
|
||||
|
||||
# 初始化章级存储/校验/渲染组件
|
||||
self.chapter_storage = ChapterStorage(self.config.CHAPTER_OUTPUT_DIR)
|
||||
self.document_composer = DocumentComposer()
|
||||
self.validator = IRValidator()
|
||||
self.renderer = HTMLRenderer()
|
||||
|
||||
# 初始化节点
|
||||
self._initialize_nodes()
|
||||
|
||||
@@ -139,6 +158,7 @@ class ReportAgent:
|
||||
|
||||
# 确保输出目录存在
|
||||
os.makedirs(self.config.OUTPUT_DIR, exist_ok=True)
|
||||
os.makedirs(self.config.DOCUMENT_IR_OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
logger.info("Report Agent已初始化")
|
||||
logger.info(f"使用LLM: {self.llm_client.get_model_info()}")
|
||||
@@ -175,61 +195,144 @@ class ReportAgent:
|
||||
self.llm_client,
|
||||
self.config.TEMPLATE_DIR
|
||||
)
|
||||
self.html_generation_node = HTMLGenerationNode(self.llm_client)
|
||||
self.document_layout_node = DocumentLayoutNode(self.llm_client)
|
||||
self.word_budget_node = WordBudgetNode(self.llm_client)
|
||||
self.chapter_generation_node = ChapterGenerationNode(
|
||||
self.llm_client,
|
||||
self.validator,
|
||||
self.chapter_storage
|
||||
)
|
||||
|
||||
def generate_report(self, query: str, reports: List[Any], forum_logs: str = "",
|
||||
custom_template: str = "", save_report: bool = True) -> str:
|
||||
def generate_report(self, query: str, reports: List[Any], forum_logs: str = "",
|
||||
custom_template: str = "", save_report: bool = True) -> str:
|
||||
"""
|
||||
生成综合报告
|
||||
生成综合报告(章节JSON → IR → HTML)
|
||||
|
||||
Args:
|
||||
query: 原始查询
|
||||
reports: 三个子agent的报告列表(按顺序:QueryEngine, MediaEngine, InsightEngine)
|
||||
forum_logs: 论坛日志内容
|
||||
custom_template: 用户自定义模板(可选)
|
||||
save_report: 是否保存报告到文件
|
||||
|
||||
Returns:
|
||||
dict: 包含HTML内容与保存文件信息
|
||||
dict: HTML内容以及保存的文件路径信息
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
|
||||
# 为新的查询重置状态,确保文件命名信息完整
|
||||
self.state = ReportState(query=query)
|
||||
self.state.metadata.query = query
|
||||
report_id = f"report-{uuid4().hex[:8]}"
|
||||
self.state.task_id = report_id
|
||||
self.state.query = query
|
||||
self.state.metadata.query = query
|
||||
self.state.mark_processing()
|
||||
|
||||
logger.info(f"开始生成报告: {query}")
|
||||
logger.info(f"输入数据 - 报告数量: {len(reports)}, 论坛日志长度: {len(forum_logs)}")
|
||||
|
||||
|
||||
normalized_reports = self._normalize_reports(reports)
|
||||
logger.info(f"开始生成报告 {report_id}: {query}")
|
||||
logger.info(f"输入数据 - 报告数量: {len(reports)}, 论坛日志长度: {len(str(forum_logs))}")
|
||||
|
||||
try:
|
||||
# Step 1: 模板选择
|
||||
template_result = self._select_template(query, reports, forum_logs, custom_template)
|
||||
|
||||
# Step 2: 直接生成HTML报告
|
||||
html_report = self._generate_html_report(query, reports, forum_logs, template_result)
|
||||
|
||||
# Step 3: 保存报告
|
||||
self.state.metadata.template_used = template_result.get('template_name', '')
|
||||
sections = self._slice_template(template_result.get('template_content', ''))
|
||||
if not sections:
|
||||
raise ValueError("模板无法解析出章节,请检查模板内容。")
|
||||
|
||||
template_text = template_result.get('template_content', '')
|
||||
template_overview = self._build_template_overview(template_text, sections)
|
||||
# 基于模板骨架+三引擎内容设计全局标题、目录与视觉主题
|
||||
layout_design = self.document_layout_node.run(
|
||||
sections,
|
||||
template_text,
|
||||
normalized_reports,
|
||||
forum_logs,
|
||||
query,
|
||||
template_overview,
|
||||
)
|
||||
# 使用刚生成的设计稿对全书进行篇幅规划,约束各章字数与重点
|
||||
word_plan = self.word_budget_node.run(
|
||||
sections,
|
||||
layout_design,
|
||||
normalized_reports,
|
||||
forum_logs,
|
||||
query,
|
||||
template_overview,
|
||||
)
|
||||
# 记录每个章节的目标字数/强调点,后续传给章节LLM
|
||||
chapter_targets = {
|
||||
entry.get("chapterId"): entry
|
||||
for entry in word_plan.get("chapters", [])
|
||||
if entry.get("chapterId")
|
||||
}
|
||||
|
||||
generation_context = self._build_generation_context(
|
||||
query,
|
||||
normalized_reports,
|
||||
forum_logs,
|
||||
template_result,
|
||||
layout_design,
|
||||
chapter_targets,
|
||||
word_plan,
|
||||
template_overview,
|
||||
)
|
||||
# IR/渲染需要的全局元数据,带上设计稿给出的标题/主题/目录/篇幅信息
|
||||
manifest_meta = {
|
||||
"query": query,
|
||||
"title": layout_design.get("title") or (f"{query} - 舆情洞察报告" if query else template_result.get("template_name")),
|
||||
"subtitle": layout_design.get("subtitle"),
|
||||
"tagline": layout_design.get("tagline"),
|
||||
"templateName": template_result.get("template_name"),
|
||||
"selectionReason": template_result.get("selection_reason"),
|
||||
"themeTokens": generation_context.get("theme_tokens", {}),
|
||||
"toc": {
|
||||
"depth": 3,
|
||||
"autoNumbering": True,
|
||||
"title": layout_design.get("tocTitle") or "目录",
|
||||
},
|
||||
"hero": layout_design.get("hero"),
|
||||
"layoutNotes": layout_design.get("layoutNotes"),
|
||||
"wordPlan": {
|
||||
"totalWords": word_plan.get("totalWords"),
|
||||
"globalGuidelines": word_plan.get("globalGuidelines"),
|
||||
},
|
||||
"templateOverview": template_overview,
|
||||
}
|
||||
if layout_design.get("themeTokens"):
|
||||
manifest_meta["themeTokens"] = layout_design["themeTokens"]
|
||||
if layout_design.get("tocPlan"):
|
||||
manifest_meta["toc"]["customEntries"] = layout_design["tocPlan"]
|
||||
# 初始化章节输出目录并写入manifest,方便流式存盘
|
||||
run_dir = self.chapter_storage.start_session(report_id, manifest_meta)
|
||||
self._persist_planning_artifacts(run_dir, layout_design, word_plan, template_overview)
|
||||
|
||||
chapters = []
|
||||
for section in sections:
|
||||
logger.info(f"生成章节: {section.title}")
|
||||
chapter = self.chapter_generation_node.run(
|
||||
section,
|
||||
generation_context,
|
||||
run_dir
|
||||
)
|
||||
chapters.append(chapter)
|
||||
|
||||
document_ir = self.document_composer.build_document(
|
||||
report_id,
|
||||
manifest_meta,
|
||||
chapters
|
||||
)
|
||||
html_report = self.renderer.render(document_ir)
|
||||
|
||||
self.state.html_content = html_report
|
||||
self.state.mark_completed()
|
||||
|
||||
saved_files = {}
|
||||
if save_report:
|
||||
saved_files = self._save_report(html_report)
|
||||
|
||||
# 更新生成时间
|
||||
end_time = datetime.now()
|
||||
generation_time = (end_time - start_time).total_seconds()
|
||||
saved_files = self._save_report(html_report, document_ir, report_id)
|
||||
|
||||
generation_time = (datetime.now() - start_time).total_seconds()
|
||||
self.state.metadata.generation_time = generation_time
|
||||
|
||||
logger.info(f"报告生成完成,耗时: {generation_time:.2f} 秒")
|
||||
|
||||
return {
|
||||
'html_content': html_report,
|
||||
'report_id': report_id,
|
||||
**saved_files
|
||||
}
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.state.mark_failed(str(e))
|
||||
logger.exception(f"报告生成过程中发生错误: {str(e)}")
|
||||
raise e
|
||||
raise
|
||||
|
||||
def _select_template(self, query: str, reports: List[Any], forum_logs: str, custom_template: str):
|
||||
"""选择报告模板"""
|
||||
@@ -271,38 +374,153 @@ class ReportAgent:
|
||||
self.state.metadata.template_used = fallback_template['template_name']
|
||||
return fallback_template
|
||||
|
||||
def _generate_html_report(self, query: str, reports: List[Any], forum_logs: str, template_result: Dict[str, Any]) -> str:
|
||||
"""生成HTML报告"""
|
||||
logger.info("多轮生成HTML报告...")
|
||||
|
||||
# 准备报告内容,确保有3个报告
|
||||
query_report = reports[0] if len(reports) > 0 else ""
|
||||
media_report = reports[1] if len(reports) > 1 else ""
|
||||
insight_report = reports[2] if len(reports) > 2 else ""
|
||||
|
||||
# 转换为字符串格式
|
||||
query_report = str(query_report) if query_report else ""
|
||||
media_report = str(media_report) if media_report else ""
|
||||
insight_report = str(insight_report) if insight_report else ""
|
||||
|
||||
html_input = {
|
||||
'query': query,
|
||||
'query_engine_report': query_report,
|
||||
'media_engine_report': media_report,
|
||||
'insight_engine_report': insight_report,
|
||||
'forum_logs': forum_logs,
|
||||
'selected_template': template_result.get('template_content', '')
|
||||
def _slice_template(self, template_markdown: str) -> List[TemplateSection]:
|
||||
"""将模板切成章节列表,若为空则提供fallback"""
|
||||
sections = parse_template_sections(template_markdown)
|
||||
if sections:
|
||||
return sections
|
||||
logger.warning("模板未解析出章节,使用默认章节骨架")
|
||||
fallback = TemplateSection(
|
||||
title="1.0 综合分析",
|
||||
slug="section-1-0",
|
||||
order=10,
|
||||
depth=1,
|
||||
raw_title="1.0 综合分析",
|
||||
number="1.0",
|
||||
chapter_id="S1",
|
||||
outline=["1.1 摘要", "1.2 数据亮点", "1.3 风险提示"],
|
||||
)
|
||||
return [fallback]
|
||||
|
||||
def _build_generation_context(
|
||||
self,
|
||||
query: str,
|
||||
reports: Dict[str, str],
|
||||
forum_logs: str,
|
||||
template_result: Dict[str, Any],
|
||||
layout_design: Dict[str, Any],
|
||||
chapter_directives: Dict[str, Any],
|
||||
word_plan: Dict[str, Any],
|
||||
template_overview: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
构造章节生成所需的共享上下文
|
||||
|
||||
这里把“全书设计稿”“章节篇幅约束”“统一主题配色”等一次性整理好,
|
||||
避免每次章节调用都重新拼装上下文。
|
||||
"""
|
||||
# 优先使用设计稿定制的主题色,否则退回默认主题
|
||||
theme_tokens = (
|
||||
layout_design.get("themeTokens")
|
||||
if layout_design else None
|
||||
) or self._default_theme_tokens()
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"template_name": template_result.get("template_name"),
|
||||
"reports": reports,
|
||||
"forum_logs": self._stringify(forum_logs),
|
||||
"theme_tokens": theme_tokens,
|
||||
"style_directives": {
|
||||
"tone": "analytical",
|
||||
"audience": "executive",
|
||||
"language": "zh-CN",
|
||||
},
|
||||
"data_bundles": [],
|
||||
"max_tokens": min(self.config.MAX_CONTENT_LENGTH, 6000),
|
||||
"layout": layout_design or {},
|
||||
"template_overview": template_overview or {},
|
||||
"chapter_directives": chapter_directives or {},
|
||||
"word_plan": word_plan or {},
|
||||
}
|
||||
|
||||
# 使用HTML生成节点生成报告
|
||||
html_content = self.html_generation_node.run(html_input)
|
||||
|
||||
# 更新状态
|
||||
self.state.html_content = html_content
|
||||
self.state.mark_completed()
|
||||
|
||||
logger.info("HTML报告生成完成")
|
||||
return html_content
|
||||
|
||||
def _normalize_reports(self, reports: List[Any]) -> Dict[str, str]:
|
||||
"""将不同来源的报告统一转为字符串"""
|
||||
keys = ["query_engine", "media_engine", "insight_engine"]
|
||||
normalized: Dict[str, str] = {}
|
||||
for idx, key in enumerate(keys):
|
||||
value = reports[idx] if idx < len(reports) else ""
|
||||
normalized[key] = self._stringify(value)
|
||||
return normalized
|
||||
|
||||
def _stringify(self, value: Any) -> str:
|
||||
"""安全地将对象转成字符串"""
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
if isinstance(value, (dict, list)):
|
||||
try:
|
||||
return json.dumps(value, ensure_ascii=False, indent=2)
|
||||
except Exception:
|
||||
return str(value)
|
||||
return str(value)
|
||||
|
||||
def _default_theme_tokens(self) -> Dict[str, Any]:
|
||||
"""默认的主题变量,供渲染器/LLM共用"""
|
||||
return {
|
||||
"colors": {
|
||||
"bg": "#f8f9fa",
|
||||
"text": "#212529",
|
||||
"primary": "#007bff",
|
||||
"secondary": "#6c757d",
|
||||
"card": "#ffffff",
|
||||
"border": "#dee2e6",
|
||||
"accent1": "#17a2b8",
|
||||
"accent2": "#28a745",
|
||||
"accent3": "#ffc107",
|
||||
"accent4": "#dc3545",
|
||||
},
|
||||
"fonts": {
|
||||
"body": "-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, 'Noto Sans', sans-serif",
|
||||
"heading": "'Source Han Sans SC', 'PingFang SC', 'Microsoft YaHei', sans-serif",
|
||||
},
|
||||
"spacing": {"container": "1200px", "gutter": "24px"},
|
||||
"vars": {
|
||||
"header_sticky": True,
|
||||
"toc_depth": 3,
|
||||
"enable_dark_mode": True,
|
||||
},
|
||||
}
|
||||
|
||||
def _build_template_overview(
|
||||
self,
|
||||
template_markdown: str,
|
||||
sections: List[TemplateSection],
|
||||
) -> Dict[str, Any]:
|
||||
"""提取模板标题与章节骨架,供设计/篇幅规划统一引用"""
|
||||
fallback_title = sections[0].title if sections else ""
|
||||
overview = {
|
||||
"title": self._extract_template_title(template_markdown, fallback_title),
|
||||
"chapters": [],
|
||||
}
|
||||
for section in sections:
|
||||
overview["chapters"].append(
|
||||
{
|
||||
"chapterId": section.chapter_id,
|
||||
"title": section.title,
|
||||
"rawTitle": section.raw_title,
|
||||
"number": section.number,
|
||||
"slug": section.slug,
|
||||
"order": section.order,
|
||||
"depth": section.depth,
|
||||
"outline": section.outline,
|
||||
}
|
||||
)
|
||||
return overview
|
||||
|
||||
@staticmethod
|
||||
def _extract_template_title(template_markdown: str, fallback: str = "") -> str:
|
||||
"""尝试从Markdown中提取首个标题,找不到时使用fallback"""
|
||||
for line in template_markdown.splitlines():
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
if stripped.startswith("#"):
|
||||
return stripped.lstrip("#").strip()
|
||||
if stripped:
|
||||
fallback = fallback or stripped
|
||||
return fallback or "智能舆情分析报告"
|
||||
|
||||
def _get_fallback_template_content(self) -> str:
|
||||
"""获取备用模板内容"""
|
||||
@@ -353,40 +571,82 @@ class ReportAgent:
|
||||
*生成时间:{generation_time}*
|
||||
"""
|
||||
|
||||
def _save_report(self, html_content: str):
|
||||
"""保存报告到文件"""
|
||||
# 生成文件名
|
||||
def _save_report(self, html_content: str, document_ir: Dict[str, Any], report_id: str) -> Dict[str, Any]:
|
||||
"""保存HTML与IR到文件并返回路径信息"""
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
query_safe = "".join(c for c in self.state.metadata.query if c.isalnum() or c in (' ', '-', '_')).rstrip()
|
||||
query_safe = query_safe.replace(' ', '_')[:30]
|
||||
|
||||
filename = f"final_report_{query_safe}_{timestamp}.html"
|
||||
filepath = os.path.join(self.config.OUTPUT_DIR, filename)
|
||||
|
||||
# 保存HTML报告
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(html_content)
|
||||
|
||||
abs_report_path = os.path.abspath(filepath)
|
||||
rel_report_path = os.path.relpath(abs_report_path, os.getcwd())
|
||||
logger.info(f"报告已保存到: {abs_report_path}")
|
||||
|
||||
# 保存状态
|
||||
query_safe = "".join(
|
||||
c for c in self.state.metadata.query if c.isalnum() or c in (" ", "-", "_")
|
||||
).rstrip()
|
||||
query_safe = query_safe.replace(" ", "_")[:30] or "report"
|
||||
|
||||
html_filename = f"final_report_{query_safe}_{timestamp}.html"
|
||||
html_path = Path(self.config.OUTPUT_DIR) / html_filename
|
||||
html_path.write_text(html_content, encoding="utf-8")
|
||||
html_abs = str(html_path.resolve())
|
||||
html_rel = os.path.relpath(html_abs, os.getcwd())
|
||||
|
||||
ir_path = self._save_document_ir(document_ir, query_safe, timestamp)
|
||||
ir_abs = str(ir_path.resolve())
|
||||
ir_rel = os.path.relpath(ir_abs, os.getcwd())
|
||||
|
||||
state_filename = f"report_state_{query_safe}_{timestamp}.json"
|
||||
state_filepath = os.path.join(self.config.OUTPUT_DIR, state_filename)
|
||||
self.state.save_to_file(state_filepath)
|
||||
abs_state_path = os.path.abspath(state_filepath)
|
||||
rel_state_path = os.path.relpath(abs_state_path, os.getcwd())
|
||||
logger.info(f"状态已保存到: {abs_state_path}")
|
||||
state_path = Path(self.config.OUTPUT_DIR) / state_filename
|
||||
self.state.save_to_file(str(state_path))
|
||||
state_abs = str(state_path.resolve())
|
||||
state_rel = os.path.relpath(state_abs, os.getcwd())
|
||||
|
||||
logger.info(f"HTML报告已保存: {html_path}")
|
||||
logger.info(f"Document IR已保存: {ir_path}")
|
||||
logger.info(f"状态已保存到: {state_path}")
|
||||
|
||||
return {
|
||||
'report_filename': filename,
|
||||
'report_filepath': abs_report_path,
|
||||
'report_relative_path': rel_report_path,
|
||||
'report_filename': html_filename,
|
||||
'report_filepath': html_abs,
|
||||
'report_relative_path': html_rel,
|
||||
'ir_filename': ir_path.name,
|
||||
'ir_filepath': ir_abs,
|
||||
'ir_relative_path': ir_rel,
|
||||
'state_filename': state_filename,
|
||||
'state_filepath': abs_state_path,
|
||||
'state_relative_path': rel_state_path
|
||||
'state_filepath': state_abs,
|
||||
'state_relative_path': state_rel,
|
||||
}
|
||||
|
||||
def _save_document_ir(self, document_ir: Dict[str, Any], query_safe: str, timestamp: str) -> Path:
|
||||
"""将整本IR写入独立目录"""
|
||||
filename = f"report_ir_{query_safe}_{timestamp}.json"
|
||||
ir_path = Path(self.config.DOCUMENT_IR_OUTPUT_DIR) / filename
|
||||
ir_path.write_text(
|
||||
json.dumps(document_ir, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return ir_path
|
||||
|
||||
def _persist_planning_artifacts(
|
||||
self,
|
||||
run_dir: Path,
|
||||
layout_design: Dict[str, Any],
|
||||
word_plan: Dict[str, Any],
|
||||
template_overview: Dict[str, Any],
|
||||
):
|
||||
"""
|
||||
将文档设计稿、篇幅规划与模板概览另存成JSON
|
||||
|
||||
方便在调试或复盘时快速定位:标题/目录/主题是如何确定的、
|
||||
字数分配有什么要求,以便后续人工校正。
|
||||
"""
|
||||
artifacts = {
|
||||
"document_layout": layout_design,
|
||||
"word_plan": word_plan,
|
||||
"template_overview": template_overview,
|
||||
}
|
||||
for name, payload in artifacts.items():
|
||||
if not payload:
|
||||
continue
|
||||
path = run_dir / f"{name}.json"
|
||||
try:
|
||||
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
except Exception as exc:
|
||||
logger.warning(f"写入{name}失败: {exc}")
|
||||
|
||||
def get_progress_summary(self) -> Dict[str, Any]:
|
||||
"""获取进度摘要"""
|
||||
@@ -515,4 +775,4 @@ def create_agent(config_file: Optional[str] = None) -> ReportAgent:
|
||||
"""
|
||||
|
||||
config = Settings() # 以空配置初始化,而从从环境变量初始化
|
||||
return ReportAgent(config)
|
||||
return ReportAgent(config)
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
Report Engine核心工具集合。
|
||||
|
||||
包含模板切片、章节存储等基础能力,供agent流水线复用。
|
||||
"""
|
||||
|
||||
from .template_parser import TemplateSection, parse_template_sections
|
||||
from .chapter_storage import ChapterStorage
|
||||
from .stitcher import DocumentComposer
|
||||
|
||||
__all__ = [
|
||||
"TemplateSection",
|
||||
"parse_template_sections",
|
||||
"ChapterStorage",
|
||||
"DocumentComposer",
|
||||
]
|
||||
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
章节JSON的落盘与清单管理。
|
||||
|
||||
每一章在流式生成时会立即写入raw文件,完成校验后再写入
|
||||
格式化的chapter.json,并在manifest中记录元数据,便于后续装订。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, Generator, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChapterRecord:
|
||||
"""manifest中记录的章节元数据"""
|
||||
|
||||
chapter_id: str
|
||||
slug: str
|
||||
title: str
|
||||
order: int
|
||||
status: str
|
||||
files: Dict[str, str] = field(default_factory=dict)
|
||||
errors: List[str] = field(default_factory=list)
|
||||
updated_at: str = field(default_factory=lambda: datetime.utcnow().isoformat() + "Z")
|
||||
|
||||
def to_dict(self) -> Dict[str, object]:
|
||||
return {
|
||||
"chapterId": self.chapter_id,
|
||||
"slug": self.slug,
|
||||
"title": self.title,
|
||||
"order": self.order,
|
||||
"status": self.status,
|
||||
"files": self.files,
|
||||
"errors": self.errors,
|
||||
"updatedAt": self.updated_at,
|
||||
}
|
||||
|
||||
|
||||
class ChapterStorage:
|
||||
"""
|
||||
章节JSON写入与manifest管理器。
|
||||
|
||||
用法:
|
||||
run_dir = storage.start_session(report_id, {...})
|
||||
chapter_dir = storage.begin_chapter(run_dir, meta)
|
||||
with storage.capture_stream(chapter_dir) as fp:
|
||||
fp.write(chunk)
|
||||
storage.persist_chapter(run_dir, meta, payload, errors)
|
||||
"""
|
||||
|
||||
def __init__(self, base_dir: str):
|
||||
self.base_dir = Path(base_dir)
|
||||
self.base_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._manifests: Dict[str, Dict[str, object]] = {}
|
||||
|
||||
# ======== 会话 & manifest ========
|
||||
|
||||
def start_session(self, report_id: str, metadata: Dict[str, object]) -> Path:
|
||||
"""为本次报告创建独立的章节输出目录与manifest"""
|
||||
run_dir = self.base_dir / report_id
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
manifest = {
|
||||
"reportId": report_id,
|
||||
"createdAt": datetime.utcnow().isoformat() + "Z",
|
||||
"metadata": metadata,
|
||||
"chapters": [],
|
||||
}
|
||||
self._manifests[self._key(run_dir)] = manifest
|
||||
self._write_manifest(run_dir, manifest)
|
||||
return run_dir
|
||||
|
||||
def begin_chapter(self, run_dir: Path, chapter_meta: Dict[str, object]) -> Path:
|
||||
"""创建章节子目录并在manifest中标记为streaming状态"""
|
||||
slug_value = str(
|
||||
chapter_meta.get("slug") or chapter_meta.get("chapterId") or "section"
|
||||
)
|
||||
chapter_dir = self._chapter_dir(
|
||||
run_dir,
|
||||
slug_value,
|
||||
int(chapter_meta.get("order", 0)),
|
||||
)
|
||||
record = ChapterRecord(
|
||||
chapter_id=str(chapter_meta.get("chapterId")),
|
||||
slug=slug_value,
|
||||
title=str(chapter_meta.get("title")),
|
||||
order=int(chapter_meta.get("order", 0)),
|
||||
status="streaming",
|
||||
files={"raw": str(self._raw_stream_path(chapter_dir).relative_to(run_dir))},
|
||||
)
|
||||
self._upsert_record(run_dir, record)
|
||||
return chapter_dir
|
||||
|
||||
def persist_chapter(
|
||||
self,
|
||||
run_dir: Path,
|
||||
chapter_meta: Dict[str, object],
|
||||
payload: Dict[str, object],
|
||||
errors: Optional[List[str]] = None,
|
||||
) -> Path:
|
||||
"""章节流式生成完毕后写入最终JSON并更新manifest状态"""
|
||||
slug_value = str(
|
||||
chapter_meta.get("slug") or chapter_meta.get("chapterId") or "section"
|
||||
)
|
||||
chapter_dir = self._chapter_dir(
|
||||
run_dir,
|
||||
slug_value,
|
||||
int(chapter_meta.get("order", 0)),
|
||||
)
|
||||
final_path = chapter_dir / "chapter.json"
|
||||
final_path.write_text(
|
||||
json.dumps(payload, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
record = ChapterRecord(
|
||||
chapter_id=str(chapter_meta.get("chapterId")),
|
||||
slug=slug_value,
|
||||
title=str(chapter_meta.get("title")),
|
||||
order=int(chapter_meta.get("order", 0)),
|
||||
status="ready" if not errors else "invalid",
|
||||
files={
|
||||
"raw": str(self._raw_stream_path(chapter_dir).relative_to(run_dir)),
|
||||
"json": str(final_path.relative_to(run_dir)),
|
||||
},
|
||||
errors=errors or [],
|
||||
)
|
||||
self._upsert_record(run_dir, record)
|
||||
return final_path
|
||||
|
||||
def load_chapters(self, run_dir: Path) -> List[Dict[str, object]]:
|
||||
payloads: List[Dict[str, object]] = []
|
||||
for child in sorted(run_dir.iterdir()):
|
||||
if not child.is_dir():
|
||||
continue
|
||||
chapter_path = child / "chapter.json"
|
||||
if not chapter_path.exists():
|
||||
continue
|
||||
try:
|
||||
payload = json.loads(chapter_path.read_text(encoding="utf-8"))
|
||||
payloads.append(payload)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
payloads.sort(key=lambda x: x.get("order", 0))
|
||||
return payloads
|
||||
|
||||
# ======== 文件操作 ========
|
||||
|
||||
@contextmanager
|
||||
def capture_stream(self, chapter_dir: Path) -> Generator:
|
||||
"""将流式输出实时写入raw文件"""
|
||||
raw_path = self._raw_stream_path(chapter_dir)
|
||||
raw_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with raw_path.open("w", encoding="utf-8") as fp:
|
||||
yield fp
|
||||
|
||||
# ======== 内部工具 ========
|
||||
|
||||
def _chapter_dir(self, run_dir: Path, slug: str, order: int) -> Path:
|
||||
safe_slug = self._safe_slug(slug)
|
||||
folder = f"{order:03d}-{safe_slug}"
|
||||
path = run_dir / folder
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
def _safe_slug(self, slug: str) -> str:
|
||||
slug = slug.replace(" ", "-").replace("/", "-")
|
||||
return slug or "section"
|
||||
|
||||
def _raw_stream_path(self, chapter_dir: Path) -> Path:
|
||||
return chapter_dir / "stream.raw"
|
||||
|
||||
def _key(self, run_dir: Path) -> str:
|
||||
return str(run_dir.resolve())
|
||||
|
||||
def _manifest_path(self, run_dir: Path) -> Path:
|
||||
return run_dir / "manifest.json"
|
||||
|
||||
def _write_manifest(self, run_dir: Path, manifest: Dict[str, object]):
|
||||
self._manifest_path(run_dir).write_text(
|
||||
json.dumps(manifest, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def _read_manifest(self, run_dir: Path) -> Dict[str, object]:
|
||||
manifest_path = self._manifest_path(run_dir)
|
||||
if manifest_path.exists():
|
||||
return json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
return {"reportId": run_dir.name, "chapters": []}
|
||||
|
||||
def _upsert_record(self, run_dir: Path, record: ChapterRecord):
|
||||
"""更新或追加manifest中的章节记录,保证顺序一致"""
|
||||
key = self._key(run_dir)
|
||||
manifest = self._manifests.get(key) or self._read_manifest(run_dir)
|
||||
chapters: List[Dict[str, object]] = manifest.get("chapters", [])
|
||||
chapters = [c for c in chapters if c.get("chapterId") != record.chapter_id]
|
||||
chapters.append(record.to_dict())
|
||||
chapters.sort(key=lambda x: x.get("order", 0))
|
||||
manifest["chapters"] = chapters
|
||||
manifest.setdefault("updatedAt", datetime.utcnow().isoformat() + "Z")
|
||||
self._manifests[key] = manifest
|
||||
self._write_manifest(run_dir, manifest)
|
||||
|
||||
|
||||
__all__ = ["ChapterStorage", "ChapterRecord"]
|
||||
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
章节装订器:负责把多个章节JSON合并为整本IR。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Set
|
||||
|
||||
from ..ir import IR_VERSION
|
||||
|
||||
|
||||
class DocumentComposer:
|
||||
"""
|
||||
将章节拼接成Document IR的简单装订器。
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._seen_anchors: Set[str] = set()
|
||||
|
||||
def build_document(
|
||||
self,
|
||||
report_id: str,
|
||||
metadata: Dict[str, object],
|
||||
chapters: List[Dict[str, object]],
|
||||
) -> Dict[str, object]:
|
||||
"""把所有章节按order排序并注入唯一锚点,形成整本IR"""
|
||||
ordered = sorted(chapters, key=lambda c: c.get("order", 0))
|
||||
for idx, chapter in enumerate(ordered, start=1):
|
||||
chapter.setdefault("chapterId", f"S{idx}")
|
||||
anchor = chapter.get("anchor") or f"section-{idx}"
|
||||
chapter["anchor"] = self._ensure_unique_anchor(anchor)
|
||||
chapter.setdefault("order", idx * 10)
|
||||
|
||||
document = {
|
||||
"version": IR_VERSION,
|
||||
"reportId": report_id,
|
||||
"metadata": {
|
||||
**metadata,
|
||||
"generatedAt": metadata.get("generatedAt")
|
||||
or datetime.utcnow().isoformat() + "Z",
|
||||
},
|
||||
"themeTokens": metadata.get("themeTokens", {}),
|
||||
"chapters": ordered,
|
||||
"assets": metadata.get("assets", {}),
|
||||
}
|
||||
return document
|
||||
|
||||
def _ensure_unique_anchor(self, anchor: str) -> str:
|
||||
"""若存在重复锚点则追加序号,确保全局唯一"""
|
||||
base = anchor
|
||||
counter = 2
|
||||
while anchor in self._seen_anchors:
|
||||
anchor = f"{base}-{counter}"
|
||||
counter += 1
|
||||
self._seen_anchors.add(anchor)
|
||||
return anchor
|
||||
|
||||
|
||||
__all__ = ["DocumentComposer"]
|
||||
@@ -0,0 +1,208 @@
|
||||
"""
|
||||
Markdown模板切片工具。
|
||||
|
||||
LLM需要“按章调用”,因此必须把Markdown模板解析为结构化章节队列。
|
||||
这里通过轻量正则和缩进启发式,兼容“# 标题”与
|
||||
“- **1.0 标题** / - 1.1 子标题”等多种写法。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
|
||||
SECTION_ORDER_STEP = 10
|
||||
|
||||
|
||||
@dataclass
|
||||
class TemplateSection:
|
||||
"""模板章节实体"""
|
||||
|
||||
title: str
|
||||
slug: str
|
||||
order: int
|
||||
depth: int
|
||||
raw_title: str
|
||||
number: str = ""
|
||||
chapter_id: str = ""
|
||||
outline: List[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"title": self.title,
|
||||
"slug": self.slug,
|
||||
"order": self.order,
|
||||
"depth": self.depth,
|
||||
"number": self.number,
|
||||
"chapterId": self.chapter_id,
|
||||
"outline": self.outline,
|
||||
}
|
||||
|
||||
|
||||
heading_pattern = re.compile(r"^(#{1,6})\s+(.*)$")
|
||||
bullet_pattern = re.compile(r"^[-*+]\s+(.*)$")
|
||||
number_pattern = re.compile(r"^(?P<num>\d+(?:\.\d+)*)(?:[\s、::.-]+(?P<label>.*))?$")
|
||||
|
||||
|
||||
def parse_template_sections(template_md: str) -> List[TemplateSection]:
|
||||
"""
|
||||
将Markdown模板切分成章节列表(按大标题)。
|
||||
|
||||
返回的每个TemplateSection都携带slug/order/章节号,
|
||||
方便后续分章调用与锚点生成。
|
||||
"""
|
||||
|
||||
sections: List[TemplateSection] = []
|
||||
current: Optional[TemplateSection] = None
|
||||
order = SECTION_ORDER_STEP
|
||||
used_slugs = set()
|
||||
|
||||
for raw_line in template_md.splitlines():
|
||||
if not raw_line.strip():
|
||||
continue
|
||||
|
||||
indent = len(raw_line) - len(raw_line.lstrip(" "))
|
||||
stripped = raw_line.strip()
|
||||
|
||||
meta = _classify_line(stripped, indent)
|
||||
if not meta:
|
||||
continue
|
||||
|
||||
if meta["is_section"]:
|
||||
slug = _ensure_unique_slug(meta["slug"], used_slugs)
|
||||
section = TemplateSection(
|
||||
title=meta["title"],
|
||||
slug=slug,
|
||||
order=order,
|
||||
depth=meta["depth"],
|
||||
raw_title=meta["raw"],
|
||||
number=meta["number"],
|
||||
)
|
||||
sections.append(section)
|
||||
current = section
|
||||
order += SECTION_ORDER_STEP
|
||||
continue
|
||||
|
||||
# outline
|
||||
if current:
|
||||
current.outline.append(meta["title"])
|
||||
|
||||
for idx, section in enumerate(sections, start=1):
|
||||
# 为每个章节生成稳定的chapter_id,便于后续引用
|
||||
section.chapter_id = f"S{idx}"
|
||||
|
||||
return sections
|
||||
|
||||
|
||||
def _classify_line(stripped: str, indent: int) -> Optional[dict]:
|
||||
"""根据缩进与符号分类行"""
|
||||
|
||||
heading_match = heading_pattern.match(stripped)
|
||||
if heading_match:
|
||||
level = len(heading_match.group(1))
|
||||
payload = _strip_markup(heading_match.group(2).strip())
|
||||
title_info = _split_number(payload)
|
||||
slug = _build_slug(title_info["number"], title_info["title"])
|
||||
return {
|
||||
"is_section": level <= 2,
|
||||
"depth": level,
|
||||
"title": title_info["display"],
|
||||
"raw": payload,
|
||||
"number": title_info["number"],
|
||||
"slug": slug,
|
||||
}
|
||||
|
||||
bullet_match = bullet_pattern.match(stripped)
|
||||
if bullet_match:
|
||||
payload = _strip_markup(bullet_match.group(1).strip())
|
||||
title_info = _split_number(payload)
|
||||
slug = _build_slug(title_info["number"], title_info["title"])
|
||||
is_section = indent <= 1
|
||||
depth = 1 if indent <= 1 else 2
|
||||
return {
|
||||
"is_section": is_section,
|
||||
"depth": depth,
|
||||
"title": title_info["display"],
|
||||
"raw": payload,
|
||||
"number": title_info["number"],
|
||||
"slug": slug,
|
||||
}
|
||||
|
||||
# 兼容“1.1 ...”没有前缀符号的行
|
||||
number_match = number_pattern.match(stripped)
|
||||
if number_match and number_match.group("label"):
|
||||
payload = stripped
|
||||
title = number_match.group("label").strip()
|
||||
number = number_match.group("num")
|
||||
slug = _build_slug(number, title)
|
||||
is_section = indent == 0 and number.count(".") <= 1
|
||||
depth = 1 if is_section else 2
|
||||
display = f"{number} {title}" if title else number
|
||||
return {
|
||||
"is_section": is_section,
|
||||
"depth": depth,
|
||||
"title": display,
|
||||
"raw": payload,
|
||||
"number": number,
|
||||
"slug": slug,
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _strip_markup(text: str) -> str:
|
||||
"""去除包裹的**、__等简单强调标记"""
|
||||
if text.startswith(("**", "__")) and text.endswith(("**", "__")) and len(text) > 4:
|
||||
return text[2:-2].strip()
|
||||
return text
|
||||
|
||||
|
||||
def _split_number(payload: str) -> dict:
|
||||
"""拆分编号与标题"""
|
||||
match = number_pattern.match(payload)
|
||||
number = match.group("num") if match else ""
|
||||
label = match.group("label") if match else payload
|
||||
label = (label or "").strip()
|
||||
display = f"{number} {label}".strip() if number else label or payload
|
||||
title_core = label or payload
|
||||
return {
|
||||
"number": number,
|
||||
"title": title_core,
|
||||
"display": display,
|
||||
}
|
||||
|
||||
|
||||
def _build_slug(number: str, title: str) -> str:
|
||||
"""根据编号/标题生成锚点"""
|
||||
if number:
|
||||
token = number.replace(".", "-")
|
||||
else:
|
||||
token = _slugify_text(title)
|
||||
token = token or "section"
|
||||
return f"section-{token}"
|
||||
|
||||
|
||||
def _slugify_text(text: str) -> str:
|
||||
text = unicodedata.normalize("NFKD", text)
|
||||
text = text.replace("·", "-").replace(" ", "-")
|
||||
text = re.sub(r"[^0-9a-zA-Z\u4e00-\u9fff-]+", "-", text)
|
||||
text = re.sub(r"-{2,}", "-", text)
|
||||
return text.strip("-").lower()
|
||||
|
||||
|
||||
def _ensure_unique_slug(slug: str, used: set) -> str:
|
||||
if slug not in used:
|
||||
used.add(slug)
|
||||
return slug
|
||||
base = slug
|
||||
idx = 2
|
||||
while slug in used:
|
||||
slug = f"{base}-{idx}"
|
||||
idx += 1
|
||||
used.add(slug)
|
||||
return slug
|
||||
|
||||
|
||||
__all__ = ["TemplateSection", "parse_template_sections"]
|
||||
@@ -78,7 +78,9 @@ class ReportTask:
|
||||
'has_result': bool(self.html_content),
|
||||
'report_file_ready': bool(self.report_file_path),
|
||||
'report_file_name': self.report_file_name,
|
||||
'report_file_path': self.report_file_relative_path
|
||||
'report_file_path': self.report_file_relative_path or self.report_file_path,
|
||||
'state_file_ready': bool(self.state_file_path),
|
||||
'state_file_path': self.state_file_relative_path or self.state_file_path
|
||||
}
|
||||
|
||||
|
||||
@@ -135,17 +137,21 @@ def run_report_generation(task: ReportTask, query: str, custom_template: str = "
|
||||
save_report=True
|
||||
)
|
||||
|
||||
html_report = generation_result.get('html_content', '')
|
||||
if isinstance(generation_result, dict):
|
||||
html_report = generation_result.get('html_content', '')
|
||||
else:
|
||||
html_report = generation_result
|
||||
|
||||
task.update_status("running", 90)
|
||||
|
||||
# 保存结果
|
||||
task.html_content = html_report
|
||||
task.report_file_path = generation_result.get('report_filepath', '')
|
||||
task.report_file_relative_path = generation_result.get('report_relative_path', '')
|
||||
task.report_file_name = generation_result.get('report_filename', '')
|
||||
task.state_file_path = generation_result.get('state_filepath', '')
|
||||
task.state_file_relative_path = generation_result.get('state_relative_path', '')
|
||||
if isinstance(generation_result, dict):
|
||||
task.report_file_path = generation_result.get('report_filepath', '')
|
||||
task.report_file_relative_path = generation_result.get('report_relative_path', '')
|
||||
task.report_file_name = generation_result.get('report_filename', '')
|
||||
task.state_file_path = generation_result.get('state_filepath', '')
|
||||
task.state_file_relative_path = generation_result.get('state_relative_path', '')
|
||||
task.update_status("completed", 100)
|
||||
|
||||
except Exception as e:
|
||||
@@ -269,7 +275,9 @@ def get_progress(task_id: str):
|
||||
'has_result': True,
|
||||
'report_file_ready': False,
|
||||
'report_file_name': '',
|
||||
'report_file_path': ''
|
||||
'report_file_path': '',
|
||||
'state_file_ready': False,
|
||||
'state_file_path': ''
|
||||
}
|
||||
})
|
||||
|
||||
@@ -534,4 +542,4 @@ def clear_log():
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': f'清空日志失败: {str(e)}'
|
||||
}), 500
|
||||
}), 500
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
Report Engine的可执行JSON契约(IR)定义与校验工具。
|
||||
|
||||
该模块暴露统一的Schema文本与校验器,供提示词、章节生成、
|
||||
以及最终装订流程共同复用,确保从LLM到渲染的产物结构一致。
|
||||
"""
|
||||
|
||||
from .schema import (
|
||||
IR_VERSION,
|
||||
CHAPTER_JSON_SCHEMA,
|
||||
CHAPTER_JSON_SCHEMA_TEXT,
|
||||
ALLOWED_BLOCK_TYPES,
|
||||
ALLOWED_INLINE_MARKS,
|
||||
)
|
||||
from .validator import IRValidator
|
||||
|
||||
__all__ = [
|
||||
"IR_VERSION",
|
||||
"CHAPTER_JSON_SCHEMA",
|
||||
"CHAPTER_JSON_SCHEMA_TEXT",
|
||||
"ALLOWED_BLOCK_TYPES",
|
||||
"ALLOWED_INLINE_MARKS",
|
||||
"IRValidator",
|
||||
]
|
||||
@@ -0,0 +1,369 @@
|
||||
"""
|
||||
Report Engine JSON契约(IR)Schema定义。
|
||||
|
||||
这里集中维护所有章节级别的Schema与可用于提示词的文本表示,
|
||||
确保章节生成、校验与渲染对同一个结构有统一认知。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List
|
||||
|
||||
IR_VERSION = "1.0"
|
||||
|
||||
# ====== 基础常量 ======
|
||||
ALLOWED_INLINE_MARKS: List[str] = [
|
||||
"bold",
|
||||
"italic",
|
||||
"underline",
|
||||
"strike",
|
||||
"code",
|
||||
"link",
|
||||
"color",
|
||||
"font",
|
||||
"highlight",
|
||||
"subscript",
|
||||
"superscript",
|
||||
"math",
|
||||
]
|
||||
|
||||
ALLOWED_BLOCK_TYPES: List[str] = [
|
||||
"heading",
|
||||
"paragraph",
|
||||
"list",
|
||||
"table",
|
||||
"blockquote",
|
||||
"hr",
|
||||
"code",
|
||||
"math",
|
||||
"figure",
|
||||
"callout",
|
||||
"kpiGrid",
|
||||
"widget",
|
||||
"toc",
|
||||
]
|
||||
|
||||
# ====== Schema定义 ======
|
||||
inline_mark_schema: Dict[str, Any] = {
|
||||
"type": "object",
|
||||
"required": ["type"],
|
||||
"properties": {
|
||||
"type": {"type": "string", "enum": ALLOWED_INLINE_MARKS},
|
||||
"value": {"type": ["string", "number", "object"]},
|
||||
"href": {"type": "string", "format": "uri-reference"},
|
||||
"title": {"type": "string"},
|
||||
"style": {"type": "object"},
|
||||
},
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
inline_run_schema: Dict[str, Any] = {
|
||||
"type": "object",
|
||||
"required": ["text"],
|
||||
"properties": {
|
||||
"text": {"type": "string"},
|
||||
"marks": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/definitions/inlineMark"},
|
||||
},
|
||||
},
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
heading_block: Dict[str, Any] = {
|
||||
"title": "HeadingBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "heading"},
|
||||
"level": {"type": "integer", "minimum": 1, "maximum": 6},
|
||||
"text": {"type": "string"},
|
||||
"anchor": {"type": "string"},
|
||||
"numbering": {"type": "string"},
|
||||
"subtitle": {"type": "string"},
|
||||
},
|
||||
"required": ["type", "level", "text", "anchor"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
paragraph_block: Dict[str, Any] = {
|
||||
"title": "ParagraphBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "paragraph"},
|
||||
"inlines": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/definitions/inlineRun"},
|
||||
},
|
||||
"align": {"type": "string", "enum": ["left", "center", "right", "justify"]},
|
||||
},
|
||||
"required": ["type", "inlines"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
list_block: Dict[str, Any] = {
|
||||
"title": "ListBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "list"},
|
||||
"listType": {"type": "string", "enum": ["ordered", "bullet", "task"]},
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/definitions/block"},
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["type", "listType", "items"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
table_block: Dict[str, Any] = {
|
||||
"title": "TableBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "table"},
|
||||
"colgroup": {"type": "array", "items": {"type": "object"}},
|
||||
"rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cells": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"rowspan": {"type": "integer", "minimum": 1},
|
||||
"colspan": {"type": "integer", "minimum": 1},
|
||||
"align": {
|
||||
"type": "string",
|
||||
"enum": ["left", "center", "right"],
|
||||
},
|
||||
"blocks": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/definitions/block"},
|
||||
},
|
||||
},
|
||||
"required": ["blocks"],
|
||||
"additionalProperties": True,
|
||||
},
|
||||
}
|
||||
},
|
||||
"required": ["cells"],
|
||||
"additionalProperties": True,
|
||||
},
|
||||
},
|
||||
"caption": {"type": "string"},
|
||||
"zebra": {"type": "boolean"},
|
||||
},
|
||||
"required": ["type", "rows"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
blockquote_block: Dict[str, Any] = {
|
||||
"title": "BlockquoteBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "blockquote"},
|
||||
"blocks": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/definitions/block"},
|
||||
},
|
||||
"variant": {"type": "string"},
|
||||
},
|
||||
"required": ["type", "blocks"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
hr_block: Dict[str, Any] = {
|
||||
"title": "HorizontalRuleBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "hr"},
|
||||
"variant": {"type": "string"},
|
||||
},
|
||||
"required": ["type"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
code_block: Dict[str, Any] = {
|
||||
"title": "CodeBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "code"},
|
||||
"lang": {"type": "string"},
|
||||
"content": {"type": "string"},
|
||||
"caption": {"type": "string"},
|
||||
},
|
||||
"required": ["type", "content"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
math_block: Dict[str, Any] = {
|
||||
"title": "MathBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "math"},
|
||||
"latex": {"type": "string"},
|
||||
"displayMode": {"type": "boolean"},
|
||||
},
|
||||
"required": ["type", "latex"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
figure_block: Dict[str, Any] = {
|
||||
"title": "FigureBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "figure"},
|
||||
"img": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"src": {"type": "string"},
|
||||
"alt": {"type": "string"},
|
||||
"width": {"type": "number"},
|
||||
"height": {"type": "number"},
|
||||
"srcset": {"type": "string"},
|
||||
},
|
||||
"required": ["src"],
|
||||
"additionalProperties": True,
|
||||
},
|
||||
"caption": {"type": "string"},
|
||||
"responsive": {"type": "boolean"},
|
||||
},
|
||||
"required": ["type", "img"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
callout_block: Dict[str, Any] = {
|
||||
"title": "CalloutBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "callout"},
|
||||
"tone": {
|
||||
"type": "string",
|
||||
"enum": ["info", "warning", "success", "danger"],
|
||||
},
|
||||
"title": {"type": "string"},
|
||||
"blocks": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/definitions/block"},
|
||||
},
|
||||
},
|
||||
"required": ["type", "tone", "blocks"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
kpi_block: Dict[str, Any] = {
|
||||
"title": "KPIGridBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "kpiGrid"},
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"label": {"type": "string"},
|
||||
"value": {"type": "string"},
|
||||
"unit": {"type": "string"},
|
||||
"delta": {"type": "string"},
|
||||
"deltaTone": {"type": "string", "enum": ["up", "down", "neutral"]},
|
||||
},
|
||||
"required": ["label", "value"],
|
||||
"additionalProperties": True,
|
||||
},
|
||||
},
|
||||
"cols": {"type": "integer"},
|
||||
},
|
||||
"required": ["type", "items"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
widget_block: Dict[str, Any] = {
|
||||
"title": "WidgetBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "widget"},
|
||||
"widgetId": {"type": "string"},
|
||||
"widgetType": {"type": "string"},
|
||||
"props": {"type": "object"},
|
||||
"data": {"type": "object"},
|
||||
"dataRef": {"type": "string"},
|
||||
},
|
||||
"required": ["type", "widgetId", "widgetType"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
toc_block: Dict[str, Any] = {
|
||||
"title": "TOCBlock",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {"const": "toc"},
|
||||
"depth": {"type": "integer", "minimum": 1, "maximum": 4},
|
||||
"autoNumbering": {"type": "boolean"},
|
||||
},
|
||||
"required": ["type"],
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
block_variants: List[Dict[str, Any]] = [
|
||||
heading_block,
|
||||
paragraph_block,
|
||||
list_block,
|
||||
table_block,
|
||||
blockquote_block,
|
||||
hr_block,
|
||||
code_block,
|
||||
math_block,
|
||||
figure_block,
|
||||
callout_block,
|
||||
kpi_block,
|
||||
widget_block,
|
||||
toc_block,
|
||||
]
|
||||
|
||||
CHAPTER_JSON_SCHEMA: Dict[str, Any] = {
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "ReportEngineChapterIR",
|
||||
"type": "object",
|
||||
"required": ["chapterId", "title", "anchor", "order", "blocks"],
|
||||
"properties": {
|
||||
"chapterId": {"type": "string"},
|
||||
"anchor": {"type": "string"},
|
||||
"title": {"type": "string"},
|
||||
"order": {"type": "number"},
|
||||
"summary": {"type": "string"},
|
||||
"blocks": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/definitions/block"},
|
||||
},
|
||||
"xrefs": {"type": "object"},
|
||||
"widgets": {"type": "array", "items": {"type": "string"}},
|
||||
"footnotes": {"type": "array", "items": {"type": "object"}},
|
||||
"errors": {"type": "array", "items": {"type": "string"}},
|
||||
"metadata": {"type": "object"},
|
||||
},
|
||||
"additionalProperties": True,
|
||||
"definitions": {
|
||||
"inlineMark": inline_mark_schema,
|
||||
"inlineRun": inline_run_schema,
|
||||
"block": {"oneOf": block_variants},
|
||||
},
|
||||
}
|
||||
|
||||
CHAPTER_JSON_SCHEMA_TEXT: str = json.dumps(
|
||||
CHAPTER_JSON_SCHEMA,
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"IR_VERSION",
|
||||
"ALLOWED_INLINE_MARKS",
|
||||
"ALLOWED_BLOCK_TYPES",
|
||||
"CHAPTER_JSON_SCHEMA",
|
||||
"CHAPTER_JSON_SCHEMA_TEXT",
|
||||
]
|
||||
@@ -0,0 +1,218 @@
|
||||
"""
|
||||
章节级JSON结构校验器。
|
||||
|
||||
LLM按章节生成IR后,需要在落盘与装订前经过严格校验,以避免
|
||||
渲染期的结构性崩溃。本模块实现轻量级的Python校验逻辑,
|
||||
无需依赖jsonschema库即可快速定位错误。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
from .schema import ALLOWED_BLOCK_TYPES, ALLOWED_INLINE_MARKS, IR_VERSION
|
||||
|
||||
|
||||
class IRValidator:
|
||||
"""
|
||||
章节IR结构校验器。
|
||||
|
||||
说明:
|
||||
- validate_chapter返回(是否通过, 错误列表)
|
||||
- 错误定位采用path语法,便于快速追踪
|
||||
"""
|
||||
|
||||
def __init__(self, schema_version: str = IR_VERSION):
|
||||
self.schema_version = schema_version
|
||||
|
||||
# ======== 对外接口 ========
|
||||
|
||||
def validate_chapter(self, chapter: Dict[str, Any]) -> Tuple[bool, List[str]]:
|
||||
"""校验单个章节对象的必填字段与block结构"""
|
||||
errors: List[str] = []
|
||||
if not isinstance(chapter, dict):
|
||||
return False, ["chapter必须是对象"]
|
||||
|
||||
for field in ("chapterId", "title", "anchor", "order", "blocks"):
|
||||
if field not in chapter:
|
||||
errors.append(f"missing chapter.{field}")
|
||||
|
||||
if not isinstance(chapter.get("blocks"), list) or not chapter.get("blocks"):
|
||||
errors.append("chapter.blocks必须是非空数组")
|
||||
return False, errors
|
||||
|
||||
blocks = chapter.get("blocks", [])
|
||||
for idx, block in enumerate(blocks):
|
||||
self._validate_block(block, f"blocks[{idx}]", errors)
|
||||
|
||||
return len(errors) == 0, errors
|
||||
|
||||
# ======== 内部工具 ========
|
||||
|
||||
def _validate_block(self, block: Any, path: str, errors: List[str]):
|
||||
"""根据block类型调用不同的校验器"""
|
||||
if not isinstance(block, dict):
|
||||
errors.append(f"{path} 必须是对象")
|
||||
return
|
||||
|
||||
block_type = block.get("type")
|
||||
if block_type not in ALLOWED_BLOCK_TYPES:
|
||||
errors.append(f"{path}.type 不被支持: {block_type}")
|
||||
return
|
||||
|
||||
validator = getattr(self, f"_validate_{block_type}_block", None)
|
||||
if validator:
|
||||
validator(block, path, errors)
|
||||
|
||||
def _validate_heading_block(self, block: Dict[str, Any], path: str, errors: List[str]):
|
||||
"""heading必须有level/text/anchor"""
|
||||
if "level" not in block or not isinstance(block["level"], int):
|
||||
errors.append(f"{path}.level 必须是整数")
|
||||
if "text" not in block:
|
||||
errors.append(f"{path}.text 缺失")
|
||||
if "anchor" not in block:
|
||||
errors.append(f"{path}.anchor 缺失")
|
||||
|
||||
def _validate_paragraph_block(self, block: Dict[str, Any], path: str, errors: List[str]):
|
||||
"""paragraph需要非空inlines,并逐条校验"""
|
||||
inlines = block.get("inlines")
|
||||
if not isinstance(inlines, list) or not inlines:
|
||||
errors.append(f"{path}.inlines 必须是非空数组")
|
||||
return
|
||||
for idx, run in enumerate(inlines):
|
||||
self._validate_inline_run(run, f"{path}.inlines[{idx}]", errors)
|
||||
|
||||
def _validate_list_block(self, block: Dict[str, Any], path: str, errors: List[str]):
|
||||
"""列表需要声明listType且每个item都是block数组"""
|
||||
if block.get("listType") not in {"ordered", "bullet", "task"}:
|
||||
errors.append(f"{path}.listType 取值非法")
|
||||
items = block.get("items")
|
||||
if not isinstance(items, list) or not items:
|
||||
errors.append(f"{path}.items 必须是非空列表")
|
||||
return
|
||||
for i, item in enumerate(items):
|
||||
if not isinstance(item, list):
|
||||
errors.append(f"{path}.items[{i}] 必须是区块数组")
|
||||
continue
|
||||
for j, sub_block in enumerate(item):
|
||||
self._validate_block(sub_block, f"{path}.items[{i}][{j}]", errors)
|
||||
|
||||
def _validate_table_block(self, block: Dict[str, Any], path: str, errors: List[str]):
|
||||
"""表格需提供rows/cells/blocks,递归校验单元格内容"""
|
||||
rows = block.get("rows")
|
||||
if not isinstance(rows, list) or not rows:
|
||||
errors.append(f"{path}.rows 必须是非空数组")
|
||||
return
|
||||
for r_idx, row in enumerate(rows):
|
||||
cells = row.get("cells") if isinstance(row, dict) else None
|
||||
if not isinstance(cells, list) or not cells:
|
||||
errors.append(f"{path}.rows[{r_idx}].cells 必须是非空数组")
|
||||
continue
|
||||
for c_idx, cell in enumerate(cells):
|
||||
if not isinstance(cell, dict):
|
||||
errors.append(f"{path}.rows[{r_idx}].cells[{c_idx}] 必须是对象")
|
||||
continue
|
||||
blocks = cell.get("blocks")
|
||||
if not isinstance(blocks, list) or not blocks:
|
||||
errors.append(
|
||||
f"{path}.rows[{r_idx}].cells[{c_idx}].blocks 必须是非空数组"
|
||||
)
|
||||
continue
|
||||
for b_idx, sub_block in enumerate(blocks):
|
||||
self._validate_block(
|
||||
sub_block,
|
||||
f"{path}.rows[{r_idx}].cells[{c_idx}].blocks[{b_idx}]",
|
||||
errors,
|
||||
)
|
||||
|
||||
def _validate_blockquote_block(
|
||||
self, block: Dict[str, Any], path: str, errors: List[str]
|
||||
):
|
||||
"""引用块内部需要至少一个子block"""
|
||||
inner = block.get("blocks")
|
||||
if not isinstance(inner, list) or not inner:
|
||||
errors.append(f"{path}.blocks 必须是非空数组")
|
||||
return
|
||||
for idx, sub_block in enumerate(inner):
|
||||
self._validate_block(sub_block, f"{path}.blocks[{idx}]", errors)
|
||||
|
||||
def _validate_callout_block(self, block: Dict[str, Any], path: str, errors: List[str]):
|
||||
"""callout需声明tone,并至少有一个子block"""
|
||||
tone = block.get("tone")
|
||||
if tone not in {"info", "warning", "success", "danger"}:
|
||||
errors.append(f"{path}.tone 取值非法: {tone}")
|
||||
blocks = block.get("blocks")
|
||||
if not isinstance(blocks, list) or not blocks:
|
||||
errors.append(f"{path}.blocks 必须是非空数组")
|
||||
return
|
||||
for idx, sub_block in enumerate(blocks):
|
||||
self._validate_block(sub_block, f"{path}.blocks[{idx}]", errors)
|
||||
|
||||
def _validate_kpiGrid_block(self, block: Dict[str, Any], path: str, errors: List[str]):
|
||||
"""KPI卡需要非空items,每项包含label/value"""
|
||||
items = block.get("items")
|
||||
if not isinstance(items, list) or not items:
|
||||
errors.append(f"{path}.items 必须是非空数组")
|
||||
return
|
||||
for idx, item in enumerate(items):
|
||||
if not isinstance(item, dict):
|
||||
errors.append(f"{path}.items[{idx}] 必须是对象")
|
||||
continue
|
||||
if "label" not in item or "value" not in item:
|
||||
errors.append(f"{path}.items[{idx}] 需要label与value")
|
||||
|
||||
def _validate_widget_block(self, block: Dict[str, Any], path: str, errors: List[str]):
|
||||
"""widget必须声明widgetId/type,并提供数据或数据引用"""
|
||||
if "widgetId" not in block:
|
||||
errors.append(f"{path}.widgetId 缺失")
|
||||
if "widgetType" not in block:
|
||||
errors.append(f"{path}.widgetType 缺失")
|
||||
if "data" not in block and "dataRef" not in block:
|
||||
errors.append(f"{path} 需要 data 或 dataRef 其一")
|
||||
|
||||
def _validate_code_block(self, block: Dict[str, Any], path: str, errors: List[str]):
|
||||
"""code block至少要有content"""
|
||||
if "content" not in block:
|
||||
errors.append(f"{path}.content 缺失")
|
||||
|
||||
def _validate_math_block(self, block: Dict[str, Any], path: str, errors: List[str]):
|
||||
"""数学块要求latex字段"""
|
||||
if "latex" not in block:
|
||||
errors.append(f"{path}.latex 缺失")
|
||||
|
||||
def _validate_figure_block(
|
||||
self, block: Dict[str, Any], path: str, errors: List[str]
|
||||
):
|
||||
"""figure需要img对象且至少带src"""
|
||||
img = block.get("img")
|
||||
if not isinstance(img, dict):
|
||||
errors.append(f"{path}.img 必须是对象")
|
||||
return
|
||||
if "src" not in img:
|
||||
errors.append(f"{path}.img.src 缺失")
|
||||
|
||||
def _validate_inline_run(
|
||||
self, run: Any, path: str, errors: List[str]
|
||||
):
|
||||
"""校验paragraph中的inline run与marks合法性"""
|
||||
if not isinstance(run, dict):
|
||||
errors.append(f"{path} 必须是对象")
|
||||
return
|
||||
if "text" not in run:
|
||||
errors.append(f"{path}.text 缺失")
|
||||
marks = run.get("marks", [])
|
||||
if marks is None:
|
||||
return
|
||||
if not isinstance(marks, list):
|
||||
errors.append(f"{path}.marks 必须是数组")
|
||||
return
|
||||
for m_idx, mark in enumerate(marks):
|
||||
if not isinstance(mark, dict):
|
||||
errors.append(f"{path}.marks[{m_idx}] 必须是对象")
|
||||
continue
|
||||
m_type = mark.get("type")
|
||||
if m_type not in ALLOWED_INLINE_MARKS:
|
||||
errors.append(f"{path}.marks[{m_idx}].type 不被支持: {m_type}")
|
||||
|
||||
|
||||
__all__ = ["IRValidator"]
|
||||
@@ -5,11 +5,15 @@ Report Engine节点处理模块
|
||||
|
||||
from .base_node import BaseNode, StateMutationNode
|
||||
from .template_selection_node import TemplateSelectionNode
|
||||
from .html_generation_node import HTMLGenerationNode
|
||||
from .chapter_generation_node import ChapterGenerationNode
|
||||
from .document_layout_node import DocumentLayoutNode
|
||||
from .word_budget_node import WordBudgetNode
|
||||
|
||||
__all__ = [
|
||||
"BaseNode",
|
||||
"StateMutationNode",
|
||||
"StateMutationNode",
|
||||
"TemplateSelectionNode",
|
||||
"HTMLGenerationNode"
|
||||
"ChapterGenerationNode",
|
||||
"DocumentLayoutNode",
|
||||
"WordBudgetNode",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,506 @@
|
||||
"""
|
||||
章节级JSON生成节点。
|
||||
|
||||
每个章节依据Markdown模板切片独立调用LLM,流式写入Raw文件,
|
||||
完成后校验并落盘标准化JSON。该节点只负责“拿到合规章节”。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
import re
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from ..core import TemplateSection, ChapterStorage
|
||||
from ..ir import ALLOWED_BLOCK_TYPES, IRValidator
|
||||
from ..prompts import (
|
||||
SYSTEM_PROMPT_CHAPTER_JSON,
|
||||
build_chapter_user_prompt,
|
||||
)
|
||||
from .base_node import BaseNode
|
||||
|
||||
try:
|
||||
from json_repair import repair_json as _json_repair_fn
|
||||
except ImportError: # pragma: no cover - optional dependency
|
||||
_json_repair_fn = None
|
||||
|
||||
|
||||
class ChapterGenerationNode(BaseNode):
|
||||
"""负责按章节调用LLM并校验JSON结构"""
|
||||
|
||||
_COLON_EQUALS_PATTERN = re.compile(r'(":\s*)=')
|
||||
|
||||
def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage):
|
||||
super().__init__(llm_client, "ChapterGenerationNode")
|
||||
self.validator = validator
|
||||
self.storage = storage
|
||||
|
||||
def run(
|
||||
self,
|
||||
section: TemplateSection,
|
||||
context: Dict[str, Any],
|
||||
run_dir: Path,
|
||||
**kwargs,
|
||||
) -> Dict[str, Any]:
|
||||
"""针对单个章节调用LLM,校验/落盘章节JSON并返回结构化结果"""
|
||||
chapter_meta = {
|
||||
"chapterId": section.chapter_id,
|
||||
"slug": section.slug,
|
||||
"title": section.title,
|
||||
"order": section.order,
|
||||
}
|
||||
chapter_dir = self.storage.begin_chapter(run_dir, chapter_meta)
|
||||
llm_payload = self._build_payload(section, context)
|
||||
user_message = build_chapter_user_prompt(llm_payload)
|
||||
|
||||
raw_text = self._stream_llm(user_message, chapter_dir, **kwargs)
|
||||
chapter_json = self._parse_chapter(raw_text)
|
||||
|
||||
# 自动补全关键字段后再校验
|
||||
chapter_json.setdefault("chapterId", section.chapter_id)
|
||||
chapter_json.setdefault("anchor", section.slug)
|
||||
chapter_json.setdefault("title", section.title)
|
||||
chapter_json.setdefault("order", section.order)
|
||||
self._sanitize_chapter_blocks(chapter_json)
|
||||
|
||||
valid, errors = self.validator.validate_chapter(chapter_json)
|
||||
self.storage.persist_chapter(
|
||||
run_dir,
|
||||
chapter_meta,
|
||||
chapter_json,
|
||||
errors=None if valid else errors,
|
||||
)
|
||||
|
||||
if not valid:
|
||||
raise ValueError(
|
||||
f"{section.title} 章节JSON校验失败: {'; '.join(errors[:5])}"
|
||||
)
|
||||
|
||||
return chapter_json
|
||||
|
||||
# ====== 内部方法 ======
|
||||
|
||||
def _build_payload(self, section: TemplateSection, context: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""构造LLM输入payload"""
|
||||
reports = context.get("reports", {})
|
||||
# 章节篇幅规划(来自WordBudgetNode),用于指导字数与强调点
|
||||
chapter_plan_map = context.get("chapter_directives", {})
|
||||
chapter_plan = chapter_plan_map.get(section.chapter_id) if chapter_plan_map else {}
|
||||
payload = {
|
||||
"section": {
|
||||
"chapterId": section.chapter_id,
|
||||
"title": section.title,
|
||||
"slug": section.slug,
|
||||
"order": section.order,
|
||||
"number": section.number,
|
||||
"outline": section.outline,
|
||||
},
|
||||
"globalContext": {
|
||||
"query": context.get("query"),
|
||||
"templateName": context.get("template_name"),
|
||||
"themeTokens": context.get("theme_tokens", {}),
|
||||
"styleDirectives": context.get("style_directives", {}),
|
||||
# layout里包含标题/目录/hero等信息,方便章节保持统一视觉调性
|
||||
"layout": context.get("layout"),
|
||||
"templateOverview": context.get("template_overview", {}),
|
||||
},
|
||||
"reports": {
|
||||
"query_engine": reports.get("query_engine", ""),
|
||||
"media_engine": reports.get("media_engine", ""),
|
||||
"insight_engine": reports.get("insight_engine", ""),
|
||||
},
|
||||
"forumLogs": context.get("forum_logs", ""),
|
||||
"dataBundles": context.get("data_bundles", []),
|
||||
"constraints": {
|
||||
"language": "zh-CN",
|
||||
"maxTokens": context.get("max_tokens", 4096),
|
||||
"allowedBlocks": ALLOWED_BLOCK_TYPES,
|
||||
"styleHints": {
|
||||
"expectWidgets": True,
|
||||
"forceHeadingAnchors": True,
|
||||
"allowInlineMix": True,
|
||||
},
|
||||
},
|
||||
"chapterPlan": chapter_plan,
|
||||
"wordPlan": context.get("word_plan"),
|
||||
}
|
||||
if chapter_plan:
|
||||
constraints = payload["constraints"]
|
||||
if chapter_plan.get("targetWords"):
|
||||
constraints["wordTarget"] = chapter_plan["targetWords"]
|
||||
if chapter_plan.get("minWords"):
|
||||
constraints["minWords"] = chapter_plan["minWords"]
|
||||
if chapter_plan.get("maxWords"):
|
||||
constraints["maxWords"] = chapter_plan["maxWords"]
|
||||
if chapter_plan.get("emphasis"):
|
||||
constraints["emphasis"] = chapter_plan["emphasis"]
|
||||
if chapter_plan.get("sections"):
|
||||
constraints["sectionBudgets"] = chapter_plan["sections"]
|
||||
payload["globalContext"]["sectionBudgets"] = chapter_plan["sections"]
|
||||
return payload
|
||||
|
||||
def _stream_llm(self, user_message: str, chapter_dir: Path, **kwargs) -> str:
|
||||
"""流式调用LLM并实时写入raw文件"""
|
||||
chunks: List[str] = []
|
||||
with self.storage.capture_stream(chapter_dir) as stream_fp:
|
||||
stream = self.llm_client.stream_invoke(
|
||||
SYSTEM_PROMPT_CHAPTER_JSON,
|
||||
user_message,
|
||||
temperature=kwargs.get("temperature", 0.2),
|
||||
top_p=kwargs.get("top_p", 0.95),
|
||||
)
|
||||
for delta in stream:
|
||||
stream_fp.write(delta)
|
||||
chunks.append(delta)
|
||||
return "".join(chunks)
|
||||
|
||||
def _parse_chapter(self, raw_text: str) -> Dict[str, Any]:
|
||||
"""清洗LLM输出并解析JSON"""
|
||||
cleaned = raw_text.strip()
|
||||
if cleaned.startswith("```json"):
|
||||
cleaned = cleaned[7:]
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned[3:]
|
||||
if cleaned.endswith("```"):
|
||||
cleaned = cleaned[:-3]
|
||||
cleaned = cleaned.strip()
|
||||
if not cleaned:
|
||||
raise ValueError("LLM返回空内容")
|
||||
|
||||
candidate_payloads = [cleaned]
|
||||
repaired = self._repair_llm_json(cleaned)
|
||||
if repaired != cleaned:
|
||||
candidate_payloads.append(repaired)
|
||||
|
||||
try:
|
||||
data = self._parse_with_candidates(candidate_payloads)
|
||||
except json.JSONDecodeError as exc:
|
||||
repaired_payload = self._attempt_json_repair(cleaned)
|
||||
if repaired_payload:
|
||||
candidate_payloads.append(repaired_payload)
|
||||
try:
|
||||
data = self._parse_with_candidates(candidate_payloads[-1:])
|
||||
except json.JSONDecodeError as inner_exc:
|
||||
raise ValueError(f"章节JSON解析失败: {inner_exc}") from inner_exc
|
||||
else:
|
||||
raise ValueError(f"章节JSON解析失败: {exc}") from exc
|
||||
|
||||
if "chapter" in data and isinstance(data["chapter"], dict):
|
||||
return data["chapter"]
|
||||
if isinstance(data, dict) and all(
|
||||
key in data for key in ("chapterId", "title", "blocks")
|
||||
):
|
||||
return data
|
||||
if isinstance(data, list):
|
||||
for item in data:
|
||||
if isinstance(item, dict):
|
||||
if "chapter" in item and isinstance(item["chapter"], dict):
|
||||
return item["chapter"]
|
||||
if all(key in item for key in ("chapterId", "title", "blocks")):
|
||||
return item
|
||||
raise ValueError("章节JSON缺少chapter字段")
|
||||
|
||||
def _repair_llm_json(self, text: str) -> str:
|
||||
"""处理常见的LLM错误(如\":=导致的非法JSON)"""
|
||||
repaired = text
|
||||
mutated = False
|
||||
|
||||
new_text = self._COLON_EQUALS_PATTERN.sub(r"\1", repaired)
|
||||
if new_text != repaired:
|
||||
logger.warning("检测到章节JSON中的\":=\"字符,已自动移除多余的'='号")
|
||||
repaired = new_text
|
||||
mutated = True
|
||||
|
||||
repaired, escaped = self._escape_in_string_controls(repaired)
|
||||
if escaped:
|
||||
logger.warning("检测到章节JSON字符串中存在未转义的控制字符,已自动转换为转义序列")
|
||||
mutated = True
|
||||
|
||||
repaired, balanced = self._balance_brackets(repaired)
|
||||
if balanced:
|
||||
logger.warning("检测到章节JSON括号不平衡,已自动补齐/剔除异常括号")
|
||||
mutated = True
|
||||
|
||||
repaired, commas_fixed = self._fix_missing_commas(repaired)
|
||||
if commas_fixed:
|
||||
logger.warning("检测到章节JSON对象/数组之间缺少逗号,已自动补齐")
|
||||
mutated = True
|
||||
|
||||
return repaired if mutated else text
|
||||
|
||||
def _escape_in_string_controls(self, text: str) -> Tuple[str, bool]:
|
||||
"""
|
||||
将字符串字面量中的裸换行/制表符/控制字符替换为JSON合法的转义序列。
|
||||
"""
|
||||
if not text:
|
||||
return text, False
|
||||
|
||||
result: List[str] = []
|
||||
in_string = False
|
||||
escaped = False
|
||||
mutated = False
|
||||
control_map = {"\n": "\\n", "\r": "\\n", "\t": "\\t"}
|
||||
|
||||
for ch in text:
|
||||
if escaped:
|
||||
result.append(ch)
|
||||
escaped = False
|
||||
continue
|
||||
|
||||
if ch == "\\":
|
||||
result.append(ch)
|
||||
escaped = True
|
||||
continue
|
||||
|
||||
if ch == '"':
|
||||
result.append(ch)
|
||||
in_string = not in_string
|
||||
continue
|
||||
|
||||
if in_string and ch in control_map:
|
||||
result.append(control_map[ch])
|
||||
mutated = True
|
||||
continue
|
||||
|
||||
if in_string and ord(ch) < 0x20:
|
||||
result.append(f"\\u{ord(ch):04x}")
|
||||
mutated = True
|
||||
continue
|
||||
|
||||
result.append(ch)
|
||||
|
||||
return "".join(result), mutated
|
||||
|
||||
def _fix_missing_commas(self, text: str) -> Tuple[str, bool]:
|
||||
"""在对象/数组连续出现时自动补逗号"""
|
||||
if not text:
|
||||
return text, False
|
||||
|
||||
chars: List[str] = []
|
||||
mutated = False
|
||||
in_string = False
|
||||
escaped = False
|
||||
length = len(text)
|
||||
i = 0
|
||||
while i < length:
|
||||
ch = text[i]
|
||||
chars.append(ch)
|
||||
if escaped:
|
||||
escaped = False
|
||||
i += 1
|
||||
continue
|
||||
if ch == "\\":
|
||||
escaped = True
|
||||
i += 1
|
||||
continue
|
||||
if ch == '"':
|
||||
in_string = not in_string
|
||||
i += 1
|
||||
continue
|
||||
if not in_string and ch in "}]":
|
||||
j = i + 1
|
||||
while j < length and text[j] in " \t\r\n":
|
||||
j += 1
|
||||
if j < length:
|
||||
next_ch = text[j]
|
||||
if next_ch in "{[":
|
||||
chars.append(",")
|
||||
mutated = True
|
||||
i += 1
|
||||
return "".join(chars), mutated
|
||||
|
||||
def _balance_brackets(self, text: str) -> Tuple[str, bool]:
|
||||
"""尝试修复因LLM多写/少写括号导致的不平衡结构"""
|
||||
if not text:
|
||||
return text, False
|
||||
|
||||
result: List[str] = []
|
||||
stack: List[str] = []
|
||||
mutated = False
|
||||
in_string = False
|
||||
escaped = False
|
||||
|
||||
opener_map = {"{": "}", "[": "]"}
|
||||
|
||||
for ch in text:
|
||||
if escaped:
|
||||
result.append(ch)
|
||||
escaped = False
|
||||
continue
|
||||
|
||||
if ch == "\\":
|
||||
result.append(ch)
|
||||
escaped = True
|
||||
continue
|
||||
|
||||
if ch == '"':
|
||||
result.append(ch)
|
||||
in_string = not in_string
|
||||
continue
|
||||
|
||||
if in_string:
|
||||
result.append(ch)
|
||||
continue
|
||||
|
||||
if ch in "{[":
|
||||
stack.append(ch)
|
||||
result.append(ch)
|
||||
continue
|
||||
|
||||
if ch in "}]":
|
||||
if stack and ((ch == "}" and stack[-1] == "{") or (ch == "]" and stack[-1] == "[")):
|
||||
stack.pop()
|
||||
result.append(ch)
|
||||
else:
|
||||
mutated = True
|
||||
continue
|
||||
|
||||
result.append(ch)
|
||||
|
||||
while stack:
|
||||
opener = stack.pop()
|
||||
result.append(opener_map[opener])
|
||||
mutated = True
|
||||
|
||||
return "".join(result), mutated
|
||||
|
||||
def _attempt_json_repair(self, text: str) -> str | None:
|
||||
"""使用可选的json_repair库进一步修复复杂语法错误"""
|
||||
if not _json_repair_fn:
|
||||
return None
|
||||
try:
|
||||
fixed = _json_repair_fn(text)
|
||||
except Exception as exc: # pragma: no cover - library failure
|
||||
logger.warning(f"json_repair 修复章节JSON失败: {exc}")
|
||||
return None
|
||||
if fixed == text:
|
||||
return None
|
||||
logger.warning("已使用json_repair自动修复章节JSON语法")
|
||||
return fixed
|
||||
|
||||
def _sanitize_chapter_blocks(self, chapter: Dict[str, Any]):
|
||||
"""修正常见的结构性错误(例如list.items嵌套过深)"""
|
||||
|
||||
def walk(blocks: List[Dict[str, Any]] | None):
|
||||
if not isinstance(blocks, list):
|
||||
return
|
||||
for block in blocks:
|
||||
if not isinstance(block, dict):
|
||||
continue
|
||||
self._ensure_block_type(block)
|
||||
block_type = block.get("type")
|
||||
if block_type == "list":
|
||||
items = block.get("items")
|
||||
normalized = self._normalize_list_items(items)
|
||||
if normalized:
|
||||
block["items"] = normalized
|
||||
for entry in block.get("items", []):
|
||||
walk(entry)
|
||||
elif block_type in {"callout", "blockquote"}:
|
||||
walk(block.get("blocks"))
|
||||
elif block_type == "table":
|
||||
for row in block.get("rows", []):
|
||||
cells = row.get("cells") or []
|
||||
for cell in cells:
|
||||
walk(cell.get("blocks"))
|
||||
elif block_type == "widget":
|
||||
self._normalize_widget_block(block)
|
||||
else:
|
||||
nested = block.get("blocks")
|
||||
if isinstance(nested, list):
|
||||
walk(nested)
|
||||
|
||||
walk(chapter.get("blocks"))
|
||||
|
||||
def _normalize_list_items(self, items: Any) -> List[List[Dict[str, Any]]]:
|
||||
"""确保list block的items为[[block, block], ...]结构"""
|
||||
if not isinstance(items, list):
|
||||
return []
|
||||
normalized: List[List[Dict[str, Any]]] = []
|
||||
for item in items:
|
||||
normalized.extend(self._coerce_list_item(item))
|
||||
return [entry for entry in normalized if entry]
|
||||
|
||||
def _coerce_list_item(self, item: Any) -> List[List[Dict[str, Any]]]:
|
||||
"""将各种嵌套写法统一折算为区块数组"""
|
||||
result: List[List[Dict[str, Any]]] = []
|
||||
if isinstance(item, dict):
|
||||
self._ensure_block_type(item)
|
||||
result.append([item])
|
||||
return result
|
||||
if isinstance(item, list):
|
||||
dicts = [elem for elem in item if isinstance(elem, dict)]
|
||||
if dicts:
|
||||
for elem in dicts:
|
||||
self._ensure_block_type(elem)
|
||||
result.append(dicts)
|
||||
for elem in item:
|
||||
if isinstance(elem, list):
|
||||
result.extend(self._coerce_list_item(elem))
|
||||
elif isinstance(elem, dict):
|
||||
continue
|
||||
elif isinstance(elem, str):
|
||||
result.append([self._as_paragraph_block(elem)])
|
||||
elif isinstance(elem, (int, float)):
|
||||
result.append([self._as_paragraph_block(str(elem))])
|
||||
elif isinstance(item, str):
|
||||
result.append([self._as_paragraph_block(item)])
|
||||
elif isinstance(item, (int, float)):
|
||||
result.append([self._as_paragraph_block(str(item))])
|
||||
return result
|
||||
|
||||
def _normalize_widget_block(self, block: Dict[str, Any]):
|
||||
"""确保widget具备顶层data或dataRef"""
|
||||
has_data = block.get("data") is not None or block.get("dataRef") is not None
|
||||
if has_data:
|
||||
return
|
||||
props = block.get("props")
|
||||
if isinstance(props, dict) and "data" in props:
|
||||
block["data"] = props.pop("data")
|
||||
return
|
||||
block["data"] = {"labels": [], "datasets": []}
|
||||
|
||||
def _ensure_block_type(self, block: Dict[str, Any]):
|
||||
"""若block缺少合法type,则降级为paragraph"""
|
||||
block_type = block.get("type")
|
||||
if isinstance(block_type, str) and block_type in ALLOWED_BLOCK_TYPES:
|
||||
return
|
||||
text = ""
|
||||
for key in ("text", "content", "title"):
|
||||
value = block.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
text = value.strip()
|
||||
break
|
||||
if not text:
|
||||
try:
|
||||
text = json.dumps(block, ensure_ascii=False)
|
||||
except Exception:
|
||||
text = str(block)
|
||||
block.clear()
|
||||
block["type"] = "paragraph"
|
||||
block["inlines"] = [{"text": text}]
|
||||
|
||||
@staticmethod
|
||||
def _as_paragraph_block(text: str) -> Dict[str, Any]:
|
||||
return {
|
||||
"type": "paragraph",
|
||||
"inlines": [{"text": text or ""}],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _parse_with_candidates(payloads: List[str]) -> Dict[str, Any]:
|
||||
"""按顺序尝试多个payload,直到解析成功"""
|
||||
last_exc: json.JSONDecodeError | None = None
|
||||
for payload in payloads:
|
||||
try:
|
||||
return json.loads(payload)
|
||||
except json.JSONDecodeError as exc:
|
||||
last_exc = exc
|
||||
assert last_exc is not None
|
||||
raise last_exc
|
||||
|
||||
|
||||
__all__ = ["ChapterGenerationNode"]
|
||||
@@ -0,0 +1,81 @@
|
||||
"""
|
||||
根据模板目录与多源报告,生成整本报告的标题/目录/主题设计。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from ..core import TemplateSection
|
||||
from ..prompts import (
|
||||
SYSTEM_PROMPT_DOCUMENT_LAYOUT,
|
||||
build_document_layout_prompt,
|
||||
)
|
||||
from .base_node import BaseNode
|
||||
|
||||
|
||||
class DocumentLayoutNode(BaseNode):
|
||||
"""负责生成全局标题、目录与Hero设计"""
|
||||
|
||||
def __init__(self, llm_client):
|
||||
super().__init__(llm_client, "DocumentLayoutNode")
|
||||
|
||||
def run(
|
||||
self,
|
||||
sections: List[TemplateSection],
|
||||
template_markdown: str,
|
||||
reports: Dict[str, str],
|
||||
forum_logs: str,
|
||||
query: str,
|
||||
template_overview: Dict[str, Any] | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""综合模板+多源内容,生成全书的标题、目录结构与主题色板"""
|
||||
# 将模板原文、切片结构与多源报告一并喂给LLM,便于其理解层级与素材
|
||||
payload = {
|
||||
"query": query,
|
||||
"template": {
|
||||
"raw": template_markdown,
|
||||
"sections": [section.to_dict() for section in sections],
|
||||
},
|
||||
"templateOverview": template_overview
|
||||
or {
|
||||
"title": sections[0].title if sections else "",
|
||||
"chapters": [section.to_dict() for section in sections],
|
||||
},
|
||||
"reports": reports,
|
||||
"forumLogs": forum_logs,
|
||||
}
|
||||
|
||||
user_message = build_document_layout_prompt(payload)
|
||||
response = self.llm_client.stream_invoke_to_string(
|
||||
SYSTEM_PROMPT_DOCUMENT_LAYOUT,
|
||||
user_message,
|
||||
temperature=0.3,
|
||||
top_p=0.9,
|
||||
)
|
||||
design = self._parse_response(response)
|
||||
logger.info("文档标题/目录设计已生成")
|
||||
return design
|
||||
|
||||
def _parse_response(self, raw: str) -> Dict[str, Any]:
|
||||
"""解析LLM返回的JSON文本,若失败则抛出友好错误"""
|
||||
cleaned = raw.strip()
|
||||
if cleaned.startswith("```json"):
|
||||
cleaned = cleaned[7:]
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned[3:]
|
||||
if cleaned.endswith("```"):
|
||||
cleaned = cleaned[:-3]
|
||||
cleaned = cleaned.strip()
|
||||
if not cleaned:
|
||||
raise ValueError("文档设计LLM返回空内容")
|
||||
try:
|
||||
return json.loads(cleaned)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError(f"文档设计JSON解析失败: {exc}") from exc
|
||||
|
||||
|
||||
__all__ = ["DocumentLayoutNode"]
|
||||
@@ -1,254 +0,0 @@
|
||||
"""
|
||||
HTML生成节点
|
||||
将整合后的内容转换为美观的HTML报告
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
from loguru import logger
|
||||
|
||||
from .base_node import StateMutationNode
|
||||
from ..llms.base import LLMClient
|
||||
from ..state.state import ReportState
|
||||
from ..prompts import SYSTEM_PROMPT_HTML_GENERATION
|
||||
# 不再需要text_processing依赖
|
||||
|
||||
|
||||
class HTMLGenerationNode(StateMutationNode):
|
||||
"""HTML生成处理节点"""
|
||||
|
||||
def __init__(self, llm_client: LLMClient):
|
||||
"""
|
||||
初始化HTML生成节点
|
||||
|
||||
Args:
|
||||
llm_client: LLM客户端
|
||||
"""
|
||||
super().__init__(llm_client, "HTMLGenerationNode")
|
||||
|
||||
def run(self, input_data: Dict[str, Any], **kwargs) -> str:
|
||||
"""
|
||||
执行HTML生成
|
||||
|
||||
Args:
|
||||
input_data: 包含报告数据的字典
|
||||
- query: 原始查询
|
||||
- query_engine_report: QueryEngine报告内容
|
||||
- media_engine_report: MediaEngine报告内容
|
||||
- insight_engine_report: InsightEngine报告内容
|
||||
- forum_logs: 论坛日志内容
|
||||
- selected_template: 选择的模板内容
|
||||
|
||||
Returns:
|
||||
生成的HTML内容
|
||||
"""
|
||||
logger.info("开始生成HTML报告...")
|
||||
|
||||
try:
|
||||
# 准备LLM输入数据
|
||||
llm_input = {
|
||||
"query": input_data.get('query', ''),
|
||||
"query_engine_report": input_data.get('query_engine_report', ''),
|
||||
"media_engine_report": input_data.get('media_engine_report', ''),
|
||||
"insight_engine_report": input_data.get('insight_engine_report', ''),
|
||||
"forum_logs": input_data.get('forum_logs', ''),
|
||||
"selected_template": input_data.get('selected_template', '')
|
||||
}
|
||||
|
||||
# 转换为JSON格式传递给LLM
|
||||
message = json.dumps(llm_input, ensure_ascii=False, indent=2)
|
||||
|
||||
# 调用LLM生成HTML
|
||||
response = self.llm_client.stream_invoke_to_string(SYSTEM_PROMPT_HTML_GENERATION, message)
|
||||
|
||||
# 处理响应(简化版)
|
||||
processed_response = self.process_output(response)
|
||||
|
||||
logger.info("HTML报告生成完成")
|
||||
return processed_response
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"HTML生成失败: {str(e)}")
|
||||
# 返回备用HTML
|
||||
return self._generate_fallback_html(input_data)
|
||||
|
||||
def mutate_state(self, input_data: Dict[str, Any], state: ReportState, **kwargs) -> ReportState:
|
||||
"""
|
||||
修改报告状态,添加生成的HTML内容
|
||||
|
||||
Args:
|
||||
input_data: 输入数据
|
||||
state: 当前报告状态
|
||||
**kwargs: 额外参数
|
||||
|
||||
Returns:
|
||||
更新后的报告状态
|
||||
"""
|
||||
# 生成HTML
|
||||
html_content = self.run(input_data, **kwargs)
|
||||
|
||||
# 更新状态
|
||||
state.html_content = html_content
|
||||
state.mark_completed()
|
||||
|
||||
return state
|
||||
|
||||
def process_output(self, output: str) -> str:
|
||||
"""
|
||||
处理LLM输出,提取HTML内容
|
||||
|
||||
Args:
|
||||
output: LLM原始输出
|
||||
|
||||
Returns:
|
||||
HTML内容
|
||||
"""
|
||||
try:
|
||||
logger.info(f"处理LLM原始输出,长度: {len(output)} 字符")
|
||||
|
||||
html_content = output.strip()
|
||||
|
||||
# 清理markdown代码块标记(如果存在)
|
||||
if html_content.startswith('```html'):
|
||||
html_content = html_content[7:] # 移除 '```html'
|
||||
if html_content.endswith('```'):
|
||||
html_content = html_content[:-3] # 移除结尾的 '```'
|
||||
elif html_content.startswith('```') and html_content.endswith('```'):
|
||||
html_content = html_content[3:-3] # 移除前后的 '```'
|
||||
|
||||
html_content = html_content.strip()
|
||||
|
||||
# 如果内容为空,返回原始输出
|
||||
if not html_content:
|
||||
logger.info("处理后内容为空,返回原始输出")
|
||||
html_content = output
|
||||
|
||||
logger.info(f"HTML处理完成,最终长度: {len(html_content)} 字符")
|
||||
return html_content
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"处理HTML输出失败: {str(e)},返回原始输出")
|
||||
return output
|
||||
|
||||
def _generate_fallback_html(self, input_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
生成备用HTML报告(当LLM失败时使用)
|
||||
|
||||
Args:
|
||||
input_data: 输入数据
|
||||
|
||||
Returns:
|
||||
备用HTML内容
|
||||
"""
|
||||
logger.info("使用备用HTML生成方法")
|
||||
|
||||
query = input_data.get('query', '智能舆情分析报告')
|
||||
query_report = input_data.get('query_engine_report', '')
|
||||
media_report = input_data.get('media_engine_report', '')
|
||||
insight_report = input_data.get('insight_engine_report', '')
|
||||
forum_logs = input_data.get('forum_logs', '')
|
||||
|
||||
generation_time = datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")
|
||||
|
||||
html_content = f"""<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{query} - 智能舆情分析报告</title>
|
||||
<style>
|
||||
body {{
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background: #f5f5f5;
|
||||
}}
|
||||
.container {{
|
||||
background: white;
|
||||
padding: 40px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
}}
|
||||
h1 {{
|
||||
color: #2c3e50;
|
||||
border-bottom: 3px solid #3498db;
|
||||
padding-bottom: 10px;
|
||||
}}
|
||||
h2 {{
|
||||
color: #34495e;
|
||||
margin-top: 30px;
|
||||
margin-bottom: 15px;
|
||||
}}
|
||||
.section {{
|
||||
margin-bottom: 30px;
|
||||
padding: 20px;
|
||||
border-left: 4px solid #3498db;
|
||||
background: #f8f9fa;
|
||||
}}
|
||||
.meta {{
|
||||
background: #e9ecef;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
margin-bottom: 20px;
|
||||
}}
|
||||
.footer {{
|
||||
margin-top: 40px;
|
||||
padding-top: 20px;
|
||||
border-top: 1px solid #eee;
|
||||
text-align: center;
|
||||
color: #666;
|
||||
}}
|
||||
pre {{
|
||||
background: #f4f4f4;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
overflow-x: auto;
|
||||
white-space: pre-wrap;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>{query}</h1>
|
||||
|
||||
<div class="meta">
|
||||
<strong>报告生成时间:</strong> {generation_time}<br>
|
||||
<strong>数据来源:</strong> QueryEngine、MediaEngine、InsightEngine、ForumEngine<br>
|
||||
<strong>报告类型:</strong> 综合舆情分析报告
|
||||
</div>
|
||||
|
||||
<h2>执行摘要</h2>
|
||||
<div class="section">
|
||||
本报告整合了多个分析引擎的研究结果,为您提供全面的舆情分析洞察。
|
||||
通过对查询主题"{query}"的深度分析,我们从多个维度展现了当前的舆情态势。
|
||||
</div>
|
||||
|
||||
{f'<h2>QueryEngine分析结果</h2><div class="section"><pre>{query_report}</pre></div>' if query_report else ''}
|
||||
|
||||
{f'<h2>MediaEngine分析结果</h2><div class="section"><pre>{media_report}</pre></div>' if media_report else ''}
|
||||
|
||||
{f'<h2>InsightEngine分析结果</h2><div class="section"><pre>{insight_report}</pre></div>' if insight_report else ''}
|
||||
|
||||
{f'<h2>论坛监控数据</h2><div class="section"><pre>{forum_logs}</pre></div>' if forum_logs else ''}
|
||||
|
||||
<h2>综合结论</h2>
|
||||
<div class="section">
|
||||
基于多个分析引擎的综合研究,我们对"{query}"主题进行了全面分析。
|
||||
各引擎从不同角度提供了深入洞察,为决策提供了重要参考。
|
||||
</div>
|
||||
|
||||
<div class="footer">
|
||||
<p>本报告由智能舆情分析平台自动生成</p>
|
||||
<p>ReportEngine v1.0 | 生成时间: {generation_time}</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
return html_content
|
||||
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
"""
|
||||
章节篇幅规划节点。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from ..core import TemplateSection
|
||||
from ..prompts import (
|
||||
SYSTEM_PROMPT_WORD_BUDGET,
|
||||
build_word_budget_prompt,
|
||||
)
|
||||
from .base_node import BaseNode
|
||||
|
||||
|
||||
class WordBudgetNode(BaseNode):
|
||||
"""规划各章节字数与重点"""
|
||||
|
||||
def __init__(self, llm_client):
|
||||
super().__init__(llm_client, "WordBudgetNode")
|
||||
|
||||
def run(
|
||||
self,
|
||||
sections: List[TemplateSection],
|
||||
design: Dict[str, Any],
|
||||
reports: Dict[str, str],
|
||||
forum_logs: str,
|
||||
query: str,
|
||||
template_overview: Dict[str, Any] | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""根据设计稿和所有素材规划章节字数,让LLM写作时有明确篇幅目标"""
|
||||
# 输入中除了章节骨架外,还包含布局节点输出,方便约束篇幅时参考视觉主次
|
||||
payload = {
|
||||
"query": query,
|
||||
"design": design,
|
||||
"sections": [section.to_dict() for section in sections],
|
||||
"templateOverview": template_overview
|
||||
or {
|
||||
"title": sections[0].title if sections else "",
|
||||
"chapters": [section.to_dict() for section in sections],
|
||||
},
|
||||
"reports": reports,
|
||||
"forumLogs": forum_logs,
|
||||
}
|
||||
user = build_word_budget_prompt(payload)
|
||||
response = self.llm_client.stream_invoke_to_string(
|
||||
SYSTEM_PROMPT_WORD_BUDGET,
|
||||
user,
|
||||
temperature=0.25,
|
||||
top_p=0.85,
|
||||
)
|
||||
plan = self._parse_response(response)
|
||||
logger.info("章节字数规划已生成")
|
||||
return plan
|
||||
|
||||
def _parse_response(self, raw: str) -> Dict[str, Any]:
|
||||
"""将LLM输出的JSON文本转为字典,失败时提示规划异常"""
|
||||
cleaned = raw.strip()
|
||||
if cleaned.startswith("```json"):
|
||||
cleaned = cleaned[7:]
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned[3:]
|
||||
if cleaned.endswith("```"):
|
||||
cleaned = cleaned[:-3]
|
||||
cleaned = cleaned.strip()
|
||||
if not cleaned:
|
||||
raise ValueError("篇幅规划LLM返回空内容")
|
||||
try:
|
||||
return json.loads(cleaned)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError(f"篇幅规划JSON解析失败: {exc}") from exc
|
||||
|
||||
|
||||
__all__ = ["WordBudgetNode"]
|
||||
@@ -6,13 +6,27 @@ Report Engine提示词模块
|
||||
from .prompts import (
|
||||
SYSTEM_PROMPT_TEMPLATE_SELECTION,
|
||||
SYSTEM_PROMPT_HTML_GENERATION,
|
||||
SYSTEM_PROMPT_CHAPTER_JSON,
|
||||
SYSTEM_PROMPT_DOCUMENT_LAYOUT,
|
||||
SYSTEM_PROMPT_WORD_BUDGET,
|
||||
output_schema_template_selection,
|
||||
input_schema_html_generation
|
||||
input_schema_html_generation,
|
||||
chapter_generation_input_schema,
|
||||
build_chapter_user_prompt,
|
||||
build_document_layout_prompt,
|
||||
build_word_budget_prompt,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"SYSTEM_PROMPT_TEMPLATE_SELECTION",
|
||||
"SYSTEM_PROMPT_HTML_GENERATION",
|
||||
"SYSTEM_PROMPT_HTML_GENERATION",
|
||||
"SYSTEM_PROMPT_CHAPTER_JSON",
|
||||
"SYSTEM_PROMPT_DOCUMENT_LAYOUT",
|
||||
"SYSTEM_PROMPT_WORD_BUDGET",
|
||||
"output_schema_template_selection",
|
||||
"input_schema_html_generation"
|
||||
"input_schema_html_generation",
|
||||
"chapter_generation_input_schema",
|
||||
"build_chapter_user_prompt",
|
||||
"build_document_layout_prompt",
|
||||
"build_word_budget_prompt",
|
||||
]
|
||||
|
||||
@@ -5,6 +5,12 @@ Report Engine 的所有提示词定义
|
||||
|
||||
import json
|
||||
|
||||
from ..ir import (
|
||||
ALLOWED_BLOCK_TYPES,
|
||||
CHAPTER_JSON_SCHEMA_TEXT,
|
||||
IR_VERSION,
|
||||
)
|
||||
|
||||
# ===== JSON Schema 定义 =====
|
||||
|
||||
# 模板选择输出Schema
|
||||
@@ -30,6 +36,58 @@ input_schema_html_generation = {
|
||||
}
|
||||
}
|
||||
|
||||
# 分章节JSON生成输入Schema(给提示词说明字段)
|
||||
chapter_generation_input_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"section": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"slug": {"type": "string"},
|
||||
"order": {"type": "number"},
|
||||
"number": {"type": "string"},
|
||||
"outline": {"type": "array", "items": {"type": "string"}}
|
||||
},
|
||||
"required": ["title", "slug", "order"]
|
||||
},
|
||||
"globalContext": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string"},
|
||||
"templateName": {"type": "string"},
|
||||
"themeTokens": {"type": "object"},
|
||||
"styleDirectives": {"type": "object"}
|
||||
}
|
||||
},
|
||||
"reports": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query_engine": {"type": "string"},
|
||||
"media_engine": {"type": "string"},
|
||||
"insight_engine": {"type": "string"}
|
||||
}
|
||||
},
|
||||
"forumLogs": {"type": "string"},
|
||||
"dataBundles": {
|
||||
"type": "array",
|
||||
"items": {"type": "object"}
|
||||
},
|
||||
"constraints": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"language": {"type": "string"},
|
||||
"maxTokens": {"type": "number"},
|
||||
"allowedBlocks": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["section", "globalContext", "reports"]
|
||||
}
|
||||
|
||||
# HTML报告生成输出Schema - 已简化,不再使用JSON格式
|
||||
# output_schema_html_generation = {
|
||||
# "type": "object",
|
||||
@@ -39,6 +97,96 @@ input_schema_html_generation = {
|
||||
# "required": ["html_content"]
|
||||
# }
|
||||
|
||||
# 文档标题/目录设计输出Schema:约束DocumentLayoutNode期望的字段
|
||||
document_layout_output_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"subtitle": {"type": "string"},
|
||||
"tagline": {"type": "string"},
|
||||
"tocTitle": {"type": "string"},
|
||||
"hero": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"summary": {"type": "string"},
|
||||
"highlights": {"type": "array", "items": {"type": "string"}},
|
||||
"kpis": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"label": {"type": "string"},
|
||||
"value": {"type": "string"},
|
||||
"delta": {"type": "string"},
|
||||
"tone": {"type": "string", "enum": ["up", "down", "neutral"]},
|
||||
},
|
||||
"required": ["label", "value"],
|
||||
},
|
||||
},
|
||||
"actions": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
},
|
||||
"themeTokens": {"type": "object"},
|
||||
"tocPlan": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"chapterId": {"type": "string"},
|
||||
"anchor": {"type": "string"},
|
||||
"display": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
},
|
||||
"required": ["chapterId", "display"],
|
||||
},
|
||||
},
|
||||
"layoutNotes": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
"required": ["title", "tocPlan"],
|
||||
}
|
||||
|
||||
# 章节字数规划Schema:约束WordBudgetNode的输出结构
|
||||
word_budget_output_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"totalWords": {"type": "number"},
|
||||
"tolerance": {"type": "number"},
|
||||
"globalGuidelines": {"type": "array", "items": {"type": "string"}},
|
||||
"chapters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"chapterId": {"type": "string"},
|
||||
"title": {"type": "string"},
|
||||
"targetWords": {"type": "number"},
|
||||
"minWords": {"type": "number"},
|
||||
"maxWords": {"type": "number"},
|
||||
"emphasis": {"type": "array", "items": {"type": "string"}},
|
||||
"rationale": {"type": "string"},
|
||||
"sections": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"anchor": {"type": "string"},
|
||||
"targetWords": {"type": "number"},
|
||||
"minWords": {"type": "number"},
|
||||
"maxWords": {"type": "number"},
|
||||
"notes": {"type": "string"},
|
||||
},
|
||||
"required": ["title", "targetWords"],
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["chapterId", "targetWords"],
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": ["totalWords", "chapters"],
|
||||
}
|
||||
|
||||
# ===== 系统提示词定义 =====
|
||||
|
||||
# 模板选择的系统提示词
|
||||
@@ -133,3 +281,93 @@ SYSTEM_PROMPT_HTML_GENERATION = f"""
|
||||
|
||||
**重要:直接返回完整的HTML代码,不要包含任何解释、说明或其他文本。只返回HTML代码本身。**
|
||||
"""
|
||||
|
||||
# 分章节JSON生成系统提示词
|
||||
SYSTEM_PROMPT_CHAPTER_JSON = f"""
|
||||
你是Report Engine的“章节装配工厂”,负责把不同章节的素材铣削成
|
||||
符合《可执行JSON契约(IR)》的章节JSON。稍后我会提供单个章节要点、
|
||||
全局数据与风格指令,你需要:
|
||||
1. 完全遵循IR版本 {IR_VERSION} 的结构,严禁输出HTML或Markdown。
|
||||
2. 仅使用以下Block类型:{', '.join(ALLOWED_BLOCK_TYPES)};其中图表用block.type=widget并填充Chart.js配置。
|
||||
3. 所有段落都放入paragraph.inlines,混排样式通过marks表示(bold/italic/color/link等)。
|
||||
4. 所有heading必须包含anchor,锚点与编号保持模板一致,比如section-2-1。
|
||||
5. 表格需给出rows/cells/align,KPI卡请使用kpiGrid,分割线用hr。
|
||||
6. 如需引用图表/交互组件,统一用widgetType表示(例如chart.js/line、chart.js/doughnut)。
|
||||
7. 鼓励结合outline中列出的子标题,生成多层heading与细粒度内容,同时可补充callout、blockquote等。
|
||||
8. 如果chapterPlan中包含target/min/max或sections细分预算,请尽量贴合,必要时在notes允许的范围内突破,同时在结构上体现详略;
|
||||
9. 一级标题需使用中文数字(“一、二、三”),二级标题使用阿拉伯数字(“1.1、1.2”),heading.text中直接写好编号,与outline顺序对应;
|
||||
10. 严禁输出外部图片/AI生图链接,仅可使用Chart.js图表、表格、色块、callout等HTML原生组件;如需视觉辅助请改为文字描述或数据表;
|
||||
11. 段落混排需通过marks表达粗体、斜体、下划线、颜色等样式,禁止残留Markdown语法(如**text**);
|
||||
12. 行间公式用block.type="math"并填入math.latex,行内公式在paragraph.inlines里将文本设为Latex并加上marks.type="math",渲染层会用MathJax处理;
|
||||
13. widget配色需与CSS变量兼容,不要硬编码背景色或文字色,legend/ticks由渲染层控制;
|
||||
14. 善用callout、kpiGrid、表格、widget等提升版面丰富度,但必须遵守模板章节范围。
|
||||
15. 输出前务必自检JSON语法:禁止出现`{{}}{{`或`][`相连缺少逗号、列表项嵌套超过一层、未闭合的括号或未转义换行,`list` block的items必须是`[[block,...], ...]`结构,若无法满足则返回错误提示而不是输出不合法JSON。
|
||||
16. 所有widget块必须在顶层提供`data`或`dataRef`(可将props中的`data`上移),确保Chart.js能够直接渲染;缺失数据时宁可输出表格或段落,绝不留空。
|
||||
17. 任何block都必须声明合法`type`(heading/paragraph/list/...);若需要普通文本请使用`paragraph`并给出`inlines`,禁止返回`type:null`或未知值。
|
||||
|
||||
<CHAPTER JSON SCHEMA>
|
||||
{CHAPTER_JSON_SCHEMA_TEXT}
|
||||
</CHAPTER JSON SCHEMA>
|
||||
|
||||
输出格式:
|
||||
{{"chapter": {{...遵循上述Schema的章节JSON...}}}}
|
||||
|
||||
严禁添加除JSON以外的任何文本或注释。
|
||||
"""
|
||||
|
||||
# 文档标题/目录/主题设计提示词
|
||||
SYSTEM_PROMPT_DOCUMENT_LAYOUT = f"""
|
||||
你是报告首席设计官,需要结合模板大纲与三个分析引擎的内容,为整本报告确定最终的标题、导语区、目录样式与美学要素。
|
||||
|
||||
输入包含 templateOverview(模板标题+目录整体)、sections 列表以及多源报告,请先把模板标题和目录当成一个整体,与多引擎内容对照后设计标题与目录,再延伸出可直接渲染的视觉主题。你的输出会被独立存储以便后续拼接,请确保字段齐备。
|
||||
|
||||
目标:
|
||||
1. 生成具有中文叙事风格的 title/subtitle/tagline,并确保可直接放在封面中央,文案中需自然提到“文章总览”;
|
||||
2. 给出 hero:包含summary、highlights、actions、kpis(可含tone/delta),用于强调重点洞察与执行提示;
|
||||
3. 输出 tocPlan,一级目录固定用中文数字(“一、二、三”),二级目录用“1.1/1.2”,可在description里说明详略;如需定制目录标题,请填写 tocTitle;
|
||||
4. 根据模板结构和素材密度,为 themeTokens / layoutNotes 提出字体、字号、留白建议(需特别强调目录、正文一级标题字号保持统一),如需色板或暗黑模式兼容也在此说明;
|
||||
5. 严禁要求外部图片或AI生图,推荐Chart.js图表、表格、色块、KPI卡等可直接渲染的原生组件;
|
||||
6. 不随意增删章节,仅优化命名或描述;若有排版或章节合并提示,请放入 layoutNotes,渲染层会严格遵循。
|
||||
|
||||
输出必须满足下述JSON Schema:
|
||||
<OUTPUT JSON SCHEMA>
|
||||
{json.dumps(document_layout_output_schema, ensure_ascii=False, indent=2)}
|
||||
</OUTPUT JSON SCHEMA>
|
||||
|
||||
只返回JSON,勿附加额外文本。
|
||||
"""
|
||||
|
||||
# 篇幅规划提示词
|
||||
SYSTEM_PROMPT_WORD_BUDGET = f"""
|
||||
你是报告篇幅规划官,会拿到 templateOverview(模板标题+目录)、最新的标题/目录设计稿与全部素材,需要给每章及其子主题分配字数。
|
||||
|
||||
要求:
|
||||
1. 总字数约40000字,可上下浮动5%,并给出 globalGuidelines 说明整体详略策略;
|
||||
2. chapters 中每章需包含 targetWords/min/max、需要额外展开的 emphasis、sections 数组(为该章各小节/提纲分配字数与注意事项,可注明“允许在必要时超出10%补充案例”等);
|
||||
3. rationale 必须解释该章篇幅配置理由,引用模板/素材中的关键信息;
|
||||
4. 章节编号遵循一级中文数字、二级阿拉伯数字,便于后续统一字号;
|
||||
5. 结果写成JSON并满足下述Schema,仅用于内部存储与章节生成,不直接输出给读者。
|
||||
|
||||
<OUTPUT JSON SCHEMA>
|
||||
{json.dumps(word_budget_output_schema, ensure_ascii=False, indent=2)}
|
||||
</OUTPUT JSON SCHEMA>
|
||||
|
||||
只返回JSON,无额外说明。
|
||||
"""
|
||||
|
||||
|
||||
def build_chapter_user_prompt(payload: dict) -> str:
|
||||
"""
|
||||
将章节上下文序列化为提示词输入。
|
||||
"""
|
||||
return json.dumps(payload, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def build_document_layout_prompt(payload: dict) -> str:
|
||||
"""将文档设计所需的上下文序列化为JSON字符串"""
|
||||
return json.dumps(payload, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def build_word_budget_prompt(payload: dict) -> str:
|
||||
"""将篇幅规划输入转为字符串,便于送入LLM"""
|
||||
return json.dumps(payload, ensure_ascii=False, indent=2)
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Report Engine渲染器集合。
|
||||
"""
|
||||
|
||||
from .html_renderer import HTMLRenderer
|
||||
|
||||
__all__ = ["HTMLRenderer"]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -17,6 +17,14 @@ class Settings(BaseSettings):
|
||||
REPORT_ENGINE_PROVIDER: Optional[str] = Field(None, description="模型服务商,仅兼容保留")
|
||||
MAX_CONTENT_LENGTH: int = Field(200000, description="最大内容长度")
|
||||
OUTPUT_DIR: str = Field("final_reports", description="主输出目录")
|
||||
# 章节分块JSON会存储在该目录,便于溯源与断点续传
|
||||
CHAPTER_OUTPUT_DIR: str = Field(
|
||||
"final_reports/chapters", description="章节JSON缓存目录"
|
||||
)
|
||||
# 装订后的整本IR/manifest也会持久化,方便调试与审计
|
||||
DOCUMENT_IR_OUTPUT_DIR: str = Field(
|
||||
"final_reports/ir", description="整本IR/Manifest输出目录"
|
||||
)
|
||||
TEMPLATE_DIR: str = Field("ReportEngine/report_template", description="多模板目录")
|
||||
API_TIMEOUT: float = Field(900.0, description="单API超时时间(秒)")
|
||||
MAX_RETRY_DELAY: float = Field(180.0, description="最大重试间隔(秒)")
|
||||
@@ -41,6 +49,8 @@ def print_config(config: Settings):
|
||||
message += f"LLM Base URL: {config.REPORT_ENGINE_BASE_URL or '(默认)'}\n"
|
||||
message += f"最大内容长度: {config.MAX_CONTENT_LENGTH}\n"
|
||||
message += f"输出目录: {config.OUTPUT_DIR}\n"
|
||||
message += f"章节JSON目录: {config.CHAPTER_OUTPUT_DIR}\n"
|
||||
message += f"整本IR目录: {config.DOCUMENT_IR_OUTPUT_DIR}\n"
|
||||
message += f"模板目录: {config.TEMPLATE_DIR}\n"
|
||||
message += f"API 超时时间: {config.API_TIMEOUT} 秒\n"
|
||||
message += f"最大重试间隔: {config.MAX_RETRY_DELAY} 秒\n"
|
||||
|
||||
+1
-1
@@ -15,6 +15,7 @@ requests==2.31.0
|
||||
httpx==0.28.1
|
||||
aiofiles==23.2.1
|
||||
aiohttp>=3.8.0
|
||||
PySocks>=1.7.1
|
||||
|
||||
# ===== LLM接口 =====
|
||||
openai>=1.3.0
|
||||
@@ -32,7 +33,6 @@ jieba==0.42.1
|
||||
# ===== 数据库 =====
|
||||
pymysql==1.1.0
|
||||
aiomysql==0.2.0
|
||||
asyncmy==0.2.9
|
||||
aiosqlite==0.21.0
|
||||
redis>=4.6.0
|
||||
SQLAlchemy==2.0.35
|
||||
|
||||
Reference in New Issue
Block a user