Blocked HTML

This commit is contained in:
马一丁
2025-11-13 10:56:28 +08:00
parent 403dbbd296
commit 4846b1f758
20 changed files with 3660 additions and 367 deletions
+7 -3
View File
@@ -5,11 +5,15 @@ Report Engine节点处理模块
from .base_node import BaseNode, StateMutationNode
from .template_selection_node import TemplateSelectionNode
from .html_generation_node import HTMLGenerationNode
from .chapter_generation_node import ChapterGenerationNode
from .document_layout_node import DocumentLayoutNode
from .word_budget_node import WordBudgetNode
__all__ = [
"BaseNode",
"StateMutationNode",
"StateMutationNode",
"TemplateSelectionNode",
"HTMLGenerationNode"
"ChapterGenerationNode",
"DocumentLayoutNode",
"WordBudgetNode",
]
@@ -0,0 +1,506 @@
"""
章节级JSON生成节点。
每个章节依据Markdown模板切片独立调用LLM,流式写入Raw文件,
完成后校验并落盘标准化JSON。该节点只负责“拿到合规章节”。
"""
from __future__ import annotations
import json
from pathlib import Path
import re
from typing import Any, Dict, List, Tuple
from loguru import logger
from ..core import TemplateSection, ChapterStorage
from ..ir import ALLOWED_BLOCK_TYPES, IRValidator
from ..prompts import (
SYSTEM_PROMPT_CHAPTER_JSON,
build_chapter_user_prompt,
)
from .base_node import BaseNode
try:
from json_repair import repair_json as _json_repair_fn
except ImportError: # pragma: no cover - optional dependency
_json_repair_fn = None
class ChapterGenerationNode(BaseNode):
"""负责按章节调用LLM并校验JSON结构"""
_COLON_EQUALS_PATTERN = re.compile(r'(":\s*)=')
def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage):
super().__init__(llm_client, "ChapterGenerationNode")
self.validator = validator
self.storage = storage
def run(
self,
section: TemplateSection,
context: Dict[str, Any],
run_dir: Path,
**kwargs,
) -> Dict[str, Any]:
"""针对单个章节调用LLM,校验/落盘章节JSON并返回结构化结果"""
chapter_meta = {
"chapterId": section.chapter_id,
"slug": section.slug,
"title": section.title,
"order": section.order,
}
chapter_dir = self.storage.begin_chapter(run_dir, chapter_meta)
llm_payload = self._build_payload(section, context)
user_message = build_chapter_user_prompt(llm_payload)
raw_text = self._stream_llm(user_message, chapter_dir, **kwargs)
chapter_json = self._parse_chapter(raw_text)
# 自动补全关键字段后再校验
chapter_json.setdefault("chapterId", section.chapter_id)
chapter_json.setdefault("anchor", section.slug)
chapter_json.setdefault("title", section.title)
chapter_json.setdefault("order", section.order)
self._sanitize_chapter_blocks(chapter_json)
valid, errors = self.validator.validate_chapter(chapter_json)
self.storage.persist_chapter(
run_dir,
chapter_meta,
chapter_json,
errors=None if valid else errors,
)
if not valid:
raise ValueError(
f"{section.title} 章节JSON校验失败: {'; '.join(errors[:5])}"
)
return chapter_json
# ====== 内部方法 ======
def _build_payload(self, section: TemplateSection, context: Dict[str, Any]) -> Dict[str, Any]:
"""构造LLM输入payload"""
reports = context.get("reports", {})
# 章节篇幅规划(来自WordBudgetNode),用于指导字数与强调点
chapter_plan_map = context.get("chapter_directives", {})
chapter_plan = chapter_plan_map.get(section.chapter_id) if chapter_plan_map else {}
payload = {
"section": {
"chapterId": section.chapter_id,
"title": section.title,
"slug": section.slug,
"order": section.order,
"number": section.number,
"outline": section.outline,
},
"globalContext": {
"query": context.get("query"),
"templateName": context.get("template_name"),
"themeTokens": context.get("theme_tokens", {}),
"styleDirectives": context.get("style_directives", {}),
# layout里包含标题/目录/hero等信息,方便章节保持统一视觉调性
"layout": context.get("layout"),
"templateOverview": context.get("template_overview", {}),
},
"reports": {
"query_engine": reports.get("query_engine", ""),
"media_engine": reports.get("media_engine", ""),
"insight_engine": reports.get("insight_engine", ""),
},
"forumLogs": context.get("forum_logs", ""),
"dataBundles": context.get("data_bundles", []),
"constraints": {
"language": "zh-CN",
"maxTokens": context.get("max_tokens", 4096),
"allowedBlocks": ALLOWED_BLOCK_TYPES,
"styleHints": {
"expectWidgets": True,
"forceHeadingAnchors": True,
"allowInlineMix": True,
},
},
"chapterPlan": chapter_plan,
"wordPlan": context.get("word_plan"),
}
if chapter_plan:
constraints = payload["constraints"]
if chapter_plan.get("targetWords"):
constraints["wordTarget"] = chapter_plan["targetWords"]
if chapter_plan.get("minWords"):
constraints["minWords"] = chapter_plan["minWords"]
if chapter_plan.get("maxWords"):
constraints["maxWords"] = chapter_plan["maxWords"]
if chapter_plan.get("emphasis"):
constraints["emphasis"] = chapter_plan["emphasis"]
if chapter_plan.get("sections"):
constraints["sectionBudgets"] = chapter_plan["sections"]
payload["globalContext"]["sectionBudgets"] = chapter_plan["sections"]
return payload
def _stream_llm(self, user_message: str, chapter_dir: Path, **kwargs) -> str:
"""流式调用LLM并实时写入raw文件"""
chunks: List[str] = []
with self.storage.capture_stream(chapter_dir) as stream_fp:
stream = self.llm_client.stream_invoke(
SYSTEM_PROMPT_CHAPTER_JSON,
user_message,
temperature=kwargs.get("temperature", 0.2),
top_p=kwargs.get("top_p", 0.95),
)
for delta in stream:
stream_fp.write(delta)
chunks.append(delta)
return "".join(chunks)
def _parse_chapter(self, raw_text: str) -> Dict[str, Any]:
"""清洗LLM输出并解析JSON"""
cleaned = raw_text.strip()
if cleaned.startswith("```json"):
cleaned = cleaned[7:]
if cleaned.startswith("```"):
cleaned = cleaned[3:]
if cleaned.endswith("```"):
cleaned = cleaned[:-3]
cleaned = cleaned.strip()
if not cleaned:
raise ValueError("LLM返回空内容")
candidate_payloads = [cleaned]
repaired = self._repair_llm_json(cleaned)
if repaired != cleaned:
candidate_payloads.append(repaired)
try:
data = self._parse_with_candidates(candidate_payloads)
except json.JSONDecodeError as exc:
repaired_payload = self._attempt_json_repair(cleaned)
if repaired_payload:
candidate_payloads.append(repaired_payload)
try:
data = self._parse_with_candidates(candidate_payloads[-1:])
except json.JSONDecodeError as inner_exc:
raise ValueError(f"章节JSON解析失败: {inner_exc}") from inner_exc
else:
raise ValueError(f"章节JSON解析失败: {exc}") from exc
if "chapter" in data and isinstance(data["chapter"], dict):
return data["chapter"]
if isinstance(data, dict) and all(
key in data for key in ("chapterId", "title", "blocks")
):
return data
if isinstance(data, list):
for item in data:
if isinstance(item, dict):
if "chapter" in item and isinstance(item["chapter"], dict):
return item["chapter"]
if all(key in item for key in ("chapterId", "title", "blocks")):
return item
raise ValueError("章节JSON缺少chapter字段")
def _repair_llm_json(self, text: str) -> str:
"""处理常见的LLM错误(如\":=导致的非法JSON"""
repaired = text
mutated = False
new_text = self._COLON_EQUALS_PATTERN.sub(r"\1", repaired)
if new_text != repaired:
logger.warning("检测到章节JSON中的\":=\"字符,已自动移除多余的'='")
repaired = new_text
mutated = True
repaired, escaped = self._escape_in_string_controls(repaired)
if escaped:
logger.warning("检测到章节JSON字符串中存在未转义的控制字符,已自动转换为转义序列")
mutated = True
repaired, balanced = self._balance_brackets(repaired)
if balanced:
logger.warning("检测到章节JSON括号不平衡,已自动补齐/剔除异常括号")
mutated = True
repaired, commas_fixed = self._fix_missing_commas(repaired)
if commas_fixed:
logger.warning("检测到章节JSON对象/数组之间缺少逗号,已自动补齐")
mutated = True
return repaired if mutated else text
def _escape_in_string_controls(self, text: str) -> Tuple[str, bool]:
"""
将字符串字面量中的裸换行/制表符/控制字符替换为JSON合法的转义序列。
"""
if not text:
return text, False
result: List[str] = []
in_string = False
escaped = False
mutated = False
control_map = {"\n": "\\n", "\r": "\\n", "\t": "\\t"}
for ch in text:
if escaped:
result.append(ch)
escaped = False
continue
if ch == "\\":
result.append(ch)
escaped = True
continue
if ch == '"':
result.append(ch)
in_string = not in_string
continue
if in_string and ch in control_map:
result.append(control_map[ch])
mutated = True
continue
if in_string and ord(ch) < 0x20:
result.append(f"\\u{ord(ch):04x}")
mutated = True
continue
result.append(ch)
return "".join(result), mutated
def _fix_missing_commas(self, text: str) -> Tuple[str, bool]:
"""在对象/数组连续出现时自动补逗号"""
if not text:
return text, False
chars: List[str] = []
mutated = False
in_string = False
escaped = False
length = len(text)
i = 0
while i < length:
ch = text[i]
chars.append(ch)
if escaped:
escaped = False
i += 1
continue
if ch == "\\":
escaped = True
i += 1
continue
if ch == '"':
in_string = not in_string
i += 1
continue
if not in_string and ch in "}]":
j = i + 1
while j < length and text[j] in " \t\r\n":
j += 1
if j < length:
next_ch = text[j]
if next_ch in "{[":
chars.append(",")
mutated = True
i += 1
return "".join(chars), mutated
def _balance_brackets(self, text: str) -> Tuple[str, bool]:
"""尝试修复因LLM多写/少写括号导致的不平衡结构"""
if not text:
return text, False
result: List[str] = []
stack: List[str] = []
mutated = False
in_string = False
escaped = False
opener_map = {"{": "}", "[": "]"}
for ch in text:
if escaped:
result.append(ch)
escaped = False
continue
if ch == "\\":
result.append(ch)
escaped = True
continue
if ch == '"':
result.append(ch)
in_string = not in_string
continue
if in_string:
result.append(ch)
continue
if ch in "{[":
stack.append(ch)
result.append(ch)
continue
if ch in "}]":
if stack and ((ch == "}" and stack[-1] == "{") or (ch == "]" and stack[-1] == "[")):
stack.pop()
result.append(ch)
else:
mutated = True
continue
result.append(ch)
while stack:
opener = stack.pop()
result.append(opener_map[opener])
mutated = True
return "".join(result), mutated
def _attempt_json_repair(self, text: str) -> str | None:
"""使用可选的json_repair库进一步修复复杂语法错误"""
if not _json_repair_fn:
return None
try:
fixed = _json_repair_fn(text)
except Exception as exc: # pragma: no cover - library failure
logger.warning(f"json_repair 修复章节JSON失败: {exc}")
return None
if fixed == text:
return None
logger.warning("已使用json_repair自动修复章节JSON语法")
return fixed
def _sanitize_chapter_blocks(self, chapter: Dict[str, Any]):
"""修正常见的结构性错误(例如list.items嵌套过深)"""
def walk(blocks: List[Dict[str, Any]] | None):
if not isinstance(blocks, list):
return
for block in blocks:
if not isinstance(block, dict):
continue
self._ensure_block_type(block)
block_type = block.get("type")
if block_type == "list":
items = block.get("items")
normalized = self._normalize_list_items(items)
if normalized:
block["items"] = normalized
for entry in block.get("items", []):
walk(entry)
elif block_type in {"callout", "blockquote"}:
walk(block.get("blocks"))
elif block_type == "table":
for row in block.get("rows", []):
cells = row.get("cells") or []
for cell in cells:
walk(cell.get("blocks"))
elif block_type == "widget":
self._normalize_widget_block(block)
else:
nested = block.get("blocks")
if isinstance(nested, list):
walk(nested)
walk(chapter.get("blocks"))
def _normalize_list_items(self, items: Any) -> List[List[Dict[str, Any]]]:
"""确保list block的items为[[block, block], ...]结构"""
if not isinstance(items, list):
return []
normalized: List[List[Dict[str, Any]]] = []
for item in items:
normalized.extend(self._coerce_list_item(item))
return [entry for entry in normalized if entry]
def _coerce_list_item(self, item: Any) -> List[List[Dict[str, Any]]]:
"""将各种嵌套写法统一折算为区块数组"""
result: List[List[Dict[str, Any]]] = []
if isinstance(item, dict):
self._ensure_block_type(item)
result.append([item])
return result
if isinstance(item, list):
dicts = [elem for elem in item if isinstance(elem, dict)]
if dicts:
for elem in dicts:
self._ensure_block_type(elem)
result.append(dicts)
for elem in item:
if isinstance(elem, list):
result.extend(self._coerce_list_item(elem))
elif isinstance(elem, dict):
continue
elif isinstance(elem, str):
result.append([self._as_paragraph_block(elem)])
elif isinstance(elem, (int, float)):
result.append([self._as_paragraph_block(str(elem))])
elif isinstance(item, str):
result.append([self._as_paragraph_block(item)])
elif isinstance(item, (int, float)):
result.append([self._as_paragraph_block(str(item))])
return result
def _normalize_widget_block(self, block: Dict[str, Any]):
"""确保widget具备顶层data或dataRef"""
has_data = block.get("data") is not None or block.get("dataRef") is not None
if has_data:
return
props = block.get("props")
if isinstance(props, dict) and "data" in props:
block["data"] = props.pop("data")
return
block["data"] = {"labels": [], "datasets": []}
def _ensure_block_type(self, block: Dict[str, Any]):
"""若block缺少合法type,则降级为paragraph"""
block_type = block.get("type")
if isinstance(block_type, str) and block_type in ALLOWED_BLOCK_TYPES:
return
text = ""
for key in ("text", "content", "title"):
value = block.get(key)
if isinstance(value, str) and value.strip():
text = value.strip()
break
if not text:
try:
text = json.dumps(block, ensure_ascii=False)
except Exception:
text = str(block)
block.clear()
block["type"] = "paragraph"
block["inlines"] = [{"text": text}]
@staticmethod
def _as_paragraph_block(text: str) -> Dict[str, Any]:
return {
"type": "paragraph",
"inlines": [{"text": text or ""}],
}
@staticmethod
def _parse_with_candidates(payloads: List[str]) -> Dict[str, Any]:
"""按顺序尝试多个payload,直到解析成功"""
last_exc: json.JSONDecodeError | None = None
for payload in payloads:
try:
return json.loads(payload)
except json.JSONDecodeError as exc:
last_exc = exc
assert last_exc is not None
raise last_exc
__all__ = ["ChapterGenerationNode"]
@@ -0,0 +1,81 @@
"""
根据模板目录与多源报告,生成整本报告的标题/目录/主题设计。
"""
from __future__ import annotations
import json
from typing import Any, Dict, List
from loguru import logger
from ..core import TemplateSection
from ..prompts import (
SYSTEM_PROMPT_DOCUMENT_LAYOUT,
build_document_layout_prompt,
)
from .base_node import BaseNode
class DocumentLayoutNode(BaseNode):
"""负责生成全局标题、目录与Hero设计"""
def __init__(self, llm_client):
super().__init__(llm_client, "DocumentLayoutNode")
def run(
self,
sections: List[TemplateSection],
template_markdown: str,
reports: Dict[str, str],
forum_logs: str,
query: str,
template_overview: Dict[str, Any] | None = None,
) -> Dict[str, Any]:
"""综合模板+多源内容,生成全书的标题、目录结构与主题色板"""
# 将模板原文、切片结构与多源报告一并喂给LLM,便于其理解层级与素材
payload = {
"query": query,
"template": {
"raw": template_markdown,
"sections": [section.to_dict() for section in sections],
},
"templateOverview": template_overview
or {
"title": sections[0].title if sections else "",
"chapters": [section.to_dict() for section in sections],
},
"reports": reports,
"forumLogs": forum_logs,
}
user_message = build_document_layout_prompt(payload)
response = self.llm_client.stream_invoke_to_string(
SYSTEM_PROMPT_DOCUMENT_LAYOUT,
user_message,
temperature=0.3,
top_p=0.9,
)
design = self._parse_response(response)
logger.info("文档标题/目录设计已生成")
return design
def _parse_response(self, raw: str) -> Dict[str, Any]:
"""解析LLM返回的JSON文本,若失败则抛出友好错误"""
cleaned = raw.strip()
if cleaned.startswith("```json"):
cleaned = cleaned[7:]
if cleaned.startswith("```"):
cleaned = cleaned[3:]
if cleaned.endswith("```"):
cleaned = cleaned[:-3]
cleaned = cleaned.strip()
if not cleaned:
raise ValueError("文档设计LLM返回空内容")
try:
return json.loads(cleaned)
except json.JSONDecodeError as exc:
raise ValueError(f"文档设计JSON解析失败: {exc}") from exc
__all__ = ["DocumentLayoutNode"]
-254
View File
@@ -1,254 +0,0 @@
"""
HTML生成节点
将整合后的内容转换为美观的HTML报告
"""
import json
from datetime import datetime
from typing import Dict, Any
from loguru import logger
from .base_node import StateMutationNode
from ..llms.base import LLMClient
from ..state.state import ReportState
from ..prompts import SYSTEM_PROMPT_HTML_GENERATION
# 不再需要text_processing依赖
class HTMLGenerationNode(StateMutationNode):
"""HTML生成处理节点"""
def __init__(self, llm_client: LLMClient):
"""
初始化HTML生成节点
Args:
llm_client: LLM客户端
"""
super().__init__(llm_client, "HTMLGenerationNode")
def run(self, input_data: Dict[str, Any], **kwargs) -> str:
"""
执行HTML生成
Args:
input_data: 包含报告数据的字典
- query: 原始查询
- query_engine_report: QueryEngine报告内容
- media_engine_report: MediaEngine报告内容
- insight_engine_report: InsightEngine报告内容
- forum_logs: 论坛日志内容
- selected_template: 选择的模板内容
Returns:
生成的HTML内容
"""
logger.info("开始生成HTML报告...")
try:
# 准备LLM输入数据
llm_input = {
"query": input_data.get('query', ''),
"query_engine_report": input_data.get('query_engine_report', ''),
"media_engine_report": input_data.get('media_engine_report', ''),
"insight_engine_report": input_data.get('insight_engine_report', ''),
"forum_logs": input_data.get('forum_logs', ''),
"selected_template": input_data.get('selected_template', '')
}
# 转换为JSON格式传递给LLM
message = json.dumps(llm_input, ensure_ascii=False, indent=2)
# 调用LLM生成HTML
response = self.llm_client.stream_invoke_to_string(SYSTEM_PROMPT_HTML_GENERATION, message)
# 处理响应(简化版)
processed_response = self.process_output(response)
logger.info("HTML报告生成完成")
return processed_response
except Exception as e:
logger.exception(f"HTML生成失败: {str(e)}")
# 返回备用HTML
return self._generate_fallback_html(input_data)
def mutate_state(self, input_data: Dict[str, Any], state: ReportState, **kwargs) -> ReportState:
"""
修改报告状态,添加生成的HTML内容
Args:
input_data: 输入数据
state: 当前报告状态
**kwargs: 额外参数
Returns:
更新后的报告状态
"""
# 生成HTML
html_content = self.run(input_data, **kwargs)
# 更新状态
state.html_content = html_content
state.mark_completed()
return state
def process_output(self, output: str) -> str:
"""
处理LLM输出,提取HTML内容
Args:
output: LLM原始输出
Returns:
HTML内容
"""
try:
logger.info(f"处理LLM原始输出,长度: {len(output)} 字符")
html_content = output.strip()
# 清理markdown代码块标记(如果存在)
if html_content.startswith('```html'):
html_content = html_content[7:] # 移除 '```html'
if html_content.endswith('```'):
html_content = html_content[:-3] # 移除结尾的 '```'
elif html_content.startswith('```') and html_content.endswith('```'):
html_content = html_content[3:-3] # 移除前后的 '```'
html_content = html_content.strip()
# 如果内容为空,返回原始输出
if not html_content:
logger.info("处理后内容为空,返回原始输出")
html_content = output
logger.info(f"HTML处理完成,最终长度: {len(html_content)} 字符")
return html_content
except Exception as e:
logger.exception(f"处理HTML输出失败: {str(e)},返回原始输出")
return output
def _generate_fallback_html(self, input_data: Dict[str, Any]) -> str:
"""
生成备用HTML报告(当LLM失败时使用)
Args:
input_data: 输入数据
Returns:
备用HTML内容
"""
logger.info("使用备用HTML生成方法")
query = input_data.get('query', '智能舆情分析报告')
query_report = input_data.get('query_engine_report', '')
media_report = input_data.get('media_engine_report', '')
insight_report = input_data.get('insight_engine_report', '')
forum_logs = input_data.get('forum_logs', '')
generation_time = datetime.now().strftime("%Y年%m月%d%H:%M:%S")
html_content = f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{query} - 智能舆情分析报告</title>
<style>
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
line-height: 1.6;
color: #333;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
background: #f5f5f5;
}}
.container {{
background: white;
padding: 40px;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}}
h1 {{
color: #2c3e50;
border-bottom: 3px solid #3498db;
padding-bottom: 10px;
}}
h2 {{
color: #34495e;
margin-top: 30px;
margin-bottom: 15px;
}}
.section {{
margin-bottom: 30px;
padding: 20px;
border-left: 4px solid #3498db;
background: #f8f9fa;
}}
.meta {{
background: #e9ecef;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}}
.footer {{
margin-top: 40px;
padding-top: 20px;
border-top: 1px solid #eee;
text-align: center;
color: #666;
}}
pre {{
background: #f4f4f4;
padding: 15px;
border-radius: 5px;
overflow-x: auto;
white-space: pre-wrap;
}}
</style>
</head>
<body>
<div class="container">
<h1>{query}</h1>
<div class="meta">
<strong>报告生成时间:</strong> {generation_time}<br>
<strong>数据来源:</strong> QueryEngine、MediaEngine、InsightEngine、ForumEngine<br>
<strong>报告类型:</strong> 综合舆情分析报告
</div>
<h2>执行摘要</h2>
<div class="section">
本报告整合了多个分析引擎的研究结果,为您提供全面的舆情分析洞察。
通过对查询主题"{query}"的深度分析,我们从多个维度展现了当前的舆情态势。
</div>
{f'<h2>QueryEngine分析结果</h2><div class="section"><pre>{query_report}</pre></div>' if query_report else ''}
{f'<h2>MediaEngine分析结果</h2><div class="section"><pre>{media_report}</pre></div>' if media_report else ''}
{f'<h2>InsightEngine分析结果</h2><div class="section"><pre>{insight_report}</pre></div>' if insight_report else ''}
{f'<h2>论坛监控数据</h2><div class="section"><pre>{forum_logs}</pre></div>' if forum_logs else ''}
<h2>综合结论</h2>
<div class="section">
基于多个分析引擎的综合研究,我们对"{query}"主题进行了全面分析。
各引擎从不同角度提供了深入洞察,为决策提供了重要参考。
</div>
<div class="footer">
<p>本报告由智能舆情分析平台自动生成</p>
<p>ReportEngine v1.0 | 生成时间: {generation_time}</p>
</div>
</div>
</body>
</html>"""
return html_content
+78
View File
@@ -0,0 +1,78 @@
"""
章节篇幅规划节点。
"""
from __future__ import annotations
import json
from typing import Any, Dict, List
from loguru import logger
from ..core import TemplateSection
from ..prompts import (
SYSTEM_PROMPT_WORD_BUDGET,
build_word_budget_prompt,
)
from .base_node import BaseNode
class WordBudgetNode(BaseNode):
"""规划各章节字数与重点"""
def __init__(self, llm_client):
super().__init__(llm_client, "WordBudgetNode")
def run(
self,
sections: List[TemplateSection],
design: Dict[str, Any],
reports: Dict[str, str],
forum_logs: str,
query: str,
template_overview: Dict[str, Any] | None = None,
) -> Dict[str, Any]:
"""根据设计稿和所有素材规划章节字数,让LLM写作时有明确篇幅目标"""
# 输入中除了章节骨架外,还包含布局节点输出,方便约束篇幅时参考视觉主次
payload = {
"query": query,
"design": design,
"sections": [section.to_dict() for section in sections],
"templateOverview": template_overview
or {
"title": sections[0].title if sections else "",
"chapters": [section.to_dict() for section in sections],
},
"reports": reports,
"forumLogs": forum_logs,
}
user = build_word_budget_prompt(payload)
response = self.llm_client.stream_invoke_to_string(
SYSTEM_PROMPT_WORD_BUDGET,
user,
temperature=0.25,
top_p=0.85,
)
plan = self._parse_response(response)
logger.info("章节字数规划已生成")
return plan
def _parse_response(self, raw: str) -> Dict[str, Any]:
"""将LLM输出的JSON文本转为字典,失败时提示规划异常"""
cleaned = raw.strip()
if cleaned.startswith("```json"):
cleaned = cleaned[7:]
if cleaned.startswith("```"):
cleaned = cleaned[3:]
if cleaned.endswith("```"):
cleaned = cleaned[:-3]
cleaned = cleaned.strip()
if not cleaned:
raise ValueError("篇幅规划LLM返回空内容")
try:
return json.loads(cleaned)
except json.JSONDecodeError as exc:
raise ValueError(f"篇幅规划JSON解析失败: {exc}") from exc
__all__ = ["WordBudgetNode"]