Blocked HTML

2025-11-13 10:56:28 +08:00
parent 403dbbd296
commit 4846b1f758
20 changed files with 3660 additions and 367 deletions
@@ -5,11 +5,15 @@ Report Engine节点处理模块

 from .base_node import BaseNode, StateMutationNode
 from .template_selection_node import TemplateSelectionNode
-from .html_generation_node import HTMLGenerationNode
+from .chapter_generation_node import ChapterGenerationNode
+from .document_layout_node import DocumentLayoutNode
+from .word_budget_node import WordBudgetNode

 __all__ = [
    "BaseNode",
-    "StateMutationNode", 
+    "StateMutationNode",
    "TemplateSelectionNode",
-    "HTMLGenerationNode"
+    "ChapterGenerationNode",
+    "DocumentLayoutNode",
+    "WordBudgetNode",
 ]
@@ -0,0 +1,506 @@
+"""
+章节级JSON生成节点。
+
+每个章节依据Markdown模板切片独立调用LLM，流式写入Raw文件，
+完成后校验并落盘标准化JSON。该节点只负责“拿到合规章节”。
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+import re
+from typing import Any, Dict, List, Tuple
+
+from loguru import logger
+
+from ..core import TemplateSection, ChapterStorage
+from ..ir import ALLOWED_BLOCK_TYPES, IRValidator
+from ..prompts import (
+    SYSTEM_PROMPT_CHAPTER_JSON,
+    build_chapter_user_prompt,
+)
+from .base_node import BaseNode
+
+try:
+    from json_repair import repair_json as _json_repair_fn
+except ImportError:  # pragma: no cover - optional dependency
+    _json_repair_fn = None
+
+
+class ChapterGenerationNode(BaseNode):
+    """负责按章节调用LLM并校验JSON结构"""
+
+    _COLON_EQUALS_PATTERN = re.compile(r'(":\s*)=')
+
+    def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage):
+        super().__init__(llm_client, "ChapterGenerationNode")
+        self.validator = validator
+        self.storage = storage
+
+    def run(
+        self,
+        section: TemplateSection,
+        context: Dict[str, Any],
+        run_dir: Path,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """针对单个章节调用LLM，校验/落盘章节JSON并返回结构化结果"""
+        chapter_meta = {
+            "chapterId": section.chapter_id,
+            "slug": section.slug,
+            "title": section.title,
+            "order": section.order,
+        }
+        chapter_dir = self.storage.begin_chapter(run_dir, chapter_meta)
+        llm_payload = self._build_payload(section, context)
+        user_message = build_chapter_user_prompt(llm_payload)
+
+        raw_text = self._stream_llm(user_message, chapter_dir, **kwargs)
+        chapter_json = self._parse_chapter(raw_text)
+
+        # 自动补全关键字段后再校验
+        chapter_json.setdefault("chapterId", section.chapter_id)
+        chapter_json.setdefault("anchor", section.slug)
+        chapter_json.setdefault("title", section.title)
+        chapter_json.setdefault("order", section.order)
+        self._sanitize_chapter_blocks(chapter_json)
+
+        valid, errors = self.validator.validate_chapter(chapter_json)
+        self.storage.persist_chapter(
+            run_dir,
+            chapter_meta,
+            chapter_json,
+            errors=None if valid else errors,
+        )
+
+        if not valid:
+            raise ValueError(
+                f"{section.title} 章节JSON校验失败: {'; '.join(errors[:5])}"
+            )
+
+        return chapter_json
+
+    # ====== 内部方法 ======
+
+    def _build_payload(self, section: TemplateSection, context: Dict[str, Any]) -> Dict[str, Any]:
+        """构造LLM输入payload"""
+        reports = context.get("reports", {})
+        # 章节篇幅规划（来自WordBudgetNode），用于指导字数与强调点
+        chapter_plan_map = context.get("chapter_directives", {})
+        chapter_plan = chapter_plan_map.get(section.chapter_id) if chapter_plan_map else {}
+        payload = {
+            "section": {
+                "chapterId": section.chapter_id,
+                "title": section.title,
+                "slug": section.slug,
+                "order": section.order,
+                "number": section.number,
+                "outline": section.outline,
+            },
+            "globalContext": {
+                "query": context.get("query"),
+                "templateName": context.get("template_name"),
+                "themeTokens": context.get("theme_tokens", {}),
+                "styleDirectives": context.get("style_directives", {}),
+                # layout里包含标题/目录/hero等信息，方便章节保持统一视觉调性
+                "layout": context.get("layout"),
+                "templateOverview": context.get("template_overview", {}),
+            },
+            "reports": {
+                "query_engine": reports.get("query_engine", ""),
+                "media_engine": reports.get("media_engine", ""),
+                "insight_engine": reports.get("insight_engine", ""),
+            },
+            "forumLogs": context.get("forum_logs", ""),
+            "dataBundles": context.get("data_bundles", []),
+            "constraints": {
+                "language": "zh-CN",
+                "maxTokens": context.get("max_tokens", 4096),
+                "allowedBlocks": ALLOWED_BLOCK_TYPES,
+                "styleHints": {
+                    "expectWidgets": True,
+                    "forceHeadingAnchors": True,
+                    "allowInlineMix": True,
+                },
+            },
+            "chapterPlan": chapter_plan,
+            "wordPlan": context.get("word_plan"),
+        }
+        if chapter_plan:
+            constraints = payload["constraints"]
+            if chapter_plan.get("targetWords"):
+                constraints["wordTarget"] = chapter_plan["targetWords"]
+            if chapter_plan.get("minWords"):
+                constraints["minWords"] = chapter_plan["minWords"]
+            if chapter_plan.get("maxWords"):
+                constraints["maxWords"] = chapter_plan["maxWords"]
+            if chapter_plan.get("emphasis"):
+                constraints["emphasis"] = chapter_plan["emphasis"]
+            if chapter_plan.get("sections"):
+                constraints["sectionBudgets"] = chapter_plan["sections"]
+                payload["globalContext"]["sectionBudgets"] = chapter_plan["sections"]
+        return payload
+
+    def _stream_llm(self, user_message: str, chapter_dir: Path, **kwargs) -> str:
+        """流式调用LLM并实时写入raw文件"""
+        chunks: List[str] = []
+        with self.storage.capture_stream(chapter_dir) as stream_fp:
+            stream = self.llm_client.stream_invoke(
+                SYSTEM_PROMPT_CHAPTER_JSON,
+                user_message,
+                temperature=kwargs.get("temperature", 0.2),
+                top_p=kwargs.get("top_p", 0.95),
+            )
+            for delta in stream:
+                stream_fp.write(delta)
+                chunks.append(delta)
+        return "".join(chunks)
+
+    def _parse_chapter(self, raw_text: str) -> Dict[str, Any]:
+        """清洗LLM输出并解析JSON"""
+        cleaned = raw_text.strip()
+        if cleaned.startswith("```json"):
+            cleaned = cleaned[7:]
+        if cleaned.startswith("```"):
+            cleaned = cleaned[3:]
+        if cleaned.endswith("```"):
+            cleaned = cleaned[:-3]
+        cleaned = cleaned.strip()
+        if not cleaned:
+            raise ValueError("LLM返回空内容")
+
+        candidate_payloads = [cleaned]
+        repaired = self._repair_llm_json(cleaned)
+        if repaired != cleaned:
+            candidate_payloads.append(repaired)
+
+        try:
+            data = self._parse_with_candidates(candidate_payloads)
+        except json.JSONDecodeError as exc:
+            repaired_payload = self._attempt_json_repair(cleaned)
+            if repaired_payload:
+                candidate_payloads.append(repaired_payload)
+                try:
+                    data = self._parse_with_candidates(candidate_payloads[-1:])
+                except json.JSONDecodeError as inner_exc:
+                    raise ValueError(f"章节JSON解析失败: {inner_exc}") from inner_exc
+            else:
+                raise ValueError(f"章节JSON解析失败: {exc}") from exc
+
+        if "chapter" in data and isinstance(data["chapter"], dict):
+            return data["chapter"]
+        if isinstance(data, dict) and all(
+            key in data for key in ("chapterId", "title", "blocks")
+        ):
+            return data
+        if isinstance(data, list):
+            for item in data:
+                if isinstance(item, dict):
+                    if "chapter" in item and isinstance(item["chapter"], dict):
+                        return item["chapter"]
+                    if all(key in item for key in ("chapterId", "title", "blocks")):
+                        return item
+        raise ValueError("章节JSON缺少chapter字段")
+
+    def _repair_llm_json(self, text: str) -> str:
+        """处理常见的LLM错误（如\":=导致的非法JSON）"""
+        repaired = text
+        mutated = False
+
+        new_text = self._COLON_EQUALS_PATTERN.sub(r"\1", repaired)
+        if new_text != repaired:
+            logger.warning("检测到章节JSON中的\":=\"字符，已自动移除多余的'='号")
+            repaired = new_text
+            mutated = True
+
+        repaired, escaped = self._escape_in_string_controls(repaired)
+        if escaped:
+            logger.warning("检测到章节JSON字符串中存在未转义的控制字符，已自动转换为转义序列")
+            mutated = True
+
+        repaired, balanced = self._balance_brackets(repaired)
+        if balanced:
+            logger.warning("检测到章节JSON括号不平衡，已自动补齐/剔除异常括号")
+            mutated = True
+
+        repaired, commas_fixed = self._fix_missing_commas(repaired)
+        if commas_fixed:
+            logger.warning("检测到章节JSON对象/数组之间缺少逗号，已自动补齐")
+            mutated = True
+
+        return repaired if mutated else text
+
+    def _escape_in_string_controls(self, text: str) -> Tuple[str, bool]:
+        """
+        将字符串字面量中的裸换行/制表符/控制字符替换为JSON合法的转义序列。
+        """
+        if not text:
+            return text, False
+
+        result: List[str] = []
+        in_string = False
+        escaped = False
+        mutated = False
+        control_map = {"\n": "\\n", "\r": "\\n", "\t": "\\t"}
+
+        for ch in text:
+            if escaped:
+                result.append(ch)
+                escaped = False
+                continue
+
+            if ch == "\\":
+                result.append(ch)
+                escaped = True
+                continue
+
+            if ch == '"':
+                result.append(ch)
+                in_string = not in_string
+                continue
+
+            if in_string and ch in control_map:
+                result.append(control_map[ch])
+                mutated = True
+                continue
+
+            if in_string and ord(ch) < 0x20:
+                result.append(f"\\u{ord(ch):04x}")
+                mutated = True
+                continue
+
+            result.append(ch)
+
+        return "".join(result), mutated
+
+    def _fix_missing_commas(self, text: str) -> Tuple[str, bool]:
+        """在对象/数组连续出现时自动补逗号"""
+        if not text:
+            return text, False
+
+        chars: List[str] = []
+        mutated = False
+        in_string = False
+        escaped = False
+        length = len(text)
+        i = 0
+        while i < length:
+            ch = text[i]
+            chars.append(ch)
+            if escaped:
+                escaped = False
+                i += 1
+                continue
+            if ch == "\\":
+                escaped = True
+                i += 1
+                continue
+            if ch == '"':
+                in_string = not in_string
+                i += 1
+                continue
+            if not in_string and ch in "}]":
+                j = i + 1
+                while j < length and text[j] in " \t\r\n":
+                    j += 1
+                if j < length:
+                    next_ch = text[j]
+                    if next_ch in "{[":
+                        chars.append(",")
+                        mutated = True
+            i += 1
+        return "".join(chars), mutated
+
+    def _balance_brackets(self, text: str) -> Tuple[str, bool]:
+        """尝试修复因LLM多写/少写括号导致的不平衡结构"""
+        if not text:
+            return text, False
+
+        result: List[str] = []
+        stack: List[str] = []
+        mutated = False
+        in_string = False
+        escaped = False
+
+        opener_map = {"{": "}", "[": "]"}
+
+        for ch in text:
+            if escaped:
+                result.append(ch)
+                escaped = False
+                continue
+
+            if ch == "\\":
+                result.append(ch)
+                escaped = True
+                continue
+
+            if ch == '"':
+                result.append(ch)
+                in_string = not in_string
+                continue
+
+            if in_string:
+                result.append(ch)
+                continue
+
+            if ch in "{[":
+                stack.append(ch)
+                result.append(ch)
+                continue
+
+            if ch in "}]":
+                if stack and ((ch == "}" and stack[-1] == "{") or (ch == "]" and stack[-1] == "[")):
+                    stack.pop()
+                    result.append(ch)
+                else:
+                    mutated = True
+                continue
+
+            result.append(ch)
+
+        while stack:
+            opener = stack.pop()
+            result.append(opener_map[opener])
+            mutated = True
+
+        return "".join(result), mutated
+
+    def _attempt_json_repair(self, text: str) -> str | None:
+        """使用可选的json_repair库进一步修复复杂语法错误"""
+        if not _json_repair_fn:
+            return None
+        try:
+            fixed = _json_repair_fn(text)
+        except Exception as exc:  # pragma: no cover - library failure
+            logger.warning(f"json_repair 修复章节JSON失败: {exc}")
+            return None
+        if fixed == text:
+            return None
+        logger.warning("已使用json_repair自动修复章节JSON语法")
+        return fixed
+
+    def _sanitize_chapter_blocks(self, chapter: Dict[str, Any]):
+        """修正常见的结构性错误（例如list.items嵌套过深）"""
+
+        def walk(blocks: List[Dict[str, Any]] | None):
+            if not isinstance(blocks, list):
+                return
+            for block in blocks:
+                if not isinstance(block, dict):
+                    continue
+                self._ensure_block_type(block)
+                block_type = block.get("type")
+                if block_type == "list":
+                    items = block.get("items")
+                    normalized = self._normalize_list_items(items)
+                    if normalized:
+                        block["items"] = normalized
+                    for entry in block.get("items", []):
+                        walk(entry)
+                elif block_type in {"callout", "blockquote"}:
+                    walk(block.get("blocks"))
+                elif block_type == "table":
+                    for row in block.get("rows", []):
+                        cells = row.get("cells") or []
+                        for cell in cells:
+                            walk(cell.get("blocks"))
+                elif block_type == "widget":
+                    self._normalize_widget_block(block)
+                else:
+                    nested = block.get("blocks")
+                    if isinstance(nested, list):
+                        walk(nested)
+
+        walk(chapter.get("blocks"))
+
+    def _normalize_list_items(self, items: Any) -> List[List[Dict[str, Any]]]:
+        """确保list block的items为[[block, block], ...]结构"""
+        if not isinstance(items, list):
+            return []
+        normalized: List[List[Dict[str, Any]]] = []
+        for item in items:
+            normalized.extend(self._coerce_list_item(item))
+        return [entry for entry in normalized if entry]
+
+    def _coerce_list_item(self, item: Any) -> List[List[Dict[str, Any]]]:
+        """将各种嵌套写法统一折算为区块数组"""
+        result: List[List[Dict[str, Any]]] = []
+        if isinstance(item, dict):
+            self._ensure_block_type(item)
+            result.append([item])
+            return result
+        if isinstance(item, list):
+            dicts = [elem for elem in item if isinstance(elem, dict)]
+            if dicts:
+                for elem in dicts:
+                    self._ensure_block_type(elem)
+                result.append(dicts)
+            for elem in item:
+                if isinstance(elem, list):
+                    result.extend(self._coerce_list_item(elem))
+                elif isinstance(elem, dict):
+                    continue
+                elif isinstance(elem, str):
+                    result.append([self._as_paragraph_block(elem)])
+                elif isinstance(elem, (int, float)):
+                    result.append([self._as_paragraph_block(str(elem))])
+        elif isinstance(item, str):
+            result.append([self._as_paragraph_block(item)])
+        elif isinstance(item, (int, float)):
+            result.append([self._as_paragraph_block(str(item))])
+        return result
+
+    def _normalize_widget_block(self, block: Dict[str, Any]):
+        """确保widget具备顶层data或dataRef"""
+        has_data = block.get("data") is not None or block.get("dataRef") is not None
+        if has_data:
+            return
+        props = block.get("props")
+        if isinstance(props, dict) and "data" in props:
+            block["data"] = props.pop("data")
+            return
+        block["data"] = {"labels": [], "datasets": []}
+
+    def _ensure_block_type(self, block: Dict[str, Any]):
+        """若block缺少合法type，则降级为paragraph"""
+        block_type = block.get("type")
+        if isinstance(block_type, str) and block_type in ALLOWED_BLOCK_TYPES:
+            return
+        text = ""
+        for key in ("text", "content", "title"):
+            value = block.get(key)
+            if isinstance(value, str) and value.strip():
+                text = value.strip()
+                break
+        if not text:
+            try:
+                text = json.dumps(block, ensure_ascii=False)
+            except Exception:
+                text = str(block)
+        block.clear()
+        block["type"] = "paragraph"
+        block["inlines"] = [{"text": text}]
+
+    @staticmethod
+    def _as_paragraph_block(text: str) -> Dict[str, Any]:
+        return {
+            "type": "paragraph",
+            "inlines": [{"text": text or ""}],
+        }
+
+    @staticmethod
+    def _parse_with_candidates(payloads: List[str]) -> Dict[str, Any]:
+        """按顺序尝试多个payload，直到解析成功"""
+        last_exc: json.JSONDecodeError | None = None
+        for payload in payloads:
+            try:
+                return json.loads(payload)
+            except json.JSONDecodeError as exc:
+                last_exc = exc
+        assert last_exc is not None
+        raise last_exc
+
+
+__all__ = ["ChapterGenerationNode"]
@@ -0,0 +1,81 @@
+"""
+根据模板目录与多源报告，生成整本报告的标题/目录/主题设计。
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, Dict, List
+
+from loguru import logger
+
+from ..core import TemplateSection
+from ..prompts import (
+    SYSTEM_PROMPT_DOCUMENT_LAYOUT,
+    build_document_layout_prompt,
+)
+from .base_node import BaseNode
+
+
+class DocumentLayoutNode(BaseNode):
+    """负责生成全局标题、目录与Hero设计"""
+
+    def __init__(self, llm_client):
+        super().__init__(llm_client, "DocumentLayoutNode")
+
+    def run(
+        self,
+        sections: List[TemplateSection],
+        template_markdown: str,
+        reports: Dict[str, str],
+        forum_logs: str,
+        query: str,
+        template_overview: Dict[str, Any] | None = None,
+    ) -> Dict[str, Any]:
+        """综合模板+多源内容，生成全书的标题、目录结构与主题色板"""
+        # 将模板原文、切片结构与多源报告一并喂给LLM，便于其理解层级与素材
+        payload = {
+            "query": query,
+            "template": {
+                "raw": template_markdown,
+                "sections": [section.to_dict() for section in sections],
+            },
+            "templateOverview": template_overview
+            or {
+                "title": sections[0].title if sections else "",
+                "chapters": [section.to_dict() for section in sections],
+            },
+            "reports": reports,
+            "forumLogs": forum_logs,
+        }
+
+        user_message = build_document_layout_prompt(payload)
+        response = self.llm_client.stream_invoke_to_string(
+            SYSTEM_PROMPT_DOCUMENT_LAYOUT,
+            user_message,
+            temperature=0.3,
+            top_p=0.9,
+        )
+        design = self._parse_response(response)
+        logger.info("文档标题/目录设计已生成")
+        return design
+
+    def _parse_response(self, raw: str) -> Dict[str, Any]:
+        """解析LLM返回的JSON文本，若失败则抛出友好错误"""
+        cleaned = raw.strip()
+        if cleaned.startswith("```json"):
+            cleaned = cleaned[7:]
+        if cleaned.startswith("```"):
+            cleaned = cleaned[3:]
+        if cleaned.endswith("```"):
+            cleaned = cleaned[:-3]
+        cleaned = cleaned.strip()
+        if not cleaned:
+            raise ValueError("文档设计LLM返回空内容")
+        try:
+            return json.loads(cleaned)
+        except json.JSONDecodeError as exc:
+            raise ValueError(f"文档设计JSON解析失败: {exc}") from exc
+
+
+__all__ = ["DocumentLayoutNode"]
@@ -1,254 +0,0 @@
-"""
-HTML生成节点
-将整合后的内容转换为美观的HTML报告
-"""
-
-import json
-from datetime import datetime
-from typing import Dict, Any
-from loguru import logger
-
-from .base_node import StateMutationNode
-from ..llms.base import LLMClient
-from ..state.state import ReportState
-from ..prompts import SYSTEM_PROMPT_HTML_GENERATION
-# 不再需要text_processing依赖
-
-
-class HTMLGenerationNode(StateMutationNode):
-    """HTML生成处理节点"""
-    
-    def __init__(self, llm_client: LLMClient):
-        """
-        初始化HTML生成节点
-        
-        Args:
-            llm_client: LLM客户端
-        """
-        super().__init__(llm_client, "HTMLGenerationNode")
-    
-    def run(self, input_data: Dict[str, Any], **kwargs) -> str:
-        """
-        执行HTML生成
-        
-        Args:
-            input_data: 包含报告数据的字典
-                - query: 原始查询
-                - query_engine_report: QueryEngine报告内容
-                - media_engine_report: MediaEngine报告内容  
-                - insight_engine_report: InsightEngine报告内容
-                - forum_logs: 论坛日志内容
-                - selected_template: 选择的模板内容
-                
-        Returns:
-            生成的HTML内容
-        """
-        logger.info("开始生成HTML报告...")
-        
-        try:
-            # 准备LLM输入数据
-            llm_input = {
-                "query": input_data.get('query', ''),
-                "query_engine_report": input_data.get('query_engine_report', ''),
-                "media_engine_report": input_data.get('media_engine_report', ''),
-                "insight_engine_report": input_data.get('insight_engine_report', ''),
-                "forum_logs": input_data.get('forum_logs', ''),
-                "selected_template": input_data.get('selected_template', '')
-            }
-            
-            # 转换为JSON格式传递给LLM
-            message = json.dumps(llm_input, ensure_ascii=False, indent=2)
-            
-            # 调用LLM生成HTML
-            response = self.llm_client.stream_invoke_to_string(SYSTEM_PROMPT_HTML_GENERATION, message)
-            
-            # 处理响应（简化版）
-            processed_response = self.process_output(response)
-            
-            logger.info("HTML报告生成完成")
-            return processed_response
-            
-        except Exception as e:
-            logger.exception(f"HTML生成失败: {str(e)}")
-            # 返回备用HTML
-            return self._generate_fallback_html(input_data)
-    
-    def mutate_state(self, input_data: Dict[str, Any], state: ReportState, **kwargs) -> ReportState:
-        """
-        修改报告状态，添加生成的HTML内容
-        
-        Args:
-            input_data: 输入数据
-            state: 当前报告状态
-            **kwargs: 额外参数
-            
-        Returns:
-            更新后的报告状态
-        """
-        # 生成HTML
-        html_content = self.run(input_data, **kwargs)
-        
-        # 更新状态
-        state.html_content = html_content
-        state.mark_completed()
-        
-        return state
-    
-    def process_output(self, output: str) -> str:
-        """
-        处理LLM输出，提取HTML内容
-        
-        Args:
-            output: LLM原始输出
-            
-        Returns:
-            HTML内容
-        """
-        try:
-            logger.info(f"处理LLM原始输出，长度: {len(output)} 字符")
-            
-            html_content = output.strip()
-            
-            # 清理markdown代码块标记（如果存在）
-            if html_content.startswith('```html'):
-                html_content = html_content[7:]  # 移除 '```html'
-                if html_content.endswith('```'):
-                    html_content = html_content[:-3]  # 移除结尾的 '```'
-            elif html_content.startswith('```') and html_content.endswith('```'):
-                html_content = html_content[3:-3]  # 移除前后的 '```'
-            
-            html_content = html_content.strip()
-            
-            # 如果内容为空，返回原始输出
-            if not html_content:
-                logger.info("处理后内容为空，返回原始输出")
-                html_content = output
-            
-            logger.info(f"HTML处理完成，最终长度: {len(html_content)} 字符")
-            return html_content
-            
-        except Exception as e:
-            logger.exception(f"处理HTML输出失败: {str(e)}，返回原始输出")
-            return output
-    
-    def _generate_fallback_html(self, input_data: Dict[str, Any]) -> str:
-        """
-        生成备用HTML报告（当LLM失败时使用）
-        
-        Args:
-            input_data: 输入数据
-            
-        Returns:
-            备用HTML内容
-        """
-        logger.info("使用备用HTML生成方法")
-        
-        query = input_data.get('query', '智能舆情分析报告')
-        query_report = input_data.get('query_engine_report', '')
-        media_report = input_data.get('media_engine_report', '')
-        insight_report = input_data.get('insight_engine_report', '')
-        forum_logs = input_data.get('forum_logs', '')
-        
-        generation_time = datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")
-        
-        html_content = f"""<!DOCTYPE html>
-<html lang="zh-CN">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>{query} - 智能舆情分析报告</title>
-    <style>
-        body {{
-            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
-            line-height: 1.6;
-            color: #333;
-            max-width: 1200px;
-            margin: 0 auto;
-            padding: 20px;
-            background: #f5f5f5;
-        }}
-        .container {{
-            background: white;
-            padding: 40px;
-            border-radius: 8px;
-            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
-        }}
-        h1 {{
-            color: #2c3e50;
-            border-bottom: 3px solid #3498db;
-            padding-bottom: 10px;
-        }}
-        h2 {{
-            color: #34495e;
-            margin-top: 30px;
-            margin-bottom: 15px;
-        }}
-        .section {{
-            margin-bottom: 30px;
-            padding: 20px;
-            border-left: 4px solid #3498db;
-            background: #f8f9fa;
-        }}
-        .meta {{
-            background: #e9ecef;
-            padding: 15px;
-            border-radius: 5px;
-            margin-bottom: 20px;
-        }}
-        .footer {{
-            margin-top: 40px;
-            padding-top: 20px;
-            border-top: 1px solid #eee;
-            text-align: center;
-            color: #666;
-        }}
-        pre {{
-            background: #f4f4f4;
-            padding: 15px;
-            border-radius: 5px;
-            overflow-x: auto;
-            white-space: pre-wrap;
-        }}
-    </style>
-</head>
-<body>
-    <div class="container">
-        <h1>{query}</h1>
-        
-        <div class="meta">
-            <strong>报告生成时间:</strong> {generation_time}<br>
-            <strong>数据来源:</strong> QueryEngine、MediaEngine、InsightEngine、ForumEngine<br>
-            <strong>报告类型:</strong> 综合舆情分析报告
-        </div>
-        
-        <h2>执行摘要</h2>
-        <div class="section">
-            本报告整合了多个分析引擎的研究结果，为您提供全面的舆情分析洞察。
-            通过对查询主题"{query}"的深度分析，我们从多个维度展现了当前的舆情态势。
-        </div>
-        
-        {f'<h2>QueryEngine分析结果</h2><div class="section"><pre>{query_report}</pre></div>' if query_report else ''}
-        
-        {f'<h2>MediaEngine分析结果</h2><div class="section"><pre>{media_report}</pre></div>' if media_report else ''}
-        
-        {f'<h2>InsightEngine分析结果</h2><div class="section"><pre>{insight_report}</pre></div>' if insight_report else ''}
-        
-        {f'<h2>论坛监控数据</h2><div class="section"><pre>{forum_logs}</pre></div>' if forum_logs else ''}
-        
-        <h2>综合结论</h2>
-        <div class="section">
-            基于多个分析引擎的综合研究，我们对"{query}"主题进行了全面分析。
-            各引擎从不同角度提供了深入洞察，为决策提供了重要参考。
-        </div>
-        
-        <div class="footer">
-            <p>本报告由智能舆情分析平台自动生成</p>
-            <p>ReportEngine v1.0 | 生成时间: {generation_time}</p>
-        </div>
-    </div>
-</body>
-</html>"""
-        
-        return html_content
-    
-
@@ -0,0 +1,78 @@
+"""
+章节篇幅规划节点。
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, Dict, List
+
+from loguru import logger
+
+from ..core import TemplateSection
+from ..prompts import (
+    SYSTEM_PROMPT_WORD_BUDGET,
+    build_word_budget_prompt,
+)
+from .base_node import BaseNode
+
+
+class WordBudgetNode(BaseNode):
+    """规划各章节字数与重点"""
+
+    def __init__(self, llm_client):
+        super().__init__(llm_client, "WordBudgetNode")
+
+    def run(
+        self,
+        sections: List[TemplateSection],
+        design: Dict[str, Any],
+        reports: Dict[str, str],
+        forum_logs: str,
+        query: str,
+        template_overview: Dict[str, Any] | None = None,
+    ) -> Dict[str, Any]:
+        """根据设计稿和所有素材规划章节字数，让LLM写作时有明确篇幅目标"""
+        # 输入中除了章节骨架外，还包含布局节点输出，方便约束篇幅时参考视觉主次
+        payload = {
+            "query": query,
+            "design": design,
+            "sections": [section.to_dict() for section in sections],
+            "templateOverview": template_overview
+            or {
+                "title": sections[0].title if sections else "",
+                "chapters": [section.to_dict() for section in sections],
+            },
+            "reports": reports,
+            "forumLogs": forum_logs,
+        }
+        user = build_word_budget_prompt(payload)
+        response = self.llm_client.stream_invoke_to_string(
+            SYSTEM_PROMPT_WORD_BUDGET,
+            user,
+            temperature=0.25,
+            top_p=0.85,
+        )
+        plan = self._parse_response(response)
+        logger.info("章节字数规划已生成")
+        return plan
+
+    def _parse_response(self, raw: str) -> Dict[str, Any]:
+        """将LLM输出的JSON文本转为字典，失败时提示规划异常"""
+        cleaned = raw.strip()
+        if cleaned.startswith("```json"):
+            cleaned = cleaned[7:]
+        if cleaned.startswith("```"):
+            cleaned = cleaned[3:]
+        if cleaned.endswith("```"):
+            cleaned = cleaned[:-3]
+        cleaned = cleaned.strip()
+        if not cleaned:
+            raise ValueError("篇幅规划LLM返回空内容")
+        try:
+            return json.loads(cleaned)
+        except json.JSONDecodeError as exc:
+            raise ValueError(f"篇幅规划JSON解析失败: {exc}") from exc
+
+
+__all__ = ["WordBudgetNode"]