""" 章节装订器:负责把多个章节JSON合并为整本IR。 DocumentComposer 会注入缺失锚点、统一顺序,并补齐 IR 级元数据。 """ from __future__ import annotations from datetime import datetime from typing import Dict, List, Set from ..ir import IR_VERSION class DocumentComposer: """ 将章节拼接成Document IR的简单装订器。 作用: - 按order排序章节,补充默认chapterId; - 防止anchor重复,生成全局唯一锚点; - 注入 IR 版本与生成时间戳。 """ def __init__(self): """初始化装订器并记录已使用的锚点,避免重复""" self._seen_anchors: Set[str] = set() def build_document( self, report_id: str, metadata: Dict[str, object], chapters: List[Dict[str, object]], ) -> Dict[str, object]: """ 把所有章节按order排序并注入唯一锚点,形成整本IR。 同时合并 metadata/themeTokens/assets,供渲染器直接消费。 参数: report_id: 本次报告ID。 metadata: 全局元信息(标题、主题、toc等)。 chapters: 章节payload列表。 返回: dict: 满足渲染器需求的Document IR。 """ # 构建从chapterId到toc anchor的映射 toc_anchor_map = self._build_toc_anchor_map(metadata) ordered = sorted(chapters, key=lambda c: c.get("order", 0)) for idx, chapter in enumerate(ordered, start=1): chapter.setdefault("chapterId", f"S{idx}") # 优先级:1. 目录配置的anchor 2. 章节自带的anchor 3. 默认anchor chapter_id = chapter.get("chapterId") anchor = ( toc_anchor_map.get(chapter_id) or chapter.get("anchor") or f"section-{idx}" ) chapter["anchor"] = self._ensure_unique_anchor(anchor) chapter.setdefault("order", idx * 10) if chapter.get("errorPlaceholder"): self._ensure_heading_block(chapter) document = { "version": IR_VERSION, "reportId": report_id, "metadata": { **metadata, "generatedAt": metadata.get("generatedAt") or datetime.utcnow().isoformat() + "Z", }, "themeTokens": metadata.get("themeTokens", {}), "chapters": ordered, "assets": metadata.get("assets", {}), } return document def _ensure_unique_anchor(self, anchor: str) -> str: """若存在重复锚点则追加序号,确保全局唯一。""" base = anchor counter = 2 while anchor in self._seen_anchors: anchor = f"{base}-{counter}" counter += 1 self._seen_anchors.add(anchor) return anchor def _build_toc_anchor_map(self, metadata: Dict[str, object]) -> Dict[str, str]: """ 从metadata.toc.customEntries构建chapterId到anchor的映射。 参数: metadata: 文档元信息。 返回: dict: chapterId -> anchor 的映射。 """ toc_config = metadata.get("toc") or {} custom_entries = toc_config.get("customEntries") or [] anchor_map = {} for entry in custom_entries: if isinstance(entry, dict): chapter_id = entry.get("chapterId") anchor = entry.get("anchor") if chapter_id and anchor: anchor_map[chapter_id] = anchor return anchor_map def _ensure_heading_block(self, chapter: Dict[str, object]) -> None: """保证占位章节仍然拥有可用于目录的heading block。""" blocks = chapter.get("blocks") if isinstance(blocks, list): for block in blocks: if isinstance(block, dict) and block.get("type") == "heading": return heading = { "type": "heading", "level": 2, "text": chapter.get("title") or "占位章节", "anchor": chapter.get("anchor"), } if isinstance(blocks, list): blocks.insert(0, heading) else: chapter["blocks"] = [heading] __all__ = ["DocumentComposer"]