Files
bettafish-company/ReportEngine/core/stitcher.py
T
2025-11-15 15:23:05 +08:00

101 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
章节装订器:负责把多个章节JSON合并为整本IR。
DocumentComposer 会注入缺失锚点、统一顺序,并补齐 IR 级元数据。
"""
from __future__ import annotations
from datetime import datetime
from typing import Dict, List, Set
from ..ir import IR_VERSION
class DocumentComposer:
"""
将章节拼接成Document IR的简单装订器。
作用:
- 按order排序章节,补充默认chapterId
- 防止anchor重复,生成全局唯一锚点;
- 注入 IR 版本与生成时间戳。
"""
def __init__(self):
"""初始化装订器并记录已使用的锚点,避免重复"""
self._seen_anchors: Set[str] = set()
def build_document(
self,
report_id: str,
metadata: Dict[str, object],
chapters: List[Dict[str, object]],
) -> Dict[str, object]:
"""
把所有章节按order排序并注入唯一锚点,形成整本IR。
同时合并 metadata/themeTokens/assets,供渲染器直接消费。
参数:
report_id: 本次报告ID。
metadata: 全局元信息(标题、主题、toc等)。
chapters: 章节payload列表。
返回:
dict: 满足渲染器需求的Document IR。
"""
ordered = sorted(chapters, key=lambda c: c.get("order", 0))
for idx, chapter in enumerate(ordered, start=1):
chapter.setdefault("chapterId", f"S{idx}")
anchor = chapter.get("anchor") or f"section-{idx}"
chapter["anchor"] = self._ensure_unique_anchor(anchor)
chapter.setdefault("order", idx * 10)
if chapter.get("errorPlaceholder"):
self._ensure_heading_block(chapter)
document = {
"version": IR_VERSION,
"reportId": report_id,
"metadata": {
**metadata,
"generatedAt": metadata.get("generatedAt")
or datetime.utcnow().isoformat() + "Z",
},
"themeTokens": metadata.get("themeTokens", {}),
"chapters": ordered,
"assets": metadata.get("assets", {}),
}
return document
def _ensure_unique_anchor(self, anchor: str) -> str:
"""若存在重复锚点则追加序号,确保全局唯一。"""
base = anchor
counter = 2
while anchor in self._seen_anchors:
anchor = f"{base}-{counter}"
counter += 1
self._seen_anchors.add(anchor)
return anchor
def _ensure_heading_block(self, chapter: Dict[str, object]) -> None:
"""保证占位章节仍然拥有可用于目录的heading block。"""
blocks = chapter.get("blocks")
if isinstance(blocks, list):
for block in blocks:
if isinstance(block, dict) and block.get("type") == "heading":
return
heading = {
"type": "heading",
"level": 2,
"text": chapter.get("title") or "占位章节",
"anchor": chapter.get("anchor"),
}
if isinstance(blocks, list):
blocks.insert(0, heading)
else:
chapter["blocks"] = [heading]
__all__ = ["DocumentComposer"]