62 lines
1.8 KiB
Python
62 lines
1.8 KiB
Python
"""
|
|
章节装订器:负责把多个章节JSON合并为整本IR。
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from typing import Dict, List, Set
|
|
|
|
from ..ir import IR_VERSION
|
|
|
|
|
|
class DocumentComposer:
|
|
"""
|
|
将章节拼接成Document IR的简单装订器。
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""初始化装订器并记录已使用的锚点,避免重复"""
|
|
self._seen_anchors: Set[str] = set()
|
|
|
|
def build_document(
|
|
self,
|
|
report_id: str,
|
|
metadata: Dict[str, object],
|
|
chapters: List[Dict[str, object]],
|
|
) -> Dict[str, object]:
|
|
"""把所有章节按order排序并注入唯一锚点,形成整本IR"""
|
|
ordered = sorted(chapters, key=lambda c: c.get("order", 0))
|
|
for idx, chapter in enumerate(ordered, start=1):
|
|
chapter.setdefault("chapterId", f"S{idx}")
|
|
anchor = chapter.get("anchor") or f"section-{idx}"
|
|
chapter["anchor"] = self._ensure_unique_anchor(anchor)
|
|
chapter.setdefault("order", idx * 10)
|
|
|
|
document = {
|
|
"version": IR_VERSION,
|
|
"reportId": report_id,
|
|
"metadata": {
|
|
**metadata,
|
|
"generatedAt": metadata.get("generatedAt")
|
|
or datetime.utcnow().isoformat() + "Z",
|
|
},
|
|
"themeTokens": metadata.get("themeTokens", {}),
|
|
"chapters": ordered,
|
|
"assets": metadata.get("assets", {}),
|
|
}
|
|
return document
|
|
|
|
def _ensure_unique_anchor(self, anchor: str) -> str:
|
|
"""若存在重复锚点则追加序号,确保全局唯一"""
|
|
base = anchor
|
|
counter = 2
|
|
while anchor in self._seen_anchors:
|
|
anchor = f"{base}-{counter}"
|
|
counter += 1
|
|
self._seen_anchors.add(anchor)
|
|
return anchor
|
|
|
|
|
|
__all__ = ["DocumentComposer"]
|