Add Comments

2025-11-14 19:44:04 +08:00
parent 52eed4d010
commit 6d0e8f4b8c
13 changed files with 655 additions and 61 deletions
@@ -75,6 +75,13 @@ class ChapterStorage:
        为本次报告创建独立的章节输出目录与manifest。

        同时把全局metadata写入 `manifest.json`，供渲染/调试查询。
+
+        参数:
+            report_id: 任务ID。
+            metadata: Report元数据（标题、主题等）。
+
+        返回:
+            Path: 新建的run目录。
        """
        run_dir = self.base_dir / report_id
        run_dir.mkdir(parents=True, exist_ok=True)
@@ -93,6 +100,13 @@ class ChapterStorage:
        创建章节子目录并在manifest中标记为streaming状态。

        会生成 `order-slug` 风格的子目录，并提前登记 raw 文件路径。
+
+        参数:
+            run_dir: 会话根目录。
+            chapter_meta: 包含 chapterId/title/slug/order 的元数据。
+
+        返回:
+            Path: 章节目录。
        """
        slug_value = str(
            chapter_meta.get("slug") or chapter_meta.get("chapterId") or "section"
@@ -124,6 +138,15 @@ class ChapterStorage:
        章节流式生成完毕后写入最终JSON并更新manifest状态。

        若校验失败，错误信息会被写入manifest，供前端展示。
+
+        参数:
+            run_dir: 会话根目录。
+            chapter_meta: 章节元信息。
+            payload: 校验通过的章节JSON。
+            errors: 可选的错误列表，用于标记invalid状态。
+
+        返回:
+            Path: 最终的 `chapter.json` 文件路径。
        """
        slug_value = str(
            chapter_meta.get("slug") or chapter_meta.get("chapterId") or "section"
@@ -159,6 +182,12 @@ class ChapterStorage:
        从指定run目录读取全部chapter.json并按order排序返回。

        常用于 DocumentComposer 将多个章节装订成整本IR。
+
+        参数:
+            run_dir: 会话根目录。
+
+        返回:
+            list[dict]: 章节payload列表。
        """
        payloads: List[Dict[str, object]] = []
        for child in sorted(run_dir.iterdir()):
@@ -183,6 +212,12 @@ class ChapterStorage:
        将流式输出实时写入raw文件。

        通过 contextmanager 暴露文件句柄，简化章节节点的写入逻辑。
+
+        参数:
+            chapter_dir: 当前章节目录。
+
+        返回:
+            Generator[TextIO]: 作为上下文管理器使用的文件对象。
        """
        raw_path = self._raw_stream_path(chapter_dir)
        raw_path.parent.mkdir(parents=True, exist_ok=True)
@@ -36,6 +36,14 @@ class DocumentComposer:
        把所有章节按order排序并注入唯一锚点，形成整本IR。

        同时合并 metadata/themeTokens/assets，供渲染器直接消费。
+
+        参数:
+            report_id: 本次报告ID。
+            metadata: 全局元信息（标题、主题、toc等）。
+            chapters: 章节payload列表。
+
+        返回:
+            dict: 满足渲染器需求的Document IR。
        """
        ordered = sorted(chapters, key=lambda c: c.get("order", 0))
        for idx, chapter in enumerate(ordered, start=1):
@@ -63,6 +63,12 @@ def parse_template_sections(template_md: str) -> List[TemplateSection]:
    返回的每个TemplateSection都携带slug/order/章节号，
    方便后续分章调用与锚点生成。解析时会同时兼容
    “# 标题”“无符号编号”“列表提纲”等不同写法。
+
+    参数:
+        template_md: 模板Markdown全文。
+
+    返回:
+        list[TemplateSection]: 结构化的章节序列。
    """

    sections: List[TemplateSection] = []
@@ -113,6 +119,13 @@ def _classify_line(stripped: str, indent: int) -> Optional[dict]:

    借助正则判断当前行是章节标题、提纲还是普通列表项，
    并衍生 depth/slug/number 等派生信息。
+
+    参数:
+        stripped: 去除前后空格后的原始行。
+        indent: 行首空格数量，用于区分层级。
+
+    返回:
+        dict | None: 识别后的元数据；无法识别时返回None。
    """

    heading_match = heading_pattern.match(stripped)
@@ -181,6 +194,12 @@ def _split_number(payload: str) -> dict:

    例如 `1.2 市场趋势` 会被拆成 number=1.2、label=市场趋势，
    并提供 display 用于回填标题。
+
+    参数:
+        payload: 原始标题字符串。
+
+    返回:
+        dict: 包含 number/title/display。
    """
    match = number_pattern.match(payload)
    number = match.group("num") if match else ""
@@ -196,7 +215,16 @@ def _split_number(payload: str) -> dict:


 def _build_slug(number: str, title: str) -> str:
-    """根据编号/标题生成锚点，优先复用编号，缺失时对标题slug化。"""
+    """
+    根据编号/标题生成锚点，优先复用编号，缺失时对标题slug化。
+
+    参数:
+        number: 章节编号。
+        title: 标题文本。
+
+    返回:
+        str: 形如 `section-1-0` 的slug。
+    """
    if number:
        token = number.replace(".", "-")
    else:
@@ -223,6 +251,13 @@ def _ensure_unique_slug(slug: str, used: set) -> str:
    若slug重复则自动追加序号，直到在used集合中唯一。

    通过 `-2/-3...` 的方式保证相同标题不会产生重复锚点。
+
+    参数:
+        slug: 初始slug。
+        used: 已使用集合。
+
+    返回:
+        str: 去重后的slug。
    """
    if slug not in used:
        used.add(slug)