diff --git a/ReportEngine/agent.py b/ReportEngine/agent.py index 6308d1b..34dc96d 100644 --- a/ReportEngine/agent.py +++ b/ReportEngine/agent.py @@ -10,6 +10,7 @@ Report Agent主类。 import json import os +from copy import deepcopy from pathlib import Path from uuid import uuid4 from datetime import datetime @@ -174,6 +175,8 @@ class ReportAgent: - 章节存储、IR装订、渲染器等产出链路; - 状态管理、日志、输入输出校验与持久化。 """ + _CONTENT_SPARSE_MIN_ATTEMPTS = 3 + _CONTENT_SPARSE_WARNING_TEXT = "本章LLM生成的内容字数可能过低,必要时可以尝试重新运行程序。" def __init__(self, config: Optional[Settings] = None): """ @@ -466,7 +469,9 @@ class ReportAgent: emit('stage', {'stage': 'storage_ready', 'run_dir': str(run_dir)}) chapters = [] - chapter_max_attempts = max(1, self.config.CHAPTER_JSON_MAX_ATTEMPTS) + chapter_max_attempts = max( + self._CONTENT_SPARSE_MIN_ATTEMPTS, self.config.CHAPTER_JSON_MAX_ATTEMPTS + ) for section in sections: logger.info(f"生成章节: {section.title}") emit('chapter_status', { @@ -492,6 +497,9 @@ class ReportAgent: chapter_payload: Dict[str, Any] | None = None attempt = 1 + best_sparse_candidate: Dict[str, Any] | None = None + best_sparse_score = -1 + fallback_used = False while attempt <= chapter_max_attempts: try: chapter_payload = self.chapter_generation_node.run( @@ -506,6 +514,19 @@ class ReportAgent: "content_sparse" if isinstance(structured_error, ChapterContentError) else "json_parse" ) readable_label = "内容密度异常" if error_kind == "content_sparse" else "JSON解析失败" + if isinstance(structured_error, ChapterContentError): + candidate = getattr(structured_error, "chapter_payload", None) + candidate_score = getattr(structured_error, "body_characters", 0) or 0 + if isinstance(candidate, dict) and candidate_score >= 0: + if candidate_score > best_sparse_score: + best_sparse_candidate = deepcopy(candidate) + best_sparse_score = candidate_score + will_fallback = ( + isinstance(structured_error, ChapterContentError) + and attempt >= chapter_max_attempts + and attempt >= self._CONTENT_SPARSE_MIN_ATTEMPTS + and best_sparse_candidate is not None + ) logger.warning( "章节 {title} {label}(第 {attempt}/{total} 次尝试): {error}", title=section.title, @@ -514,14 +535,27 @@ class ReportAgent: total=chapter_max_attempts, error=structured_error, ) - emit('chapter_status', { + status_value = 'retrying' if attempt < chapter_max_attempts or will_fallback else 'error' + status_payload = { 'chapterId': section.chapter_id, 'title': section.title, - 'status': 'retrying' if attempt < chapter_max_attempts else 'error', + 'status': status_value, 'attempt': attempt, 'error': str(structured_error), 'reason': error_kind, - }) + } + if will_fallback: + status_payload['warning'] = 'content_sparse_fallback_pending' + emit('chapter_status', status_payload) + if will_fallback: + logger.warning( + "章节 {title} 达到最大尝试次数,保留字数最多(约 {score} 字)的版本作为兜底输出", + title=section.title, + score=best_sparse_score, + ) + chapter_payload = self._finalize_sparse_chapter(best_sparse_candidate) + fallback_used = True + break if attempt >= chapter_max_attempts: raise attempt += 1 @@ -553,12 +587,16 @@ class ReportAgent: f"{section.title} 章节JSON在 {chapter_max_attempts} 次尝试后仍无法解析" ) chapters.append(chapter_payload) - emit('chapter_status', { + completion_status = { 'chapterId': section.chapter_id, 'title': section.title, 'status': 'completed', 'attempt': attempt, - }) + } + if fallback_used: + completion_status['warning'] = 'content_sparse_fallback' + completion_status['warningMessage'] = self._CONTENT_SPARSE_WARNING_TEXT + emit('chapter_status', completion_status) document_ir = self.document_composer.build_document( report_id, @@ -779,6 +817,48 @@ class ReportAgent: ] return any(keyword in normalized for keyword in keywords) + def _finalize_sparse_chapter(self, chapter: Optional[Dict[str, Any]]) -> Dict[str, Any]: + """ + 构造内容稀疏兜底章节:复制原始payload并插入温馨提示段落。 + """ + safe_chapter = deepcopy(chapter or {}) + if not isinstance(safe_chapter, dict): + safe_chapter = {} + self._ensure_sparse_warning_block(safe_chapter) + return safe_chapter + + def _ensure_sparse_warning_block(self, chapter: Dict[str, Any]) -> None: + """ + 将提示段落插在章节标题后,提醒读者该章字数偏少。 + """ + warning_block = { + "type": "paragraph", + "inlines": [ + { + "text": self._CONTENT_SPARSE_WARNING_TEXT, + "marks": [{"type": "italic"}], + } + ], + "meta": {"role": "content-sparse-warning"}, + } + blocks = chapter.get("blocks") + if isinstance(blocks, list) and blocks: + inserted = False + for idx, block in enumerate(blocks): + if isinstance(block, dict) and block.get("type") == "heading": + blocks.insert(idx + 1, warning_block) + inserted = True + break + if not inserted: + blocks.insert(0, warning_block) + else: + chapter["blocks"] = [warning_block] + meta = chapter.get("meta") + if isinstance(meta, dict): + meta["contentSparseWarning"] = True + else: + chapter["meta"] = {"contentSparseWarning": True} + def _stringify(self, value: Any) -> str: """ 安全地将对象转成字符串。 diff --git a/ReportEngine/nodes/chapter_generation_node.py b/ReportEngine/nodes/chapter_generation_node.py index 2812d04..d075875 100644 --- a/ReportEngine/nodes/chapter_generation_node.py +++ b/ReportEngine/nodes/chapter_generation_node.py @@ -55,6 +55,20 @@ class ChapterContentError(ValueError): 当LLM仅输出标题或正文不足以支撑一章时触发,驱动重试以保证报告质量。 """ + def __init__( + self, + message: str, + chapter: Optional[Dict[str, Any]] = None, + body_characters: int = 0, + narrative_characters: int = 0, + non_heading_blocks: int = 0, + ): + super().__init__(message) + self.chapter_payload: Optional[Dict[str, Any]] = chapter + self.body_characters: int = int(body_characters or 0) + self.narrative_characters: int = int(narrative_characters or 0) + self.non_heading_blocks: int = int(non_heading_blocks or 0) + class ChapterGenerationNode(BaseNode): """ @@ -897,7 +911,13 @@ class ChapterGenerationNode(BaseNode): """ blocks = chapter.get("blocks") if not isinstance(blocks, list) or not blocks: - raise ChapterContentError("章节缺少正文区块,无法输出内容") + raise ChapterContentError( + "章节缺少正文区块,无法输出内容", + chapter=chapter, + body_characters=0, + narrative_characters=0, + non_heading_blocks=0, + ) non_heading_blocks = [ block @@ -905,16 +925,21 @@ class ChapterGenerationNode(BaseNode): if isinstance(block, dict) and block.get("type") not in {"heading", "divider", "toc"} ] + valid_block_count = len(non_heading_blocks) body_characters = self._count_body_characters(blocks) narrative_characters = self._count_narrative_characters(blocks) if ( - len(non_heading_blocks) < self._MIN_NON_HEADING_BLOCKS + valid_block_count < self._MIN_NON_HEADING_BLOCKS or body_characters < self._MIN_BODY_CHARACTERS or narrative_characters < self._MIN_NARRATIVE_CHARACTERS ): raise ChapterContentError( - f"{chapter.get('title') or '该章节'} 正文不足:有效区块 {len(non_heading_blocks)} 个,估算字符数 {body_characters},叙述性字符数 {narrative_characters}" + f"{chapter.get('title') or '该章节'} 正文不足:有效区块 {valid_block_count} 个,估算字符数 {body_characters},叙述性字符数 {narrative_characters}", + chapter=chapter, + body_characters=body_characters, + narrative_characters=narrative_characters, + non_heading_blocks=valid_block_count, ) def _count_body_characters(self, blocks: Any) -> int: