diff --git a/ReportEngine/nodes/chapter_generation_node.py b/ReportEngine/nodes/chapter_generation_node.py index 00ac57a..753dc62 100644 --- a/ReportEngine/nodes/chapter_generation_node.py +++ b/ReportEngine/nodes/chapter_generation_node.py @@ -18,6 +18,8 @@ from ..core import TemplateSection, ChapterStorage from ..ir import ALLOWED_BLOCK_TYPES, ALLOWED_INLINE_MARKS, IRValidator from ..prompts import ( SYSTEM_PROMPT_CHAPTER_JSON, + SYSTEM_PROMPT_CHAPTER_JSON_REPAIR, + build_chapter_repair_prompt, build_chapter_user_prompt, ) from .base_node import BaseNode @@ -151,6 +153,20 @@ class ChapterGenerationNode(BaseNode): self._sanitize_chapter_blocks(chapter_json) valid, errors = self.validator.validate_chapter(chapter_json) + if not valid and errors: + repaired = self._attempt_llm_structural_repair( + chapter_json, + errors, + raw_text=raw_text, + ) + if repaired: + chapter_json = repaired + chapter_json.setdefault("chapterId", section.chapter_id) + chapter_json.setdefault("anchor", section.slug) + chapter_json.setdefault("title", section.title) + chapter_json.setdefault("order", section.order) + self._sanitize_chapter_blocks(chapter_json) + valid, errors = self.validator.validate_chapter(chapter_json) content_error: ChapterContentError | None = None if valid: try: @@ -537,6 +553,36 @@ class ChapterGenerationNode(BaseNode): logger.warning("已使用json_repair自动修复章节JSON语法") return fixed + def _attempt_llm_structural_repair( + self, + chapter: Dict[str, Any], + validation_errors: List[str], + raw_text: Optional[str] = None, + ) -> Optional[Dict[str, Any]]: + """将结构性错误的章节交给LLM兜底修复,保持Report Engine相同的API设置。""" + if not validation_errors: + return None + payload = build_chapter_repair_prompt(chapter, validation_errors, raw_text) + try: + response = self.llm_client.invoke( + SYSTEM_PROMPT_CHAPTER_JSON_REPAIR, + payload, + temperature=0.0, + top_p=0.05, + ) + except Exception as exc: # pragma: no cover - 网络或API异常仅记录 + logger.error(f"章节JSON LLM修复调用失败: {exc}") + return None + if not response: + return None + try: + repaired = self._parse_chapter(response) + except Exception as exc: + logger.error(f"LLM修复后的章节JSON解析失败: {exc}") + return None + logger.warning("章节JSON经多次本地修复仍不合规,已成功启用LLM兜底修复") + return repaired + def _sanitize_chapter_blocks(self, chapter: Dict[str, Any]): """ 修正常见的结构性错误(例如list.items嵌套过深)。 diff --git a/ReportEngine/prompts/__init__.py b/ReportEngine/prompts/__init__.py index 76b1455..8f1f904 100644 --- a/ReportEngine/prompts/__init__.py +++ b/ReportEngine/prompts/__init__.py @@ -8,12 +8,14 @@ from .prompts import ( SYSTEM_PROMPT_TEMPLATE_SELECTION, SYSTEM_PROMPT_HTML_GENERATION, SYSTEM_PROMPT_CHAPTER_JSON, + SYSTEM_PROMPT_CHAPTER_JSON_REPAIR, SYSTEM_PROMPT_DOCUMENT_LAYOUT, SYSTEM_PROMPT_WORD_BUDGET, output_schema_template_selection, input_schema_html_generation, chapter_generation_input_schema, build_chapter_user_prompt, + build_chapter_repair_prompt, build_document_layout_prompt, build_word_budget_prompt, ) @@ -22,12 +24,14 @@ __all__ = [ "SYSTEM_PROMPT_TEMPLATE_SELECTION", "SYSTEM_PROMPT_HTML_GENERATION", "SYSTEM_PROMPT_CHAPTER_JSON", + "SYSTEM_PROMPT_CHAPTER_JSON_REPAIR", "SYSTEM_PROMPT_DOCUMENT_LAYOUT", "SYSTEM_PROMPT_WORD_BUDGET", "output_schema_template_selection", "input_schema_html_generation", "chapter_generation_input_schema", "build_chapter_user_prompt", + "build_chapter_repair_prompt", "build_document_layout_prompt", "build_word_budget_prompt", ] diff --git a/ReportEngine/prompts/prompts.py b/ReportEngine/prompts/prompts.py index a8170fc..f4a3582 100644 --- a/ReportEngine/prompts/prompts.py +++ b/ReportEngine/prompts/prompts.py @@ -9,6 +9,7 @@ import json from ..ir import ( ALLOWED_BLOCK_TYPES, + ALLOWED_INLINE_MARKS, CHAPTER_JSON_SCHEMA_TEXT, IR_VERSION, ) @@ -317,6 +318,23 @@ SYSTEM_PROMPT_CHAPTER_JSON = f""" 严禁添加除JSON以外的任何文本或注释。 """ +SYSTEM_PROMPT_CHAPTER_JSON_REPAIR = f""" +你现在扮演Report Engine的“章节JSON修复官”,负责在章节草稿无法通过IR校验时进行兜底修复。 + +请牢记: +1. 所有chapter必须满足IR版本 {IR_VERSION} 约束,仅允许以下block.type:{', '.join(ALLOWED_BLOCK_TYPES)}; +2. paragraph.inlines中的marks必须来自以下集合:{', '.join(ALLOWED_INLINE_MARKS)}; +3. 允许的结构、字段与嵌套规则全部写在《CHAPTER JSON SCHEMA》中,任何缺少字段、数组嵌套错误或list.items不是二维数组的情况都必须修复; +4. 不得更改事实、数值与结论,只能对结构/字段名/嵌套层级做最小修改以通过校验; +5. 最终输出只能包含合法JSON,格式严格为:{{"chapter": {{...修复后的章节JSON...}}}},禁止额外解释或Markdown。 + + +{CHAPTER_JSON_SCHEMA_TEXT} + + +只返回JSON,不要添加注释或自然语言。 +""" + # 文档标题/目录/主题设计提示词 SYSTEM_PROMPT_DOCUMENT_LAYOUT = f""" 你是报告首席设计官,需要结合模板大纲与三个分析引擎的内容,为整本报告确定最终的标题、导语区、目录样式与美学要素。 @@ -367,6 +385,20 @@ def build_chapter_user_prompt(payload: dict) -> str: return json.dumps(payload, ensure_ascii=False, indent=2) +def build_chapter_repair_prompt(chapter: dict, errors, original_text=None) -> str: + """ + 构造章节修复输入payload,包含原始章节与校验错误。 + """ + payload: dict = { + "failedChapter": chapter, + "validatorErrors": errors, + } + if original_text: + snippet = original_text[-2000:] + payload["rawOutputTail"] = snippet + return json.dumps(payload, ensure_ascii=False, indent=2) + + def build_document_layout_prompt(payload: dict) -> str: """将文档设计所需的上下文序列化为JSON字符串,供布局节点发送给LLM。""" return json.dumps(payload, ensure_ascii=False, indent=2)