Optimize the Handling of Low Word Counts
This commit is contained in:
+86
-6
@@ -10,6 +10,7 @@ Report Agent主类。
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
from copy import deepcopy
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -174,6 +175,8 @@ class ReportAgent:
|
|||||||
- 章节存储、IR装订、渲染器等产出链路;
|
- 章节存储、IR装订、渲染器等产出链路;
|
||||||
- 状态管理、日志、输入输出校验与持久化。
|
- 状态管理、日志、输入输出校验与持久化。
|
||||||
"""
|
"""
|
||||||
|
_CONTENT_SPARSE_MIN_ATTEMPTS = 3
|
||||||
|
_CONTENT_SPARSE_WARNING_TEXT = "本章LLM生成的内容字数可能过低,必要时可以尝试重新运行程序。"
|
||||||
|
|
||||||
def __init__(self, config: Optional[Settings] = None):
|
def __init__(self, config: Optional[Settings] = None):
|
||||||
"""
|
"""
|
||||||
@@ -466,7 +469,9 @@ class ReportAgent:
|
|||||||
emit('stage', {'stage': 'storage_ready', 'run_dir': str(run_dir)})
|
emit('stage', {'stage': 'storage_ready', 'run_dir': str(run_dir)})
|
||||||
|
|
||||||
chapters = []
|
chapters = []
|
||||||
chapter_max_attempts = max(1, self.config.CHAPTER_JSON_MAX_ATTEMPTS)
|
chapter_max_attempts = max(
|
||||||
|
self._CONTENT_SPARSE_MIN_ATTEMPTS, self.config.CHAPTER_JSON_MAX_ATTEMPTS
|
||||||
|
)
|
||||||
for section in sections:
|
for section in sections:
|
||||||
logger.info(f"生成章节: {section.title}")
|
logger.info(f"生成章节: {section.title}")
|
||||||
emit('chapter_status', {
|
emit('chapter_status', {
|
||||||
@@ -492,6 +497,9 @@ class ReportAgent:
|
|||||||
|
|
||||||
chapter_payload: Dict[str, Any] | None = None
|
chapter_payload: Dict[str, Any] | None = None
|
||||||
attempt = 1
|
attempt = 1
|
||||||
|
best_sparse_candidate: Dict[str, Any] | None = None
|
||||||
|
best_sparse_score = -1
|
||||||
|
fallback_used = False
|
||||||
while attempt <= chapter_max_attempts:
|
while attempt <= chapter_max_attempts:
|
||||||
try:
|
try:
|
||||||
chapter_payload = self.chapter_generation_node.run(
|
chapter_payload = self.chapter_generation_node.run(
|
||||||
@@ -506,6 +514,19 @@ class ReportAgent:
|
|||||||
"content_sparse" if isinstance(structured_error, ChapterContentError) else "json_parse"
|
"content_sparse" if isinstance(structured_error, ChapterContentError) else "json_parse"
|
||||||
)
|
)
|
||||||
readable_label = "内容密度异常" if error_kind == "content_sparse" else "JSON解析失败"
|
readable_label = "内容密度异常" if error_kind == "content_sparse" else "JSON解析失败"
|
||||||
|
if isinstance(structured_error, ChapterContentError):
|
||||||
|
candidate = getattr(structured_error, "chapter_payload", None)
|
||||||
|
candidate_score = getattr(structured_error, "body_characters", 0) or 0
|
||||||
|
if isinstance(candidate, dict) and candidate_score >= 0:
|
||||||
|
if candidate_score > best_sparse_score:
|
||||||
|
best_sparse_candidate = deepcopy(candidate)
|
||||||
|
best_sparse_score = candidate_score
|
||||||
|
will_fallback = (
|
||||||
|
isinstance(structured_error, ChapterContentError)
|
||||||
|
and attempt >= chapter_max_attempts
|
||||||
|
and attempt >= self._CONTENT_SPARSE_MIN_ATTEMPTS
|
||||||
|
and best_sparse_candidate is not None
|
||||||
|
)
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"章节 {title} {label}(第 {attempt}/{total} 次尝试): {error}",
|
"章节 {title} {label}(第 {attempt}/{total} 次尝试): {error}",
|
||||||
title=section.title,
|
title=section.title,
|
||||||
@@ -514,14 +535,27 @@ class ReportAgent:
|
|||||||
total=chapter_max_attempts,
|
total=chapter_max_attempts,
|
||||||
error=structured_error,
|
error=structured_error,
|
||||||
)
|
)
|
||||||
emit('chapter_status', {
|
status_value = 'retrying' if attempt < chapter_max_attempts or will_fallback else 'error'
|
||||||
|
status_payload = {
|
||||||
'chapterId': section.chapter_id,
|
'chapterId': section.chapter_id,
|
||||||
'title': section.title,
|
'title': section.title,
|
||||||
'status': 'retrying' if attempt < chapter_max_attempts else 'error',
|
'status': status_value,
|
||||||
'attempt': attempt,
|
'attempt': attempt,
|
||||||
'error': str(structured_error),
|
'error': str(structured_error),
|
||||||
'reason': error_kind,
|
'reason': error_kind,
|
||||||
})
|
}
|
||||||
|
if will_fallback:
|
||||||
|
status_payload['warning'] = 'content_sparse_fallback_pending'
|
||||||
|
emit('chapter_status', status_payload)
|
||||||
|
if will_fallback:
|
||||||
|
logger.warning(
|
||||||
|
"章节 {title} 达到最大尝试次数,保留字数最多(约 {score} 字)的版本作为兜底输出",
|
||||||
|
title=section.title,
|
||||||
|
score=best_sparse_score,
|
||||||
|
)
|
||||||
|
chapter_payload = self._finalize_sparse_chapter(best_sparse_candidate)
|
||||||
|
fallback_used = True
|
||||||
|
break
|
||||||
if attempt >= chapter_max_attempts:
|
if attempt >= chapter_max_attempts:
|
||||||
raise
|
raise
|
||||||
attempt += 1
|
attempt += 1
|
||||||
@@ -553,12 +587,16 @@ class ReportAgent:
|
|||||||
f"{section.title} 章节JSON在 {chapter_max_attempts} 次尝试后仍无法解析"
|
f"{section.title} 章节JSON在 {chapter_max_attempts} 次尝试后仍无法解析"
|
||||||
)
|
)
|
||||||
chapters.append(chapter_payload)
|
chapters.append(chapter_payload)
|
||||||
emit('chapter_status', {
|
completion_status = {
|
||||||
'chapterId': section.chapter_id,
|
'chapterId': section.chapter_id,
|
||||||
'title': section.title,
|
'title': section.title,
|
||||||
'status': 'completed',
|
'status': 'completed',
|
||||||
'attempt': attempt,
|
'attempt': attempt,
|
||||||
})
|
}
|
||||||
|
if fallback_used:
|
||||||
|
completion_status['warning'] = 'content_sparse_fallback'
|
||||||
|
completion_status['warningMessage'] = self._CONTENT_SPARSE_WARNING_TEXT
|
||||||
|
emit('chapter_status', completion_status)
|
||||||
|
|
||||||
document_ir = self.document_composer.build_document(
|
document_ir = self.document_composer.build_document(
|
||||||
report_id,
|
report_id,
|
||||||
@@ -779,6 +817,48 @@ class ReportAgent:
|
|||||||
]
|
]
|
||||||
return any(keyword in normalized for keyword in keywords)
|
return any(keyword in normalized for keyword in keywords)
|
||||||
|
|
||||||
|
def _finalize_sparse_chapter(self, chapter: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
构造内容稀疏兜底章节:复制原始payload并插入温馨提示段落。
|
||||||
|
"""
|
||||||
|
safe_chapter = deepcopy(chapter or {})
|
||||||
|
if not isinstance(safe_chapter, dict):
|
||||||
|
safe_chapter = {}
|
||||||
|
self._ensure_sparse_warning_block(safe_chapter)
|
||||||
|
return safe_chapter
|
||||||
|
|
||||||
|
def _ensure_sparse_warning_block(self, chapter: Dict[str, Any]) -> None:
|
||||||
|
"""
|
||||||
|
将提示段落插在章节标题后,提醒读者该章字数偏少。
|
||||||
|
"""
|
||||||
|
warning_block = {
|
||||||
|
"type": "paragraph",
|
||||||
|
"inlines": [
|
||||||
|
{
|
||||||
|
"text": self._CONTENT_SPARSE_WARNING_TEXT,
|
||||||
|
"marks": [{"type": "italic"}],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"meta": {"role": "content-sparse-warning"},
|
||||||
|
}
|
||||||
|
blocks = chapter.get("blocks")
|
||||||
|
if isinstance(blocks, list) and blocks:
|
||||||
|
inserted = False
|
||||||
|
for idx, block in enumerate(blocks):
|
||||||
|
if isinstance(block, dict) and block.get("type") == "heading":
|
||||||
|
blocks.insert(idx + 1, warning_block)
|
||||||
|
inserted = True
|
||||||
|
break
|
||||||
|
if not inserted:
|
||||||
|
blocks.insert(0, warning_block)
|
||||||
|
else:
|
||||||
|
chapter["blocks"] = [warning_block]
|
||||||
|
meta = chapter.get("meta")
|
||||||
|
if isinstance(meta, dict):
|
||||||
|
meta["contentSparseWarning"] = True
|
||||||
|
else:
|
||||||
|
chapter["meta"] = {"contentSparseWarning": True}
|
||||||
|
|
||||||
def _stringify(self, value: Any) -> str:
|
def _stringify(self, value: Any) -> str:
|
||||||
"""
|
"""
|
||||||
安全地将对象转成字符串。
|
安全地将对象转成字符串。
|
||||||
|
|||||||
@@ -55,6 +55,20 @@ class ChapterContentError(ValueError):
|
|||||||
当LLM仅输出标题或正文不足以支撑一章时触发,驱动重试以保证报告质量。
|
当LLM仅输出标题或正文不足以支撑一章时触发,驱动重试以保证报告质量。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str,
|
||||||
|
chapter: Optional[Dict[str, Any]] = None,
|
||||||
|
body_characters: int = 0,
|
||||||
|
narrative_characters: int = 0,
|
||||||
|
non_heading_blocks: int = 0,
|
||||||
|
):
|
||||||
|
super().__init__(message)
|
||||||
|
self.chapter_payload: Optional[Dict[str, Any]] = chapter
|
||||||
|
self.body_characters: int = int(body_characters or 0)
|
||||||
|
self.narrative_characters: int = int(narrative_characters or 0)
|
||||||
|
self.non_heading_blocks: int = int(non_heading_blocks or 0)
|
||||||
|
|
||||||
|
|
||||||
class ChapterGenerationNode(BaseNode):
|
class ChapterGenerationNode(BaseNode):
|
||||||
"""
|
"""
|
||||||
@@ -897,7 +911,13 @@ class ChapterGenerationNode(BaseNode):
|
|||||||
"""
|
"""
|
||||||
blocks = chapter.get("blocks")
|
blocks = chapter.get("blocks")
|
||||||
if not isinstance(blocks, list) or not blocks:
|
if not isinstance(blocks, list) or not blocks:
|
||||||
raise ChapterContentError("章节缺少正文区块,无法输出内容")
|
raise ChapterContentError(
|
||||||
|
"章节缺少正文区块,无法输出内容",
|
||||||
|
chapter=chapter,
|
||||||
|
body_characters=0,
|
||||||
|
narrative_characters=0,
|
||||||
|
non_heading_blocks=0,
|
||||||
|
)
|
||||||
|
|
||||||
non_heading_blocks = [
|
non_heading_blocks = [
|
||||||
block
|
block
|
||||||
@@ -905,16 +925,21 @@ class ChapterGenerationNode(BaseNode):
|
|||||||
if isinstance(block, dict)
|
if isinstance(block, dict)
|
||||||
and block.get("type") not in {"heading", "divider", "toc"}
|
and block.get("type") not in {"heading", "divider", "toc"}
|
||||||
]
|
]
|
||||||
|
valid_block_count = len(non_heading_blocks)
|
||||||
body_characters = self._count_body_characters(blocks)
|
body_characters = self._count_body_characters(blocks)
|
||||||
narrative_characters = self._count_narrative_characters(blocks)
|
narrative_characters = self._count_narrative_characters(blocks)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
len(non_heading_blocks) < self._MIN_NON_HEADING_BLOCKS
|
valid_block_count < self._MIN_NON_HEADING_BLOCKS
|
||||||
or body_characters < self._MIN_BODY_CHARACTERS
|
or body_characters < self._MIN_BODY_CHARACTERS
|
||||||
or narrative_characters < self._MIN_NARRATIVE_CHARACTERS
|
or narrative_characters < self._MIN_NARRATIVE_CHARACTERS
|
||||||
):
|
):
|
||||||
raise ChapterContentError(
|
raise ChapterContentError(
|
||||||
f"{chapter.get('title') or '该章节'} 正文不足:有效区块 {len(non_heading_blocks)} 个,估算字符数 {body_characters},叙述性字符数 {narrative_characters}"
|
f"{chapter.get('title') or '该章节'} 正文不足:有效区块 {valid_block_count} 个,估算字符数 {body_characters},叙述性字符数 {narrative_characters}",
|
||||||
|
chapter=chapter,
|
||||||
|
body_characters=body_characters,
|
||||||
|
narrative_characters=narrative_characters,
|
||||||
|
non_heading_blocks=valid_block_count,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _count_body_characters(self, blocks: Any) -> int:
|
def _count_body_characters(self, blocks: Any) -> int:
|
||||||
|
|||||||
Reference in New Issue
Block a user