Fixed Directory Parsing Issues and Optimized Directory Rendering
This commit is contained in:
@@ -114,16 +114,94 @@ class DocumentLayoutNode(BaseNode):
|
||||
if not isinstance(result.get("title"), str):
|
||||
logger.warning("文档设计缺少title字段或类型错误,使用默认值")
|
||||
result.setdefault("title", "未命名报告")
|
||||
if not isinstance(result.get("toc"), (list, dict)):
|
||||
logger.warning("文档设计缺少toc字段或类型错误,使用空列表")
|
||||
result.setdefault("toc", [])
|
||||
|
||||
# 处理tocPlan字段
|
||||
toc_plan = result.get("tocPlan", [])
|
||||
if not isinstance(toc_plan, list):
|
||||
logger.warning("文档设计缺少tocPlan字段或类型错误,使用空列表")
|
||||
result["tocPlan"] = []
|
||||
else:
|
||||
# 清理tocPlan中的description字段
|
||||
result["tocPlan"] = self._clean_toc_plan_descriptions(toc_plan)
|
||||
|
||||
if not isinstance(result.get("hero"), dict):
|
||||
logger.warning("文档设计缺少hero字段或类型错误,使用空对象")
|
||||
result.setdefault("hero", {})
|
||||
|
||||
return result
|
||||
except JSONParseError as exc:
|
||||
# 转换为原有的异常类型以保持向后兼容
|
||||
raise ValueError(f"文档设计JSON解析失败: {exc}") from exc
|
||||
|
||||
def _clean_toc_plan_descriptions(self, toc_plan: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
清理tocPlan中每个条目的description字段,移除可能的JSON片段。
|
||||
|
||||
参数:
|
||||
toc_plan: 原始的目录计划列表
|
||||
|
||||
返回:
|
||||
List[Dict[str, Any]]: 清理后的目录计划列表
|
||||
"""
|
||||
import re
|
||||
|
||||
def clean_text(text: Any) -> str:
|
||||
"""清理文本中的JSON片段"""
|
||||
if not text or not isinstance(text, str):
|
||||
return ""
|
||||
|
||||
cleaned = text
|
||||
|
||||
# 移除以逗号+空白+{开头的不完整JSON对象
|
||||
cleaned = re.sub(r',\s*\{[^}]*$', '', cleaned)
|
||||
|
||||
# 移除以逗号+空白+[开头的不完整JSON数组
|
||||
cleaned = re.sub(r',\s*\[[^\]]*$', '', cleaned)
|
||||
|
||||
# 移除孤立的 { 加上后续内容(如果没有匹配的 })
|
||||
open_brace_pos = cleaned.rfind('{')
|
||||
if open_brace_pos != -1:
|
||||
close_brace_pos = cleaned.rfind('}')
|
||||
if close_brace_pos < open_brace_pos:
|
||||
cleaned = cleaned[:open_brace_pos].rstrip(',,、 \t\n')
|
||||
|
||||
# 移除孤立的 [ 加上后续内容(如果没有匹配的 ])
|
||||
open_bracket_pos = cleaned.rfind('[')
|
||||
if open_bracket_pos != -1:
|
||||
close_bracket_pos = cleaned.rfind(']')
|
||||
if close_bracket_pos < open_bracket_pos:
|
||||
cleaned = cleaned[:open_bracket_pos].rstrip(',,、 \t\n')
|
||||
|
||||
# 移除看起来像JSON键值对的片段
|
||||
cleaned = re.sub(r',?\s*"[^"]+"\s*:\s*"[^"]*$', '', cleaned)
|
||||
cleaned = re.sub(r',?\s*"[^"]+"\s*:\s*[^,}\]]*$', '', cleaned)
|
||||
|
||||
# 清理末尾的逗号和空白
|
||||
cleaned = cleaned.rstrip(',,、 \t\n')
|
||||
|
||||
return cleaned.strip()
|
||||
|
||||
cleaned_plan = []
|
||||
for entry in toc_plan:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
|
||||
# 清理description字段
|
||||
if "description" in entry:
|
||||
original_desc = entry["description"]
|
||||
cleaned_desc = clean_text(original_desc)
|
||||
|
||||
if cleaned_desc != original_desc:
|
||||
logger.warning(
|
||||
f"清理目录项 '{entry.get('display', 'unknown')}' 的description字段中的JSON片段:\n"
|
||||
f" 原文: {original_desc[:100]}...\n"
|
||||
f" 清理后: {cleaned_desc[:100]}..."
|
||||
)
|
||||
entry["description"] = cleaned_desc
|
||||
|
||||
cleaned_plan.append(entry)
|
||||
|
||||
return cleaned_plan
|
||||
|
||||
|
||||
__all__ = ["DocumentLayoutNode"]
|
||||
|
||||
Reference in New Issue
Block a user