Increase DeepSeek Compatibility
This commit is contained in:
+11
-4
@@ -29,6 +29,7 @@ from .nodes import (
|
|||||||
TemplateSelectionNode,
|
TemplateSelectionNode,
|
||||||
ChapterGenerationNode,
|
ChapterGenerationNode,
|
||||||
ChapterJsonParseError,
|
ChapterJsonParseError,
|
||||||
|
ChapterContentError,
|
||||||
DocumentLayoutNode,
|
DocumentLayoutNode,
|
||||||
WordBudgetNode,
|
WordBudgetNode,
|
||||||
)
|
)
|
||||||
@@ -438,20 +439,26 @@ class ReportAgent:
|
|||||||
stream_callback=chunk_callback
|
stream_callback=chunk_callback
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
except ChapterJsonParseError as parse_error:
|
except (ChapterJsonParseError, ChapterContentError) as structured_error:
|
||||||
|
error_kind = (
|
||||||
|
"content_sparse" if isinstance(structured_error, ChapterContentError) else "json_parse"
|
||||||
|
)
|
||||||
|
readable_label = "内容密度异常" if error_kind == "content_sparse" else "JSON解析失败"
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"章节 %s JSON解析失败(第 %s/%s 次尝试): %s",
|
"章节 %s %s(第 %s/%s 次尝试): %s",
|
||||||
section.title,
|
section.title,
|
||||||
|
readable_label,
|
||||||
attempt,
|
attempt,
|
||||||
chapter_max_attempts,
|
chapter_max_attempts,
|
||||||
parse_error,
|
structured_error,
|
||||||
)
|
)
|
||||||
emit('chapter_status', {
|
emit('chapter_status', {
|
||||||
'chapterId': section.chapter_id,
|
'chapterId': section.chapter_id,
|
||||||
'title': section.title,
|
'title': section.title,
|
||||||
'status': 'retrying' if attempt < chapter_max_attempts else 'error',
|
'status': 'retrying' if attempt < chapter_max_attempts else 'error',
|
||||||
'attempt': attempt,
|
'attempt': attempt,
|
||||||
'error': str(parse_error),
|
'error': str(structured_error),
|
||||||
|
'reason': error_kind,
|
||||||
})
|
})
|
||||||
if attempt >= chapter_max_attempts:
|
if attempt >= chapter_max_attempts:
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ Report Engine节点处理模块。
|
|||||||
|
|
||||||
from .base_node import BaseNode, StateMutationNode
|
from .base_node import BaseNode, StateMutationNode
|
||||||
from .template_selection_node import TemplateSelectionNode
|
from .template_selection_node import TemplateSelectionNode
|
||||||
from .chapter_generation_node import ChapterGenerationNode, ChapterJsonParseError
|
from .chapter_generation_node import ChapterGenerationNode, ChapterJsonParseError, ChapterContentError
|
||||||
from .document_layout_node import DocumentLayoutNode
|
from .document_layout_node import DocumentLayoutNode
|
||||||
from .word_budget_node import WordBudgetNode
|
from .word_budget_node import WordBudgetNode
|
||||||
|
|
||||||
@@ -16,6 +16,7 @@ __all__ = [
|
|||||||
"TemplateSelectionNode",
|
"TemplateSelectionNode",
|
||||||
"ChapterGenerationNode",
|
"ChapterGenerationNode",
|
||||||
"ChapterJsonParseError",
|
"ChapterJsonParseError",
|
||||||
|
"ChapterContentError",
|
||||||
"DocumentLayoutNode",
|
"DocumentLayoutNode",
|
||||||
"WordBudgetNode",
|
"WordBudgetNode",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -36,6 +36,14 @@ class ChapterJsonParseError(ValueError):
|
|||||||
self.raw_text = raw_text
|
self.raw_text = raw_text
|
||||||
|
|
||||||
|
|
||||||
|
class ChapterContentError(ValueError):
|
||||||
|
"""
|
||||||
|
章节内容稀疏异常。
|
||||||
|
|
||||||
|
当LLM仅输出标题或正文不足以支撑一章时触发,驱动重试以保证报告质量。
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class ChapterGenerationNode(BaseNode):
|
class ChapterGenerationNode(BaseNode):
|
||||||
"""
|
"""
|
||||||
负责按章节调用LLM并校验JSON结构。
|
负责按章节调用LLM并校验JSON结构。
|
||||||
@@ -71,6 +79,12 @@ class ChapterGenerationNode(BaseNode):
|
|||||||
"sub": "subscript",
|
"sub": "subscript",
|
||||||
"sup": "superscript",
|
"sup": "superscript",
|
||||||
}
|
}
|
||||||
|
# 章节若仅包含标题或字符过少则视为失败,强制LLM重新生成
|
||||||
|
_MIN_NON_HEADING_BLOCKS = 2
|
||||||
|
_MIN_BODY_CHARACTERS = 400
|
||||||
|
_PARAGRAPH_FRAGMENT_MAX_CHARS = 80
|
||||||
|
_PARAGRAPH_FRAGMENT_NO_TERMINATOR_MAX_CHARS = 240
|
||||||
|
_TERMINATION_PUNCTUATION = set("。!?!?;;……")
|
||||||
|
|
||||||
def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage):
|
def __init__(self, llm_client, validator: IRValidator, storage: ChapterStorage):
|
||||||
"""
|
"""
|
||||||
@@ -121,17 +135,32 @@ class ChapterGenerationNode(BaseNode):
|
|||||||
self._sanitize_chapter_blocks(chapter_json)
|
self._sanitize_chapter_blocks(chapter_json)
|
||||||
|
|
||||||
valid, errors = self.validator.validate_chapter(chapter_json)
|
valid, errors = self.validator.validate_chapter(chapter_json)
|
||||||
|
content_error: ChapterContentError | None = None
|
||||||
|
if valid:
|
||||||
|
try:
|
||||||
|
self._ensure_content_density(chapter_json)
|
||||||
|
except ChapterContentError as exc:
|
||||||
|
content_error = exc
|
||||||
|
|
||||||
|
error_messages: List[str] = []
|
||||||
|
if not valid and errors:
|
||||||
|
error_messages.extend(errors)
|
||||||
|
if content_error:
|
||||||
|
error_messages.append(str(content_error))
|
||||||
|
|
||||||
self.storage.persist_chapter(
|
self.storage.persist_chapter(
|
||||||
run_dir,
|
run_dir,
|
||||||
chapter_meta,
|
chapter_meta,
|
||||||
chapter_json,
|
chapter_json,
|
||||||
errors=None if valid else errors,
|
errors=None if not error_messages else error_messages,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not valid:
|
if not valid:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"{section.title} 章节JSON校验失败: {'; '.join(errors[:5])}"
|
f"{section.title} 章节JSON校验失败: {'; '.join(errors[:5])}"
|
||||||
)
|
)
|
||||||
|
if content_error:
|
||||||
|
raise content_error
|
||||||
|
|
||||||
return chapter_json
|
return chapter_json
|
||||||
|
|
||||||
@@ -488,6 +517,97 @@ class ChapterGenerationNode(BaseNode):
|
|||||||
|
|
||||||
walk(chapter.get("blocks"))
|
walk(chapter.get("blocks"))
|
||||||
|
|
||||||
|
blocks = chapter.get("blocks")
|
||||||
|
if isinstance(blocks, list):
|
||||||
|
chapter["blocks"] = self._merge_fragment_sequences(blocks)
|
||||||
|
|
||||||
|
def _ensure_content_density(self, chapter: Dict[str, Any]):
|
||||||
|
"""
|
||||||
|
校验章节正文密度。
|
||||||
|
|
||||||
|
若blocks缺失、除标题外无有效区块,或正文字符数低于阈值,
|
||||||
|
则视为章节内容异常,触发ChapterContentError以便上游重试。
|
||||||
|
"""
|
||||||
|
blocks = chapter.get("blocks")
|
||||||
|
if not isinstance(blocks, list) or not blocks:
|
||||||
|
raise ChapterContentError("章节缺少正文区块,无法输出内容")
|
||||||
|
|
||||||
|
non_heading_blocks = [
|
||||||
|
block
|
||||||
|
for block in blocks
|
||||||
|
if isinstance(block, dict)
|
||||||
|
and block.get("type") not in {"heading", "divider", "toc"}
|
||||||
|
]
|
||||||
|
body_characters = self._count_body_characters(blocks)
|
||||||
|
|
||||||
|
if len(non_heading_blocks) < self._MIN_NON_HEADING_BLOCKS or body_characters < self._MIN_BODY_CHARACTERS:
|
||||||
|
raise ChapterContentError(
|
||||||
|
f"{chapter.get('title') or '该章节'} 正文不足:有效区块 {len(non_heading_blocks)} 个,估算字符数 {body_characters}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _count_body_characters(self, blocks: Any) -> int:
|
||||||
|
"""
|
||||||
|
递归统计正文字符数。
|
||||||
|
|
||||||
|
- 忽略heading/divider/widget等非正文类型;
|
||||||
|
- 对paragraph/list/table/callout等结构抽取嵌套文本;
|
||||||
|
- 仅用于粗粒度判断篇幅是否合理。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def walk(node: Any) -> int:
|
||||||
|
if node is None:
|
||||||
|
return 0
|
||||||
|
if isinstance(node, list):
|
||||||
|
return sum(walk(item) for item in node)
|
||||||
|
if isinstance(node, str):
|
||||||
|
return len(node.strip())
|
||||||
|
if not isinstance(node, dict):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
block_type = node.get("type")
|
||||||
|
if block_type in {"heading", "divider", "toc", "widget"}:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if block_type == "paragraph":
|
||||||
|
inlines = node.get("inlines")
|
||||||
|
if isinstance(inlines, list):
|
||||||
|
total = 0
|
||||||
|
for run in inlines:
|
||||||
|
if isinstance(run, dict):
|
||||||
|
text = run.get("text")
|
||||||
|
if isinstance(text, str):
|
||||||
|
total += len(text.strip())
|
||||||
|
return total
|
||||||
|
text_value = node.get("text")
|
||||||
|
if isinstance(text_value, str):
|
||||||
|
return len(text_value.strip())
|
||||||
|
return len(self._extract_block_text(node).strip())
|
||||||
|
|
||||||
|
if block_type == "list":
|
||||||
|
total = 0
|
||||||
|
for item in node.get("items", []):
|
||||||
|
total += walk(item)
|
||||||
|
return total
|
||||||
|
|
||||||
|
if block_type in {"blockquote", "callout"}:
|
||||||
|
return walk(node.get("blocks"))
|
||||||
|
|
||||||
|
if block_type == "table":
|
||||||
|
total = 0
|
||||||
|
for row in node.get("rows", []):
|
||||||
|
cells = row.get("cells") or []
|
||||||
|
for cell in cells:
|
||||||
|
total += walk(cell.get("blocks"))
|
||||||
|
return total
|
||||||
|
|
||||||
|
nested = node.get("blocks")
|
||||||
|
if isinstance(nested, list):
|
||||||
|
return walk(nested)
|
||||||
|
|
||||||
|
return len(self._extract_block_text(node).strip())
|
||||||
|
|
||||||
|
return walk(blocks)
|
||||||
|
|
||||||
def _sanitize_block_content(self, block: Dict[str, Any]):
|
def _sanitize_block_content(self, block: Dict[str, Any]):
|
||||||
"""根据类型做精细化修复,例如清理paragraph内的非法inline mark"""
|
"""根据类型做精细化修复,例如清理paragraph内的非法inline mark"""
|
||||||
block_type = block.get("type")
|
block_type = block.get("type")
|
||||||
@@ -505,7 +625,134 @@ class ChapterGenerationNode(BaseNode):
|
|||||||
normalized_runs = [self._as_inline_run(self._extract_block_text(block))]
|
normalized_runs = [self._as_inline_run(self._extract_block_text(block))]
|
||||||
if not normalized_runs:
|
if not normalized_runs:
|
||||||
normalized_runs = [self._as_inline_run("")]
|
normalized_runs = [self._as_inline_run("")]
|
||||||
block["inlines"] = normalized_runs
|
block["inlines"] = self._strip_inline_artifacts(normalized_runs)
|
||||||
|
|
||||||
|
def _strip_inline_artifacts(self, inlines: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""移除被LLM误写入的JSON哨兵文本,防止渲染出`{\"type\": \"\"}`等垃圾字符"""
|
||||||
|
cleaned: List[Dict[str, Any]] = []
|
||||||
|
for run in inlines or []:
|
||||||
|
if not isinstance(run, dict):
|
||||||
|
continue
|
||||||
|
text = run.get("text")
|
||||||
|
if isinstance(text, str):
|
||||||
|
stripped = text.strip()
|
||||||
|
if stripped.startswith("{") and stripped.endswith("}"):
|
||||||
|
try:
|
||||||
|
payload = json.loads(stripped)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
payload = None
|
||||||
|
if isinstance(payload, dict) and set(payload.keys()).issubset({"type", "value"}):
|
||||||
|
continue
|
||||||
|
cleaned.append(run)
|
||||||
|
return cleaned or [self._as_inline_run("")]
|
||||||
|
|
||||||
|
def _merge_fragment_sequences(self, blocks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""合并被LLM拆成多段的句子片段,避免HTML出现大量孤立<p>"""
|
||||||
|
if not isinstance(blocks, list):
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
merged: List[Dict[str, Any]] = []
|
||||||
|
fragment_buffer: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
def flush_buffer():
|
||||||
|
nonlocal fragment_buffer
|
||||||
|
if not fragment_buffer:
|
||||||
|
return
|
||||||
|
if len(fragment_buffer) == 1:
|
||||||
|
merged.append(fragment_buffer[0])
|
||||||
|
else:
|
||||||
|
merged.append(self._combine_paragraph_fragments(fragment_buffer))
|
||||||
|
fragment_buffer = []
|
||||||
|
|
||||||
|
for block in blocks:
|
||||||
|
if self._is_paragraph_fragment(block):
|
||||||
|
fragment_buffer.append(block)
|
||||||
|
continue
|
||||||
|
flush_buffer()
|
||||||
|
merged.append(self._merge_nested_fragments(block))
|
||||||
|
|
||||||
|
flush_buffer()
|
||||||
|
return merged
|
||||||
|
|
||||||
|
def _merge_nested_fragments(self, block: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""对嵌套结构(callout/list/table)递归处理片段合并"""
|
||||||
|
block_type = block.get("type")
|
||||||
|
if block_type in {"callout", "blockquote"}:
|
||||||
|
nested = block.get("blocks")
|
||||||
|
if isinstance(nested, list):
|
||||||
|
block["blocks"] = self._merge_fragment_sequences(nested)
|
||||||
|
elif block_type == "list":
|
||||||
|
items = block.get("items")
|
||||||
|
if isinstance(items, list):
|
||||||
|
for entry in items:
|
||||||
|
if isinstance(entry, list):
|
||||||
|
merged_entry = self._merge_fragment_sequences(entry)
|
||||||
|
entry[:] = merged_entry
|
||||||
|
elif block_type == "table":
|
||||||
|
for row in block.get("rows", []):
|
||||||
|
cells = row.get("cells") or []
|
||||||
|
for cell in cells:
|
||||||
|
nested_blocks = cell.get("blocks")
|
||||||
|
if isinstance(nested_blocks, list):
|
||||||
|
cell["blocks"] = self._merge_fragment_sequences(nested_blocks)
|
||||||
|
return block
|
||||||
|
|
||||||
|
def _combine_paragraph_fragments(self, fragments: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||||
|
"""将多个句子片段合并为单个paragraph block"""
|
||||||
|
template = dict(fragments[0])
|
||||||
|
combined_inlines: List[Dict[str, Any]] = []
|
||||||
|
for fragment in fragments:
|
||||||
|
runs = fragment.get("inlines")
|
||||||
|
if isinstance(runs, list) and runs:
|
||||||
|
combined_inlines.extend(runs)
|
||||||
|
else:
|
||||||
|
fallback_text = self._extract_block_text(fragment)
|
||||||
|
combined_inlines.append(self._as_inline_run(fallback_text))
|
||||||
|
if not combined_inlines:
|
||||||
|
combined_inlines.append(self._as_inline_run(""))
|
||||||
|
template["inlines"] = combined_inlines
|
||||||
|
return template
|
||||||
|
|
||||||
|
def _is_paragraph_fragment(self, block: Dict[str, Any]) -> bool:
|
||||||
|
"""判断paragraph是否为被错误拆分的短片段"""
|
||||||
|
if not isinstance(block, dict) or block.get("type") != "paragraph":
|
||||||
|
return False
|
||||||
|
inlines = block.get("inlines")
|
||||||
|
text = ""
|
||||||
|
has_marks = False
|
||||||
|
if isinstance(inlines, list) and inlines:
|
||||||
|
parts: List[str] = []
|
||||||
|
for run in inlines:
|
||||||
|
if not isinstance(run, dict):
|
||||||
|
continue
|
||||||
|
parts.append(str(run.get("text") or ""))
|
||||||
|
marks = run.get("marks")
|
||||||
|
if isinstance(marks, list) and any(marks):
|
||||||
|
has_marks = True
|
||||||
|
text = "".join(parts)
|
||||||
|
else:
|
||||||
|
text = self._extract_block_text(block)
|
||||||
|
stripped = (text or "").strip()
|
||||||
|
if not stripped:
|
||||||
|
return True
|
||||||
|
if has_marks:
|
||||||
|
return False
|
||||||
|
if "\n" in stripped:
|
||||||
|
return False
|
||||||
|
|
||||||
|
short_limit = self._PARAGRAPH_FRAGMENT_MAX_CHARS
|
||||||
|
long_limit = getattr(
|
||||||
|
self,
|
||||||
|
"_PARAGRAPH_FRAGMENT_NO_TERMINATOR_MAX_CHARS",
|
||||||
|
short_limit * 3,
|
||||||
|
)
|
||||||
|
|
||||||
|
if stripped[-1] in self._TERMINATION_PUNCTUATION:
|
||||||
|
return len(stripped) <= short_limit
|
||||||
|
|
||||||
|
if len(stripped) > long_limit:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def _coerce_inline_run(self, run: Any) -> List[Dict[str, Any]]:
|
def _coerce_inline_run(self, run: Any) -> List[Dict[str, Any]]:
|
||||||
"""将任意inline写法规整为合法run"""
|
"""将任意inline写法规整为合法run"""
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import ast
|
import ast
|
||||||
|
import copy
|
||||||
import html
|
import html
|
||||||
import json
|
import json
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
@@ -19,6 +20,31 @@ class HTMLRenderer:
|
|||||||
- 提供主题变量、编号映射等辅助功能。
|
- 提供主题变量、编号映射等辅助功能。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
CALLOUT_ALLOWED_TYPES = {
|
||||||
|
"paragraph",
|
||||||
|
"list",
|
||||||
|
"table",
|
||||||
|
"blockquote",
|
||||||
|
"code",
|
||||||
|
"math",
|
||||||
|
"figure",
|
||||||
|
"kpiGrid",
|
||||||
|
}
|
||||||
|
INLINE_ARTIFACT_KEYS = {
|
||||||
|
"props",
|
||||||
|
"widgetId",
|
||||||
|
"widgetType",
|
||||||
|
"data",
|
||||||
|
"dataRef",
|
||||||
|
"datasets",
|
||||||
|
"labels",
|
||||||
|
"config",
|
||||||
|
"options",
|
||||||
|
}
|
||||||
|
TABLE_COMPLEX_CHARS = set(
|
||||||
|
"@%%()(),,。;;::、??!!·…-—_+<>[]{}|\\/\"'`~$^&*#"
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(self, config: Dict[str, Any] | None = None):
|
def __init__(self, config: Dict[str, Any] | None = None):
|
||||||
"""初始化渲染器缓存并允许注入额外配置(如主题覆盖)"""
|
"""初始化渲染器缓存并允许注入额外配置(如主题覆盖)"""
|
||||||
self.config = config or {}
|
self.config = config or {}
|
||||||
@@ -72,6 +98,7 @@ class HTMLRenderer:
|
|||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
<title>{self._escape_html(title)}</title>
|
<title>{self._escape_html(title)}</title>
|
||||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chartjs-chart-sankey@4"></script>
|
||||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/1.4.1/html2canvas.min.js"></script>
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/1.4.1/html2canvas.min.js"></script>
|
||||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"></script>
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"></script>
|
||||||
<script>
|
<script>
|
||||||
@@ -442,8 +469,9 @@ class HTMLRenderer:
|
|||||||
|
|
||||||
def _render_table(self, block: Dict[str, Any]) -> str:
|
def _render_table(self, block: Dict[str, Any]) -> str:
|
||||||
"""渲染表格,同时保留caption与单元格属性"""
|
"""渲染表格,同时保留caption与单元格属性"""
|
||||||
|
rows = self._normalize_table_rows(block.get("rows") or [])
|
||||||
rows_html = ""
|
rows_html = ""
|
||||||
for row in block.get("rows", []):
|
for row in rows:
|
||||||
row_cells = ""
|
row_cells = ""
|
||||||
for cell in row.get("cells", []):
|
for cell in row.get("cells", []):
|
||||||
cell_tag = "th" if cell.get("header") or cell.get("isHeader") else "td"
|
cell_tag = "th" if cell.get("header") or cell.get("isHeader") else "td"
|
||||||
@@ -462,6 +490,105 @@ class HTMLRenderer:
|
|||||||
caption_html = f"<caption>{self._escape_html(caption)}</caption>" if caption else ""
|
caption_html = f"<caption>{self._escape_html(caption)}</caption>" if caption else ""
|
||||||
return f'<div class="table-wrap"><table>{caption_html}<tbody>{rows_html}</tbody></table></div>'
|
return f'<div class="table-wrap"><table>{caption_html}<tbody>{rows_html}</tbody></table></div>'
|
||||||
|
|
||||||
|
def _normalize_table_rows(self, rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""检测并修正仅有单列的竖排表,转换为标准网格"""
|
||||||
|
if not rows:
|
||||||
|
return []
|
||||||
|
if not all(len((row.get("cells") or [])) == 1 for row in rows):
|
||||||
|
return rows
|
||||||
|
texts = [self._extract_row_text(row) for row in rows]
|
||||||
|
header_span = self._detect_transposed_header_span(rows, texts)
|
||||||
|
if not header_span:
|
||||||
|
return rows
|
||||||
|
normalized = self._transpose_single_cell_table(rows, header_span)
|
||||||
|
return normalized or rows
|
||||||
|
|
||||||
|
def _detect_transposed_header_span(self, rows: List[Dict[str, Any]], texts: List[str]) -> int:
|
||||||
|
"""推断竖排表头的行数,用于后续转置"""
|
||||||
|
max_fields = min(8, len(rows) // 2)
|
||||||
|
header_span = 0
|
||||||
|
for idx, text in enumerate(texts):
|
||||||
|
if idx >= max_fields:
|
||||||
|
break
|
||||||
|
if self._is_potential_table_header(text):
|
||||||
|
header_span += 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
if header_span < 2:
|
||||||
|
return 0
|
||||||
|
remainder = texts[header_span:]
|
||||||
|
if not remainder or (len(rows) - header_span) % header_span != 0:
|
||||||
|
return 0
|
||||||
|
if not any(self._looks_like_table_value(txt) for txt in remainder):
|
||||||
|
return 0
|
||||||
|
return header_span
|
||||||
|
|
||||||
|
def _is_potential_table_header(self, text: str) -> bool:
|
||||||
|
"""根据长度与字符特征判断是否像表头字段"""
|
||||||
|
if not text:
|
||||||
|
return False
|
||||||
|
stripped = text.strip()
|
||||||
|
if not stripped or len(stripped) > 12:
|
||||||
|
return False
|
||||||
|
return not any(ch.isdigit() or ch in self.TABLE_COMPLEX_CHARS for ch in stripped)
|
||||||
|
|
||||||
|
def _looks_like_table_value(self, text: str) -> bool:
|
||||||
|
"""判断该文本是否更像数据值,用于辅助判断转置"""
|
||||||
|
if not text:
|
||||||
|
return False
|
||||||
|
stripped = text.strip()
|
||||||
|
if len(stripped) >= 12:
|
||||||
|
return True
|
||||||
|
return any(ch.isdigit() or ch in self.TABLE_COMPLEX_CHARS for ch in stripped)
|
||||||
|
|
||||||
|
def _transpose_single_cell_table(self, rows: List[Dict[str, Any]], span: int) -> List[Dict[str, Any]]:
|
||||||
|
"""将单列多行的表格转换为标准表头 + 若干数据行"""
|
||||||
|
total = len(rows)
|
||||||
|
if total <= span or (total - span) % span != 0:
|
||||||
|
return []
|
||||||
|
header_rows = rows[:span]
|
||||||
|
data_rows = rows[span:]
|
||||||
|
normalized: List[Dict[str, Any]] = []
|
||||||
|
header_cells = []
|
||||||
|
for row in header_rows:
|
||||||
|
cell = copy.deepcopy((row.get("cells") or [{}])[0])
|
||||||
|
cell["header"] = True
|
||||||
|
header_cells.append(cell)
|
||||||
|
normalized.append({"cells": header_cells})
|
||||||
|
for start in range(0, len(data_rows), span):
|
||||||
|
group = data_rows[start : start + span]
|
||||||
|
if len(group) < span:
|
||||||
|
break
|
||||||
|
normalized.append(
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
copy.deepcopy((item.get("cells") or [{}])[0])
|
||||||
|
for item in group
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
def _extract_row_text(self, row: Dict[str, Any]) -> str:
|
||||||
|
"""提取表格行中的纯文本,方便启发式分析"""
|
||||||
|
cells = row.get("cells") or []
|
||||||
|
if not cells:
|
||||||
|
return ""
|
||||||
|
cell = cells[0]
|
||||||
|
texts: List[str] = []
|
||||||
|
for block in cell.get("blocks", []):
|
||||||
|
if isinstance(block, dict):
|
||||||
|
if block.get("type") == "paragraph":
|
||||||
|
for inline in block.get("inlines") or []:
|
||||||
|
if isinstance(inline, dict):
|
||||||
|
value = inline.get("text")
|
||||||
|
else:
|
||||||
|
value = inline
|
||||||
|
if value is None:
|
||||||
|
continue
|
||||||
|
texts.append(str(value))
|
||||||
|
return "".join(texts)
|
||||||
|
|
||||||
def _render_blockquote(self, block: Dict[str, Any]) -> str:
|
def _render_blockquote(self, block: Dict[str, Any]) -> str:
|
||||||
"""渲染引用块,可嵌套其他block"""
|
"""渲染引用块,可嵌套其他block"""
|
||||||
inner = self._render_blocks(block.get("blocks", []))
|
inner = self._render_blocks(block.get("blocks", []))
|
||||||
@@ -487,9 +614,63 @@ class HTMLRenderer:
|
|||||||
"""渲染高亮提示盒,tone决定颜色"""
|
"""渲染高亮提示盒,tone决定颜色"""
|
||||||
tone = block.get("tone", "info")
|
tone = block.get("tone", "info")
|
||||||
title = block.get("title")
|
title = block.get("title")
|
||||||
inner = self._render_blocks(block.get("blocks", []))
|
safe_blocks, trailing_blocks = self._split_callout_content(block.get("blocks"))
|
||||||
|
inner = self._render_blocks(safe_blocks)
|
||||||
title_html = f"<strong>{self._escape_html(title)}</strong>" if title else ""
|
title_html = f"<strong>{self._escape_html(title)}</strong>" if title else ""
|
||||||
return f'<div class="callout tone-{tone}">{title_html}{inner}</div>'
|
callout_html = f'<div class="callout tone-{tone}">{title_html}{inner}</div>'
|
||||||
|
trailing_html = self._render_blocks(trailing_blocks) if trailing_blocks else ""
|
||||||
|
return callout_html + trailing_html
|
||||||
|
|
||||||
|
def _split_callout_content(
|
||||||
|
self, blocks: List[Dict[str, Any]] | None
|
||||||
|
) -> tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
||||||
|
"""限定callout内部仅包含轻量内容,其余块剥离到外层"""
|
||||||
|
if not blocks:
|
||||||
|
return [], []
|
||||||
|
safe: List[Dict[str, Any]] = []
|
||||||
|
trailing: List[Dict[str, Any]] = []
|
||||||
|
for idx, child in enumerate(blocks):
|
||||||
|
child_type = child.get("type")
|
||||||
|
if child_type == "list":
|
||||||
|
sanitized, overflow = self._sanitize_callout_list(child)
|
||||||
|
if sanitized:
|
||||||
|
safe.append(sanitized)
|
||||||
|
if overflow:
|
||||||
|
trailing.extend(overflow)
|
||||||
|
trailing.extend(copy.deepcopy(blocks[idx + 1 :]))
|
||||||
|
break
|
||||||
|
elif child_type in self.CALLOUT_ALLOWED_TYPES:
|
||||||
|
safe.append(child)
|
||||||
|
else:
|
||||||
|
trailing.extend(copy.deepcopy(blocks[idx:]))
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
return safe, []
|
||||||
|
return safe, trailing
|
||||||
|
|
||||||
|
def _sanitize_callout_list(
|
||||||
|
self, block: Dict[str, Any]
|
||||||
|
) -> tuple[Dict[str, Any] | None, List[Dict[str, Any]]]:
|
||||||
|
"""当列表项包含结构型block时,将其截断移出callout"""
|
||||||
|
items = block.get("items") or []
|
||||||
|
if not items:
|
||||||
|
return block, []
|
||||||
|
sanitized_items: List[List[Dict[str, Any]]] = []
|
||||||
|
trailing: List[Dict[str, Any]] = []
|
||||||
|
for idx, item in enumerate(items):
|
||||||
|
safe, overflow = self._split_callout_content(item)
|
||||||
|
if safe:
|
||||||
|
sanitized_items.append(safe)
|
||||||
|
if overflow:
|
||||||
|
trailing.extend(overflow)
|
||||||
|
for rest in items[idx + 1 :]:
|
||||||
|
trailing.extend(copy.deepcopy(rest))
|
||||||
|
break
|
||||||
|
if not sanitized_items:
|
||||||
|
return None, trailing
|
||||||
|
new_block = copy.deepcopy(block)
|
||||||
|
new_block["items"] = sanitized_items
|
||||||
|
return new_block, trailing
|
||||||
|
|
||||||
def _render_kpi_grid(self, block: Dict[str, Any]) -> str:
|
def _render_kpi_grid(self, block: Dict[str, Any]) -> str:
|
||||||
"""渲染KPI卡片栅格,包含指标值与涨跌幅"""
|
"""渲染KPI卡片栅格,包含指标值与涨跌幅"""
|
||||||
@@ -631,6 +812,8 @@ class HTMLRenderer:
|
|||||||
nested_marks = inline_payload.get("marks")
|
nested_marks = inline_payload.get("marks")
|
||||||
if isinstance(nested_marks, list):
|
if isinstance(nested_marks, list):
|
||||||
marks.extend(nested_marks)
|
marks.extend(nested_marks)
|
||||||
|
elif any(key in payload for key in self.INLINE_ARTIFACT_KEYS):
|
||||||
|
text_value = ""
|
||||||
|
|
||||||
return text_value, marks
|
return text_value, marks
|
||||||
|
|
||||||
@@ -1281,10 +1464,11 @@ function mergeOptions(base, override) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function resolveChartTypes(payload) {
|
function resolveChartTypes(payload) {
|
||||||
|
const explicit = payload && payload.props && payload.props.type;
|
||||||
const widgetType = payload && payload.widgetType ? payload.widgetType : 'chart.js/bar';
|
const widgetType = payload && payload.widgetType ? payload.widgetType : 'chart.js/bar';
|
||||||
const primary = widgetType.includes('/') ? widgetType.split('/').pop() : widgetType;
|
const derived = widgetType && widgetType.includes('/') ? widgetType.split('/').pop() : widgetType;
|
||||||
const extra = Array.isArray(payload && payload.preferredTypes) ? payload.preferredTypes : [];
|
const extra = Array.isArray(payload && payload.preferredTypes) ? payload.preferredTypes : [];
|
||||||
const pipeline = [primary, ...extra, ...STABLE_CHART_TYPES];
|
const pipeline = [explicit, derived, ...extra, ...STABLE_CHART_TYPES].filter(Boolean);
|
||||||
const result = [];
|
const result = [];
|
||||||
pipeline.forEach(type => {
|
pipeline.forEach(type => {
|
||||||
if (type && !result.includes(type)) {
|
if (type && !result.includes(type)) {
|
||||||
@@ -1456,6 +1640,15 @@ function buildChartOptions(payload) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function instantiateChart(ctx, payload, optionsTemplate, type) {
|
function instantiateChart(ctx, payload, optionsTemplate, type) {
|
||||||
|
if (!ctx) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (ctx.canvas && typeof Chart !== 'undefined' && typeof Chart.getChart === 'function') {
|
||||||
|
const existing = Chart.getChart(ctx.canvas);
|
||||||
|
if (existing) {
|
||||||
|
existing.destroy();
|
||||||
|
}
|
||||||
|
}
|
||||||
const data = cloneDeep(payload && payload.data ? payload.data : {});
|
const data = cloneDeep(payload && payload.data ? payload.data : {});
|
||||||
const config = {
|
const config = {
|
||||||
type,
|
type,
|
||||||
|
|||||||
Reference in New Issue
Block a user