Fixed the PDF Rendering Overflow Issue and Updated the Logic for Rendering PDFs

This commit is contained in:
马一丁
2025-11-19 11:35:58 +08:00
parent a07d6c5292
commit d4f8301fd5
3 changed files with 426 additions and 43 deletions
+22 -9
View File
@@ -405,12 +405,13 @@ class HTMLRenderer:
def _render_body(self) -> str: def _render_body(self) -> str:
""" """
拼装<body>结构,包含头部、导航、章节和脚本。 拼装<body>结构,包含头部、导航、章节和脚本。
新版本:移除独立的cover section,标题合并到hero section中。
返回: 返回:
str: body片段HTML。 str: body片段HTML。
""" """
header = self._render_header() header = self._render_header()
cover = self._render_cover() # cover = self._render_cover() # 不再单独渲染cover
hero = self._render_hero() hero = self._render_hero()
toc_section = self._render_toc_section() toc_section = self._render_toc_section()
chapters = "".join(self._render_chapter(chapter) for chapter in self.chapters) chapters = "".join(self._render_chapter(chapter) for chapter in self.chapters)
@@ -433,7 +434,6 @@ class HTMLRenderer:
{header} {header}
{overlay} {overlay}
<main> <main>
{cover}
{hero} {hero}
{toc_section} {toc_section}
{chapters} {chapters}
@@ -502,6 +502,7 @@ class HTMLRenderer:
def _render_hero(self) -> str: def _render_hero(self) -> str:
""" """
根据layout中的hero字段输出摘要/KPI/亮点区。 根据layout中的hero字段输出摘要/KPI/亮点区。
新版本:将标题和总览合并在一起,去掉椭圆背景。
返回: 返回:
str: hero区HTML,若无数据则为空字符串。 str: hero区HTML,若无数据则为空字符串。
@@ -509,6 +510,11 @@ class HTMLRenderer:
hero = self.metadata.get("hero") or {} hero = self.metadata.get("hero") or {}
if not hero: if not hero:
return "" return ""
# 获取标题和副标题
title = self.metadata.get("title") or "智能舆情报告"
subtitle = self.metadata.get("subtitle") or self.metadata.get("templateName") or ""
summary = hero.get("summary") summary = hero.get("summary")
summary_html = f'<p class="hero-summary">{self._escape_html(summary)}</p>' if summary else "" summary_html = f'<p class="hero-summary">{self._escape_html(summary)}</p>' if summary else ""
highlights = hero.get("highlights") or [] highlights = hero.get("highlights") or []
@@ -535,14 +541,21 @@ class HTMLRenderer:
""" """
return f""" return f"""
<section class="hero-section"> <section class="hero-section-combined">
<div class="hero-content"> <div class="hero-header">
{summary_html} <p class="hero-hint">文章总览</p>
<ul class="hero-highlights">{highlight_html}</ul> <h1 class="hero-title">{self._escape_html(title)}</h1>
<div class="hero-actions">{actions_html}</div> <p class="hero-subtitle">{self._escape_html(subtitle)}</p>
</div> </div>
<div class="hero-side"> <div class="hero-body">
{kpi_cards} <div class="hero-content">
{summary_html}
<ul class="hero-highlights">{highlight_html}</ul>
<div class="hero-actions">{actions_html}</div>
</div>
<div class="hero-side">
{kpi_cards}
</div>
</div> </div>
</section> </section>
""".strip() """.strip()
+297 -30
View File
@@ -145,11 +145,13 @@ class PDFLayoutOptimizer:
# 字符宽度估算系数(基于常见中文字体) # 字符宽度估算系数(基于常见中文字体)
# 中文字符通常是等宽的,约等于字号的像素值 # 中文字符通常是等宽的,约等于字号的像素值
# 英文和数字约为字号的0.5-0.6倍 # 英文和数字约为字号的0.5-0.6倍
# 更新:使用更精确的系数以更好地预测溢出
CHAR_WIDTH_FACTOR = { CHAR_WIDTH_FACTOR = {
'chinese': 1.0, # 中文字符 'chinese': 1.05, # 中文字符(略微增加以确保安全边界)
'english': 0.55, # 英文字母 'english': 0.58, # 英文字母
'number': 0.6, # 数字 'number': 0.65, # 数字(数字通常比字母稍宽)
'symbol': 0.4, # 符号 'symbol': 0.45, # 符号
'percent': 0.7, # 百分号等特殊符号
} }
def __init__(self, config: Optional[PDFLayoutConfig] = None): def __init__(self, config: Optional[PDFLayoutConfig] = None):
@@ -208,6 +210,8 @@ class PDFLayoutOptimizer:
- max_kpi_value_length: 最长KPI数值长度 - max_kpi_value_length: 最长KPI数值长度
- max_table_columns: 最多表格列数 - max_table_columns: 最多表格列数
- total_content_length: 总内容长度 - total_content_length: 总内容长度
- hero_kpi_count: Hero区域的KPI数量
- max_hero_kpi_value_length: Hero区域最长KPI数值长度
""" """
stats = { stats = {
'kpi_count': 0, 'kpi_count': 0,
@@ -219,8 +223,23 @@ class PDFLayoutOptimizer:
'max_table_rows': 0, 'max_table_rows': 0,
'total_content_length': 0, 'total_content_length': 0,
'has_long_text': False, 'has_long_text': False,
'hero_kpi_count': 0,
'max_hero_kpi_value_length': 0,
} }
# 分析hero区域的KPI
metadata = document_ir.get('metadata', {})
hero = metadata.get('hero', {})
if hero:
hero_kpis = hero.get('kpis', [])
stats['hero_kpi_count'] = len(hero_kpis)
for kpi in hero_kpis:
value = str(kpi.get('value', ''))
stats['max_hero_kpi_value_length'] = max(
stats['max_hero_kpi_value_length'],
len(value)
)
# 优先使用chaptersfallback到sections # 优先使用chaptersfallback到sections
chapters = document_ir.get('chapters', []) chapters = document_ir.get('chapters', [])
if not chapters: if not chapters:
@@ -353,6 +372,8 @@ class PDFLayoutOptimizer:
width += font_size * self.CHAR_WIDTH_FACTOR['english'] width += font_size * self.CHAR_WIDTH_FACTOR['english']
elif char.isdigit(): elif char.isdigit():
width += font_size * self.CHAR_WIDTH_FACTOR['number'] width += font_size * self.CHAR_WIDTH_FACTOR['number']
elif char in '%': # 百分号
width += font_size * self.CHAR_WIDTH_FACTOR['percent']
else: else:
width += font_size * self.CHAR_WIDTH_FACTOR['symbol'] width += font_size * self.CHAR_WIDTH_FACTOR['symbol']
@@ -460,52 +481,77 @@ class PDFLayoutOptimizer:
for issue in overflow_issues: for issue in overflow_issues:
logger.warning(f"检测到布局问题: {issue}") logger.warning(f"检测到布局问题: {issue}")
# KPI卡片宽度(像素) # KPI卡片宽度(像素)- 更保守的计算,留出更多安全边界
kpi_card_width = (800 - 20) // 2 - 40 # 2列布局 kpi_card_width = (800 - 20) // 2 - 60 # 2列布局,增加边距以防溢出
# 优先处理Hero区域的KPI(如果有的话)
if stats['hero_kpi_count'] > 0 and stats['max_hero_kpi_value_length'] > 0:
# Hero区域的KPI卡片宽度通常更窄
hero_kpi_width = 250 # Hero侧边栏的典型宽度
sample_text = '9' * stats['max_hero_kpi_value_length'] + ''
safe_font_size, needs_adjustment = self._calculate_safe_font_size(
sample_text,
hero_kpi_width,
min_font_size=14,
max_font_size=24 # Hero KPI字号通常较小
)
if needs_adjustment or stats['max_hero_kpi_value_length'] > 6:
# Hero KPI需要更保守的字号
config.kpi_card.font_size_value = max(14, safe_font_size - 2)
self.optimization_log.append(
f"Hero KPI数值较长({stats['max_hero_kpi_value_length']}字符)"
f"字号调整为{config.kpi_card.font_size_value}px"
)
# 根据KPI数值长度智能调整字号 # 根据KPI数值长度智能调整字号
if stats['max_kpi_value_length'] > 0: if stats['max_kpi_value_length'] > 0:
# 创建示例文本进行测试 # 创建示例文本进行测试 - 使用实际可能的字符组合
sample_text = '9' * stats['max_kpi_value_length'] sample_text = '9' * stats['max_kpi_value_length'] + '亿' # 加上可能的单位
safe_font_size, needs_adjustment = self._calculate_safe_font_size( safe_font_size, needs_adjustment = self._calculate_safe_font_size(
sample_text, sample_text,
kpi_card_width, kpi_card_width,
min_font_size=18, min_font_size=16, # 降低最小字号以确保不溢出
max_font_size=32 max_font_size=28 # 降低最大字号以更保守
) )
if needs_adjustment: if needs_adjustment:
config.kpi_card.font_size_value = safe_font_size config.kpi_card.font_size_value = safe_font_size
# 进一步降低以留出安全边界
config.kpi_card.font_size_value = max(16, safe_font_size - 2)
self.optimization_log.append( self.optimization_log.append(
f"KPI数值过长({stats['max_kpi_value_length']}字符)" f"KPI数值过长({stats['max_kpi_value_length']}字符)"
f"字号自动调整为{safe_font_size}px以防止溢出" f"字号自动调整为{config.kpi_card.font_size_value}px以防止溢出"
) )
elif stats['max_kpi_value_length'] > 10: elif stats['max_kpi_value_length'] > 8:
# 即使不溢出,也适当缩小以留出更多空间 # 对于较长文本,更保守地调整
config.kpi_card.font_size_value = min(28, safe_font_size) config.kpi_card.font_size_value = min(24, safe_font_size)
self.optimization_log.append( self.optimization_log.append(
f"KPI数值较长({stats['max_kpi_value_length']}字符)" f"KPI数值较长({stats['max_kpi_value_length']}字符)"
f"预防性调整字号为{config.kpi_card.font_size_value}px" f"预防性调整字号为{config.kpi_card.font_size_value}px"
) )
# 根据KPI数量调整网格布局 # 根据KPI数量调整网格布局和间距
if stats['kpi_count'] > 6: if stats['kpi_count'] > 6:
config.grid.columns = 3 config.grid.columns = 3
config.kpi_card.min_height = 100 config.kpi_card.min_height = 100
config.kpi_card.padding = 16 # 缩小padding以节省空间 config.kpi_card.padding = 14 # 缩小padding以节省空间
config.grid.gap = 16 # 减小间距
self.optimization_log.append( self.optimization_log.append(
f"KPI卡片较多({stats['kpi_count']}个)" f"KPI卡片较多({stats['kpi_count']}个)"
f"调整为3列布局并缩小内边距" f"调整为3列布局并缩小内边距和间距"
) )
elif stats['kpi_count'] > 4: elif stats['kpi_count'] > 4:
config.grid.columns = 2 config.grid.columns = 2
config.kpi_card.padding = 18 config.kpi_card.padding = 16
config.grid.gap = 18
self.optimization_log.append( self.optimization_log.append(
f"KPI卡片适中({stats['kpi_count']}个),使用2列布局" f"KPI卡片适中({stats['kpi_count']}个),使用2列布局"
) )
elif stats['kpi_count'] <= 2: elif stats['kpi_count'] <= 2:
config.grid.columns = 1 config.grid.columns = 1
config.kpi_card.padding = 24 # 较少卡片时增加padding config.kpi_card.padding = 22 # 较少卡片时增加padding
config.grid.gap = 20
self.optimization_log.append( self.optimization_log.append(
f"KPI卡片较少({stats['kpi_count']}个)" f"KPI卡片较少({stats['kpi_count']}个)"
f"使用1列布局并增加内边距" f"使用1列布局并增加内边距"
@@ -539,11 +585,19 @@ class PDFLayoutOptimizer:
# 如果有长文本,增加行高和段落间距 # 如果有长文本,增加行高和段落间距
if stats['has_long_text']: if stats['has_long_text']:
config.page.line_height = 1.8 config.page.line_height = 1.75 # 稍微降低以节省空间
config.callout.line_height = 1.8 config.callout.line_height = 1.75
config.page.paragraph_spacing = 18 config.page.paragraph_spacing = 16 # 适度间距
self.optimization_log.append( self.optimization_log.append(
"检测到长文本,增加行高至1.8和段落间距以提高可读性" "检测到长文本,增加行高至1.75和段落间距以提高可读性"
)
else:
# 没有长文本时使用更紧凑的间距
config.page.line_height = 1.5
config.callout.line_height = 1.6
config.page.paragraph_spacing = 14
self.optimization_log.append(
"文本长度适中,使用标准行高和段落间距"
) )
# 如果内容较多,减小整体字号 # 如果内容较多,减小整体字号
@@ -643,6 +697,16 @@ class PDFLayoutOptimizer:
css = f""" css = f"""
/* PDF布局优化样式 - 由PDFLayoutOptimizer自动生成 */ /* PDF布局优化样式 - 由PDFLayoutOptimizer自动生成 */
/* 隐藏独立的封面section,已合并到hero */
.cover {{
display: none !important;
}}
/* PDF中隐藏hero actions(深蓝色的三个按钮) */
.hero-actions {{
display: none !important;
}}
/* 页面基础样式 */ /* 页面基础样式 */
body {{ body {{
font-size: {cfg.page.font_size_base}px; font-size: {cfg.page.font_size_base}px;
@@ -731,12 +795,14 @@ p {{
font-size: {cfg.callout.font_size_title}px !important; font-size: {cfg.callout.font_size_title}px !important;
margin-bottom: 10px; margin-bottom: 10px;
word-break: break-word; word-break: break-word;
line-height: 1.4;
}} }}
.callout-content {{ .callout-content {{
font-size: {cfg.callout.font_size_content}px !important; font-size: {cfg.callout.font_size_content}px !important;
word-break: break-word; word-break: break-word;
overflow-wrap: break-word; overflow-wrap: break-word;
line-height: {cfg.callout.line_height};
}} }}
/* 表格优化 - 严格防止溢出 */ /* 表格优化 - 严格防止溢出 */
@@ -790,24 +856,196 @@ td {{
word-break: break-word; word-break: break-word;
}} }}
/* Hero区域的KPI卡片 */ /* Hero区域合并版本 - 包含标题和内容,保留蓝色椭圆背景 */
.hero-kpi {{ .hero-section-combined {{
padding: {cfg.kpi_card.padding}px !important; padding: 45px 55px !important;
margin: 0 auto 40px auto !important;
min-height: 500px;
/* 使用100%宽度,填满整个页面 */
width: 100% !important;
max-width: 100% !important;
box-sizing: border-box;
overflow: visible;
border-radius: 40px !important;
background: linear-gradient(135deg, #e8f4f8 0%, #d4e9f7 100%);
page-break-after: always !important;
}}
/* Hero标题区域 */
.hero-header {{
text-align: center;
margin-bottom: 25px;
padding-bottom: 18px;
border-bottom: 1px solid rgba(100, 150, 200, 0.2);
}}
.hero-hint {{
font-size: {max(cfg.page.font_size_base - 2, 11)}px !important;
color: #d32f2f;
margin: 0 0 6px 0;
font-weight: 500;
}}
.hero-title {{
font-size: {max(cfg.page.font_size_base + 5, 19)}px !important; /* 稍微减小标题字号 */
font-weight: 600;
margin: 6px 0;
color: #1a1a1a;
line-height: 1.3;
}}
.hero-subtitle {{
font-size: {max(cfg.page.font_size_base - 1, 12)}px !important;
color: #d32f2f;
margin: 6px 0 0 0;
font-weight: 400;
}}
/* Hero主体区域 - 左右分栏 */
.hero-body {{
display: flex;
gap: 28px; /* 左右间距 */
align-items: flex-start;
}}
/* Hero左侧内容区 - 占蓝色背景的70% */
.hero-content {{
flex: 7; /* 左侧占70% */
min-width: 0;
padding-right: 25px;
box-sizing: border-box;
overflow: hidden;
}}
/* Hero右侧KPI区域 - 占蓝色背景的30% */
.hero-side {{
flex: 3; /* 右侧占30% */
min-width: 0;
display: flex;
flex-direction: column;
gap: {max(cfg.grid.gap - 2, 10)}px;
overflow: hidden; overflow: hidden;
box-sizing: border-box; box-sizing: border-box;
}} }}
/* Hero区域的KPI卡片 - 横向拉长,每行显示一个内容 */
.hero-kpi {{
padding: 12px 18px !important; /* 增加横向padding */
overflow: hidden;
box-sizing: border-box;
max-width: 100%;
min-height: 85px; /* 增加高度以容纳三行 */
display: flex;
flex-direction: column;
justify-content: space-between;
}}
.hero-kpi .label {{ .hero-kpi .label {{
font-size: {cfg.kpi_card.font_size_label}px !important; font-size: {max(cfg.kpi_card.font_size_label - 3, 9)}px !important; /* 减小标签字号 */
word-break: break-word; word-break: break-word;
max-width: 100%; max-width: 100%;
line-height: 1.2;
margin-bottom: 4px;
overflow: hidden;
text-overflow: ellipsis;
display: block; /* 独占一行 */
}} }}
.hero-kpi .value {{ .hero-kpi .value {{
font-size: {cfg.kpi_card.font_size_value}px !important; font-size: {max(cfg.kpi_card.font_size_value - 12, 14)}px !important; /* 减小数值字号 */
word-break: break-word; word-break: break-word;
overflow-wrap: break-word; overflow-wrap: break-word;
max-width: 100%; max-width: 100%;
line-height: 1.1;
display: block; /* 独占一行 */
hyphens: auto;
overflow: hidden;
text-overflow: ellipsis;
margin-bottom: 3px;
}}
.hero-kpi .delta {{
font-size: {max(cfg.kpi_card.font_size_change - 3, 9)}px !important; /* 减小变化值字号 */
word-break: break-word;
margin-top: 3px;
display: block; /* 独占一行 */
max-width: 100%;
overflow: hidden;
text-overflow: ellipsis;
line-height: 1.2;
}}
/* Hero summary文本 */
.hero-summary {{
font-size: {cfg.page.font_size_base}px !important;
line-height: 1.65;
margin-top: 0;
margin-bottom: 18px; /* 增加底部边距,与badges保持一致 */
word-break: break-word;
max-width: 98%; /* 与badges宽度一致 */
overflow: hidden;
}}
/* Hero highlights列表 - 横向排列,宽度与summary一致 */
.hero-highlights {{
list-style: none;
padding: 0;
margin: 16px 0; /* 增加上下边距 */
display: flex;
flex-direction: column;
gap: 12px; /* 增加间距,让椭圆之间有更多空间 */
max-width: 100%;
overflow: hidden;
}}
.hero-highlights li {{
margin: 0;
max-width: 100%;
flex-shrink: 0;
flex-grow: 0;
}}
/* hero highlights中的badge - 拉长加宽的椭圆形背景,与上方文本对齐 */
.hero-highlights .badge {{
font-size: {max(cfg.callout.font_size_content - 3, 10)}px !important;
padding: 10px 20px !important; /* 增加padding,更好的视觉效果 */
max-width: 100%;
width: 98%; /* 占满宽度,与summary文本对齐 */
display: flex;
align-items: center; /* 垂直居中文字 */
justify-content: flex-start; /* 文字左对齐 */
word-wrap: break-word;
white-space: normal;
overflow: hidden;
text-overflow: ellipsis;
box-sizing: border-box;
line-height: 1.5; /* 增加行高,更好的可读性 */
min-height: 40px; /* 增加最小高度 */
/* 拉长的椭圆形背景 */
background: rgba(100, 120, 150, 0.15) !important;
border-radius: 22px !important; /* 稍微增加圆角 */
border: 1px solid rgba(100, 120, 150, 0.25);
}}
/* Hero actions按钮 - 确保不溢出椭圆 */
.hero-actions {{
margin-top: 12px;
display: flex;
flex-wrap: wrap;
gap: 6px;
max-width: 100%;
overflow: hidden;
}}
.hero-actions button {{
font-size: {max(cfg.page.font_size_base - 2, 11)}px !important;
padding: 5px 10px !important;
max-width: 200px; /* 限制按钮最大宽度 */
word-break: break-word;
white-space: normal;
overflow: hidden;
text-overflow: ellipsis;
box-sizing: border-box;
}} }}
/* 防止标题孤行 */ /* 防止标题孤行 */
@@ -818,6 +1056,19 @@ h1, h2, h3, h4, h5, h6 {{
overflow-wrap: break-word; overflow-wrap: break-word;
}} }}
/* ===== 强制页面分离规则 ===== */
/* 目录section强制开始新页并在之后强制分页 */
.toc-section {{
page-break-before: always !important;
page-break-after: always !important;
}}
/* 第一个章节强制开始新页(正文从第三页开始) */
main > .chapter:first-of-type {{
page-break-before: always !important;
}}
/* 确保内容块不被分页且不溢出 */ /* 确保内容块不被分页且不溢出 */
.content-block {{ .content-block {{
break-inside: avoid; break-inside: avoid;
@@ -838,13 +1089,29 @@ h1, h2, h3, h4, h5, h6 {{
letter-spacing: -0.02em; /* 稍微紧缩间距以节省空间 */ letter-spacing: -0.02em; /* 稍微紧缩间距以节省空间 */
}} }}
/* 色块(badge)样式控制 */ /* 色块(badge)样式控制 - 防止过大 */
.badge, .callout {{ .badge {{
display: inline-block; display: inline-block;
max-width: 100%; max-width: 100%;
overflow: hidden; overflow: hidden;
text-overflow: ellipsis; text-overflow: ellipsis;
white-space: normal; white-space: normal;
/* 限制badge的最大尺寸 */
padding: 4px 12px !important;
font-size: {max(cfg.page.font_size_base - 2, 12)}px !important;
line-height: 1.4 !important;
/* 防止badge异常过大 */
word-break: break-word;
hyphens: auto;
}}
/* 确保callout不会过大 */
.callout {{
max-width: 100% !important;
margin: 16px 0 !important;
padding: {cfg.callout.padding}px !important;
box-sizing: border-box;
overflow: hidden;
}} }}
/* 响应式调整 */ /* 响应式调整 */
+107 -4
View File
@@ -6,6 +6,7 @@ PDF渲染器 - 使用WeasyPrint从HTML生成PDF
from __future__ import annotations from __future__ import annotations
import base64 import base64
import copy
from pathlib import Path from pathlib import Path
from typing import Any, Dict from typing import Any, Dict
from datetime import datetime from datetime import datetime
@@ -86,6 +87,102 @@ class PDFRenderer:
raise FileNotFoundError(f"未找到字体文件,请检查 {fonts_dir} 目录") raise FileNotFoundError(f"未找到字体文件,请检查 {fonts_dir} 目录")
def _preprocess_charts(self, document_ir: Dict[str, Any]) -> Dict[str, Any]:
"""
预处理图表:验证和修复所有图表数据
这个方法确保在转换为SVG之前,所有图表数据都是有效的。
使用与HTMLRenderer相同的验证和修复逻辑,保证PDF和HTML的一致性。
参数:
document_ir: Document IR数据
返回:
Dict[str, Any]: 修复后的Document IR(深拷贝)
"""
# 深拷贝以避免修改原始IR
ir_copy = copy.deepcopy(document_ir)
repair_stats = {
'total': 0,
'repaired': 0,
'failed': 0
}
def repair_widgets_in_blocks(blocks: list) -> None:
"""递归修复blocks中的所有widget"""
for block in blocks:
if not isinstance(block, dict):
continue
# 处理widget类型
if block.get('type') == 'widget':
widget_type = block.get('widgetType', '')
if widget_type.startswith('chart.js'):
repair_stats['total'] += 1
# 使用HTMLRenderer的验证器和修复器
validation = self.html_renderer.chart_validator.validate(block)
if not validation.is_valid:
logger.debug(f"图表 {block.get('widgetId')} 需要修复: {validation.errors}")
# 尝试修复
repair_result = self.html_renderer.chart_repairer.repair(block, validation)
if repair_result.success and repair_result.repaired_block:
# 更新block内容(在副本中)
block.update(repair_result.repaired_block)
repair_stats['repaired'] += 1
logger.debug(
f"图表 {block.get('widgetId')} 已修复 "
f"(方法: {repair_result.method})"
)
else:
repair_stats['failed'] += 1
logger.warning(
f"图表 {block.get('widgetId')} 修复失败,将使用原始数据"
)
# 递归处理嵌套的blocks
nested_blocks = block.get('blocks')
if isinstance(nested_blocks, list):
repair_widgets_in_blocks(nested_blocks)
# 处理列表项
if block.get('type') == 'list':
items = block.get('items', [])
for item in items:
if isinstance(item, list):
repair_widgets_in_blocks(item)
# 处理表格单元格
if block.get('type') == 'table':
rows = block.get('rows', [])
for row in rows:
cells = row.get('cells', [])
for cell in cells:
cell_blocks = cell.get('blocks', [])
if isinstance(cell_blocks, list):
repair_widgets_in_blocks(cell_blocks)
# 处理所有章节
chapters = ir_copy.get('chapters', [])
for chapter in chapters:
blocks = chapter.get('blocks', [])
repair_widgets_in_blocks(blocks)
# 输出统计信息
if repair_stats['total'] > 0:
logger.info(
f"PDF图表预处理完成: "
f"总计 {repair_stats['total']} 个图表, "
f"修复 {repair_stats['repaired']} 个, "
f"失败 {repair_stats['failed']}"
)
return ir_copy
def _convert_charts_to_svg(self, document_ir: Dict[str, Any]) -> Dict[str, str]: def _convert_charts_to_svg(self, document_ir: Dict[str, Any]) -> Dict[str, str]:
""" """
将document_ir中的所有图表转换为SVG 将document_ir中的所有图表转换为SVG
@@ -260,11 +357,17 @@ class PDFRenderer:
else: else:
layout_config = self.layout_optimizer.config layout_config = self.layout_optimizer.config
# 转换图表为SVG # 关键修复:先预处理图表,确保数据有效
logger.info("开始转换图表为SVG矢量图形...") logger.info("预处理图表数据...")
svg_map = self._convert_charts_to_svg(document_ir) preprocessed_ir = self._preprocess_charts(document_ir)
# 使用HTML渲染器生成基础HTML # 转换图表为SVG(使用预处理后的IR
logger.info("开始转换图表为SVG矢量图形...")
svg_map = self._convert_charts_to_svg(preprocessed_ir)
# 使用HTML渲染器生成基础HTML(使用原始IR,因为HTMLRenderer会自己修复)
# 注意:这里仍使用原始document_ir,因为HTMLRenderer内部会进行相同的修复
# 这确保了HTML和SVG使用相同的修复逻辑
html = self.html_renderer.render(document_ir) html = self.html_renderer.render(document_ir)
# 注入SVG # 注入SVG