From d4f8301fd572f43a56f4dc65aee0ffd9e7ebb45f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E4=B8=80=E4=B8=81?= <1769123563@qq.com> Date: Wed, 19 Nov 2025 11:35:58 +0800 Subject: [PATCH] Fixed the PDF Rendering Overflow Issue and Updated the Logic for Rendering PDFs --- ReportEngine/renderers/html_renderer.py | 31 +- .../renderers/pdf_layout_optimizer.py | 327 ++++++++++++++++-- ReportEngine/renderers/pdf_renderer.py | 111 +++++- 3 files changed, 426 insertions(+), 43 deletions(-) diff --git a/ReportEngine/renderers/html_renderer.py b/ReportEngine/renderers/html_renderer.py index 83c20ee..7c5679f 100644 --- a/ReportEngine/renderers/html_renderer.py +++ b/ReportEngine/renderers/html_renderer.py @@ -405,12 +405,13 @@ class HTMLRenderer: def _render_body(self) -> str: """ 拼装结构,包含头部、导航、章节和脚本。 + 新版本:移除独立的cover section,标题合并到hero section中。 返回: str: body片段HTML。 """ header = self._render_header() - cover = self._render_cover() + # cover = self._render_cover() # 不再单独渲染cover hero = self._render_hero() toc_section = self._render_toc_section() chapters = "".join(self._render_chapter(chapter) for chapter in self.chapters) @@ -433,7 +434,6 @@ class HTMLRenderer: {header} {overlay}
-{cover} {hero} {toc_section} {chapters} @@ -502,6 +502,7 @@ class HTMLRenderer: def _render_hero(self) -> str: """ 根据layout中的hero字段输出摘要/KPI/亮点区。 + 新版本:将标题和总览合并在一起,去掉椭圆背景。 返回: str: hero区HTML,若无数据则为空字符串。 @@ -509,6 +510,11 @@ class HTMLRenderer: hero = self.metadata.get("hero") or {} if not hero: return "" + + # 获取标题和副标题 + title = self.metadata.get("title") or "智能舆情报告" + subtitle = self.metadata.get("subtitle") or self.metadata.get("templateName") or "" + summary = hero.get("summary") summary_html = f'

{self._escape_html(summary)}

' if summary else "" highlights = hero.get("highlights") or [] @@ -535,14 +541,21 @@ class HTMLRenderer: """ return f""" -
-
- {summary_html} -
    {highlight_html}
-
{actions_html}
+
+
+

文章总览

+

{self._escape_html(title)}

+

{self._escape_html(subtitle)}

-
- {kpi_cards} +
+
+ {summary_html} +
    {highlight_html}
+
{actions_html}
+
+
+ {kpi_cards} +
""".strip() diff --git a/ReportEngine/renderers/pdf_layout_optimizer.py b/ReportEngine/renderers/pdf_layout_optimizer.py index c3898a2..b70294c 100644 --- a/ReportEngine/renderers/pdf_layout_optimizer.py +++ b/ReportEngine/renderers/pdf_layout_optimizer.py @@ -145,11 +145,13 @@ class PDFLayoutOptimizer: # 字符宽度估算系数(基于常见中文字体) # 中文字符通常是等宽的,约等于字号的像素值 # 英文和数字约为字号的0.5-0.6倍 + # 更新:使用更精确的系数以更好地预测溢出 CHAR_WIDTH_FACTOR = { - 'chinese': 1.0, # 中文字符 - 'english': 0.55, # 英文字母 - 'number': 0.6, # 数字 - 'symbol': 0.4, # 符号 + 'chinese': 1.05, # 中文字符(略微增加以确保安全边界) + 'english': 0.58, # 英文字母 + 'number': 0.65, # 数字(数字通常比字母稍宽) + 'symbol': 0.45, # 符号 + 'percent': 0.7, # 百分号等特殊符号 } def __init__(self, config: Optional[PDFLayoutConfig] = None): @@ -208,6 +210,8 @@ class PDFLayoutOptimizer: - max_kpi_value_length: 最长KPI数值长度 - max_table_columns: 最多表格列数 - total_content_length: 总内容长度 + - hero_kpi_count: Hero区域的KPI数量 + - max_hero_kpi_value_length: Hero区域最长KPI数值长度 """ stats = { 'kpi_count': 0, @@ -219,8 +223,23 @@ class PDFLayoutOptimizer: 'max_table_rows': 0, 'total_content_length': 0, 'has_long_text': False, + 'hero_kpi_count': 0, + 'max_hero_kpi_value_length': 0, } + # 分析hero区域的KPI + metadata = document_ir.get('metadata', {}) + hero = metadata.get('hero', {}) + if hero: + hero_kpis = hero.get('kpis', []) + stats['hero_kpi_count'] = len(hero_kpis) + for kpi in hero_kpis: + value = str(kpi.get('value', '')) + stats['max_hero_kpi_value_length'] = max( + stats['max_hero_kpi_value_length'], + len(value) + ) + # 优先使用chapters,fallback到sections chapters = document_ir.get('chapters', []) if not chapters: @@ -353,6 +372,8 @@ class PDFLayoutOptimizer: width += font_size * self.CHAR_WIDTH_FACTOR['english'] elif char.isdigit(): width += font_size * self.CHAR_WIDTH_FACTOR['number'] + elif char in '%%': # 百分号 + width += font_size * self.CHAR_WIDTH_FACTOR['percent'] else: width += font_size * self.CHAR_WIDTH_FACTOR['symbol'] @@ -460,52 +481,77 @@ class PDFLayoutOptimizer: for issue in overflow_issues: logger.warning(f"检测到布局问题: {issue}") - # KPI卡片宽度(像素) - kpi_card_width = (800 - 20) // 2 - 40 # 2列布局 + # KPI卡片宽度(像素)- 更保守的计算,留出更多安全边界 + kpi_card_width = (800 - 20) // 2 - 60 # 2列布局,增加边距以防溢出 + + # 优先处理Hero区域的KPI(如果有的话) + if stats['hero_kpi_count'] > 0 and stats['max_hero_kpi_value_length'] > 0: + # Hero区域的KPI卡片宽度通常更窄 + hero_kpi_width = 250 # Hero侧边栏的典型宽度 + sample_text = '9' * stats['max_hero_kpi_value_length'] + '元' + safe_font_size, needs_adjustment = self._calculate_safe_font_size( + sample_text, + hero_kpi_width, + min_font_size=14, + max_font_size=24 # Hero KPI字号通常较小 + ) + + if needs_adjustment or stats['max_hero_kpi_value_length'] > 6: + # Hero KPI需要更保守的字号 + config.kpi_card.font_size_value = max(14, safe_font_size - 2) + self.optimization_log.append( + f"Hero KPI数值较长({stats['max_hero_kpi_value_length']}字符)," + f"字号调整为{config.kpi_card.font_size_value}px" + ) # 根据KPI数值长度智能调整字号 if stats['max_kpi_value_length'] > 0: - # 创建示例文本进行测试 - sample_text = '9' * stats['max_kpi_value_length'] + # 创建示例文本进行测试 - 使用实际可能的字符组合 + sample_text = '9' * stats['max_kpi_value_length'] + '亿' # 加上可能的单位 safe_font_size, needs_adjustment = self._calculate_safe_font_size( sample_text, kpi_card_width, - min_font_size=18, - max_font_size=32 + min_font_size=16, # 降低最小字号以确保不溢出 + max_font_size=28 # 降低最大字号以更保守 ) if needs_adjustment: config.kpi_card.font_size_value = safe_font_size + # 进一步降低以留出安全边界 + config.kpi_card.font_size_value = max(16, safe_font_size - 2) self.optimization_log.append( f"KPI数值过长({stats['max_kpi_value_length']}字符)," - f"字号自动调整为{safe_font_size}px以防止溢出" + f"字号自动调整为{config.kpi_card.font_size_value}px以防止溢出" ) - elif stats['max_kpi_value_length'] > 10: - # 即使不溢出,也适当缩小以留出更多空间 - config.kpi_card.font_size_value = min(28, safe_font_size) + elif stats['max_kpi_value_length'] > 8: + # 对于较长文本,更保守地调整 + config.kpi_card.font_size_value = min(24, safe_font_size) self.optimization_log.append( f"KPI数值较长({stats['max_kpi_value_length']}字符)," f"预防性调整字号为{config.kpi_card.font_size_value}px" ) - # 根据KPI数量调整网格布局 + # 根据KPI数量调整网格布局和间距 if stats['kpi_count'] > 6: config.grid.columns = 3 config.kpi_card.min_height = 100 - config.kpi_card.padding = 16 # 缩小padding以节省空间 + config.kpi_card.padding = 14 # 缩小padding以节省空间 + config.grid.gap = 16 # 减小间距 self.optimization_log.append( f"KPI卡片较多({stats['kpi_count']}个)," - f"调整为3列布局并缩小内边距" + f"调整为3列布局并缩小内边距和间距" ) elif stats['kpi_count'] > 4: config.grid.columns = 2 - config.kpi_card.padding = 18 + config.kpi_card.padding = 16 + config.grid.gap = 18 self.optimization_log.append( f"KPI卡片适中({stats['kpi_count']}个),使用2列布局" ) elif stats['kpi_count'] <= 2: config.grid.columns = 1 - config.kpi_card.padding = 24 # 较少卡片时增加padding + config.kpi_card.padding = 22 # 较少卡片时增加padding + config.grid.gap = 20 self.optimization_log.append( f"KPI卡片较少({stats['kpi_count']}个)," f"使用1列布局并增加内边距" @@ -539,11 +585,19 @@ class PDFLayoutOptimizer: # 如果有长文本,增加行高和段落间距 if stats['has_long_text']: - config.page.line_height = 1.8 - config.callout.line_height = 1.8 - config.page.paragraph_spacing = 18 + config.page.line_height = 1.75 # 稍微降低以节省空间 + config.callout.line_height = 1.75 + config.page.paragraph_spacing = 16 # 适度间距 self.optimization_log.append( - "检测到长文本,增加行高至1.8和段落间距以提高可读性" + "检测到长文本,增加行高至1.75和段落间距以提高可读性" + ) + else: + # 没有长文本时使用更紧凑的间距 + config.page.line_height = 1.5 + config.callout.line_height = 1.6 + config.page.paragraph_spacing = 14 + self.optimization_log.append( + "文本长度适中,使用标准行高和段落间距" ) # 如果内容较多,减小整体字号 @@ -643,6 +697,16 @@ class PDFLayoutOptimizer: css = f""" /* PDF布局优化样式 - 由PDFLayoutOptimizer自动生成 */ +/* 隐藏独立的封面section,已合并到hero */ +.cover {{ + display: none !important; +}} + +/* PDF中隐藏hero actions(深蓝色的三个按钮) */ +.hero-actions {{ + display: none !important; +}} + /* 页面基础样式 */ body {{ font-size: {cfg.page.font_size_base}px; @@ -731,12 +795,14 @@ p {{ font-size: {cfg.callout.font_size_title}px !important; margin-bottom: 10px; word-break: break-word; + line-height: 1.4; }} .callout-content {{ font-size: {cfg.callout.font_size_content}px !important; word-break: break-word; overflow-wrap: break-word; + line-height: {cfg.callout.line_height}; }} /* 表格优化 - 严格防止溢出 */ @@ -790,24 +856,196 @@ td {{ word-break: break-word; }} -/* Hero区域的KPI卡片 */ -.hero-kpi {{ - padding: {cfg.kpi_card.padding}px !important; +/* Hero区域合并版本 - 包含标题和内容,保留蓝色椭圆背景 */ +.hero-section-combined {{ + padding: 45px 55px !important; + margin: 0 auto 40px auto !important; + min-height: 500px; + /* 使用100%宽度,填满整个页面 */ + width: 100% !important; + max-width: 100% !important; + box-sizing: border-box; + overflow: visible; + border-radius: 40px !important; + background: linear-gradient(135deg, #e8f4f8 0%, #d4e9f7 100%); + page-break-after: always !important; +}} + +/* Hero标题区域 */ +.hero-header {{ + text-align: center; + margin-bottom: 25px; + padding-bottom: 18px; + border-bottom: 1px solid rgba(100, 150, 200, 0.2); +}} + +.hero-hint {{ + font-size: {max(cfg.page.font_size_base - 2, 11)}px !important; + color: #d32f2f; + margin: 0 0 6px 0; + font-weight: 500; +}} + +.hero-title {{ + font-size: {max(cfg.page.font_size_base + 5, 19)}px !important; /* 稍微减小标题字号 */ + font-weight: 600; + margin: 6px 0; + color: #1a1a1a; + line-height: 1.3; +}} + +.hero-subtitle {{ + font-size: {max(cfg.page.font_size_base - 1, 12)}px !important; + color: #d32f2f; + margin: 6px 0 0 0; + font-weight: 400; +}} + +/* Hero主体区域 - 左右分栏 */ +.hero-body {{ + display: flex; + gap: 28px; /* 左右间距 */ + align-items: flex-start; +}} + +/* Hero左侧内容区 - 占蓝色背景的70% */ +.hero-content {{ + flex: 7; /* 左侧占70% */ + min-width: 0; + padding-right: 25px; + box-sizing: border-box; + overflow: hidden; +}} + +/* Hero右侧KPI区域 - 占蓝色背景的30% */ +.hero-side {{ + flex: 3; /* 右侧占30% */ + min-width: 0; + display: flex; + flex-direction: column; + gap: {max(cfg.grid.gap - 2, 10)}px; overflow: hidden; box-sizing: border-box; }} +/* Hero区域的KPI卡片 - 横向拉长,每行显示一个内容 */ +.hero-kpi {{ + padding: 12px 18px !important; /* 增加横向padding */ + overflow: hidden; + box-sizing: border-box; + max-width: 100%; + min-height: 85px; /* 增加高度以容纳三行 */ + display: flex; + flex-direction: column; + justify-content: space-between; +}} + .hero-kpi .label {{ - font-size: {cfg.kpi_card.font_size_label}px !important; + font-size: {max(cfg.kpi_card.font_size_label - 3, 9)}px !important; /* 减小标签字号 */ word-break: break-word; max-width: 100%; + line-height: 1.2; + margin-bottom: 4px; + overflow: hidden; + text-overflow: ellipsis; + display: block; /* 独占一行 */ }} .hero-kpi .value {{ - font-size: {cfg.kpi_card.font_size_value}px !important; + font-size: {max(cfg.kpi_card.font_size_value - 12, 14)}px !important; /* 减小数值字号 */ word-break: break-word; overflow-wrap: break-word; max-width: 100%; + line-height: 1.1; + display: block; /* 独占一行 */ + hyphens: auto; + overflow: hidden; + text-overflow: ellipsis; + margin-bottom: 3px; +}} + +.hero-kpi .delta {{ + font-size: {max(cfg.kpi_card.font_size_change - 3, 9)}px !important; /* 减小变化值字号 */ + word-break: break-word; + margin-top: 3px; + display: block; /* 独占一行 */ + max-width: 100%; + overflow: hidden; + text-overflow: ellipsis; + line-height: 1.2; +}} + +/* Hero summary文本 */ +.hero-summary {{ + font-size: {cfg.page.font_size_base}px !important; + line-height: 1.65; + margin-top: 0; + margin-bottom: 18px; /* 增加底部边距,与badges保持一致 */ + word-break: break-word; + max-width: 98%; /* 与badges宽度一致 */ + overflow: hidden; +}} + +/* Hero highlights列表 - 横向排列,宽度与summary一致 */ +.hero-highlights {{ + list-style: none; + padding: 0; + margin: 16px 0; /* 增加上下边距 */ + display: flex; + flex-direction: column; + gap: 12px; /* 增加间距,让椭圆之间有更多空间 */ + max-width: 100%; + overflow: hidden; +}} + +.hero-highlights li {{ + margin: 0; + max-width: 100%; + flex-shrink: 0; + flex-grow: 0; +}} + +/* hero highlights中的badge - 拉长加宽的椭圆形背景,与上方文本对齐 */ +.hero-highlights .badge {{ + font-size: {max(cfg.callout.font_size_content - 3, 10)}px !important; + padding: 10px 20px !important; /* 增加padding,更好的视觉效果 */ + max-width: 100%; + width: 98%; /* 占满宽度,与summary文本对齐 */ + display: flex; + align-items: center; /* 垂直居中文字 */ + justify-content: flex-start; /* 文字左对齐 */ + word-wrap: break-word; + white-space: normal; + overflow: hidden; + text-overflow: ellipsis; + box-sizing: border-box; + line-height: 1.5; /* 增加行高,更好的可读性 */ + min-height: 40px; /* 增加最小高度 */ + /* 拉长的椭圆形背景 */ + background: rgba(100, 120, 150, 0.15) !important; + border-radius: 22px !important; /* 稍微增加圆角 */ + border: 1px solid rgba(100, 120, 150, 0.25); +}} + +/* Hero actions按钮 - 确保不溢出椭圆 */ +.hero-actions {{ + margin-top: 12px; + display: flex; + flex-wrap: wrap; + gap: 6px; + max-width: 100%; + overflow: hidden; +}} + +.hero-actions button {{ + font-size: {max(cfg.page.font_size_base - 2, 11)}px !important; + padding: 5px 10px !important; + max-width: 200px; /* 限制按钮最大宽度 */ + word-break: break-word; + white-space: normal; + overflow: hidden; + text-overflow: ellipsis; + box-sizing: border-box; }} /* 防止标题孤行 */ @@ -818,6 +1056,19 @@ h1, h2, h3, h4, h5, h6 {{ overflow-wrap: break-word; }} +/* ===== 强制页面分离规则 ===== */ + +/* 目录section强制开始新页并在之后强制分页 */ +.toc-section {{ + page-break-before: always !important; + page-break-after: always !important; +}} + +/* 第一个章节强制开始新页(正文从第三页开始) */ +main > .chapter:first-of-type {{ + page-break-before: always !important; +}} + /* 确保内容块不被分页且不溢出 */ .content-block {{ break-inside: avoid; @@ -838,13 +1089,29 @@ h1, h2, h3, h4, h5, h6 {{ letter-spacing: -0.02em; /* 稍微紧缩间距以节省空间 */ }} -/* 色块(badge)样式控制 */ -.badge, .callout {{ +/* 色块(badge)样式控制 - 防止过大 */ +.badge {{ display: inline-block; max-width: 100%; overflow: hidden; text-overflow: ellipsis; white-space: normal; + /* 限制badge的最大尺寸 */ + padding: 4px 12px !important; + font-size: {max(cfg.page.font_size_base - 2, 12)}px !important; + line-height: 1.4 !important; + /* 防止badge异常过大 */ + word-break: break-word; + hyphens: auto; +}} + +/* 确保callout不会过大 */ +.callout {{ + max-width: 100% !important; + margin: 16px 0 !important; + padding: {cfg.callout.padding}px !important; + box-sizing: border-box; + overflow: hidden; }} /* 响应式调整 */ diff --git a/ReportEngine/renderers/pdf_renderer.py b/ReportEngine/renderers/pdf_renderer.py index 8b22e7e..8972f77 100644 --- a/ReportEngine/renderers/pdf_renderer.py +++ b/ReportEngine/renderers/pdf_renderer.py @@ -6,6 +6,7 @@ PDF渲染器 - 使用WeasyPrint从HTML生成PDF from __future__ import annotations import base64 +import copy from pathlib import Path from typing import Any, Dict from datetime import datetime @@ -86,6 +87,102 @@ class PDFRenderer: raise FileNotFoundError(f"未找到字体文件,请检查 {fonts_dir} 目录") + def _preprocess_charts(self, document_ir: Dict[str, Any]) -> Dict[str, Any]: + """ + 预处理图表:验证和修复所有图表数据 + + 这个方法确保在转换为SVG之前,所有图表数据都是有效的。 + 使用与HTMLRenderer相同的验证和修复逻辑,保证PDF和HTML的一致性。 + + 参数: + document_ir: Document IR数据 + + 返回: + Dict[str, Any]: 修复后的Document IR(深拷贝) + """ + # 深拷贝以避免修改原始IR + ir_copy = copy.deepcopy(document_ir) + + repair_stats = { + 'total': 0, + 'repaired': 0, + 'failed': 0 + } + + def repair_widgets_in_blocks(blocks: list) -> None: + """递归修复blocks中的所有widget""" + for block in blocks: + if not isinstance(block, dict): + continue + + # 处理widget类型 + if block.get('type') == 'widget': + widget_type = block.get('widgetType', '') + if widget_type.startswith('chart.js'): + repair_stats['total'] += 1 + + # 使用HTMLRenderer的验证器和修复器 + validation = self.html_renderer.chart_validator.validate(block) + + if not validation.is_valid: + logger.debug(f"图表 {block.get('widgetId')} 需要修复: {validation.errors}") + + # 尝试修复 + repair_result = self.html_renderer.chart_repairer.repair(block, validation) + + if repair_result.success and repair_result.repaired_block: + # 更新block内容(在副本中) + block.update(repair_result.repaired_block) + repair_stats['repaired'] += 1 + logger.debug( + f"图表 {block.get('widgetId')} 已修复 " + f"(方法: {repair_result.method})" + ) + else: + repair_stats['failed'] += 1 + logger.warning( + f"图表 {block.get('widgetId')} 修复失败,将使用原始数据" + ) + + # 递归处理嵌套的blocks + nested_blocks = block.get('blocks') + if isinstance(nested_blocks, list): + repair_widgets_in_blocks(nested_blocks) + + # 处理列表项 + if block.get('type') == 'list': + items = block.get('items', []) + for item in items: + if isinstance(item, list): + repair_widgets_in_blocks(item) + + # 处理表格单元格 + if block.get('type') == 'table': + rows = block.get('rows', []) + for row in rows: + cells = row.get('cells', []) + for cell in cells: + cell_blocks = cell.get('blocks', []) + if isinstance(cell_blocks, list): + repair_widgets_in_blocks(cell_blocks) + + # 处理所有章节 + chapters = ir_copy.get('chapters', []) + for chapter in chapters: + blocks = chapter.get('blocks', []) + repair_widgets_in_blocks(blocks) + + # 输出统计信息 + if repair_stats['total'] > 0: + logger.info( + f"PDF图表预处理完成: " + f"总计 {repair_stats['total']} 个图表, " + f"修复 {repair_stats['repaired']} 个, " + f"失败 {repair_stats['failed']} 个" + ) + + return ir_copy + def _convert_charts_to_svg(self, document_ir: Dict[str, Any]) -> Dict[str, str]: """ 将document_ir中的所有图表转换为SVG @@ -260,11 +357,17 @@ class PDFRenderer: else: layout_config = self.layout_optimizer.config - # 转换图表为SVG - logger.info("开始转换图表为SVG矢量图形...") - svg_map = self._convert_charts_to_svg(document_ir) + # 关键修复:先预处理图表,确保数据有效 + logger.info("预处理图表数据...") + preprocessed_ir = self._preprocess_charts(document_ir) - # 使用HTML渲染器生成基础HTML + # 转换图表为SVG(使用预处理后的IR) + logger.info("开始转换图表为SVG矢量图形...") + svg_map = self._convert_charts_to_svg(preprocessed_ir) + + # 使用HTML渲染器生成基础HTML(使用原始IR,因为HTMLRenderer会自己修复) + # 注意:这里仍使用原始document_ir,因为HTMLRenderer内部会进行相同的修复 + # 这确保了HTML和SVG使用相同的修复逻辑 html = self.html_renderer.render(document_ir) # 注入SVG