From d397b98d2b17909b70c817a82d85e54a11ee2cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E4=B8=80=E4=B8=81?= <1769123563@qq.com> Date: Tue, 18 Nov 2025 23:57:08 +0800 Subject: [PATCH] Preventing Errors and Overflow During PDF Rendering --- .../renderers/pdf_layout_optimizer.py | 456 +++++++++++++++--- 1 file changed, 389 insertions(+), 67 deletions(-) diff --git a/ReportEngine/renderers/pdf_layout_optimizer.py b/ReportEngine/renderers/pdf_layout_optimizer.py index b913580..c3898a2 100644 --- a/ReportEngine/renderers/pdf_layout_optimizer.py +++ b/ReportEngine/renderers/pdf_layout_optimizer.py @@ -8,13 +8,16 @@ PDF布局优化器 - 调整色块大小 - 智能排列信息块 - 保存和加载优化方案 +- 文本宽度检测和溢出预防 +- 色块边界检测和自动调整 """ from __future__ import annotations import json +import re from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple from dataclasses import dataclass, asdict from datetime import datetime from loguru import logger @@ -139,6 +142,16 @@ class PDFLayoutOptimizer: 根据内容特征自动优化PDF布局,防止溢出和排版问题。 """ + # 字符宽度估算系数(基于常见中文字体) + # 中文字符通常是等宽的,约等于字号的像素值 + # 英文和数字约为字号的0.5-0.6倍 + CHAR_WIDTH_FACTOR = { + 'chinese': 1.0, # 中文字符 + 'english': 0.55, # 英文字母 + 'number': 0.6, # 数字 + 'symbol': 0.4, # 符号 + } + def __init__(self, config: Optional[PDFLayoutConfig] = None): """ 初始化优化器 @@ -208,66 +221,220 @@ class PDFLayoutOptimizer: 'has_long_text': False, } + # 优先使用chapters,fallback到sections + chapters = document_ir.get('chapters', []) + if not chapters: + chapters = document_ir.get('sections', []) + # 遍历章节 - sections = document_ir.get('sections', []) - for section in sections: - self._analyze_section(section, stats) + for chapter in chapters: + self._analyze_chapter(chapter, stats) logger.info(f"文档分析完成: {stats}") return stats - def _analyze_section(self, section: Dict[str, Any], stats: Dict[str, Any]): - """递归分析章节""" - children = section.get('children', []) + def _analyze_chapter(self, chapter: Dict[str, Any], stats: Dict[str, Any]): + """分析单个章节""" + # 分析章节中的blocks + blocks = chapter.get('blocks', []) + for block in blocks: + self._analyze_block(block, stats) + # 递归处理子章节(如果有) + children = chapter.get('children', []) for child in children: - node_type = child.get('type') + if isinstance(child, dict): + self._analyze_chapter(child, stats) - if node_type == 'kpi_grid': - kpis = child.get('kpis', []) - stats['kpi_count'] += len(kpis) + def _analyze_block(self, block: Dict[str, Any], stats: Dict[str, Any]): + """分析单个block节点""" + if not isinstance(block, dict): + return - # 检查KPI数值长度 - for kpi in kpis: - value = str(kpi.get('value', '')) - stats['max_kpi_value_length'] = max( - stats['max_kpi_value_length'], - len(value) - ) + node_type = block.get('type') - elif node_type == 'table': - stats['table_count'] += 1 + if node_type == 'kpiGrid': + kpis = block.get('items', []) + stats['kpi_count'] += len(kpis) - # 分析表格结构 - headers = child.get('headers', []) - rows = child.get('rows', []) + # 检查KPI数值长度 + for kpi in kpis: + value = str(kpi.get('value', '')) + stats['max_kpi_value_length'] = max( + stats['max_kpi_value_length'], + len(value) + ) + + elif node_type == 'table': + stats['table_count'] += 1 + + # 分析表格结构 + headers = block.get('headers', []) + rows = block.get('rows', []) + if rows and isinstance(rows[0], dict): + # 从第一行的cells计算列数 + cells = rows[0].get('cells', []) + stats['max_table_columns'] = max( + stats['max_table_columns'], + len(cells) + ) + else: stats['max_table_columns'] = max( stats['max_table_columns'], len(headers) ) - stats['max_table_rows'] = max( - stats['max_table_rows'], - len(rows) + stats['max_table_rows'] = max( + stats['max_table_rows'], + len(rows) + ) + + elif node_type == 'chart' or node_type == 'widget': + stats['chart_count'] += 1 + + elif node_type == 'callout': + stats['callout_count'] += 1 + # 检查callout中的blocks + callout_blocks = block.get('blocks', []) + for cb in callout_blocks: + if isinstance(cb, dict) and cb.get('type') == 'paragraph': + text = self._extract_text_from_paragraph(cb) + if len(text) > 200: + stats['has_long_text'] = True + + elif node_type == 'paragraph': + text = self._extract_text_from_paragraph(block) + stats['total_content_length'] += len(text) + if len(text) > 500: + stats['has_long_text'] = True + + # 递归处理嵌套的blocks + nested_blocks = block.get('blocks', []) + if nested_blocks: + for nested in nested_blocks: + self._analyze_block(nested, stats) + + def _extract_text_from_paragraph(self, paragraph: Dict[str, Any]) -> str: + """从paragraph block中提取纯文本""" + text_parts = [] + inlines = paragraph.get('inlines', []) + for inline in inlines: + if isinstance(inline, dict): + text = inline.get('text', '') + if text: + text_parts.append(str(text)) + elif isinstance(inline, str): + text_parts.append(inline) + return ''.join(text_parts) + + def _analyze_section(self, section: Dict[str, Any], stats: Dict[str, Any]): + """递归分析章节(保留用于向后兼容)""" + # 这个方法保留用于向后兼容,实际上调用_analyze_chapter + self._analyze_chapter(section, stats) + + def _estimate_text_width(self, text: str, font_size: int) -> float: + """ + 估算文本的像素宽度 + + 参数: + text: 要测量的文本 + font_size: 字号(像素) + + 返回: + float: 估算的宽度(像素) + """ + if not text: + return 0.0 + + width = 0.0 + for char in text: + if '\u4e00' <= char <= '\u9fff': # 中文字符范围 + width += font_size * self.CHAR_WIDTH_FACTOR['chinese'] + elif char.isalpha(): + width += font_size * self.CHAR_WIDTH_FACTOR['english'] + elif char.isdigit(): + width += font_size * self.CHAR_WIDTH_FACTOR['number'] + else: + width += font_size * self.CHAR_WIDTH_FACTOR['symbol'] + + return width + + def _check_text_overflow(self, text: str, font_size: int, max_width: int) -> bool: + """ + 检查文本是否会溢出 + + 参数: + text: 要检查的文本 + font_size: 字号(像素) + max_width: 最大宽度(像素) + + 返回: + bool: True表示会溢出 + """ + estimated_width = self._estimate_text_width(text, font_size) + return estimated_width > max_width + + def _calculate_safe_font_size( + self, + text: str, + max_width: int, + min_font_size: int = 10, + max_font_size: int = 32 + ) -> Tuple[int, bool]: + """ + 计算安全的字号以避免溢出 + + 参数: + text: 要显示的文本 + max_width: 最大宽度(像素) + min_font_size: 最小字号 + max_font_size: 最大字号 + + 返回: + Tuple[int, bool]: (建议字号, 是否需要调整) + """ + if not text: + return max_font_size, False + + # 从最大字号开始尝试 + for font_size in range(max_font_size, min_font_size - 1, -1): + if not self._check_text_overflow(text, font_size, max_width): + # 如果需要缩小字号 + needs_adjustment = font_size < max_font_size + return font_size, needs_adjustment + + # 如果连最小字号都溢出,返回最小字号并标记需要调整 + return min_font_size, True + + def _detect_kpi_overflow_issues(self, stats: Dict[str, Any]) -> List[str]: + """ + 检测KPI卡片可能的溢出问题 + + 参数: + stats: 文档统计信息 + + 返回: + List[str]: 检测到的问题列表 + """ + issues = [] + + # KPI卡片的典型宽度(像素) + # 基于2列布局,容器宽度800px,间距20px + kpi_card_width = (800 - 20) // 2 - 40 # 减去padding + + # 检查最长KPI数值 + max_kpi_length = stats.get('max_kpi_value_length', 0) + if max_kpi_length > 0: + # 假设一个很长的数值 + sample_text = '1' * max_kpi_length + '亿元' + current_font_size = self.config.kpi_card.font_size_value + + if self._check_text_overflow(sample_text, current_font_size, kpi_card_width): + issues.append( + f"KPI数值过长({max_kpi_length}字符)," + f"字号{current_font_size}px可能导致溢出" ) - elif node_type == 'chart': - stats['chart_count'] += 1 - - elif node_type == 'callout': - stats['callout_count'] += 1 - content = child.get('content', '') - if len(content) > 200: - stats['has_long_text'] = True - - elif node_type == 'paragraph': - text = child.get('text', '') - stats['total_content_length'] += len(text) - if len(text) > 500: - stats['has_long_text'] = True - - # 递归处理子章节 - if node_type == 'section': - self._analyze_section(child, stats) + return issues def _adjust_config_based_on_stats( self, @@ -287,37 +454,73 @@ class PDFLayoutOptimizer: optimize_for_print=self.config.optimize_for_print, ) - # 根据KPI数值长度调整字号 - if stats['max_kpi_value_length'] > 10: - config.kpi_card.font_size_value = 28 - self.optimization_log.append( - f"KPI数值过长({stats['max_kpi_value_length']}字符)," - f"字号从32调整为28" - ) - elif stats['max_kpi_value_length'] > 15: - config.kpi_card.font_size_value = 24 - self.optimization_log.append( - f"KPI数值很长({stats['max_kpi_value_length']}字符)," - f"字号从32调整为24" + # 检测KPI溢出问题 + overflow_issues = self._detect_kpi_overflow_issues(stats) + if overflow_issues: + for issue in overflow_issues: + logger.warning(f"检测到布局问题: {issue}") + + # KPI卡片宽度(像素) + kpi_card_width = (800 - 20) // 2 - 40 # 2列布局 + + # 根据KPI数值长度智能调整字号 + if stats['max_kpi_value_length'] > 0: + # 创建示例文本进行测试 + sample_text = '9' * stats['max_kpi_value_length'] + safe_font_size, needs_adjustment = self._calculate_safe_font_size( + sample_text, + kpi_card_width, + min_font_size=18, + max_font_size=32 ) - # 根据KPI数量调整网格列数 + if needs_adjustment: + config.kpi_card.font_size_value = safe_font_size + self.optimization_log.append( + f"KPI数值过长({stats['max_kpi_value_length']}字符)," + f"字号自动调整为{safe_font_size}px以防止溢出" + ) + elif stats['max_kpi_value_length'] > 10: + # 即使不溢出,也适当缩小以留出更多空间 + config.kpi_card.font_size_value = min(28, safe_font_size) + self.optimization_log.append( + f"KPI数值较长({stats['max_kpi_value_length']}字符)," + f"预防性调整字号为{config.kpi_card.font_size_value}px" + ) + + # 根据KPI数量调整网格布局 if stats['kpi_count'] > 6: config.grid.columns = 3 config.kpi_card.min_height = 100 + config.kpi_card.padding = 16 # 缩小padding以节省空间 self.optimization_log.append( f"KPI卡片较多({stats['kpi_count']}个)," - f"每行列数从2调整为3" + f"调整为3列布局并缩小内边距" + ) + elif stats['kpi_count'] > 4: + config.grid.columns = 2 + config.kpi_card.padding = 18 + self.optimization_log.append( + f"KPI卡片适中({stats['kpi_count']}个),使用2列布局" ) elif stats['kpi_count'] <= 2: config.grid.columns = 1 + config.kpi_card.padding = 24 # 较少卡片时增加padding self.optimization_log.append( f"KPI卡片较少({stats['kpi_count']}个)," - f"每行列数从2调整为1" + f"使用1列布局并增加内边距" ) - # 根据表格列数调整字号 - if stats['max_table_columns'] > 6: + # 根据表格列数调整字号和间距 + if stats['max_table_columns'] > 8: + config.table.font_size_header = 10 + config.table.font_size_body = 9 + config.table.cell_padding = 6 + self.optimization_log.append( + f"表格列数很多({stats['max_table_columns']}列)," + f"大幅缩小字号和内边距" + ) + elif stats['max_table_columns'] > 6: config.table.font_size_header = 11 config.table.font_size_body = 10 config.table.cell_padding = 8 @@ -325,13 +528,34 @@ class PDFLayoutOptimizer: f"表格列数较多({stats['max_table_columns']}列)," f"缩小字号和内边距" ) + elif stats['max_table_columns'] > 4: + config.table.font_size_header = 12 + config.table.font_size_body = 11 + config.table.cell_padding = 10 + self.optimization_log.append( + f"表格列数适中({stats['max_table_columns']}列)," + f"适度调整字号" + ) - # 如果有长文本,增加行高 + # 如果有长文本,增加行高和段落间距 if stats['has_long_text']: config.page.line_height = 1.8 config.callout.line_height = 1.8 + config.page.paragraph_spacing = 18 self.optimization_log.append( - "检测到长文本,增加行高至1.8提高可读性" + "检测到长文本,增加行高至1.8和段落间距以提高可读性" + ) + + # 如果内容较多,减小整体字号 + total_blocks = (stats['kpi_count'] + stats['table_count'] + + stats['chart_count'] + stats['callout_count']) + if total_blocks > 20: + config.page.font_size_base = 13 + config.page.font_size_h2 = 22 + config.page.font_size_h3 = 18 + self.optimization_log.append( + f"内容块较多({total_blocks}个)," + f"适度缩小整体字号以优化排版" ) return config @@ -446,7 +670,7 @@ p {{ margin-bottom: {cfg.page.section_spacing}px; }} -/* KPI卡片优化 */ +/* KPI卡片优化 - 防止溢出 */ .kpi-grid {{ display: grid; grid-template-columns: repeat({cfg.grid.columns}, 1fr); @@ -459,58 +683,93 @@ p {{ min-height: {cfg.kpi_card.min_height}px; break-inside: avoid; page-break-inside: avoid; + /* 防止溢出的关键设置 */ + overflow: hidden; + box-sizing: border-box; + max-width: 100%; }} .kpi-card .value {{ font-size: {cfg.kpi_card.font_size_value}px !important; line-height: 1.2; + /* 强制换行和溢出控制 */ word-break: break-word; + overflow-wrap: break-word; + hyphens: auto; + max-width: 100%; + overflow: hidden; + text-overflow: ellipsis; }} .kpi-card .label {{ font-size: {cfg.kpi_card.font_size_label}px !important; + /* 防止标签溢出 */ + word-break: break-word; + overflow-wrap: break-word; + max-width: 100%; }} .kpi-card .change {{ font-size: {cfg.kpi_card.font_size_change}px !important; + word-break: break-word; }} -/* 提示框优化 */ +/* 提示框优化 - 防止溢出 */ .callout {{ padding: {cfg.callout.padding}px !important; margin: 20px 0; line-height: {cfg.callout.line_height}; break-inside: avoid; page-break-inside: avoid; + /* 防止溢出 */ + overflow: hidden; + box-sizing: border-box; + max-width: 100%; }} .callout-title {{ font-size: {cfg.callout.font_size_title}px !important; margin-bottom: 10px; + word-break: break-word; }} .callout-content {{ font-size: {cfg.callout.font_size_content}px !important; + word-break: break-word; + overflow-wrap: break-word; }} -/* 表格优化 */ +/* 表格优化 - 严格防止溢出 */ table {{ width: 100%; break-inside: avoid; page-break-inside: avoid; + /* 表格布局固定 */ + table-layout: fixed; + max-width: 100%; + overflow: hidden; }} th {{ font-size: {cfg.table.font_size_header}px !important; padding: {cfg.table.cell_padding}px !important; + /* 表头文字控制 */ + word-break: break-word; + overflow-wrap: break-word; + hyphens: auto; + max-width: 100%; }} td {{ font-size: {cfg.table.font_size_body}px !important; padding: {cfg.table.cell_padding}px !important; max-width: {cfg.table.max_cell_width}px; + /* 强制换行,防止溢出 */ word-wrap: break-word; overflow-wrap: break-word; + word-break: break-word; + hyphens: auto; + white-space: normal; }} /* 图表优化 */ @@ -520,22 +779,85 @@ td {{ padding: {cfg.chart.padding}px; break-inside: avoid; page-break-inside: avoid; + /* 防止图表溢出 */ + overflow: hidden; + max-width: 100%; + box-sizing: border-box; }} .chart-title {{ font-size: {cfg.chart.font_size_title}px !important; + word-break: break-word; +}} + +/* Hero区域的KPI卡片 */ +.hero-kpi {{ + padding: {cfg.kpi_card.padding}px !important; + overflow: hidden; + box-sizing: border-box; +}} + +.hero-kpi .label {{ + font-size: {cfg.kpi_card.font_size_label}px !important; + word-break: break-word; + max-width: 100%; +}} + +.hero-kpi .value {{ + font-size: {cfg.kpi_card.font_size_value}px !important; + word-break: break-word; + overflow-wrap: break-word; + max-width: 100%; }} /* 防止标题孤行 */ h1, h2, h3, h4, h5, h6 {{ break-after: avoid; page-break-after: avoid; + word-break: break-word; + overflow-wrap: break-word; }} -/* 确保内容块不被分页 */ +/* 确保内容块不被分页且不溢出 */ .content-block {{ break-inside: avoid; page-break-inside: avoid; + overflow: hidden; + max-width: 100%; +}} + +/* 全局溢出防护 */ +* {{ + box-sizing: border-box; + max-width: 100%; +}} + +/* 特别控制数字和长单词 */ +.kpi-value, .value, .delta {{ + font-variant-numeric: tabular-nums; + letter-spacing: -0.02em; /* 稍微紧缩间距以节省空间 */ +}} + +/* 色块(badge)样式控制 */ +.badge, .callout {{ + display: inline-block; + max-width: 100%; + overflow: hidden; + text-overflow: ellipsis; + white-space: normal; +}} + +/* 响应式调整 */ +@media print {{ + /* 打印时更严格的控制 */ + * {{ + overflow: visible !important; + max-width: 100% !important; + }} + + .kpi-card, .callout, .chart-card {{ + overflow: hidden !important; + }} }} """