From 5e82185bee14f3876d09821ec4a3d93c14883ac3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E4=B8=80=E4=B8=81?= <1769123563@qq.com> Date: Tue, 18 Nov 2025 20:10:11 +0800 Subject: [PATCH] Modify the Logic for "Export as PDF" --- ReportEngine/flask_interface.py | 137 +++++ ReportEngine/renderers/__init__.py | 26 +- ReportEngine/renderers/html_renderer.py | 6 +- .../renderers/pdf_layout_optimizer.py | 554 ++++++++++++++++++ ReportEngine/renderers/pdf_renderer.py | 250 ++++++++ ReportEngine/scripts/export_to_pdf.py | 80 +++ templates/index.html | 96 ++- 7 files changed, 1087 insertions(+), 62 deletions(-) create mode 100644 ReportEngine/renderers/pdf_layout_optimizer.py create mode 100644 ReportEngine/renderers/pdf_renderer.py create mode 100644 ReportEngine/scripts/export_to_pdf.py diff --git a/ReportEngine/flask_interface.py b/ReportEngine/flask_interface.py index 71955e3..79449f7 100644 --- a/ReportEngine/flask_interface.py +++ b/ReportEngine/flask_interface.py @@ -1008,3 +1008,140 @@ def clear_log(): 'success': False, 'error': f'清空日志失败: {str(e)}' }), 500 + + +@report_bp.route('/export/pdf/', methods=['GET']) +def export_pdf(task_id: str): + """ + 导出报告为PDF格式。 + + 从IR JSON文件生成优化的PDF,支持自动布局调整。 + + 参数: + task_id: 任务ID + + 查询参数: + optimize: 是否启用布局优化(默认true) + + 返回: + Response: PDF文件流或错误信息 + """ + try: + # 获取任务信息 + task = tasks_registry.get(task_id) + if not task: + return jsonify({ + 'success': False, + 'error': '任务不存在' + }), 404 + + # 检查任务是否完成 + if task.status != 'completed': + return jsonify({ + 'success': False, + 'error': f'任务未完成,当前状态: {task.status}' + }), 400 + + # 获取IR文件路径 + if not task.ir_file_path or not os.path.exists(task.ir_file_path): + return jsonify({ + 'success': False, + 'error': 'IR文件不存在' + }), 404 + + # 读取IR数据 + with open(task.ir_file_path, 'r', encoding='utf-8') as f: + document_ir = json.load(f) + + # 检查是否启用布局优化 + optimize = request.args.get('optimize', 'true').lower() == 'true' + + # 创建PDF渲染器并生成PDF + from .renderers import PDFRenderer + renderer = PDFRenderer() + + logger.info(f"开始导出PDF,任务ID: {task_id},布局优化: {optimize}") + + # 生成PDF字节流 + pdf_bytes = renderer.render_to_bytes(document_ir, optimize_layout=optimize) + + # 确定下载文件名 + topic = document_ir.get('metadata', {}).get('topic', 'report') + pdf_filename = f"report_{topic}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf" + + # 返回PDF文件 + return Response( + pdf_bytes, + mimetype='application/pdf', + headers={ + 'Content-Disposition': f'attachment; filename="{pdf_filename}"', + 'Content-Type': 'application/pdf' + } + ) + + except Exception as e: + logger.exception(f"导出PDF失败: {str(e)}") + return jsonify({ + 'success': False, + 'error': f'导出PDF失败: {str(e)}' + }), 500 + + +@report_bp.route('/export/pdf-from-ir', methods=['POST']) +def export_pdf_from_ir(): + """ + 从IR JSON直接导出PDF(不需要任务ID)。 + + 适用于前端直接传递IR数据的场景。 + + 请求体: + { + "document_ir": {...}, // Document IR JSON + "optimize": true // 是否启用布局优化(可选) + } + + 返回: + Response: PDF文件流或错误信息 + """ + try: + data = request.get_json() + + if not data or 'document_ir' not in data: + return jsonify({ + 'success': False, + 'error': '缺少document_ir参数' + }), 400 + + document_ir = data['document_ir'] + optimize = data.get('optimize', True) + + # 创建PDF渲染器并生成PDF + from .renderers import PDFRenderer + renderer = PDFRenderer() + + logger.info(f"从IR直接导出PDF,布局优化: {optimize}") + + # 生成PDF字节流 + pdf_bytes = renderer.render_to_bytes(document_ir, optimize_layout=optimize) + + # 确定下载文件名 + topic = document_ir.get('metadata', {}).get('topic', 'report') + pdf_filename = f"report_{topic}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf" + + # 返回PDF文件 + return Response( + pdf_bytes, + mimetype='application/pdf', + headers={ + 'Content-Disposition': f'attachment; filename="{pdf_filename}"', + 'Content-Type': 'application/pdf' + } + ) + + except Exception as e: + logger.exception(f"从IR导出PDF失败: {str(e)}") + return jsonify({ + 'success': False, + 'error': f'导出PDF失败: {str(e)}' + }), 500 + diff --git a/ReportEngine/renderers/__init__.py b/ReportEngine/renderers/__init__.py index bcc897e..931dc2a 100644 --- a/ReportEngine/renderers/__init__.py +++ b/ReportEngine/renderers/__init__.py @@ -1,9 +1,31 @@ """ Report Engine渲染器集合。 -目前仅提供 HTMLRenderer,未来可扩展为PDF/Markdown等输出。 +提供 HTMLRenderer 和 PDFRenderer,支持HTML和PDF输出。 """ from .html_renderer import HTMLRenderer +from .pdf_renderer import PDFRenderer +from .pdf_layout_optimizer import ( + PDFLayoutOptimizer, + PDFLayoutConfig, + PageLayout, + KPICardLayout, + CalloutLayout, + TableLayout, + ChartLayout, + GridLayout, +) -__all__ = ["HTMLRenderer"] +__all__ = [ + "HTMLRenderer", + "PDFRenderer", + "PDFLayoutOptimizer", + "PDFLayoutConfig", + "PageLayout", + "KPICardLayout", + "CalloutLayout", + "TableLayout", + "ChartLayout", + "GridLayout", +] diff --git a/ReportEngine/renderers/html_renderer.py b/ReportEngine/renderers/html_renderer.py index 0cdf3f0..1575fb1 100644 --- a/ReportEngine/renderers/html_renderer.py +++ b/ReportEngine/renderers/html_renderer.py @@ -102,7 +102,7 @@ class HTMLRenderer: @staticmethod def _get_font_path() -> Path: """返回PDF导出所需字体的路径(使用优化后的子集字体)""" - return Path(__file__).parent / "assets" / "fonts" / "SourceHanSerifSC-Medium-Subset.otf" + return Path(__file__).parent / "assets" / "fonts" / "SourceHanSerifSC-Medium-Subset.ttf" def _load_lib(self, filename: str) -> str: """ @@ -2881,8 +2881,8 @@ function exportPdf() { const pdf = new jspdf.jsPDF('p', 'mm', 'a4'); try { if (window.pdfFontData) { - pdf.addFileToVFS('SourceHanSerifSC-Medium.otf', window.pdfFontData); - pdf.addFont('SourceHanSerifSC-Medium.otf', 'SourceHanSerif', 'normal'); + pdf.addFileToVFS('SourceHanSerifSC-Medium.ttf', window.pdfFontData); + pdf.addFont('SourceHanSerifSC-Medium.ttf', 'SourceHanSerif', 'normal'); pdf.setFont('SourceHanSerif'); console.log('PDF字体已成功加载'); } else { diff --git a/ReportEngine/renderers/pdf_layout_optimizer.py b/ReportEngine/renderers/pdf_layout_optimizer.py new file mode 100644 index 0000000..b913580 --- /dev/null +++ b/ReportEngine/renderers/pdf_layout_optimizer.py @@ -0,0 +1,554 @@ +""" +PDF布局优化器 + +自动分析和优化PDF布局,确保内容不溢出、排版美观。 +支持: +- 自动调整字号 +- 优化行间距 +- 调整色块大小 +- 智能排列信息块 +- 保存和加载优化方案 +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Dict, List, Optional +from dataclasses import dataclass, asdict +from datetime import datetime +from loguru import logger + + +@dataclass +class KPICardLayout: + """KPI卡片布局配置""" + font_size_value: int = 32 # 数值字号 + font_size_label: int = 14 # 标签字号 + font_size_change: int = 13 # 变化值字号 + padding: int = 20 # 内边距 + min_height: int = 120 # 最小高度 + value_max_length: int = 10 # 数值最大字符数(超过则缩小字号) + + +@dataclass +class CalloutLayout: + """提示框布局配置""" + font_size_title: int = 16 # 标题字号 + font_size_content: int = 14 # 内容字号 + padding: int = 20 # 内边距 + line_height: float = 1.6 # 行高倍数 + max_width: str = "100%" # 最大宽度 + + +@dataclass +class TableLayout: + """表格布局配置""" + font_size_header: int = 13 # 表头字号 + font_size_body: int = 12 # 表体字号 + cell_padding: int = 12 # 单元格内边距 + max_cell_width: int = 200 # 最大单元格宽度(像素) + overflow_strategy: str = "wrap" # 溢出策略:wrap(换行) / ellipsis(省略号) + + +@dataclass +class ChartLayout: + """图表布局配置""" + font_size_title: int = 16 # 图表标题字号 + font_size_label: int = 12 # 标签字号 + min_height: int = 300 # 最小高度 + max_height: int = 600 # 最大高度 + padding: int = 20 # 内边距 + + +@dataclass +class GridLayout: + """网格布局配置""" + columns: int = 2 # 每行列数 + gap: int = 20 # 间距 + responsive_breakpoint: int = 768 # 响应式断点(宽度) + + +@dataclass +class PageLayout: + """页面整体布局配置""" + font_size_base: int = 14 # 基础字号 + font_size_h1: int = 28 # 一级标题 + font_size_h2: int = 24 # 二级标题 + font_size_h3: int = 20 # 三级标题 + font_size_h4: int = 16 # 四级标题 + line_height: float = 1.6 # 行高倍数 + paragraph_spacing: int = 16 # 段落间距 + section_spacing: int = 32 # 章节间距 + page_padding: int = 40 # 页面边距 + max_content_width: int = 800 # 最大内容宽度 + + +@dataclass +class PDFLayoutConfig: + """完整的PDF布局配置""" + page: PageLayout + kpi_card: KPICardLayout + callout: CalloutLayout + table: TableLayout + chart: ChartLayout + grid: GridLayout + + # 优化策略配置 + auto_adjust_font_size: bool = True # 自动调整字号 + auto_adjust_grid_columns: bool = True # 自动调整网格列数 + prevent_orphan_headers: bool = True # 防止标题孤行 + optimize_for_print: bool = True # 打印优化 + + def to_dict(self) -> Dict[str, Any]: + """转换为字典""" + return { + 'page': asdict(self.page), + 'kpi_card': asdict(self.kpi_card), + 'callout': asdict(self.callout), + 'table': asdict(self.table), + 'chart': asdict(self.chart), + 'grid': asdict(self.grid), + 'auto_adjust_font_size': self.auto_adjust_font_size, + 'auto_adjust_grid_columns': self.auto_adjust_grid_columns, + 'prevent_orphan_headers': self.prevent_orphan_headers, + 'optimize_for_print': self.optimize_for_print, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> PDFLayoutConfig: + """从字典创建配置""" + return cls( + page=PageLayout(**data['page']), + kpi_card=KPICardLayout(**data['kpi_card']), + callout=CalloutLayout(**data['callout']), + table=TableLayout(**data['table']), + chart=ChartLayout(**data['chart']), + grid=GridLayout(**data['grid']), + auto_adjust_font_size=data.get('auto_adjust_font_size', True), + auto_adjust_grid_columns=data.get('auto_adjust_grid_columns', True), + prevent_orphan_headers=data.get('prevent_orphan_headers', True), + optimize_for_print=data.get('optimize_for_print', True), + ) + + +class PDFLayoutOptimizer: + """ + PDF布局优化器 + + 根据内容特征自动优化PDF布局,防止溢出和排版问题。 + """ + + def __init__(self, config: Optional[PDFLayoutConfig] = None): + """ + 初始化优化器 + + 参数: + config: 布局配置,如果为None则使用默认配置 + """ + self.config = config or self._create_default_config() + self.optimization_log = [] + + @staticmethod + def _create_default_config() -> PDFLayoutConfig: + """创建默认配置""" + return PDFLayoutConfig( + page=PageLayout(), + kpi_card=KPICardLayout(), + callout=CalloutLayout(), + table=TableLayout(), + chart=ChartLayout(), + grid=GridLayout(), + ) + + def optimize_for_document(self, document_ir: Dict[str, Any]) -> PDFLayoutConfig: + """ + 根据文档IR内容优化布局配置 + + 参数: + document_ir: Document IR数据 + + 返回: + PDFLayoutConfig: 优化后的布局配置 + """ + logger.info("开始分析文档并优化布局...") + + # 分析文档结构 + stats = self._analyze_document(document_ir) + + # 根据分析结果调整配置 + optimized_config = self._adjust_config_based_on_stats(stats) + + # 记录优化日志 + self._log_optimization(stats, optimized_config) + + return optimized_config + + def _analyze_document(self, document_ir: Dict[str, Any]) -> Dict[str, Any]: + """ + 分析文档内容特征 + + 返回统计信息: + - kpi_count: KPI卡片数量 + - table_count: 表格数量 + - chart_count: 图表数量 + - max_kpi_value_length: 最长KPI数值长度 + - max_table_columns: 最多表格列数 + - total_content_length: 总内容长度 + """ + stats = { + 'kpi_count': 0, + 'table_count': 0, + 'chart_count': 0, + 'callout_count': 0, + 'max_kpi_value_length': 0, + 'max_table_columns': 0, + 'max_table_rows': 0, + 'total_content_length': 0, + 'has_long_text': False, + } + + # 遍历章节 + sections = document_ir.get('sections', []) + for section in sections: + self._analyze_section(section, stats) + + logger.info(f"文档分析完成: {stats}") + return stats + + def _analyze_section(self, section: Dict[str, Any], stats: Dict[str, Any]): + """递归分析章节""" + children = section.get('children', []) + + for child in children: + node_type = child.get('type') + + if node_type == 'kpi_grid': + kpis = child.get('kpis', []) + stats['kpi_count'] += len(kpis) + + # 检查KPI数值长度 + for kpi in kpis: + value = str(kpi.get('value', '')) + stats['max_kpi_value_length'] = max( + stats['max_kpi_value_length'], + len(value) + ) + + elif node_type == 'table': + stats['table_count'] += 1 + + # 分析表格结构 + headers = child.get('headers', []) + rows = child.get('rows', []) + stats['max_table_columns'] = max( + stats['max_table_columns'], + len(headers) + ) + stats['max_table_rows'] = max( + stats['max_table_rows'], + len(rows) + ) + + elif node_type == 'chart': + stats['chart_count'] += 1 + + elif node_type == 'callout': + stats['callout_count'] += 1 + content = child.get('content', '') + if len(content) > 200: + stats['has_long_text'] = True + + elif node_type == 'paragraph': + text = child.get('text', '') + stats['total_content_length'] += len(text) + if len(text) > 500: + stats['has_long_text'] = True + + # 递归处理子章节 + if node_type == 'section': + self._analyze_section(child, stats) + + def _adjust_config_based_on_stats( + self, + stats: Dict[str, Any] + ) -> PDFLayoutConfig: + """根据统计信息调整配置""" + config = PDFLayoutConfig( + page=PageLayout(**asdict(self.config.page)), + kpi_card=KPICardLayout(**asdict(self.config.kpi_card)), + callout=CalloutLayout(**asdict(self.config.callout)), + table=TableLayout(**asdict(self.config.table)), + chart=ChartLayout(**asdict(self.config.chart)), + grid=GridLayout(**asdict(self.config.grid)), + auto_adjust_font_size=self.config.auto_adjust_font_size, + auto_adjust_grid_columns=self.config.auto_adjust_grid_columns, + prevent_orphan_headers=self.config.prevent_orphan_headers, + optimize_for_print=self.config.optimize_for_print, + ) + + # 根据KPI数值长度调整字号 + if stats['max_kpi_value_length'] > 10: + config.kpi_card.font_size_value = 28 + self.optimization_log.append( + f"KPI数值过长({stats['max_kpi_value_length']}字符)," + f"字号从32调整为28" + ) + elif stats['max_kpi_value_length'] > 15: + config.kpi_card.font_size_value = 24 + self.optimization_log.append( + f"KPI数值很长({stats['max_kpi_value_length']}字符)," + f"字号从32调整为24" + ) + + # 根据KPI数量调整网格列数 + if stats['kpi_count'] > 6: + config.grid.columns = 3 + config.kpi_card.min_height = 100 + self.optimization_log.append( + f"KPI卡片较多({stats['kpi_count']}个)," + f"每行列数从2调整为3" + ) + elif stats['kpi_count'] <= 2: + config.grid.columns = 1 + self.optimization_log.append( + f"KPI卡片较少({stats['kpi_count']}个)," + f"每行列数从2调整为1" + ) + + # 根据表格列数调整字号 + if stats['max_table_columns'] > 6: + config.table.font_size_header = 11 + config.table.font_size_body = 10 + config.table.cell_padding = 8 + self.optimization_log.append( + f"表格列数较多({stats['max_table_columns']}列)," + f"缩小字号和内边距" + ) + + # 如果有长文本,增加行高 + if stats['has_long_text']: + config.page.line_height = 1.8 + config.callout.line_height = 1.8 + self.optimization_log.append( + "检测到长文本,增加行高至1.8提高可读性" + ) + + return config + + def _log_optimization( + self, + stats: Dict[str, Any], + config: PDFLayoutConfig + ): + """记录优化过程""" + log_entry = { + 'timestamp': datetime.now().isoformat(), + 'document_stats': stats, + 'optimizations': self.optimization_log.copy(), + 'final_config': config.to_dict(), + } + + logger.info(f"布局优化完成,应用了{len(self.optimization_log)}项优化") + for opt in self.optimization_log: + logger.info(f" - {opt}") + + # 清空日志供下次使用 + self.optimization_log.clear() + + return log_entry + + def save_config(self, path: str | Path, log_entry: Optional[Dict] = None): + """ + 保存配置到文件 + + 参数: + path: 保存路径 + log_entry: 优化日志条目(可选) + """ + path = Path(path) + path.parent.mkdir(parents=True, exist_ok=True) + + data = { + 'config': self.config.to_dict(), + } + + if log_entry: + data['optimization_log'] = log_entry + + with open(path, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + logger.info(f"布局配置已保存: {path}") + + @classmethod + def load_config(cls, path: str | Path) -> PDFLayoutOptimizer: + """ + 从文件加载配置 + + 参数: + path: 配置文件路径 + + 返回: + PDFLayoutOptimizer: 加载了配置的优化器实例 + """ + path = Path(path) + + if not path.exists(): + logger.warning(f"配置文件不存在: {path},使用默认配置") + return cls() + + with open(path, 'r', encoding='utf-8') as f: + data = json.load(f) + + config = PDFLayoutConfig.from_dict(data['config']) + optimizer = cls(config) + + logger.info(f"布局配置已加载: {path}") + return optimizer + + def generate_pdf_css(self) -> str: + """ + 根据当前配置生成PDF专用CSS + + 返回: + str: CSS样式字符串 + """ + cfg = self.config + + css = f""" +/* PDF布局优化样式 - 由PDFLayoutOptimizer自动生成 */ + +/* 页面基础样式 */ +body {{ + font-size: {cfg.page.font_size_base}px; + line-height: {cfg.page.line_height}; +}} + +main {{ + padding: {cfg.page.page_padding}px !important; + max-width: {cfg.page.max_content_width}px; + margin: 0 auto; +}} + +/* 标题样式 */ +h1 {{ font-size: {cfg.page.font_size_h1}px !important; }} +h2 {{ font-size: {cfg.page.font_size_h2}px !important; }} +h3 {{ font-size: {cfg.page.font_size_h3}px !important; }} +h4 {{ font-size: {cfg.page.font_size_h4}px !important; }} + +/* 段落间距 */ +p {{ + margin-bottom: {cfg.page.paragraph_spacing}px; +}} + +.chapter {{ + margin-bottom: {cfg.page.section_spacing}px; +}} + +/* KPI卡片优化 */ +.kpi-grid {{ + display: grid; + grid-template-columns: repeat({cfg.grid.columns}, 1fr); + gap: {cfg.grid.gap}px; + margin: 20px 0; +}} + +.kpi-card {{ + padding: {cfg.kpi_card.padding}px !important; + min-height: {cfg.kpi_card.min_height}px; + break-inside: avoid; + page-break-inside: avoid; +}} + +.kpi-card .value {{ + font-size: {cfg.kpi_card.font_size_value}px !important; + line-height: 1.2; + word-break: break-word; +}} + +.kpi-card .label {{ + font-size: {cfg.kpi_card.font_size_label}px !important; +}} + +.kpi-card .change {{ + font-size: {cfg.kpi_card.font_size_change}px !important; +}} + +/* 提示框优化 */ +.callout {{ + padding: {cfg.callout.padding}px !important; + margin: 20px 0; + line-height: {cfg.callout.line_height}; + break-inside: avoid; + page-break-inside: avoid; +}} + +.callout-title {{ + font-size: {cfg.callout.font_size_title}px !important; + margin-bottom: 10px; +}} + +.callout-content {{ + font-size: {cfg.callout.font_size_content}px !important; +}} + +/* 表格优化 */ +table {{ + width: 100%; + break-inside: avoid; + page-break-inside: avoid; +}} + +th {{ + font-size: {cfg.table.font_size_header}px !important; + padding: {cfg.table.cell_padding}px !important; +}} + +td {{ + font-size: {cfg.table.font_size_body}px !important; + padding: {cfg.table.cell_padding}px !important; + max-width: {cfg.table.max_cell_width}px; + word-wrap: break-word; + overflow-wrap: break-word; +}} + +/* 图表优化 */ +.chart-card {{ + min-height: {cfg.chart.min_height}px; + max-height: {cfg.chart.max_height}px; + padding: {cfg.chart.padding}px; + break-inside: avoid; + page-break-inside: avoid; +}} + +.chart-title {{ + font-size: {cfg.chart.font_size_title}px !important; +}} + +/* 防止标题孤行 */ +h1, h2, h3, h4, h5, h6 {{ + break-after: avoid; + page-break-after: avoid; +}} + +/* 确保内容块不被分页 */ +.content-block {{ + break-inside: avoid; + page-break-inside: avoid; +}} +""" + + return css + + +__all__ = [ + 'PDFLayoutOptimizer', + 'PDFLayoutConfig', + 'PageLayout', + 'KPICardLayout', + 'CalloutLayout', + 'TableLayout', + 'ChartLayout', + 'GridLayout', +] diff --git a/ReportEngine/renderers/pdf_renderer.py b/ReportEngine/renderers/pdf_renderer.py new file mode 100644 index 0000000..68b0566 --- /dev/null +++ b/ReportEngine/renderers/pdf_renderer.py @@ -0,0 +1,250 @@ +""" +PDF渲染器 - 使用WeasyPrint从HTML生成PDF +支持完整的CSS样式和中文字体 +""" + +from __future__ import annotations + +import base64 +from pathlib import Path +from typing import Any, Dict +from datetime import datetime +from loguru import logger + +try: + from weasyprint import HTML, CSS + from weasyprint.text.fonts import FontConfiguration + WEASYPRINT_AVAILABLE = True +except ImportError: + WEASYPRINT_AVAILABLE = False + logger.warning("WeasyPrint未安装,PDF导出功能将不可用") + +from .html_renderer import HTMLRenderer +from .pdf_layout_optimizer import PDFLayoutOptimizer, PDFLayoutConfig + + +class PDFRenderer: + """ + 基于WeasyPrint的PDF渲染器 + + - 直接从HTML生成PDF,保留所有CSS样式 + - 完美支持中文字体 + - 自动处理分页和布局 + """ + + def __init__( + self, + config: Dict[str, Any] | None = None, + layout_optimizer: PDFLayoutOptimizer | None = None + ): + """ + 初始化PDF渲染器 + + 参数: + config: 渲染器配置 + layout_optimizer: PDF布局优化器(可选) + """ + self.config = config or {} + self.html_renderer = HTMLRenderer(config) + self.layout_optimizer = layout_optimizer or PDFLayoutOptimizer() + + if not WEASYPRINT_AVAILABLE: + raise RuntimeError("WeasyPrint未安装,请运行: pip install weasyprint") + + @staticmethod + def _get_font_path() -> Path: + """获取字体文件路径""" + # 优先使用完整字体以确保字符覆盖 + fonts_dir = Path(__file__).parent / "assets" / "fonts" + + # 检查完整字体 + full_font = fonts_dir / "SourceHanSerifSC-Medium.otf" + if full_font.exists(): + logger.info(f"使用完整字体: {full_font}") + return full_font + + # 检查TTF子集字体 + subset_ttf = fonts_dir / "SourceHanSerifSC-Medium-Subset.ttf" + if subset_ttf.exists(): + logger.info(f"使用TTF子集字体: {subset_ttf}") + return subset_ttf + + # 检查OTF子集字体 + subset_otf = fonts_dir / "SourceHanSerifSC-Medium-Subset.otf" + if subset_otf.exists(): + logger.info(f"使用OTF子集字体: {subset_otf}") + return subset_otf + + raise FileNotFoundError(f"未找到字体文件,请检查 {fonts_dir} 目录") + + def _get_pdf_html( + self, + document_ir: Dict[str, Any], + optimize_layout: bool = True + ) -> str: + """ + 生成适用于PDF的HTML内容 + + - 移除交互式元素(按钮、导航等) + - 添加PDF专用样式 + - 嵌入字体文件 + - 应用布局优化 + + 参数: + document_ir: Document IR数据 + optimize_layout: 是否启用布局优化 + + 返回: + str: 优化后的HTML内容 + """ + # 如果启用布局优化,先分析文档并生成优化配置 + if optimize_layout: + logger.info("启用PDF布局优化...") + layout_config = self.layout_optimizer.optimize_for_document(document_ir) + + # 保存优化日志 + log_dir = Path('logs/pdf_layouts') + log_dir.mkdir(parents=True, exist_ok=True) + log_file = log_dir / f"layout_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" + + # 保存配置和优化日志 + optimization_log = self.layout_optimizer._log_optimization( + self.layout_optimizer._analyze_document(document_ir), + layout_config + ) + self.layout_optimizer.config = layout_config + self.layout_optimizer.save_config(log_file, optimization_log) + else: + layout_config = self.layout_optimizer.config + + # 使用HTML渲染器生成基础HTML + html = self.html_renderer.render(document_ir) + + # 获取字体路径并转换为base64(用于嵌入) + font_path = self._get_font_path() + font_data = font_path.read_bytes() + font_base64 = base64.b64encode(font_data).decode('ascii') + + # 判断字体格式 + font_format = 'opentype' if font_path.suffix == '.otf' else 'truetype' + + # 生成优化后的CSS + optimized_css = self.layout_optimizer.generate_pdf_css() + + # 添加PDF专用CSS + pdf_css = f""" + +""" + + # 在前插入PDF专用CSS + html = html.replace('', f'{pdf_css}\n') + + return html + + def render_to_pdf( + self, + document_ir: Dict[str, Any], + output_path: str | Path, + optimize_layout: bool = True + ) -> Path: + """ + 将Document IR渲染为PDF文件 + + 参数: + document_ir: Document IR数据 + output_path: PDF输出路径 + optimize_layout: 是否启用布局优化(默认True) + + 返回: + Path: 生成的PDF文件路径 + """ + output_path = Path(output_path) + + logger.info(f"开始生成PDF: {output_path}") + + # 生成HTML内容 + html_content = self._get_pdf_html(document_ir, optimize_layout) + + # 配置字体 + font_config = FontConfiguration() + + # 从HTML字符串创建WeasyPrint HTML对象 + html_doc = HTML(string=html_content, base_url=str(Path.cwd())) + + # 生成PDF + try: + html_doc.write_pdf( + output_path, + font_config=font_config, + presentational_hints=True # 保留HTML的呈现提示 + ) + logger.info(f"✓ PDF生成成功: {output_path}") + return output_path + + except Exception as e: + logger.error(f"PDF生成失败: {e}") + raise + + def render_to_bytes( + self, + document_ir: Dict[str, Any], + optimize_layout: bool = True + ) -> bytes: + """ + 将Document IR渲染为PDF字节流 + + 参数: + document_ir: Document IR数据 + optimize_layout: 是否启用布局优化(默认True) + + 返回: + bytes: PDF文件的字节内容 + """ + html_content = self._get_pdf_html(document_ir, optimize_layout) + font_config = FontConfiguration() + html_doc = HTML(string=html_content, base_url=str(Path.cwd())) + + return html_doc.write_pdf( + font_config=font_config, + presentational_hints=True + ) + + +__all__ = ["PDFRenderer"] diff --git a/ReportEngine/scripts/export_to_pdf.py b/ReportEngine/scripts/export_to_pdf.py new file mode 100644 index 0000000..1afb701 --- /dev/null +++ b/ReportEngine/scripts/export_to_pdf.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +""" +PDF导出工具 - 使用Python直接生成PDF,无乱码 + +用法: + python ReportEngine/scripts/export_to_pdf.py <报告IR JSON文件> [输出PDF路径] + +示例: + python ReportEngine/scripts/export_to_pdf.py final_reports/ir/report_ir_xxx.json output.pdf + python ReportEngine/scripts/export_to_pdf.py final_reports/ir/report_ir_xxx.json +""" + +import sys +import json +from pathlib import Path +from loguru import logger + +from ReportEngine.renderers import PDFRenderer + + +def export_to_pdf(ir_json_path: str, output_pdf_path: str = None): + """ + 从IR JSON文件生成PDF + + 参数: + ir_json_path: Document IR JSON文件路径 + output_pdf_path: 输出PDF路径(可选,默认为同名.pdf) + """ + ir_path = Path(ir_json_path) + + if not ir_path.exists(): + logger.error(f"文件不存在: {ir_path}") + return False + + # 读取IR数据 + logger.info(f"读取报告: {ir_path}") + with open(ir_path, 'r', encoding='utf-8') as f: + document_ir = json.load(f) + + # 确定输出路径 + if output_pdf_path is None: + output_pdf_path = ir_path.parent / f"{ir_path.stem}.pdf" + else: + output_pdf_path = Path(output_pdf_path) + + # 生成PDF + logger.info(f"开始生成PDF...") + renderer = PDFRenderer() + + try: + renderer.render_to_pdf(document_ir, output_pdf_path) + logger.success(f"✓ PDF已生成: {output_pdf_path}") + return True + except Exception as e: + logger.error(f"✗ PDF生成失败: {e}") + logger.exception("详细错误信息:") + return False + + +def main(): + """主函数""" + if len(sys.argv) < 2: + print(__doc__) + sys.exit(1) + + ir_json_path = sys.argv[1] + output_pdf_path = sys.argv[2] if len(sys.argv) > 2 else None + + # 检查环境变量 + import os + if 'DYLD_LIBRARY_PATH' not in os.environ: + logger.warning("未设置DYLD_LIBRARY_PATH,尝试自动设置...") + os.environ['DYLD_LIBRARY_PATH'] = '/opt/homebrew/lib' + + success = export_to_pdf(ir_json_path, output_pdf_path) + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + main() diff --git a/templates/index.html b/templates/index.html index aee3cc6..0b78983 100644 --- a/templates/index.html +++ b/templates/index.html @@ -3702,71 +3702,53 @@ } async function downloadPdfFromPreview() { - const iframe = document.getElementById('report-iframe'); const btn = document.getElementById('downloadPdfButton'); - if (!iframe || !iframe.contentDocument) { - showMessage('请先加载报告预览再下载PDF', 'error'); - return; - } - const target = iframe.contentDocument.documentElement; - if (!target) { - showMessage('报告内容未就绪', 'error'); + const taskId = btn?.dataset.taskId; + + if (!taskId) { + showMessage('无可用的报告任务,请先生成报告', 'error'); return; } + if (btn) btn.disabled = true; - showMessage('正在生成PDF,请稍候...', 'info'); + showMessage('正在生成优化的PDF,请稍候...', 'info'); + try { - const { jsPDF } = window.jspdf || {}; - if (!jsPDF) { - throw new Error('PDF依赖未加载'); - } - const pdf = new jsPDF('p', 'mm', 'a4'); - - // 添加中文字体支持 - try { - const fontData = iframe.contentWindow.pdfFontData || window.pdfFontData; - if (fontData) { - pdf.addFileToVFS('SourceHanSerifSC-Medium.otf', fontData); - pdf.addFont('SourceHanSerifSC-Medium.otf', 'SourceHanSerif', 'normal'); - pdf.setFont('SourceHanSerif'); - console.log('PDF字体已加载:SourceHanSerif'); - } else { - console.warn('PDF字体数据未找到,将使用默认字体'); - } - } catch (fontErr) { - console.warn('PDF字体加载失败:', fontErr); - } - - const pageWidth = pdf.internal.pageSize.getWidth(); - const pxWidth = Math.max(target.scrollWidth || 0, Math.round(pageWidth * 3.78)); - const renderTask = pdf.html(target, { - x: 10, - y: 10, - width: pageWidth - 20, - windowWidth: pxWidth, - margin: [10, 10, 16, 10], - autoPaging: 'text', - html2canvas: { - scale: Math.min(1.5, Math.max(1.0, pageWidth / (target.clientWidth || pageWidth))), - useCORS: true, - scrollX: 0, - scrollY: -iframe.contentWindow.scrollY, - logging: false, - allowTaint: true, - backgroundColor: '#ffffff' - }, - pagebreak: { - mode: ['css', 'legacy'], - avoid: ['.chapter', '.callout', '.chart-card', '.table-wrap', '.kpi-grid', '.hero-section'], - before: '.chapter-divider' - } + // 调用后端PDF导出API + const response = await fetch(`/api/report/export/pdf/${taskId}?optimize=true`, { + method: 'GET' }); - await (renderTask && typeof renderTask.then === 'function' ? renderTask : Promise.resolve()); - pdf.save('report.pdf'); + + if (!response.ok) { + const error = await response.json(); + throw new Error(error.error || 'PDF导出失败'); + } + + // 获取PDF文件名(从响应头) + const contentDisposition = response.headers.get('Content-Disposition'); + let filename = 'report.pdf'; + if (contentDisposition) { + const matches = /filename="?([^"]+)"?/.exec(contentDisposition); + if (matches && matches[1]) { + filename = matches[1]; + } + } + + // 下载PDF + const blob = await response.blob(); + const url = window.URL.createObjectURL(blob); + const link = document.createElement('a'); + link.href = url; + link.download = filename; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + window.URL.revokeObjectURL(url); + showMessage('PDF生成完成,已开始下载', 'success'); } catch (err) { - console.error('生成PDF失败:', err); - showMessage('生成PDF失败: ' + err.message, 'error'); + console.error('导出PDF失败:', err); + showMessage('导出PDF失败: ' + err.message, 'error'); } finally { if (btn) btn.disabled = false; }