""" PDF渲染器 - 使用WeasyPrint从HTML生成PDF 支持完整的CSS样式和中文字体 """ from __future__ import annotations import base64 from pathlib import Path from typing import Any, Dict from datetime import datetime from loguru import logger try: from weasyprint import HTML, CSS from weasyprint.text.fonts import FontConfiguration WEASYPRINT_AVAILABLE = True except ImportError: WEASYPRINT_AVAILABLE = False logger.warning("WeasyPrint未安装,PDF导出功能将不可用") from .html_renderer import HTMLRenderer from .pdf_layout_optimizer import PDFLayoutOptimizer, PDFLayoutConfig from .chart_to_svg import create_chart_converter class PDFRenderer: """ 基于WeasyPrint的PDF渲染器 - 直接从HTML生成PDF,保留所有CSS样式 - 完美支持中文字体 - 自动处理分页和布局 """ def __init__( self, config: Dict[str, Any] | None = None, layout_optimizer: PDFLayoutOptimizer | None = None ): """ 初始化PDF渲染器 参数: config: 渲染器配置 layout_optimizer: PDF布局优化器(可选) """ self.config = config or {} self.html_renderer = HTMLRenderer(config) self.layout_optimizer = layout_optimizer or PDFLayoutOptimizer() if not WEASYPRINT_AVAILABLE: raise RuntimeError("WeasyPrint未安装,请运行: pip install weasyprint") # 初始化图表转换器 try: font_path = self._get_font_path() self.chart_converter = create_chart_converter(font_path=str(font_path)) logger.info("图表SVG转换器初始化成功") except Exception as e: logger.warning(f"图表SVG转换器初始化失败: {e},将使用表格降级") @staticmethod def _get_font_path() -> Path: """获取字体文件路径""" # 优先使用完整字体以确保字符覆盖 fonts_dir = Path(__file__).parent / "assets" / "fonts" # 检查完整字体 full_font = fonts_dir / "SourceHanSerifSC-Medium.otf" if full_font.exists(): logger.info(f"使用完整字体: {full_font}") return full_font # 检查TTF子集字体 subset_ttf = fonts_dir / "SourceHanSerifSC-Medium-Subset.ttf" if subset_ttf.exists(): logger.info(f"使用TTF子集字体: {subset_ttf}") return subset_ttf # 检查OTF子集字体 subset_otf = fonts_dir / "SourceHanSerifSC-Medium-Subset.otf" if subset_otf.exists(): logger.info(f"使用OTF子集字体: {subset_otf}") return subset_otf raise FileNotFoundError(f"未找到字体文件,请检查 {fonts_dir} 目录") def _convert_charts_to_svg(self, document_ir: Dict[str, Any]) -> Dict[str, str]: """ 将document_ir中的所有图表转换为SVG 参数: document_ir: Document IR数据 返回: Dict[str, str]: widgetId到SVG字符串的映射 """ svg_map = {} if not hasattr(self, 'chart_converter') or not self.chart_converter: logger.warning("图表转换器未初始化,跳过图表转换") return svg_map # 遍历所有章节 chapters = document_ir.get('chapters', []) for chapter in chapters: blocks = chapter.get('blocks', []) self._extract_and_convert_widgets(blocks, svg_map) logger.info(f"成功转换 {len(svg_map)} 个图表为SVG") return svg_map def _extract_and_convert_widgets( self, blocks: list, svg_map: Dict[str, str] ) -> None: """ 递归遍历blocks,找到所有widget并转换为SVG 参数: blocks: block列表 svg_map: 用于存储转换结果的字典 """ for block in blocks: if not isinstance(block, dict): continue block_type = block.get('type') # 处理widget类型 if block_type == 'widget': widget_id = block.get('widgetId') widget_type = block.get('widgetType', '') # 只处理chart.js类型的widget if widget_id and widget_type.startswith('chart.js'): try: svg_content = self.chart_converter.convert_widget_to_svg( block, width=800, height=500, dpi=100 ) if svg_content: svg_map[widget_id] = svg_content logger.debug(f"图表 {widget_id} 转换为SVG成功") else: logger.warning(f"图表 {widget_id} 转换为SVG失败") except Exception as e: logger.error(f"转换图表 {widget_id} 时出错: {e}") # 递归处理嵌套的blocks nested_blocks = block.get('blocks') if isinstance(nested_blocks, list): self._extract_and_convert_widgets(nested_blocks, svg_map) # 处理列表项 if block_type == 'list': items = block.get('items', []) for item in items: if isinstance(item, list): self._extract_and_convert_widgets(item, svg_map) # 处理表格单元格 if block_type == 'table': rows = block.get('rows', []) for row in rows: cells = row.get('cells', []) for cell in cells: cell_blocks = cell.get('blocks', []) if isinstance(cell_blocks, list): self._extract_and_convert_widgets(cell_blocks, svg_map) def _inject_svg_into_html(self, html: str, svg_map: Dict[str, str]) -> str: """ 将SVG内容直接注入到HTML中(不使用JavaScript) 参数: html: 原始HTML内容 svg_map: widgetId到SVG内容的映射 返回: str: 注入SVG后的HTML """ if not svg_map: return html import re # 为每个widgetId查找对应的canvas并替换为SVG for widget_id, svg_content in svg_map.items(): # 清理SVG内容(移除XML声明,因为SVG将嵌入HTML) svg_content = re.sub(r'<\?xml[^>]+\?>', '', svg_content) svg_content = re.sub(r']+>', '', svg_content) svg_content = svg_content.strip() # 创建SVG容器HTML svg_html = f'