Files
bettafish-company/ReportEngine/renderers/pdf_renderer.py
T
2025-11-18 20:10:11 +08:00

251 lines
7.0 KiB
Python

"""
PDF渲染器 - 使用WeasyPrint从HTML生成PDF
支持完整的CSS样式和中文字体
"""
from __future__ import annotations
import base64
from pathlib import Path
from typing import Any, Dict
from datetime import datetime
from loguru import logger
try:
from weasyprint import HTML, CSS
from weasyprint.text.fonts import FontConfiguration
WEASYPRINT_AVAILABLE = True
except ImportError:
WEASYPRINT_AVAILABLE = False
logger.warning("WeasyPrint未安装,PDF导出功能将不可用")
from .html_renderer import HTMLRenderer
from .pdf_layout_optimizer import PDFLayoutOptimizer, PDFLayoutConfig
class PDFRenderer:
"""
基于WeasyPrint的PDF渲染器
- 直接从HTML生成PDF,保留所有CSS样式
- 完美支持中文字体
- 自动处理分页和布局
"""
def __init__(
self,
config: Dict[str, Any] | None = None,
layout_optimizer: PDFLayoutOptimizer | None = None
):
"""
初始化PDF渲染器
参数:
config: 渲染器配置
layout_optimizer: PDF布局优化器(可选)
"""
self.config = config or {}
self.html_renderer = HTMLRenderer(config)
self.layout_optimizer = layout_optimizer or PDFLayoutOptimizer()
if not WEASYPRINT_AVAILABLE:
raise RuntimeError("WeasyPrint未安装,请运行: pip install weasyprint")
@staticmethod
def _get_font_path() -> Path:
"""获取字体文件路径"""
# 优先使用完整字体以确保字符覆盖
fonts_dir = Path(__file__).parent / "assets" / "fonts"
# 检查完整字体
full_font = fonts_dir / "SourceHanSerifSC-Medium.otf"
if full_font.exists():
logger.info(f"使用完整字体: {full_font}")
return full_font
# 检查TTF子集字体
subset_ttf = fonts_dir / "SourceHanSerifSC-Medium-Subset.ttf"
if subset_ttf.exists():
logger.info(f"使用TTF子集字体: {subset_ttf}")
return subset_ttf
# 检查OTF子集字体
subset_otf = fonts_dir / "SourceHanSerifSC-Medium-Subset.otf"
if subset_otf.exists():
logger.info(f"使用OTF子集字体: {subset_otf}")
return subset_otf
raise FileNotFoundError(f"未找到字体文件,请检查 {fonts_dir} 目录")
def _get_pdf_html(
self,
document_ir: Dict[str, Any],
optimize_layout: bool = True
) -> str:
"""
生成适用于PDF的HTML内容
- 移除交互式元素(按钮、导航等)
- 添加PDF专用样式
- 嵌入字体文件
- 应用布局优化
参数:
document_ir: Document IR数据
optimize_layout: 是否启用布局优化
返回:
str: 优化后的HTML内容
"""
# 如果启用布局优化,先分析文档并生成优化配置
if optimize_layout:
logger.info("启用PDF布局优化...")
layout_config = self.layout_optimizer.optimize_for_document(document_ir)
# 保存优化日志
log_dir = Path('logs/pdf_layouts')
log_dir.mkdir(parents=True, exist_ok=True)
log_file = log_dir / f"layout_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
# 保存配置和优化日志
optimization_log = self.layout_optimizer._log_optimization(
self.layout_optimizer._analyze_document(document_ir),
layout_config
)
self.layout_optimizer.config = layout_config
self.layout_optimizer.save_config(log_file, optimization_log)
else:
layout_config = self.layout_optimizer.config
# 使用HTML渲染器生成基础HTML
html = self.html_renderer.render(document_ir)
# 获取字体路径并转换为base64(用于嵌入)
font_path = self._get_font_path()
font_data = font_path.read_bytes()
font_base64 = base64.b64encode(font_data).decode('ascii')
# 判断字体格式
font_format = 'opentype' if font_path.suffix == '.otf' else 'truetype'
# 生成优化后的CSS
optimized_css = self.layout_optimizer.generate_pdf_css()
# 添加PDF专用CSS
pdf_css = f"""
<style>
/* PDF专用字体嵌入 */
@font-face {{
font-family: 'SourceHanSerif';
src: url(data:font/{font_format};base64,{font_base64}) format('{font_format}');
font-weight: normal;
font-style: normal;
}}
/* 强制所有文本使用思源宋体 */
body, h1, h2, h3, h4, h5, h6, p, li, td, th, div, span {{
font-family: 'SourceHanSerif', serif !important;
}}
/* PDF专用样式调整 */
.report-header {{
display: none !important;
}}
.no-print {{
display: none !important;
}}
body {{
background: white !important;
}}
/* 隐藏图表canvas,显示fallback表格 */
.chart-container {{
display: none !important;
}}
.chart-fallback {{
display: block !important;
}}
{optimized_css}
</style>
"""
# 在</head>前插入PDF专用CSS
html = html.replace('</head>', f'{pdf_css}\n</head>')
return html
def render_to_pdf(
self,
document_ir: Dict[str, Any],
output_path: str | Path,
optimize_layout: bool = True
) -> Path:
"""
将Document IR渲染为PDF文件
参数:
document_ir: Document IR数据
output_path: PDF输出路径
optimize_layout: 是否启用布局优化(默认True)
返回:
Path: 生成的PDF文件路径
"""
output_path = Path(output_path)
logger.info(f"开始生成PDF: {output_path}")
# 生成HTML内容
html_content = self._get_pdf_html(document_ir, optimize_layout)
# 配置字体
font_config = FontConfiguration()
# 从HTML字符串创建WeasyPrint HTML对象
html_doc = HTML(string=html_content, base_url=str(Path.cwd()))
# 生成PDF
try:
html_doc.write_pdf(
output_path,
font_config=font_config,
presentational_hints=True # 保留HTML的呈现提示
)
logger.info(f"✓ PDF生成成功: {output_path}")
return output_path
except Exception as e:
logger.error(f"PDF生成失败: {e}")
raise
def render_to_bytes(
self,
document_ir: Dict[str, Any],
optimize_layout: bool = True
) -> bytes:
"""
将Document IR渲染为PDF字节流
参数:
document_ir: Document IR数据
optimize_layout: 是否启用布局优化(默认True)
返回:
bytes: PDF文件的字节内容
"""
html_content = self._get_pdf_html(document_ir, optimize_layout)
font_config = FontConfiguration()
html_doc = HTML(string=html_content, base_url=str(Path.cwd()))
return html_doc.write_pdf(
font_config=font_config,
presentational_hints=True
)
__all__ = ["PDFRenderer"]