""" HTML模板管理器 支持内置模板和外部HTML模板 """ import os import markdown from bs4 import BeautifulSoup import re from typing import Optional from loguru import logger class HTMLTemplateManager: """HTML模板管理器""" def __init__(self): self.logger = logger.bind(module="HTMLTemplateManager") def markdown_to_html(self, markdown_content: str) -> str: """将Markdown转换为HTML""" html = markdown.markdown( markdown_content, extensions=['tables', 'fenced_code', 'codehilite'] ) return html def render_builtin_template(self, markdown_content: str) -> str: """使用内置模板渲染HTML""" html_body = self.markdown_to_html(markdown_content) # 增强HTML结构 soup = BeautifulSoup(html_body, 'html.parser') self._enhance_html_structure(soup) # 生成完整HTML html_template = f""" 汽车后市场情报报告
{str(soup)}
""" return html_template def render_external_template(self, template_path: str, markdown_content: str) -> str: """ 使用外部HTML模板渲染 Args: template_path: 外部模板文件路径 markdown_content: Markdown内容 Returns: 渲染后的HTML内容 """ try: with open(template_path, 'r', encoding='utf-8') as f: template = f.read() html_body = self.markdown_to_html(markdown_content) # 查找模板中的占位符并替换 # 支持 {{content}} 或 {content} 等格式 patterns = [ r'\{\{content\}\}', r'\{content\}', r'', ] replaced = False for pattern in patterns: if re.search(pattern, template, re.IGNORECASE): template = re.sub(pattern, html_body, template, flags=re.IGNORECASE) replaced = True break if not replaced: # 如果没有找到占位符,在body标签内追加内容 soup = BeautifulSoup(template, 'html.parser') body = soup.find('body') if body: body.append(BeautifulSoup(html_body, 'html.parser')) else: # 如果没有body标签,在html末尾追加 template += html_body template = str(soup) if soup else template self.logger.info(f"使用外部模板渲染: {template_path}") return template except Exception as e: self.logger.error(f"使用外部模板失败: {str(e)},回退到内置模板", exc_info=True) return self.render_builtin_template(markdown_content) def _enhance_html_structure(self, soup: BeautifulSoup): """增强HTML结构""" # 增强表格 for table in soup.find_all('table'): if not table.get('class'): table['class'] = 'data-table' # 增强列表项 for ul in soup.find_all('ul'): # 检查是否是新闻列表 if any('新闻' in str(item) for item in ul.find_all('li')): ul['class'] = 'news-list' # 增强链接 for a in soup.find_all('a'): if not a.get('target'): a['target'] = '_blank' a['rel'] = 'noopener noreferrer'