"""
HTML模板管理器
支持内置模板和外部HTML模板
"""
import os
import markdown
from bs4 import BeautifulSoup
import re
from typing import Optional
from loguru import logger
class HTMLTemplateManager:
"""HTML模板管理器"""
def __init__(self):
self.logger = logger.bind(module="HTMLTemplateManager")
def markdown_to_html(self, markdown_content: str) -> str:
"""将Markdown转换为HTML"""
html = markdown.markdown(
markdown_content,
extensions=['tables', 'fenced_code', 'codehilite']
)
return html
def render_builtin_template(self, markdown_content: str) -> str:
"""使用内置模板渲染HTML"""
html_body = self.markdown_to_html(markdown_content)
# 增强HTML结构
soup = BeautifulSoup(html_body, 'html.parser')
self._enhance_html_structure(soup)
# 生成完整HTML
html_template = f"""
汽车后市场情报报告
{str(soup)}
"""
return html_template
def render_external_template(self, template_path: str, markdown_content: str) -> str:
"""
使用外部HTML模板渲染
Args:
template_path: 外部模板文件路径
markdown_content: Markdown内容
Returns:
渲染后的HTML内容
"""
try:
with open(template_path, 'r', encoding='utf-8') as f:
template = f.read()
html_body = self.markdown_to_html(markdown_content)
# 查找模板中的占位符并替换
# 支持 {{content}} 或 {content} 等格式
patterns = [
r'\{\{content\}\}',
r'\{content\}',
r'',
]
replaced = False
for pattern in patterns:
if re.search(pattern, template, re.IGNORECASE):
template = re.sub(pattern, html_body, template, flags=re.IGNORECASE)
replaced = True
break
if not replaced:
# 如果没有找到占位符,在body标签内追加内容
soup = BeautifulSoup(template, 'html.parser')
body = soup.find('body')
if body:
body.append(BeautifulSoup(html_body, 'html.parser'))
else:
# 如果没有body标签,在html末尾追加
template += html_body
template = str(soup) if soup else template
self.logger.info(f"使用外部模板渲染: {template_path}")
return template
except Exception as e:
self.logger.error(f"使用外部模板失败: {str(e)},回退到内置模板", exc_info=True)
return self.render_builtin_template(markdown_content)
def _enhance_html_structure(self, soup: BeautifulSoup):
"""增强HTML结构"""
# 增强表格
for table in soup.find_all('table'):
if not table.get('class'):
table['class'] = 'data-table'
# 增强列表项
for ul in soup.find_all('ul'):
# 检查是否是新闻列表
if any('新闻' in str(item) for item in ul.find_all('li')):
ul['class'] = 'news-list'
# 增强链接
for a in soup.find_all('a'):
if not a.get('target'):
a['target'] = '_blank'
a['rel'] = 'noopener noreferrer'