From 5e9da9cfbf7d5bba68403398084d70b51cf430ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E4=B8=80=E4=B8=81?= <1769123563@qq.com> Date: Thu, 27 Nov 2025 10:29:27 +0800 Subject: [PATCH] Add Support for Rendering Various Inline and Block-level Mathematical Formulas --- ReportEngine/renderers/html_renderer.py | 74 +++++++++++++- ReportEngine/renderers/math_to_svg.py | 19 +++- ReportEngine/renderers/pdf_renderer.py | 123 +++++++++++++++++++++--- 3 files changed, 195 insertions(+), 21 deletions(-) diff --git a/ReportEngine/renderers/html_renderer.py b/ReportEngine/renderers/html_renderer.py index 4e7c8b9..405b811 100644 --- a/ReportEngine/renderers/html_renderer.py +++ b/ReportEngine/renderers/html_renderer.py @@ -1262,7 +1262,8 @@ class HTMLRenderer: def _render_math(self, block: Dict[str, Any]) -> str: """渲染数学公式,占位符交给外部MathJax或后处理""" - latex = self._escape_html(block.get("latex", "")) + latex_raw = block.get("latex", "") + latex = self._escape_html(self._normalize_latex_string(latex_raw)) math_id = self._escape_attr(block.get("mathId", "")) if block.get("mathId") else "" id_attr = f' data-math-id="{math_id}"' if math_id else "" return f'
$$ {latex} $$
' @@ -1988,6 +1989,66 @@ class HTMLRenderer: return text_value, marks + @staticmethod + def _normalize_latex_string(raw: Any) -> str: + """去除外层数学定界符,兼容 $...$、$$...$$、\\(\\)、\\[\\] 等格式""" + if not isinstance(raw, str): + return "" + latex = raw.strip() + patterns = [ + r'^\$\$(.*)\$\$$', + r'^\$(.*)\$$', + r'^\\\[(.*)\\\]$', + r'^\\\((.*)\\\)$', + ] + for pat in patterns: + m = re.match(pat, latex, re.DOTALL) + if m: + latex = m.group(1).strip() + break + return latex + + def _render_text_with_inline_math(self, text: Any, math_id: str | list | None = None) -> str | None: + """ + 识别纯文本中的数学定界符并渲染为math-inline/math-block,提升兼容性。 + + - 支持 $...$、$$...$$、\\(\\)、\\[\\]。 + - 若未检测到公式,返回None。 + """ + if not isinstance(text, str) or not text: + return None + + pattern = re.compile(r'(\$\$(.+?)\$\$|\$(.+?)\$|\\\((.+?)\\\)|\\\[(.+?)\\\])', re.S) + cursor = 0 + parts: List[str] = [] + idx = 0 + id_iter = iter(math_id) if isinstance(math_id, list) else None + for m in pattern.finditer(text): + start, end = m.span() + if start > cursor: + parts.append(self._escape_html(text[cursor:start])) + raw = next(g for g in m.groups()[1:] if g is not None) + latex = self._normalize_latex_string(raw) + idx += 1 + # 若已有math_id,直接使用,避免与SVG注入ID不一致;否则按局部序号生成 + if id_iter: + mid = next(id_iter, f"auto-math-{idx}") + else: + mid = math_id or f"auto-math-{idx}" + id_attr = f' data-math-id="{self._escape_attr(mid)}"' + is_display = m.group(1).startswith('$$') or m.group(1).startswith('\\[') + if is_display: + parts.append(f'
$$ {self._escape_html(latex)} $$
') + else: + parts.append(f'\\( {self._escape_html(latex)} \\)') + cursor = end + + if cursor == 0: + return None + if cursor < len(text): + parts.append(self._escape_html(text[cursor:])) + return "".join(parts) + @staticmethod def _coerce_inline_payload(payload: Dict[str, Any]) -> Dict[str, Any] | None: """尽力将字符串里的内联节点恢复为dict,修复渲染遗漏""" @@ -2013,12 +2074,19 @@ class HTMLRenderer: text_value, marks = self._normalize_inline_payload(run) math_mark = next((mark for mark in marks if mark.get("type") == "math"), None) if math_mark: - latex = math_mark.get("value") + latex = self._normalize_latex_string(math_mark.get("value")) if not isinstance(latex, str) or not latex.strip(): - latex = text_value + latex = self._normalize_latex_string(text_value) math_id = self._escape_attr(run.get("mathId", "")) if run.get("mathId") else "" id_attr = f' data-math-id="{math_id}"' if math_id else "" return f'\\( {self._escape_html(latex)} \\)' + + # 尝试从纯文本中提取数学公式(即便没有math mark) + math_id_hint = run.get("mathIds") or run.get("mathId") + mathified = self._render_text_with_inline_math(text_value, math_id_hint) + if mathified is not None: + return mathified + text = self._escape_html(text_value) styles: List[str] = [] prefix: List[str] = [] diff --git a/ReportEngine/renderers/math_to_svg.py b/ReportEngine/renderers/math_to_svg.py index 2acbc0e..c801b0a 100644 --- a/ReportEngine/renderers/math_to_svg.py +++ b/ReportEngine/renderers/math_to_svg.py @@ -4,6 +4,7 @@ LaTeX 数学公式转 SVG 渲染器 """ import io +import re from typing import Optional import matplotlib import matplotlib.pyplot as plt @@ -40,8 +41,22 @@ class MathToSVG: SVG 字符串,如果转换失败则返回 None """ try: - # 清理 LaTeX 字符串 - latex = latex.strip() + # 清理 LaTeX 字符串,去除外层定界符,兼容 $...$ / $$...$$ / \\( \\) / \\[ \\] + latex = (latex or "").strip() + patterns = [ + r'^\$\$(.*)\$\$$', + r'^\$(.*)\$$', + r'^\\\[(.*)\\\]$', + r'^\\\((.*)\\\)$', + ] + for pat in patterns: + m = re.match(pat, latex, re.DOTALL) + if m: + latex = m.group(1).strip() + break + # 清理控制字符并做常见兼容 + latex = re.sub(r'[\x00-\x1f\x7f]', '', latex) + latex = latex.replace(r'\\tfrac', r'\\frac').replace(r'\\dfrac', r'\\frac') if not latex: logger.warning("空的 LaTeX 公式") return None diff --git a/ReportEngine/renderers/pdf_renderer.py b/ReportEngine/renderers/pdf_renderer.py index 1347677..e7197bc 100644 --- a/ReportEngine/renderers/pdf_renderer.py +++ b/ReportEngine/renderers/pdf_renderer.py @@ -10,6 +10,7 @@ import copy import os import sys import io +import re from pathlib import Path from typing import Any, Dict from datetime import datetime @@ -544,23 +545,62 @@ class PDFRenderer: continue marks = run.get('marks') or [] math_mark = next((m for m in marks if m.get('type') == 'math'), None) - if not math_mark: + + if math_mark: + # 仅单个math mark + raw = math_mark.get('value') or run.get('text') or '' + latex = self._normalize_latex(raw) + is_display = bool(re.match(r'^\s*(\$\$|\\\[)', str(raw))) + if not latex: + continue + block_counter[0] += 1 + math_id = run.get('mathId') or f"math-inline-{block_counter[0]}" + run['mathId'] = math_id + try: + svg_content = ( + self.math_converter.convert_display_to_svg(latex) + if is_display else + self.math_converter.convert_inline_to_svg(latex) + ) + if svg_content: + svg_map[math_id] = svg_content + logger.debug(f"公式 {math_id} 转换为SVG成功") + else: + logger.warning(f"公式 {math_id} 转换为SVG失败: {latex[:50]}...") + except Exception as exc: + logger.error(f"转换内联公式 {latex[:50]}... 时出错: {exc}") continue - latex = (math_mark.get('value') or run.get('text') or '').strip() - if not latex: + + # 无math mark,尝试解析文本中的多个公式 + text_val = run.get('text') + if not isinstance(text_val, str): continue - block_counter[0] += 1 - math_id = f"math-inline-{block_counter[0]}" - try: - svg_content = self.math_converter.convert_inline_to_svg(latex) - if svg_content: - svg_map[math_id] = svg_content - run['mathId'] = math_id - logger.debug(f"公式 {math_id} 转换为SVG成功") - else: - logger.warning(f"公式 {math_id} 转换为SVG失败: {latex[:50]}...") - except Exception as exc: - logger.error(f"转换内联公式 {latex[:50]}... 时出错: {exc}") + segments = self._find_all_math_in_text(text_val) + if not segments: + continue + ids_for_html: list[str] = [] + for idx, (latex, is_display) in enumerate(segments, start=1): + if not latex: + continue + block_counter[0] += 1 + math_id = f"auto-math-{block_counter[0]}" + ids_for_html.append(math_id) + try: + svg_content = ( + self.math_converter.convert_display_to_svg(latex) + if is_display else + self.math_converter.convert_inline_to_svg(latex) + ) + if svg_content: + svg_map[math_id] = svg_content + logger.debug(f"公式 {math_id} 转换为SVG成功") + else: + logger.warning(f"公式 {math_id} 转换为SVG失败: {latex[:50]}...") + except Exception as exc: + logger.error(f"转换内联公式 {latex[:50]}... 时出错: {exc}") + if ids_for_html: + # 将ID列表写回run,便于HTML渲染时使用相同ID(顺序对应segments) + run['mathIds'] = ids_for_html for block in blocks: if not isinstance(block, dict): @@ -570,7 +610,7 @@ class PDFRenderer: # 处理math类型 if block_type == 'math': - latex = block.get('latex', '').strip() + latex = self._normalize_latex(block.get('latex', '')) if latex: block_counter[0] += 1 math_id = f"math-block-{block_counter[0]}" @@ -679,6 +719,57 @@ class PDFRenderer: return html + @staticmethod + def _normalize_latex(raw: Any) -> str: + """去除外层数学定界符,兼容 $...$、$$...$$、\\(\\)、\\[\\] 等格式""" + if not isinstance(raw, str): + return "" + latex = raw.strip() + patterns = [ + r'^\$\$(.*)\$\$$', + r'^\$(.*)\$$', + r'^\\\[(.*)\\\]$', + r'^\\\((.*)\\\)$', + ] + for pat in patterns: + m = re.match(pat, latex, re.DOTALL) + if m: + latex = m.group(1).strip() + break + # 清理控制字符、防止mathtext解析失败 + latex = re.sub(r'[\x00-\x1f\x7f]', '', latex) + # 常见兼容:\tfrac/\dfrac -> \frac + latex = latex.replace(r'\tfrac', r'\frac').replace(r'\dfrac', r'\frac') + return latex + + @staticmethod + def _find_first_math_in_text(text: Any) -> tuple[str, bool] | None: + """从纯文本中提取首个数学片段,返回(内容, 是否display)""" + if not isinstance(text, str): + return None + pattern = re.compile(r'\$\$(.+?)\$\$|\$(.+?)\$|\\\((.+?)\\\)|\\\[(.+?)\\\]', re.S) + m = pattern.search(text) + if not m: + return None + raw = next(g for g in m.groups() if g is not None) + latex = raw.strip() + is_display = bool(m.group(1) or m.group(4)) # $$ or \[ \] + return latex, is_display + + @staticmethod + def _find_all_math_in_text(text: Any) -> list[tuple[str, bool]]: + """从纯文本中提取所有数学片段,返回[(内容, 是否display)]""" + if not isinstance(text, str): + return [] + pattern = re.compile(r'\$\$(.+?)\$\$|\$(.+?)\$|\\\((.+?)\\\)|\\\[(.+?)\\\]', re.S) + results = [] + for m in pattern.finditer(text): + raw = next(g for g in m.groups() if g is not None) + latex = raw.strip() + is_display = bool(m.group(1) or m.group(4)) + results.append((latex, is_display)) + return results + def _inject_wordcloud_images(self, html: str, img_map: Dict[str, str]) -> str: """ 将词云PNG data URI注入HTML,替换对应canvas