Improve the Rendering of Inline Formulas
This commit is contained in:
@@ -1094,9 +1094,37 @@ class HTMLRenderer:
|
|||||||
|
|
||||||
def _render_paragraph(self, block: Dict[str, Any]) -> str:
|
def _render_paragraph(self, block: Dict[str, Any]) -> str:
|
||||||
"""渲染段落,内部通过inline run保持混排样式"""
|
"""渲染段落,内部通过inline run保持混排样式"""
|
||||||
inlines = "".join(self._render_inline(run) for run in block.get("inlines", []))
|
inlines_data = block.get("inlines", [])
|
||||||
|
# 仅包含单个display公式时直接渲染为块,避免<p>内嵌<div>
|
||||||
|
if len(inlines_data) == 1:
|
||||||
|
standalone = self._render_standalone_math_inline(inlines_data[0])
|
||||||
|
if standalone:
|
||||||
|
return standalone
|
||||||
|
|
||||||
|
inlines = "".join(self._render_inline(run) for run in inlines_data)
|
||||||
return f"<p>{inlines}</p>"
|
return f"<p>{inlines}</p>"
|
||||||
|
|
||||||
|
def _render_standalone_math_inline(self, run: Dict[str, Any] | str) -> str | None:
|
||||||
|
"""当段落只包含单个display公式时,转为math-block避免破坏行内布局"""
|
||||||
|
if isinstance(run, dict):
|
||||||
|
text_value, marks = self._normalize_inline_payload(run)
|
||||||
|
if marks:
|
||||||
|
return None
|
||||||
|
math_id_hint = run.get("mathIds") or run.get("mathId")
|
||||||
|
else:
|
||||||
|
text_value = "" if run is None else str(run)
|
||||||
|
math_id_hint = None
|
||||||
|
marks = []
|
||||||
|
|
||||||
|
rendered = self._render_text_with_inline_math(
|
||||||
|
text_value,
|
||||||
|
math_id_hint,
|
||||||
|
allow_display_block=True
|
||||||
|
)
|
||||||
|
if rendered and rendered.strip().startswith('<div class="math-block"'):
|
||||||
|
return rendered
|
||||||
|
return None
|
||||||
|
|
||||||
def _render_list(self, block: Dict[str, Any]) -> str:
|
def _render_list(self, block: Dict[str, Any]) -> str:
|
||||||
"""渲染有序/无序/任务列表"""
|
"""渲染有序/无序/任务列表"""
|
||||||
list_type = block.get("listType", "bullet")
|
list_type = block.get("listType", "bullet")
|
||||||
@@ -2034,7 +2062,12 @@ class HTMLRenderer:
|
|||||||
break
|
break
|
||||||
return latex
|
return latex
|
||||||
|
|
||||||
def _render_text_with_inline_math(self, text: Any, math_id: str | list | None = None) -> str | None:
|
def _render_text_with_inline_math(
|
||||||
|
self,
|
||||||
|
text: Any,
|
||||||
|
math_id: str | list | None = None,
|
||||||
|
allow_display_block: bool = False
|
||||||
|
) -> str | None:
|
||||||
"""
|
"""
|
||||||
识别纯文本中的数学定界符并渲染为math-inline/math-block,提升兼容性。
|
识别纯文本中的数学定界符并渲染为math-inline/math-block,提升兼容性。
|
||||||
|
|
||||||
@@ -2045,17 +2078,19 @@ class HTMLRenderer:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
pattern = re.compile(r'(\$\$(.+?)\$\$|\$(.+?)\$|\\\((.+?)\\\)|\\\[(.+?)\\\])', re.S)
|
pattern = re.compile(r'(\$\$(.+?)\$\$|\$(.+?)\$|\\\((.+?)\\\)|\\\[(.+?)\\\])', re.S)
|
||||||
|
matches = list(pattern.finditer(text))
|
||||||
|
if not matches:
|
||||||
|
return None
|
||||||
|
|
||||||
cursor = 0
|
cursor = 0
|
||||||
parts: List[str] = []
|
parts: List[str] = []
|
||||||
idx = 0
|
|
||||||
id_iter = iter(math_id) if isinstance(math_id, list) else None
|
id_iter = iter(math_id) if isinstance(math_id, list) else None
|
||||||
for m in pattern.finditer(text):
|
|
||||||
|
for idx, m in enumerate(matches, start=1):
|
||||||
start, end = m.span()
|
start, end = m.span()
|
||||||
if start > cursor:
|
prefix = text[cursor:start]
|
||||||
parts.append(self._escape_html(text[cursor:start]))
|
|
||||||
raw = next(g for g in m.groups()[1:] if g is not None)
|
raw = next(g for g in m.groups()[1:] if g is not None)
|
||||||
latex = self._normalize_latex_string(raw)
|
latex = self._normalize_latex_string(raw)
|
||||||
idx += 1
|
|
||||||
# 若已有math_id,直接使用,避免与SVG注入ID不一致;否则按局部序号生成
|
# 若已有math_id,直接使用,避免与SVG注入ID不一致;否则按局部序号生成
|
||||||
if id_iter:
|
if id_iter:
|
||||||
mid = next(id_iter, f"auto-math-{idx}")
|
mid = next(id_iter, f"auto-math-{idx}")
|
||||||
@@ -2063,14 +2098,23 @@ class HTMLRenderer:
|
|||||||
mid = math_id or f"auto-math-{idx}"
|
mid = math_id or f"auto-math-{idx}"
|
||||||
id_attr = f' data-math-id="{self._escape_attr(mid)}"'
|
id_attr = f' data-math-id="{self._escape_attr(mid)}"'
|
||||||
is_display = m.group(1).startswith('$$') or m.group(1).startswith('\\[')
|
is_display = m.group(1).startswith('$$') or m.group(1).startswith('\\[')
|
||||||
if is_display:
|
is_standalone = (
|
||||||
|
len(matches) == 1 and
|
||||||
|
not text[:start].strip() and
|
||||||
|
not text[end:].strip()
|
||||||
|
)
|
||||||
|
use_block = allow_display_block and is_display and is_standalone
|
||||||
|
if use_block:
|
||||||
|
# 独立display公式,跳过两侧空白,直接渲染块级
|
||||||
parts.append(f'<div class="math-block"{id_attr}>$$ {self._escape_html(latex)} $$</div>')
|
parts.append(f'<div class="math-block"{id_attr}>$$ {self._escape_html(latex)} $$</div>')
|
||||||
|
cursor = len(text)
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
|
if prefix:
|
||||||
|
parts.append(self._escape_html(prefix))
|
||||||
parts.append(f'<span class="math-inline"{id_attr}>\\( {self._escape_html(latex)} \\)</span>')
|
parts.append(f'<span class="math-inline"{id_attr}>\\( {self._escape_html(latex)} \\)</span>')
|
||||||
cursor = end
|
cursor = end
|
||||||
|
|
||||||
if cursor == 0:
|
|
||||||
return None
|
|
||||||
if cursor < len(text):
|
if cursor < len(text):
|
||||||
parts.append(self._escape_html(text[cursor:]))
|
parts.append(self._escape_html(text[cursor:]))
|
||||||
return "".join(parts)
|
return "".join(parts)
|
||||||
|
|||||||
@@ -550,7 +550,8 @@ class PDFRenderer:
|
|||||||
# 仅单个math mark
|
# 仅单个math mark
|
||||||
raw = math_mark.get('value') or run.get('text') or ''
|
raw = math_mark.get('value') or run.get('text') or ''
|
||||||
latex = self._normalize_latex(raw)
|
latex = self._normalize_latex(raw)
|
||||||
is_display = bool(re.match(r'^\s*(\$\$|\\\[)', str(raw)))
|
# 行内mark统一按inline处理,避免误将行内公式当成display
|
||||||
|
is_display = False
|
||||||
if not latex:
|
if not latex:
|
||||||
continue
|
continue
|
||||||
block_counter[0] += 1
|
block_counter[0] += 1
|
||||||
@@ -748,13 +749,19 @@ class PDFRenderer:
|
|||||||
if not isinstance(text, str):
|
if not isinstance(text, str):
|
||||||
return None
|
return None
|
||||||
pattern = re.compile(r'\$\$(.+?)\$\$|\$(.+?)\$|\\\((.+?)\\\)|\\\[(.+?)\\\]', re.S)
|
pattern = re.compile(r'\$\$(.+?)\$\$|\$(.+?)\$|\\\((.+?)\\\)|\\\[(.+?)\\\]', re.S)
|
||||||
m = pattern.search(text)
|
matches = list(pattern.finditer(text))
|
||||||
if not m:
|
if not matches:
|
||||||
return None
|
return None
|
||||||
|
m = matches[0]
|
||||||
raw = next(g for g in m.groups() if g is not None)
|
raw = next(g for g in m.groups() if g is not None)
|
||||||
latex = raw.strip()
|
latex = raw.strip()
|
||||||
is_display = bool(m.group(1) or m.group(4)) # $$ or \[ \]
|
is_display_raw = bool(m.group(1) or m.group(4)) # $$ or \[ \]
|
||||||
return latex, is_display
|
is_standalone = (
|
||||||
|
len(matches) == 1 and
|
||||||
|
not text[:m.start()].strip() and
|
||||||
|
not text[m.end():].strip()
|
||||||
|
)
|
||||||
|
return latex, bool(is_display_raw and is_standalone)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _find_all_math_in_text(text: Any) -> list[tuple[str, bool]]:
|
def _find_all_math_in_text(text: Any) -> list[tuple[str, bool]]:
|
||||||
@@ -763,10 +770,21 @@ class PDFRenderer:
|
|||||||
return []
|
return []
|
||||||
pattern = re.compile(r'\$\$(.+?)\$\$|\$(.+?)\$|\\\((.+?)\\\)|\\\[(.+?)\\\]', re.S)
|
pattern = re.compile(r'\$\$(.+?)\$\$|\$(.+?)\$|\\\((.+?)\\\)|\\\[(.+?)\\\]', re.S)
|
||||||
results = []
|
results = []
|
||||||
for m in pattern.finditer(text):
|
matches = list(pattern.finditer(text))
|
||||||
|
if not matches:
|
||||||
|
return results
|
||||||
|
total = len(matches)
|
||||||
|
|
||||||
|
for m in matches:
|
||||||
raw = next(g for g in m.groups() if g is not None)
|
raw = next(g for g in m.groups() if g is not None)
|
||||||
latex = raw.strip()
|
latex = raw.strip()
|
||||||
is_display = bool(m.group(1) or m.group(4))
|
is_display_raw = bool(m.group(1) or m.group(4))
|
||||||
|
is_standalone = (
|
||||||
|
total == 1 and
|
||||||
|
not text[:m.start()].strip() and
|
||||||
|
not text[m.end():].strip()
|
||||||
|
)
|
||||||
|
is_display = is_display_raw and is_standalone
|
||||||
results.append((latex, is_display))
|
results.append((latex, is_display))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user