202 lines
5.9 KiB
Python
202 lines
5.9 KiB
Python
"""
|
||
JRXML 元素自动排序 — 按 JasperReports XSD <xs:sequence> 要求重排子元素。
|
||
|
||
XSD 要求 jasperReport 子元素严格按以下顺序:
|
||
property, propertyExpression, import, template, reportFont,
|
||
style, subDataset, scriptlet, parameter, queryString, field,
|
||
sortField, variable, filterExpression, group, background, title,
|
||
pageHeader, columnHeader, detail, columnFooter, pageFooter,
|
||
lastPageFooter, summary, noData
|
||
|
||
以及 band 内部的 reportElement 必须在其他元素之前。
|
||
"""
|
||
import re
|
||
import xml.etree.ElementTree as ET
|
||
from typing import Optional
|
||
|
||
# JasperReports XSD sequence 顺序(索引越小越靠前)
|
||
JASPERREPORT_ORDER = {
|
||
"property": 0,
|
||
"propertyExpression": 1,
|
||
"import": 2,
|
||
"template": 3,
|
||
"reportFont": 4,
|
||
"style": 5,
|
||
"subDataset": 6,
|
||
"scriptlet": 7,
|
||
"parameter": 8,
|
||
"queryString": 9,
|
||
"field": 10,
|
||
"sortField": 11,
|
||
"variable": 12,
|
||
"filterExpression": 13,
|
||
"group": 14,
|
||
"background": 15,
|
||
"title": 16,
|
||
"pageHeader": 17,
|
||
"columnHeader": 18,
|
||
"detail": 19,
|
||
"columnFooter": 20,
|
||
"pageFooter": 21,
|
||
"lastPageFooter": 22,
|
||
"summary": 23,
|
||
"noData": 24,
|
||
}
|
||
|
||
# 带命名空间的标签映射(去掉 ns 前缀后匹配)
|
||
NS = "http://jasperreports.sourceforge.net/jasperreports"
|
||
|
||
|
||
def _tag_local(tag: str) -> str:
|
||
"""提取标签本地名(去掉命名空间前缀)。"""
|
||
return tag.split("}")[-1] if "}" in tag else tag
|
||
|
||
|
||
def _sort_key(elem: ET.Element) -> int:
|
||
"""排序键:按 JASPERREPORT_ORDER 中的顺序,未知元素放最后。"""
|
||
local = _tag_local(elem.tag)
|
||
return JASPERREPORT_ORDER.get(local, 999)
|
||
|
||
|
||
def reorder_jrxml_elements(xml_string: str) -> str:
|
||
"""重排 JRXML 字符串中的子元素顺序,使其符合 XSD sequence 要求。
|
||
|
||
处理范围:
|
||
- jasperReport 的直接子元素
|
||
- band 的直接子元素(reportElement 在前)
|
||
|
||
返回重排后的 XML 字符串。如果解析失败,返回原始字符串。
|
||
"""
|
||
try:
|
||
root = ET.fromstring(xml_string)
|
||
except ET.ParseError:
|
||
return xml_string # 无法解析,返回原始
|
||
|
||
_reorder_children(root)
|
||
_reorder_bands(root)
|
||
|
||
# 序列化回字符串
|
||
result = ET.tostring(root, encoding="unicode")
|
||
|
||
# 恢复 XML 声明、CDATA、命名空间
|
||
result = _restore_formatting(xml_string, result)
|
||
return result
|
||
|
||
|
||
def _reorder_children(parent: ET.Element):
|
||
"""递归重排所有子元素。"""
|
||
children = list(parent)
|
||
if not children:
|
||
return
|
||
|
||
# 按 XSD 顺序排序
|
||
children.sort(key=_sort_key)
|
||
|
||
# 重建子元素列表
|
||
for i, child in enumerate(children):
|
||
# ET 不支持直接 reorder,用 remove + insert
|
||
pass
|
||
|
||
# 实际上 ElementTree 不支持直接重排,需要重建
|
||
# 我们用更可靠的方式:收集所有子元素,清空,再按顺序添加
|
||
sorted_children = sorted(list(parent), key=_sort_key)
|
||
|
||
# 移除所有子元素
|
||
for child in list(parent):
|
||
parent.remove(child)
|
||
|
||
# 按排序后的顺序重新添加(保持 tail 文本在最后)
|
||
tail_text = ""
|
||
for child in sorted_children:
|
||
tail_text = child.tail or ""
|
||
child.tail = ""
|
||
parent.append(child)
|
||
|
||
# 恢复最后一个元素的 tail
|
||
if sorted_children and tail_text:
|
||
sorted_children[-1].tail = tail_text
|
||
|
||
# 递归处理子元素
|
||
for child in parent:
|
||
_reorder_children(child)
|
||
|
||
|
||
def _reorder_bands(root: ET.Element):
|
||
"""确保 band 内部 reportElement 在其他元素之前。"""
|
||
for elem in root.iter():
|
||
if _tag_local(elem.tag) == "band":
|
||
_ensure_reportelement_first(elem)
|
||
|
||
|
||
def _ensure_reportelement_first(band: ET.Element):
|
||
"""在 band 内部,确保 reportElement 元素排在最前面。"""
|
||
children = list(band)
|
||
report_elements = [c for c in children if _tag_local(c.tag) == "reportElement"]
|
||
other_elements = [c for c in children if _tag_local(c.tag) != "reportElement"]
|
||
|
||
if not report_elements:
|
||
return
|
||
|
||
# 移除所有
|
||
for c in list(band):
|
||
band.remove(c)
|
||
|
||
# 先添加 reportElement
|
||
tail = ""
|
||
for r in report_elements:
|
||
r.tail = ""
|
||
band.append(r)
|
||
# 再添加其他
|
||
for o in other_elements:
|
||
o.tail = ""
|
||
band.append(o)
|
||
# 恢复 tail
|
||
last = band[-1] if list(band) else None
|
||
if last and children:
|
||
last.tail = children[-1].tail or ""
|
||
|
||
|
||
def _restore_formatting(original: str, reordered: str) -> str:
|
||
"""恢复 XML 声明和 CDATA 段。"""
|
||
# 保留原始声明
|
||
decl = ""
|
||
if original.strip().startswith("<?xml"):
|
||
m = re.match(r'<\?xml[^?]*\?>', original)
|
||
if m:
|
||
decl = m.group()
|
||
if decl and not reordered.strip().startswith("<?xml"):
|
||
reordered = decl + "\n" + reordered
|
||
|
||
# 恢复 CDATA(ET 会把 CDATA 转成普通文本)
|
||
# 从原始 XML 提取所有 CDATA 块
|
||
cdata_pattern = re.compile(r'<!\[CDATA\[(.*?)\]\]>', re.DOTALL)
|
||
cdata_blocks = cdata_pattern.findall(original)
|
||
|
||
if cdata_blocks:
|
||
# 在重排后的 XML 中,对应位置的文本用 CDATA 包裹
|
||
def _restore_cdata(match):
|
||
nonlocal cdata_blocks
|
||
text = match.group(1)
|
||
for cdata in cdata_blocks:
|
||
if cdata.strip() == text.strip():
|
||
return f"<![CDATA[{cdata}]]>"
|
||
return match.group(0)
|
||
|
||
# 替换已转义的文本为 CDATA
|
||
reordered = re.sub(
|
||
r'(<queryString[^>]*>)\s*(.*?)\s*(</queryString>)',
|
||
lambda m: m.group(1) + f"\n <![CDATA[{m.group(2).strip()}]]>\n " + m.group(3),
|
||
reordered,
|
||
flags=re.DOTALL
|
||
)
|
||
|
||
return reordered
|
||
|
||
|
||
def normalize_jrxml(jrxml_text: str) -> str:
|
||
"""规范化 JRXML:排序元素 + 恢复格式。"""
|
||
if not jrxml_text or not jrxml_text.strip():
|
||
return jrxml_text
|
||
result = reorder_jrxml_elements(jrxml_text)
|
||
return result
|