bd5bfbac2d
Root cause: LLM receiving full 34k-char JRXML would regenerate from scratch
instead of modifying coordinates in-place, shrinking output to ~3k chars.
Solution (programmatic node control, not prompt engineering):
- New agent/jrxml_windower.py: decompose JRXML into header (never sent to
LLM) + individual bands. Split bands >4000 chars at element boundaries.
Reassemble with element count validation (>10% change = rollback).
- Rewrite refine_layout: per-band windowed LLM processing (~2-4k chars
each). LLM cannot "reimagine" the entire report.
- Rewrite map_fields: 100% programmatic regex $F{field_N} -> real name
replacement. Zero LLM calls, zero content loss.
- _sanitize_field_name: non-ASCII chars escaped to _uXXXX_ format for
valid JRXML identifiers.
- Tests: 48 new unit tests (windower 28 + map_fields 20). All passing.
Full suite 385 tests, zero regressions.
326 lines
13 KiB
Python
326 lines
13 KiB
Python
"""JRXML 窗口化模块单元测试。
|
|
|
|
测试 decompose → split → reassemble 往返链路,
|
|
以及元素计数和校验逻辑。
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
from agent.jrxml_windower import (
|
|
decompose_jrxml, reassemble_jrxml,
|
|
split_band_into_windows, reassemble_band_windows,
|
|
count_elements, validate_element_count,
|
|
BandInfo,
|
|
)
|
|
|
|
# ── 最小 JRXML 测试夹具 ──────────────────────────────────────────────
|
|
|
|
MINIMAL_JRXML = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<jasperReport name="test" pageWidth="595" pageHeight="842" columnCount="3">
|
|
<property name="test.prop" value="1"/>
|
|
<field name="name" class="java.lang.String"/>
|
|
<field name="amount" class="java.math.BigDecimal"/>
|
|
<queryString><![CDATA[SELECT * FROM t]]></queryString>
|
|
<title>
|
|
<band height="50">
|
|
<staticText>
|
|
<reportElement x="0" y="0" width="100" height="20"/>
|
|
<text><![CDATA[Title]]></text>
|
|
</staticText>
|
|
<textField>
|
|
<reportElement x="200" y="0" width="80" height="20"/>
|
|
<textFieldExpression><![CDATA[$F{name}]]></textFieldExpression>
|
|
</textField>
|
|
</band>
|
|
</title>
|
|
<columnHeader>
|
|
<band height="30">
|
|
<staticText>
|
|
<reportElement x="0" y="0" width="100" height="30"/>
|
|
<text><![CDATA[Header]]></text>
|
|
</staticText>
|
|
</band>
|
|
</columnHeader>
|
|
<detail>
|
|
<band height="40">
|
|
<textField>
|
|
<reportElement x="0" y="0" width="100" height="20"/>
|
|
<textFieldExpression><![CDATA[$F{name}]]></textFieldExpression>
|
|
</textField>
|
|
<textField>
|
|
<reportElement x="200" y="0" width="80" height="20"/>
|
|
<textFieldExpression><![CDATA[$F{amount}]]></textFieldExpression>
|
|
</textField>
|
|
</band>
|
|
</detail>
|
|
<pageFooter>
|
|
<band height="30">
|
|
<textField>
|
|
<reportElement x="0" y="0" width="100" height="20"/>
|
|
<textFieldExpression><![CDATA["Page " + $V{PAGE_NUMBER}]]></textFieldExpression>
|
|
</textField>
|
|
</band>
|
|
</pageFooter>
|
|
</jasperReport>"""
|
|
|
|
|
|
# ── Decompose 测试 ────────────────────────────────────────────────────
|
|
|
|
class TestDecompose:
|
|
def test_parses_minimal_jrxml(self):
|
|
parts = decompose_jrxml(MINIMAL_JRXML)
|
|
assert parts is not None
|
|
assert parts.band_count == 4 # title, columnHeader, detail, pageFooter
|
|
assert parts.total_elements == 6 # 2 + 1 + 2 + 1
|
|
|
|
def test_declaration_preserved(self):
|
|
parts = decompose_jrxml(MINIMAL_JRXML)
|
|
assert '<?xml' in parts.declaration
|
|
|
|
def test_root_open_has_jasperreport(self):
|
|
parts = decompose_jrxml(MINIMAL_JRXML)
|
|
assert 'jasperReport' in parts.root_open
|
|
|
|
def test_header_children_separated(self):
|
|
parts = decompose_jrxml(MINIMAL_JRXML)
|
|
assert 'field name="name"' in parts.header_xml
|
|
assert 'field name="amount"' in parts.header_xml
|
|
assert 'queryString' in parts.header_xml
|
|
assert 'property name' in parts.header_xml
|
|
|
|
def test_band_labels(self):
|
|
parts = decompose_jrxml(MINIMAL_JRXML)
|
|
labels = [b.label for b in parts.bands]
|
|
assert labels == ["title", "columnHeader", "detail", "pageFooter"]
|
|
|
|
def test_footer_closes_jasperreport(self):
|
|
parts = decompose_jrxml(MINIMAL_JRXML)
|
|
assert 'jasperReport' in parts.footer
|
|
assert parts.footer.strip().endswith('>')
|
|
|
|
def test_returns_none_for_non_jrxml(self):
|
|
parts = decompose_jrxml("<html><body></body></html>")
|
|
assert parts is None
|
|
|
|
def test_returns_none_for_malformed_xml(self):
|
|
parts = decompose_jrxml("not xml at all <<<")
|
|
assert parts is None
|
|
|
|
|
|
# ── Roundtrip 测试 ────────────────────────────────────────────────────
|
|
|
|
class TestRoundtrip:
|
|
def test_decompose_reassemble_element_count_unchanged(self):
|
|
parts = decompose_jrxml(MINIMAL_JRXML)
|
|
band_map = {b.label: b.band_xml for b in parts.bands}
|
|
result = reassemble_jrxml(parts, band_map)
|
|
|
|
orig = count_elements(MINIMAL_JRXML)
|
|
reassembled = count_elements(result)
|
|
assert orig == reassembled, f"Elements: {orig} -> {reassembled}"
|
|
|
|
def test_roundtrip_preserves_text_content(self):
|
|
parts = decompose_jrxml(MINIMAL_JRXML)
|
|
band_map = {b.label: b.band_xml for b in parts.bands}
|
|
result = reassemble_jrxml(parts, band_map)
|
|
|
|
assert 'Title' in result
|
|
assert 'Header' in result
|
|
assert '$F{name}' in result
|
|
assert '$F{amount}' in result
|
|
|
|
def test_empty_bands_preserved(self):
|
|
"""空 band(无元素)在 roundtrip 中不丢失。"""
|
|
jrxml = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<jasperReport name="t" pageWidth="595" pageHeight="842">
|
|
<queryString><![CDATA[]]></queryString>
|
|
<background>
|
|
<band height="10"/>
|
|
</background>
|
|
<title>
|
|
<band height="50">
|
|
<staticText>
|
|
<reportElement x="0" y="0" width="100" height="20"/>
|
|
<text><![CDATA[T]]></text>
|
|
</staticText>
|
|
</band>
|
|
</title>
|
|
</jasperReport>"""
|
|
parts = decompose_jrxml(jrxml)
|
|
assert parts.band_count == 2
|
|
band_map = {b.label: b.band_xml for b in parts.bands}
|
|
result = reassemble_jrxml(parts, band_map)
|
|
assert count_elements(jrxml) == count_elements(result)
|
|
|
|
|
|
# ── Window Split 测试 ─────────────────────────────────────────────────
|
|
|
|
class TestWindowSplit:
|
|
def test_small_band_not_split(self):
|
|
"""小 band 不会被切分。"""
|
|
band = BandInfo(
|
|
section_name="title", band_index=0,
|
|
band_xml='<band height="50"><staticText><reportElement x="0" y="0" width="1" height="1"/><text><![CDATA[X]]></text></staticText></band>',
|
|
element_count=1, char_length=150,
|
|
)
|
|
windows = split_band_into_windows(band, max_chars=4000)
|
|
assert len(windows) == 1
|
|
|
|
def test_large_band_split_at_element_boundaries(self):
|
|
"""超过字符阈值的 band 在元素边界切分。"""
|
|
inner = "<staticText><reportElement x=\"0\" y=\"0\" width=\"100\" height=\"20\"/><text><![CDATA[A]]></text></staticText>\n" * 80
|
|
band_xml = f'<band height="50">{inner}</band>'
|
|
band = BandInfo(
|
|
section_name="detail", band_index=0,
|
|
band_xml=band_xml,
|
|
element_count=80, char_length=len(band_xml),
|
|
)
|
|
windows = split_band_into_windows(band, max_chars=4000)
|
|
assert len(windows) > 1, f"Expected multiple windows, got {len(windows)}"
|
|
|
|
def test_split_preserves_element_count(self):
|
|
"""切分后重组元素数不变。"""
|
|
inner = "<staticText><reportElement x=\"0\" y=\"0\" width=\"100\" height=\"20\"/><text><![CDATA[A]]></text></staticText>\n" * 80
|
|
band_xml = f'<band height="50">{inner}</band>'
|
|
band = BandInfo(
|
|
section_name="detail", band_index=0,
|
|
band_xml=band_xml,
|
|
element_count=80, char_length=len(band_xml),
|
|
)
|
|
windows = split_band_into_windows(band, max_chars=4000)
|
|
reassembled = reassemble_band_windows(windows)
|
|
assert count_elements(band_xml) == count_elements(reassembled)
|
|
|
|
def test_no_empty_windows(self):
|
|
"""所有窗口非空。"""
|
|
inner = "<staticText><reportElement x=\"0\" y=\"0\" width=\"100\" height=\"20\"/><text><![CDATA[A]]></text></staticText>\n" * 80
|
|
band_xml = f'<band height="50">{inner}</band>'
|
|
band = BandInfo(
|
|
section_name="detail", band_index=0,
|
|
band_xml=band_xml,
|
|
element_count=80, char_length=len(band_xml),
|
|
)
|
|
windows = split_band_into_windows(band, max_chars=4000)
|
|
for i, w in enumerate(windows):
|
|
assert len(w.strip()) > 0, f"Window {i} is empty"
|
|
assert '<band' in w, f"Window {i} missing <band>"
|
|
|
|
def test_namespaced_band_split(self):
|
|
"""命名空间前缀的 band 也能正确切分。"""
|
|
inner = "<ns0:staticText><ns0:reportElement x=\"0\" y=\"0\" width=\"100\" height=\"20\"/><ns0:text><![CDATA[A]]></ns0:text></ns0:staticText>\n" * 80
|
|
band_xml = f'<ns0:band xmlns:ns0="http://jasperreports.sourceforge.net/jasperreports" height="50">{inner}</ns0:band>'
|
|
band = BandInfo(
|
|
section_name="detail", band_index=0,
|
|
band_xml=band_xml,
|
|
element_count=80, char_length=len(band_xml),
|
|
)
|
|
windows = split_band_into_windows(band, max_chars=4000)
|
|
assert len(windows) > 1, f"Expected multiple, got {len(windows)}"
|
|
for w in windows:
|
|
assert '</ns0:band>' in w or w.startswith('<ns0:band')
|
|
|
|
|
|
# ── Element Count 测试 ────────────────────────────────────────────────
|
|
|
|
class TestElementCount:
|
|
def test_counts_textfield_statictext(self):
|
|
xml = '<textField/><staticText/>'
|
|
assert count_elements(xml) == 2
|
|
|
|
def test_counts_field_declarations(self):
|
|
xml = '<field name="a" class="java.lang.String"/>'
|
|
assert count_elements(xml) == 1
|
|
|
|
def test_counts_namespaced_elements(self):
|
|
xml = '<ns0:textField/><ns0:staticText/><ns0:field name="x"/>'
|
|
assert count_elements(xml) == 3
|
|
|
|
def test_minimal_jrxml_count(self):
|
|
assert count_elements(MINIMAL_JRXML) == 8
|
|
|
|
def test_empty_string_zero(self):
|
|
assert count_elements("") == 0
|
|
|
|
|
|
# ── Validate 测试 ─────────────────────────────────────────────────────
|
|
|
|
class TestValidateElementCount:
|
|
def test_no_change_ok(self):
|
|
r = validate_element_count(MINIMAL_JRXML, MINIMAL_JRXML, "test")
|
|
assert r["ok"] is True
|
|
assert r["change_pct"] == 0
|
|
|
|
def test_small_change_ok(self):
|
|
"""< 5% 变化静默通过。"""
|
|
xml2 = MINIMAL_JRXML.replace('<staticText>', '<staticText><!-- comment -->')
|
|
r = validate_element_count(MINIMAL_JRXML, xml2, "test")
|
|
# 0% change since comments don't count as elements
|
|
assert r["ok"] is True
|
|
|
|
def test_large_change_not_ok(self):
|
|
"""> 10% 变化返回 ok=False。"""
|
|
short = MINIMAL_JRXML[:500] # 大幅截断
|
|
r = validate_element_count(MINIMAL_JRXML, short, "test")
|
|
if r["original"] > 0 and r["change_pct"] > 0.10:
|
|
assert r["ok"] is False
|
|
|
|
def test_zero_original_always_ok(self):
|
|
r = validate_element_count("", MINIMAL_JRXML, "test")
|
|
assert r["ok"] is True
|
|
|
|
|
|
# ── 多 section 多 band 测试 ──────────────────────────────────────────
|
|
|
|
MULTI_BAND_JRXML = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<jasperReport name="multi" pageWidth="595" pageHeight="842">
|
|
<field name="f1" class="java.lang.String"/>
|
|
<queryString><![CDATA[SELECT 1]]></queryString>
|
|
<detail>
|
|
<band height="30">
|
|
<textField>
|
|
<reportElement x="0" y="0" width="100" height="20"/>
|
|
<textFieldExpression><![CDATA[$F{f1}]]></textFieldExpression>
|
|
</textField>
|
|
</band>
|
|
<band height="20">
|
|
<staticText>
|
|
<reportElement x="0" y="0" width="100" height="15"/>
|
|
<text><![CDATA[Sub]]></text>
|
|
</staticText>
|
|
</band>
|
|
</detail>
|
|
<summary>
|
|
<band height="40">
|
|
<textField>
|
|
<reportElement x="0" y="0" width="200" height="30"/>
|
|
<textFieldExpression><![CDATA["Total"]]></textFieldExpression>
|
|
</textField>
|
|
</band>
|
|
</summary>
|
|
</jasperReport>"""
|
|
|
|
|
|
class TestMultiBand:
|
|
def test_multiple_bands_same_section(self):
|
|
"""同一 section 内的多个 band 分别处理。"""
|
|
parts = decompose_jrxml(MULTI_BAND_JRXML)
|
|
assert parts.band_count == 3 # detail_band0, detail_band1, summary
|
|
labels = [b.label for b in parts.bands]
|
|
assert labels == ["detail", "detail_band1", "summary"]
|
|
|
|
def test_multi_band_roundtrip(self):
|
|
parts = decompose_jrxml(MULTI_BAND_JRXML)
|
|
band_map = {b.label: b.band_xml for b in parts.bands}
|
|
result = reassemble_jrxml(parts, band_map)
|
|
assert count_elements(MULTI_BAND_JRXML) == count_elements(result)
|
|
|
|
def test_reassemble_opens_closes_sections(self):
|
|
parts = decompose_jrxml(MULTI_BAND_JRXML)
|
|
band_map = {b.label: b.band_xml for b in parts.bands}
|
|
result = reassemble_jrxml(parts, band_map)
|
|
assert result.count('<detail>') == 1
|
|
assert result.count('</detail>') == 1
|
|
assert result.count('<summary>') == 1
|
|
assert result.count('</summary>') == 1
|