"""程序化字段映射单元测试。 测试 _programmatic_map_fields 和 _sanitize_field_name 的确定性替换行为,以及 validate_element_count 校验。 """ from __future__ import annotations import pytest from agent.nodes import _programmatic_map_fields, _sanitize_field_name from agent.jrxml_windower import count_elements, validate_element_count # ── 最小 JRXML 模板(含占位字段)──────────────────────────────────── JRXML_WITH_PLACEHOLDERS = """ <band height="50"> <staticText> <reportElement x="0" y="0" width="100" height="20"/> <text><![CDATA[$F{field_1}]]></text> </staticText> <textField> <reportElement x="100" y="0" width="80" height="20"/> <textFieldExpression><![CDATA[$F{field_2}]]></textFieldExpression> </textField> <textField> <reportElement x="200" y="0" width="80" height="20"/> <textFieldExpression><![CDATA[$F{field_3}]]></textFieldExpression> </textField> </band> """ # ── _sanitize_field_name 测试 ──────────────────────────────────────── class TestSanitizeFieldName: def test_ascii_name_passes_through(self): assert _sanitize_field_name("customer_name") == "customer_name" def test_uppercase_lowered(self): assert _sanitize_field_name("CustomerName") == "customername" def test_spaces_replaced(self): assert _sanitize_field_name("customer name") == "customer_name" def test_chinese_characters_escaped(self): result = _sanitize_field_name("发票代码") assert "发票" not in result assert "u53d1_" in result assert "u7968_" in result def test_mixed_ascii_chinese(self): result = _sanitize_field_name("发票_code") assert "_code" in result assert "u53d1_" in result def test_empty_returns_unnamed(self): assert _sanitize_field_name("") == "unnamed_field" def test_all_special_chars_returns_unnamed(self): assert _sanitize_field_name("!!!") == "unnamed_field" def test_leading_digit_prefixed(self): result = _sanitize_field_name("123abc") assert result == "f_123abc" def test_consecutive_underscores_collapsed(self): result = _sanitize_field_name("a__b___c") assert result == "a_b_c" def test_japanese_characters_escaped(self): result = _sanitize_field_name("請求書") assert "請求" not in result # ── _programmatic_map_fields 测试 ──────────────────────────────────── class TestProgrammaticMapFields: def test_replaces_field_declarations(self): ocr = [ {"field_name": "customer_name"}, {"field_name": "total_amount"}, {"field_name": "invoice_date"}, ] result = _programmatic_map_fields(JRXML_WITH_PLACEHOLDERS, ocr) assert 'field name="customer_name"' in result assert 'field name="total_amount"' in result assert 'field name="invoice_date"' in result assert 'field name="field_1"' not in result def test_replaces_field_references(self): ocr = [ {"field_name": "customer_name"}, {"field_name": "total_amount"}, {"field_name": "invoice_date"}, ] result = _programmatic_map_fields(JRXML_WITH_PLACEHOLDERS, ocr) assert "$F{field_1}" not in result assert "$F{customer_name}" in result assert "$F{total_amount}" in result assert "$F{invoice_date}" in result def test_preserves_element_count(self): ocr = [ {"field_name": "customer_name"}, {"field_name": "total_amount"}, {"field_name": "invoice_date"}, ] result = _programmatic_map_fields(JRXML_WITH_PLACEHOLDERS, ocr) orig = count_elements(JRXML_WITH_PLACEHOLDERS) mod = count_elements(result) assert orig == mod, f"Elements: {orig} -> {mod}" def test_preserves_coordinates(self): ocr = [ {"field_name": "customer_name"}, {"field_name": "total_amount"}, {"field_name": "invoice_date"}, ] result = _programmatic_map_fields(JRXML_WITH_PLACEHOLDERS, ocr) assert 'x="0"' in result assert 'x="100"' in result assert 'x="200"' in result assert 'y="0"' in result assert 'width="100"' in result assert 'height="20"' in result def test_partial_fields_preserved(self): """当 OCR 字段少于占位字段时,多余占位字段保留。""" ocr = [ {"field_name": "customer_name"}, {"field_name": "total_amount"}, ] result = _programmatic_map_fields(JRXML_WITH_PLACEHOLDERS, ocr) assert 'field name="field_3"' in result assert "$F{field_3}" in result def test_empty_field_name_skipped(self): """空 field_name 的 OCR 字段不触发替换。""" ocr = [ {"field_name": ""}, {"field_name": "total_amount"}, {"field_name": ""}, ] result = _programmatic_map_fields(JRXML_WITH_PLACEHOLDERS, ocr) assert '$F{field_1}' in result assert '$F{total_amount}' in result assert '$F{field_3}' in result def test_no_ocr_fields_no_change(self): result = _programmatic_map_fields(JRXML_WITH_PLACEHOLDERS, []) assert result == JRXML_WITH_PLACEHOLDERS def test_chinese_field_names_sanitized(self): ocr = [ {"field_name": "发票代码"}, {"field_name": "发票号码"}, {"field_name": "金额"}, ] result = _programmatic_map_fields(JRXML_WITH_PLACEHOLDERS, ocr) assert "发票代码" not in result def test_validate_element_count_passes(self): ocr = [ {"field_name": "customer_name"}, {"field_name": "total_amount"}, {"field_name": "invoice_date"}, ] result = _programmatic_map_fields(JRXML_WITH_PLACEHOLDERS, ocr) validation = validate_element_count( JRXML_WITH_PLACEHOLDERS, result, "map_fields" ) assert validation["ok"] is True assert validation["modified"] == validation["original"] def test_expression_with_multiple_fields(self): """包含多个 $F{} 的表达式正确替换。""" ocr = [ {"field_name": "unit_price"}, {"field_name": "quantity"}, ] result = _programmatic_map_fields(JRXML_WITH_PLACEHOLDERS, ocr) assert '$F{unit_price}' in result assert '$F{quantity}' in result assert '$F{field_1}' not in result assert '$F{field_2}' not in result