"""续写 + JRXML 提取单元测试。 测试 _strip_continuation_wrapper、_extract_jrxml 在 多轮续写场景下的鲁棒性,以及 _generate_with_continuation 的完成检测。 """ from __future__ import annotations import pytest from agent.nodes import _strip_continuation_wrapper, _extract_jrxml # ── 完整 JRXML ───────────────────────────────────────────────────── COMPLETE_JRXML = """ <band height="50"> <staticText> <reportElement x="0" y="0" width="100" height="20"/> <text><![CDATA[$F{field_1}]]></text> </staticText> </band> """ # 第一轮输出:完整开头但缺少 (模拟截断) ROUND1_TRUNCATED = """ <band height="50"> <staticText> <reportElement x="0" y="0" width="100" height="20"/> <text><![CDATA[$F{field_1}]]></text> </staticText> </band> ```""" # 第二轮续写变体:用 关闭(另一种常见 LLM 错误) ROUND2_REPORT_CLOSE = """继续输出: ``` ```""" # 第二轮续写变体:只用 ``` 开头,无结尾(不完整代码块) ROUND2_PARTIAL_MARKDOWN = """ ```xml ```""" # ── _strip_continuation_wrapper 测试 ─────────────────────────────── class TestStripContinuationWrapper: def test_removes_complete_markdown_block(self): text = '继续输出:\n\n```\ntest\n```' result = _strip_continuation_wrapper(text) assert result == 'test' def test_removes_xml_fenced_block(self): text = '```xml\ntest\n```' result = _strip_continuation_wrapper(text) assert result == 'test' def test_removes_opening_fence_only(self): text = '```xml\ntest' result = _strip_continuation_wrapper(text) assert 'test' in result assert '```' not in result def test_removes_closing_fence_only(self): text = 'test\n```' result = _strip_continuation_wrapper(text) assert 'test' in result assert '```' not in result def test_removes_continuation_prefix_chinese(self): text = '继续输出剩余的 JRXML 内容:\ntest' result = _strip_continuation_wrapper(text) assert result == 'test' def test_pure_xml_passes_through(self): text = 'test' result = _strip_continuation_wrapper(text) assert result == 'test' def test_empty_becomes_empty(self): assert _strip_continuation_wrapper('') == '' assert _strip_continuation_wrapper(' ') == '' def test_empty_markdown_block_returns_empty(self): text = '```xml\n```' result = _strip_continuation_wrapper(text) assert result == '' def test_multiple_backtick_pairs_extracts_first_valid(self): text = '```\nfragment\n```\n```xml\ncomplete" in result assert '$F{field_1}' in result assert '$F{field_2}' in result def test_extracts_with_report_close_tag(self): """第二轮用 而非 关闭。""" combined = ROUND1_TRUNCATED + ROUND2_REPORT_CLOSE result = _extract_jrxml(combined) assert result.startswith("" in result assert '$F{field_2}' in result def test_extracts_with_partial_markdown(self): """第二轮用 ```xml 开头,``` 结尾。""" combined = ROUND1_TRUNCATED + ROUND2_PARTIAL_MARKDOWN result = _extract_jrxml(combined) assert result.startswith("" in result def test_single_round_complete_jrxml_in_markdown(self): """单轮输出:完整的 JRXML 在 markdown 代码块中。""" text = '```xml\n' + COMPLETE_JRXML + '\n```' result = _extract_jrxml(text) assert result == COMPLETE_JRXML def test_single_round_pure_jrxml(self): """单轮输出:纯 JRXML 无 markdown。""" result = _extract_jrxml(COMPLETE_JRXML) assert result == COMPLETE_JRXML def test_jrxml_with_leading_explanation(self): """JRXML 前有自然语言解释。""" text = '这是生成的报表模板:\n' + COMPLETE_JRXML result = _extract_jrxml(text) assert result == COMPLETE_JRXML def test_two_markdown_blocks_skips_fragment(self): """文本中有两个 markdown 块,第一个是片段,第二个是完整 JRXML。""" text = ( '```\nsome fragment\n```\n' '```xml\n' + COMPLETE_JRXML + '\n```' ) result = _extract_jrxml(text) assert result == COMPLETE_JRXML def test_two_markdown_blocks_first_is_complete(self): """文本中有两个 markdown 块,第一个是完整 JRXML。""" text = ( '```xml\n' + COMPLETE_JRXML + '\n```\n' '```\nsome other stuff\n```' ) result = _extract_jrxml(text) assert result == COMPLETE_JRXML def test_no_xml_passes_through(self): """无 XML 内容的文本原样返回。""" text = 'Hello, this has no XML at all.' result = _extract_jrxml(text) assert result == text # ── 完成检测测试 ─────────────────────────────────────────────────── class TestCompletionDetection: def test_jasperreport_close_detected(self): """以 结尾的 JRXML 应被识别为完成。""" import re jrxml = COMPLETE_JRXML.strip() _jrxml_end = r"\s*$" assert re.search(_jrxml_end, jrxml, re.IGNORECASE) def test_report_close_detected(self): """以 结尾的 JRXML 也应被识别为完成。""" import re jrxml = COMPLETE_JRXML.replace('', '').strip() _jrxml_end = r"\s*$" assert re.search(_jrxml_end, jrxml, re.IGNORECASE) def test_namespaced_jasperreport_close_detected(self): """以 结尾的 JRXML 也应被识别。""" import re jrxml = COMPLETE_JRXML.replace('', '').strip() _jrxml_end = r"\s*$" assert re.search(_jrxml_end, jrxml, re.IGNORECASE) def test_truncated_jrxml_not_detected(self): """截断的 JRXML(无关闭标签)不应被识别为完成。""" import re _jrxml_end = r"\s*$" assert not re.search(_jrxml_end, ROUND1_TRUNCATED.strip(), re.IGNORECASE)