"""续写 + JRXML 提取单元测试。
测试 _strip_continuation_wrapper、_extract_jrxml 在
多轮续写场景下的鲁棒性,以及 _generate_with_continuation 的完成检测。
"""
from __future__ import annotations
import pytest
from agent.nodes import _strip_continuation_wrapper, _extract_jrxml
# ── 完整 JRXML ─────────────────────────────────────────────────────
COMPLETE_JRXML = """
"""
# 第一轮输出:完整开头但缺少 (模拟截断)
ROUND1_TRUNCATED = """
"""
# 第二轮续写:用 markdown 包裹 + 错误关闭标签(真实 LLM 行为)
ROUND2_MARKDOWN_CONTINUATION = """继续输出剩余的 JRXML 内容:
```
```"""
# 第二轮续写变体:用 关闭(另一种常见 LLM 错误)
ROUND2_REPORT_CLOSE = """继续输出:
```
```"""
# 第二轮续写变体:只用 ``` 开头,无结尾(不完整代码块)
ROUND2_PARTIAL_MARKDOWN = """
```xml
```"""
# ── _strip_continuation_wrapper 测试 ───────────────────────────────
class TestStripContinuationWrapper:
def test_removes_complete_markdown_block(self):
text = '继续输出:\n\n```\ntest\n```'
result = _strip_continuation_wrapper(text)
assert result == 'test'
def test_removes_xml_fenced_block(self):
text = '```xml\ntest\n```'
result = _strip_continuation_wrapper(text)
assert result == 'test'
def test_removes_opening_fence_only(self):
text = '```xml\ntest'
result = _strip_continuation_wrapper(text)
assert 'test' in result
assert '```' not in result
def test_removes_closing_fence_only(self):
text = 'test\n```'
result = _strip_continuation_wrapper(text)
assert 'test' in result
assert '```' not in result
def test_removes_continuation_prefix_chinese(self):
text = '继续输出剩余的 JRXML 内容:\ntest'
result = _strip_continuation_wrapper(text)
assert result == 'test'
def test_pure_xml_passes_through(self):
text = 'test'
result = _strip_continuation_wrapper(text)
assert result == 'test'
def test_empty_becomes_empty(self):
assert _strip_continuation_wrapper('') == ''
assert _strip_continuation_wrapper(' ') == ''
def test_empty_markdown_block_returns_empty(self):
text = '```xml\n```'
result = _strip_continuation_wrapper(text)
assert result == ''
def test_multiple_backtick_pairs_extracts_first_valid(self):
text = '```\nfragment\n```\n```xml\ncomplete" in result
assert '$F{field_1}' in result
assert '$F{field_2}' in result
def test_extracts_with_report_close_tag(self):
"""第二轮用 而非 关闭。"""
combined = ROUND1_TRUNCATED + ROUND2_REPORT_CLOSE
result = _extract_jrxml(combined)
assert result.startswith("" in result
assert '$F{field_2}' in result
def test_extracts_with_partial_markdown(self):
"""第二轮用 ```xml 开头,``` 结尾。"""
combined = ROUND1_TRUNCATED + ROUND2_PARTIAL_MARKDOWN
result = _extract_jrxml(combined)
assert result.startswith("" in result
def test_single_round_complete_jrxml_in_markdown(self):
"""单轮输出:完整的 JRXML 在 markdown 代码块中。"""
text = '```xml\n' + COMPLETE_JRXML + '\n```'
result = _extract_jrxml(text)
assert result == COMPLETE_JRXML
def test_single_round_pure_jrxml(self):
"""单轮输出:纯 JRXML 无 markdown。"""
result = _extract_jrxml(COMPLETE_JRXML)
assert result == COMPLETE_JRXML
def test_jrxml_with_leading_explanation(self):
"""JRXML 前有自然语言解释。"""
text = '这是生成的报表模板:\n' + COMPLETE_JRXML
result = _extract_jrxml(text)
assert result == COMPLETE_JRXML
def test_two_markdown_blocks_skips_fragment(self):
"""文本中有两个 markdown 块,第一个是片段,第二个是完整 JRXML。"""
text = (
'```\nsome fragment\n```\n'
'```xml\n' + COMPLETE_JRXML + '\n```'
)
result = _extract_jrxml(text)
assert result == COMPLETE_JRXML
def test_two_markdown_blocks_first_is_complete(self):
"""文本中有两个 markdown 块,第一个是完整 JRXML。"""
text = (
'```xml\n' + COMPLETE_JRXML + '\n```\n'
'```\nsome other stuff\n```'
)
result = _extract_jrxml(text)
assert result == COMPLETE_JRXML
def test_no_xml_passes_through(self):
"""无 XML 内容的文本原样返回。"""
text = 'Hello, this has no XML at all.'
result = _extract_jrxml(text)
assert result == text
# ── 完成检测测试 ───────────────────────────────────────────────────
class TestCompletionDetection:
def test_jasperreport_close_detected(self):
"""以 结尾的 JRXML 应被识别为完成。"""
import re
jrxml = COMPLETE_JRXML.strip()
_jrxml_end = r"(?:[\w:]+:)?(?:jasperReport|report)>\s*$"
assert re.search(_jrxml_end, jrxml, re.IGNORECASE)
def test_report_close_detected(self):
"""以 结尾的 JRXML 也应被识别为完成。"""
import re
jrxml = COMPLETE_JRXML.replace('', '').strip()
_jrxml_end = r"(?:[\w:]+:)?(?:jasperReport|report)>\s*$"
assert re.search(_jrxml_end, jrxml, re.IGNORECASE)
def test_namespaced_jasperreport_close_detected(self):
"""以 结尾的 JRXML 也应被识别。"""
import re
jrxml = COMPLETE_JRXML.replace('', '').strip()
_jrxml_end = r"(?:[\w:]+:)?(?:jasperReport|report)>\s*$"
assert re.search(_jrxml_end, jrxml, re.IGNORECASE)
def test_truncated_jrxml_not_detected(self):
"""截断的 JRXML(无关闭标签)不应被识别为完成。"""
import re
_jrxml_end = r"(?:[\w:]+:)?(?:jasperReport|report)>\s*$"
assert not re.search(_jrxml_end, ROUND1_TRUNCATED.strip(), re.IGNORECASE)