4e14334030
- backend/llm.py: per-node max_tokens via get_llm(max_tokens=N), LLM_MAX_TOKENS env var (default 8192) - agent/nodes.py: 5 generation nodes use max_tokens=32768, generate_skeleton retries at 65536 - agent/nodes.py: fix ns:field regex (<field → <[\w:]*field) to handle namespace prefixes - agent/nodes.py: fix correct_jrxml never writing back to state["current_jrxml"] - agent/nodes.py: correct_jrxml rejects non-JRXML output (no <jasperReport tag) - agent/nodes.py: _strip_continuation_wrapper strips markdown/prefixes from continuation rounds - agent/nodes.py: _extract_jrxml iterates multiple markdown code blocks, skips fragments - agent/graph.py: route_after_validate skips correction loop when service_unavailable - agent/graph.py: route_after_save skips validation for empty JRXML - backend/validation.py: returns service_unavailable: True for ConnectError and HTTP 5xx - Docs: CLAUDE.md v14 changelog, README.md LLM_MAX_TOKENS, .env.example LLM_MAX_TOKENS
229 lines
8.8 KiB
Python
229 lines
8.8 KiB
Python
"""续写 + JRXML 提取单元测试。
|
|
|
|
测试 _strip_continuation_wrapper、_extract_jrxml 在
|
|
多轮续写场景下的鲁棒性,以及 _generate_with_continuation 的完成检测。
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
from agent.nodes import _strip_continuation_wrapper, _extract_jrxml
|
|
|
|
# ── 完整 JRXML ─────────────────────────────────────────────────────
|
|
|
|
COMPLETE_JRXML = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<jasperReport name="test" pageWidth="595" pageHeight="842">
|
|
<field name="field_1" class="java.lang.String"/>
|
|
<queryString><![CDATA[SELECT * FROM t]]></queryString>
|
|
<title>
|
|
<band height="50">
|
|
<staticText>
|
|
<reportElement x="0" y="0" width="100" height="20"/>
|
|
<text><![CDATA[$F{field_1}]]></text>
|
|
</staticText>
|
|
</band>
|
|
</title>
|
|
</jasperReport>"""
|
|
|
|
# 第一轮输出:完整开头但缺少 </jasperReport>(模拟截断)
|
|
ROUND1_TRUNCATED = """<?xml version="1.0" encoding="UTF-8"?>
|
|
<jasperReport name="test" pageWidth="595" pageHeight="842">
|
|
<field name="field_1" class="java.lang.String"/>
|
|
<field name="field_2" class="java.lang.String"/>
|
|
<queryString><![CDATA[SELECT * FROM t]]></queryString>
|
|
<title>
|
|
<band height="50">
|
|
<staticText>
|
|
<reportElement x="0" y="0" width="100" height="20"/>
|
|
<text><![CDATA[$F{field_1}]]></text>
|
|
</staticText>
|
|
</band>
|
|
</title>
|
|
<detail>
|
|
<band height="30">
|
|
<textField>
|
|
<reportElement x="0" y="0" width="100" height="20"/>
|
|
<textFieldExpression><![CDATA[$F{field_1}]]></"""
|
|
|
|
# 第二轮续写:用 markdown 包裹 + 错误关闭标签(真实 LLM 行为)
|
|
ROUND2_MARKDOWN_CONTINUATION = """继续输出剩余的 JRXML 内容:
|
|
|
|
```
|
|
<textFieldExpression><![CDATA[$F{field_2}]]></textFieldExpression>
|
|
</textField>
|
|
</band>
|
|
</detail>
|
|
</jasperReport>
|
|
```"""
|
|
|
|
# 第二轮续写变体:用 </report> 关闭(另一种常见 LLM 错误)
|
|
ROUND2_REPORT_CLOSE = """继续输出:
|
|
|
|
```
|
|
<textFieldExpression><![CDATA[$F{field_2}]]></textFieldExpression>
|
|
</textField>
|
|
</band>
|
|
</detail>
|
|
</report>
|
|
```"""
|
|
|
|
# 第二轮续写变体:只用 ``` 开头,无结尾(不完整代码块)
|
|
ROUND2_PARTIAL_MARKDOWN = """
|
|
```xml
|
|
<textFieldExpression><![CDATA[$F{field_2}]]></textFieldExpression>
|
|
</textField>
|
|
</band>
|
|
</detail>
|
|
</jasperReport>
|
|
```"""
|
|
|
|
|
|
# ── _strip_continuation_wrapper 测试 ───────────────────────────────
|
|
|
|
class TestStripContinuationWrapper:
|
|
def test_removes_complete_markdown_block(self):
|
|
text = '继续输出:\n\n```\n<band>test</band>\n```'
|
|
result = _strip_continuation_wrapper(text)
|
|
assert result == '<band>test</band>'
|
|
|
|
def test_removes_xml_fenced_block(self):
|
|
text = '```xml\n<band>test</band>\n```'
|
|
result = _strip_continuation_wrapper(text)
|
|
assert result == '<band>test</band>'
|
|
|
|
def test_removes_opening_fence_only(self):
|
|
text = '```xml\n<band>test</band>'
|
|
result = _strip_continuation_wrapper(text)
|
|
assert '<band>test</band>' in result
|
|
assert '```' not in result
|
|
|
|
def test_removes_closing_fence_only(self):
|
|
text = '<band>test</band>\n```'
|
|
result = _strip_continuation_wrapper(text)
|
|
assert '<band>test</band>' in result
|
|
assert '```' not in result
|
|
|
|
def test_removes_continuation_prefix_chinese(self):
|
|
text = '继续输出剩余的 JRXML 内容:\n<band>test</band>'
|
|
result = _strip_continuation_wrapper(text)
|
|
assert result == '<band>test</band>'
|
|
|
|
def test_pure_xml_passes_through(self):
|
|
text = '<band>test</band>'
|
|
result = _strip_continuation_wrapper(text)
|
|
assert result == '<band>test</band>'
|
|
|
|
def test_empty_becomes_empty(self):
|
|
assert _strip_continuation_wrapper('') == ''
|
|
assert _strip_continuation_wrapper(' ') == ''
|
|
|
|
def test_empty_markdown_block_returns_empty(self):
|
|
text = '```xml\n```'
|
|
result = _strip_continuation_wrapper(text)
|
|
assert result == ''
|
|
|
|
def test_multiple_backtick_pairs_extracts_first_valid(self):
|
|
text = '```\nfragment\n```\n```xml\ncomplete<?xml ...\n```'
|
|
result = _strip_continuation_wrapper(text)
|
|
assert result == 'fragment'
|
|
|
|
|
|
# ── _extract_jrxml 多轮续写场景测试 ─────────────────────────────────
|
|
|
|
class TestExtractJrxmlMultiRound:
|
|
def test_extracts_from_mixed_multi_round_output(self):
|
|
"""第一轮无 markdown + 第二轮有 markdown 的混合文本。"""
|
|
combined = ROUND1_TRUNCATED + ROUND2_MARKDOWN_CONTINUATION
|
|
result = _extract_jrxml(combined)
|
|
assert result.startswith("<?xml")
|
|
assert "</jasperReport>" in result
|
|
assert '$F{field_1}' in result
|
|
assert '$F{field_2}' in result
|
|
|
|
def test_extracts_with_report_close_tag(self):
|
|
"""第二轮用 </report> 而非 </jasperReport> 关闭。"""
|
|
combined = ROUND1_TRUNCATED + ROUND2_REPORT_CLOSE
|
|
result = _extract_jrxml(combined)
|
|
assert result.startswith("<?xml")
|
|
assert "</report>" in result
|
|
assert '$F{field_2}' in result
|
|
|
|
def test_extracts_with_partial_markdown(self):
|
|
"""第二轮用 ```xml 开头,``` 结尾。"""
|
|
combined = ROUND1_TRUNCATED + ROUND2_PARTIAL_MARKDOWN
|
|
result = _extract_jrxml(combined)
|
|
assert result.startswith("<?xml")
|
|
assert "</jasperReport>" in result
|
|
|
|
def test_single_round_complete_jrxml_in_markdown(self):
|
|
"""单轮输出:完整的 JRXML 在 markdown 代码块中。"""
|
|
text = '```xml\n' + COMPLETE_JRXML + '\n```'
|
|
result = _extract_jrxml(text)
|
|
assert result == COMPLETE_JRXML
|
|
|
|
def test_single_round_pure_jrxml(self):
|
|
"""单轮输出:纯 JRXML 无 markdown。"""
|
|
result = _extract_jrxml(COMPLETE_JRXML)
|
|
assert result == COMPLETE_JRXML
|
|
|
|
def test_jrxml_with_leading_explanation(self):
|
|
"""JRXML 前有自然语言解释。"""
|
|
text = '这是生成的报表模板:\n' + COMPLETE_JRXML
|
|
result = _extract_jrxml(text)
|
|
assert result == COMPLETE_JRXML
|
|
|
|
def test_two_markdown_blocks_skips_fragment(self):
|
|
"""文本中有两个 markdown 块,第一个是片段,第二个是完整 JRXML。"""
|
|
text = (
|
|
'```\nsome fragment\n```\n'
|
|
'```xml\n' + COMPLETE_JRXML + '\n```'
|
|
)
|
|
result = _extract_jrxml(text)
|
|
assert result == COMPLETE_JRXML
|
|
|
|
def test_two_markdown_blocks_first_is_complete(self):
|
|
"""文本中有两个 markdown 块,第一个是完整 JRXML。"""
|
|
text = (
|
|
'```xml\n' + COMPLETE_JRXML + '\n```\n'
|
|
'```\nsome other stuff\n```'
|
|
)
|
|
result = _extract_jrxml(text)
|
|
assert result == COMPLETE_JRXML
|
|
|
|
def test_no_xml_passes_through(self):
|
|
"""无 XML 内容的文本原样返回。"""
|
|
text = 'Hello, this has no XML at all.'
|
|
result = _extract_jrxml(text)
|
|
assert result == text
|
|
|
|
|
|
# ── 完成检测测试 ───────────────────────────────────────────────────
|
|
|
|
class TestCompletionDetection:
|
|
def test_jasperreport_close_detected(self):
|
|
"""以 </jasperReport> 结尾的 JRXML 应被识别为完成。"""
|
|
import re
|
|
jrxml = COMPLETE_JRXML.strip()
|
|
_jrxml_end = r"</(?:[\w:]+:)?(?:jasperReport|report)>\s*$"
|
|
assert re.search(_jrxml_end, jrxml, re.IGNORECASE)
|
|
|
|
def test_report_close_detected(self):
|
|
"""以 </report> 结尾的 JRXML 也应被识别为完成。"""
|
|
import re
|
|
jrxml = COMPLETE_JRXML.replace('</jasperReport>', '</report>').strip()
|
|
_jrxml_end = r"</(?:[\w:]+:)?(?:jasperReport|report)>\s*$"
|
|
assert re.search(_jrxml_end, jrxml, re.IGNORECASE)
|
|
|
|
def test_namespaced_jasperreport_close_detected(self):
|
|
"""以 </ns0:jasperReport> 结尾的 JRXML 也应被识别。"""
|
|
import re
|
|
jrxml = COMPLETE_JRXML.replace('</jasperReport>', '</ns0:jasperReport>').strip()
|
|
_jrxml_end = r"</(?:[\w:]+:)?(?:jasperReport|report)>\s*$"
|
|
assert re.search(_jrxml_end, jrxml, re.IGNORECASE)
|
|
|
|
def test_truncated_jrxml_not_detected(self):
|
|
"""截断的 JRXML(无关闭标签)不应被识别为完成。"""
|
|
import re
|
|
_jrxml_end = r"</(?:[\w:]+:)?(?:jasperReport|report)>\s*$"
|
|
assert not re.search(_jrxml_end, ROUND1_TRUNCATED.strip(), re.IGNORECASE)
|