feat: v3 robustness upgrade — EasyOCR, failure recovery, minimum content check

- OCR: EasyOCR (primary, ch_sim+en) with PaddleOCR fallback for Windows compatibility
- Validation: _check_minimum_content() rejects empty-shell JRXML (no band/textField)
- Retry: MAX_RETRY 3→5, exhaustion records pending_failure_context for next-turn auto-injection
- Finalize: only saves jrxml_versions on pass, preserves last good final_jrxml on fail
- Extract JRXML: improved empty markdown block handling and XML fragment fallback
- UI: real-time node progress via placeholder updates, initial "analyzing" feedback
- UI: use agent_state (full) instead of node_state (partial) for summary card routing
- UI: unknown template_type now gives LLM meaningful image context instead of metadata
- Docs: updated CLAUDE.md and CODE_GUIDE.md to reflect all v3 changes

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-19 19:15:30 +08:00
parent 70614dff5e
commit 6467fd4ae5
9 changed files with 1297 additions and 51 deletions
+33
View File
@@ -82,6 +82,35 @@ def _check_structural_issues(jrxml: str) -> list[str]:
return issues
def _check_minimum_content(jrxml: str) -> list[str]:
"""检查 JRXML 是否包含最基本的报表内容(至少要有 band 和文本元素)。"""
issues = []
try:
root = ET.fromstring(jrxml)
except ET.ParseError:
return [] # 结构性检查已捕获
# 统计各类元素
bands = 0
text_fields = 0
static_texts = 0
for elem in root.iter():
tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
if tag == "band":
bands += 1
elif tag == "textField":
text_fields += 1
elif tag == "staticText":
static_texts += 1
if bands == 0:
issues.append("报表没有任何 <band> 元素,无法渲染内容")
if text_fields == 0 and static_texts == 0:
issues.append("报表没有任何 <textField> 或 <staticText> 元素,输出将是一片空白")
return issues
def _validate_xsd(jrxml: str) -> tuple[bool, str]:
"""根据 JasperReports XSD schema 验证 JRXML。"""
if not SCHEMA_FILE.exists():
@@ -111,6 +140,10 @@ async def validate_jrxml(req: ValidationRequest):
if structural_issues:
return ValidationResponse(valid=False, error="; ".join(structural_issues))
content_issues = _check_minimum_content(jrxml)
if content_issues:
return ValidationResponse(valid=False, error="; ".join(content_issues))
valid, xsd_error = _validate_xsd(jrxml)
if not valid:
return ValidationResponse(valid=False, error=xsd_error)