6467fd4ae5
- OCR: EasyOCR (primary, ch_sim+en) with PaddleOCR fallback for Windows compatibility - Validation: _check_minimum_content() rejects empty-shell JRXML (no band/textField) - Retry: MAX_RETRY 3→5, exhaustion records pending_failure_context for next-turn auto-injection - Finalize: only saves jrxml_versions on pass, preserves last good final_jrxml on fail - Extract JRXML: improved empty markdown block handling and XML fragment fallback - UI: real-time node progress via placeholder updates, initial "analyzing" feedback - UI: use agent_state (full) instead of node_state (partial) for summary card routing - UI: unknown template_type now gives LLM meaningful image context instead of metadata - Docs: updated CLAUDE.md and CODE_GUIDE.md to reflect all v3 changes Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
163 lines
5.1 KiB
Python
163 lines
5.1 KiB
Python
"""JRXML 文件验证服务(FastAPI)。
|
|
|
|
使用 lxml XML Schema 验证作为 JasperReports 7.0.6 编译验证的第一阶段后备方案。
|
|
要进行完整的编译验证,需要基于 Java 的验证器以及 JasperReports 7.0.6 + JDK 21。
|
|
|
|
启动: uvicorn validation_service.main:app --port 8001
|
|
"""
|
|
|
|
import re
|
|
import xml.etree.ElementTree as ET
|
|
from pathlib import Path
|
|
|
|
from fastapi import FastAPI
|
|
from lxml import etree
|
|
from pydantic import BaseModel
|
|
|
|
app = FastAPI(title="JRXML 验证服务")
|
|
|
|
SCHEMA_DIR = Path(__file__).parent / "schemas"
|
|
SCHEMA_FILE = SCHEMA_DIR / "jasperreport_7_0_6.xsd"
|
|
|
|
|
|
class ValidationRequest(BaseModel):
|
|
jrxml: str
|
|
|
|
|
|
class ValidationResponse(BaseModel):
|
|
valid: bool
|
|
error: str
|
|
|
|
|
|
def _check_structural_issues(jrxml: str) -> list[str]:
|
|
"""检查 JRXML 中常见的结构性问题。"""
|
|
issues = []
|
|
root = None
|
|
|
|
try:
|
|
root = ET.fromstring(jrxml)
|
|
except ET.ParseError as e:
|
|
issues.append(f"XML 解析错误:{e}")
|
|
return issues
|
|
|
|
# 同时处理带命名空间和不带命名空间的元素名
|
|
ns = "http://jasperreports.sourceforge.net/jasperreports"
|
|
|
|
declared_fields = set()
|
|
for elem in root.iter():
|
|
tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
|
if tag == "field":
|
|
name = elem.get("name")
|
|
if name:
|
|
declared_fields.add(name)
|
|
|
|
field_expr_pattern = re.compile(r'\$F\{(\w+)\}')
|
|
for m in field_expr_pattern.finditer(jrxml):
|
|
field_name = m.group(1)
|
|
if field_name not in declared_fields:
|
|
issues.append(
|
|
f"字段 '{field_name}' 在表达式中使用但未在 <field> 部分声明"
|
|
)
|
|
|
|
query = None
|
|
for elem in root.iter():
|
|
tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
|
if tag == "queryString":
|
|
query = elem
|
|
break
|
|
if query is not None:
|
|
query_text = "".join(query.itertext()).strip()
|
|
if not query_text:
|
|
issues.append("<queryString> 为空 - 请在 CDATA 中添加 SQL 查询")
|
|
elif not any(kw in query_text.upper() for kw in ["SELECT"]):
|
|
issues.append("<queryString> 似乎不包含 SQL SELECT 查询")
|
|
|
|
if not root.get("pageWidth"):
|
|
issues.append("缺少 <jasperReport> 上的 pageWidth 属性")
|
|
if not root.get("pageHeight"):
|
|
issues.append("缺少 <jasperReport> 上的 pageHeight 属性")
|
|
if not root.get("name"):
|
|
issues.append("缺少 <jasperReport> 上的 'name' 属性")
|
|
|
|
return issues
|
|
|
|
|
|
def _check_minimum_content(jrxml: str) -> list[str]:
|
|
"""检查 JRXML 是否包含最基本的报表内容(至少要有 band 和文本元素)。"""
|
|
issues = []
|
|
try:
|
|
root = ET.fromstring(jrxml)
|
|
except ET.ParseError:
|
|
return [] # 结构性检查已捕获
|
|
|
|
# 统计各类元素
|
|
bands = 0
|
|
text_fields = 0
|
|
static_texts = 0
|
|
for elem in root.iter():
|
|
tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
|
if tag == "band":
|
|
bands += 1
|
|
elif tag == "textField":
|
|
text_fields += 1
|
|
elif tag == "staticText":
|
|
static_texts += 1
|
|
|
|
if bands == 0:
|
|
issues.append("报表没有任何 <band> 元素,无法渲染内容")
|
|
if text_fields == 0 and static_texts == 0:
|
|
issues.append("报表没有任何 <textField> 或 <staticText> 元素,输出将是一片空白")
|
|
|
|
return issues
|
|
|
|
|
|
def _validate_xsd(jrxml: str) -> tuple[bool, str]:
|
|
"""根据 JasperReports XSD schema 验证 JRXML。"""
|
|
if not SCHEMA_FILE.exists():
|
|
return True, ""
|
|
|
|
try:
|
|
schema_doc = etree.parse(str(SCHEMA_FILE))
|
|
xmlschema = etree.XMLSchema(schema_doc)
|
|
doc = etree.fromstring(jrxml.encode("utf-8"))
|
|
xmlschema.assertValid(doc)
|
|
return True, ""
|
|
except etree.DocumentInvalid as e:
|
|
return False, str(e)
|
|
except etree.XMLSchemaError as e:
|
|
return False, f"Schema 错误:{e}"
|
|
except Exception as e:
|
|
return False, f"XML 验证错误:{e}"
|
|
|
|
|
|
@app.post("/validate", response_model=ValidationResponse)
|
|
async def validate_jrxml(req: ValidationRequest):
|
|
jrxml = req.jrxml.strip()
|
|
if not jrxml:
|
|
return ValidationResponse(valid=False, error="JRXML 内容为空")
|
|
|
|
structural_issues = _check_structural_issues(jrxml)
|
|
if structural_issues:
|
|
return ValidationResponse(valid=False, error="; ".join(structural_issues))
|
|
|
|
content_issues = _check_minimum_content(jrxml)
|
|
if content_issues:
|
|
return ValidationResponse(valid=False, error="; ".join(content_issues))
|
|
|
|
valid, xsd_error = _validate_xsd(jrxml)
|
|
if not valid:
|
|
return ValidationResponse(valid=False, error=xsd_error)
|
|
|
|
return ValidationResponse(valid=True, error="")
|
|
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
schema_available = SCHEMA_FILE.exists()
|
|
return {
|
|
"status": "ok",
|
|
"schema_available": schema_available,
|
|
"validation_type": "XSD" if schema_available else "仅结构检查",
|
|
"note": "如需完整的 JasperReports 7.0.6 编译验证,请使用基于 Java 的验证器",
|
|
}
|