diff --git a/agent/nodes.py b/agent/nodes.py index d95d44a..f9529fa 100644 --- a/agent/nodes.py +++ b/agent/nodes.py @@ -1266,9 +1266,9 @@ def _check_ocr_fidelity(jrxml: str, state: dict) -> dict: issues = [] - # 1. 元素数量对比 - text_fields = len(re.findall(r") + text_fields = len(re.findall(r"<[a-zA-Z0-9_-]+:textField| dict: element_coverage = 1.0 # 2. 字段名覆盖(英文字段名 vs OCR 中文字段名天然不匹配,权重降低) - jrxml_fields = set(re.findall(r') + raw_fields = re.findall(r'(?:<[a-zA-Z0-9_-]+:)?field\s+name="([^"]+)"', jrxml) + jrxml_fields = set(raw_fields) ocr_field_names = set() ocr_fields = ocr_result.get("fields", []) if isinstance(ocr_result, dict) else [] for f in ocr_fields: