"""Tests for pixel-level JRXML-to-image comparison pipeline.""" import os import sys import tempfile import pytest import numpy as np sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from dotenv import load_dotenv load_dotenv() import cv2 from agent.nodes import _render_jrxml_to_png, _compute_pixel_similarity, _check_ocr_fidelity # ── Test JRXML fixture ───────────────────────────────────────────────── VALID_JRXML = """ <band height="50"> <staticText> <reportElement x="0" y="0" width="555" height="30"/> <text><![CDATA[HELLO]]></text> </staticText> </band> """ INVALID_JRXML = "" # ── _render_jrxml_to_png ──────────────────────────────────────────────── class TestRenderJrxmlToPng: def test_valid_jrxml_renders_successfully(self, tmp_path): output_png = str(tmp_path / "valid_output.png") result = _render_jrxml_to_png(VALID_JRXML, output_png, scale=1.0) assert result, "Valid JRXML should render successfully" assert os.path.exists(output_png), f"Output PNG should exist at {output_png}" assert os.path.getsize(output_png) > 1000, "Output PNG should be non-trivial" def test_invalid_jrxml_returns_false(self, tmp_path): output_png = str(tmp_path / "invalid_output.png") result = _render_jrxml_to_png(INVALID_JRXML, output_png) assert not result, "Invalid JRXML should return False" def test_render_output_is_readable_image(self, tmp_path): output_png = str(tmp_path / "readable.png") _render_jrxml_to_png(VALID_JRXML, output_png, scale=1.0) img = cv2.imread(output_png) assert img is not None, "Rendered PNG should be readable by OpenCV" assert img.shape[0] > 0 and img.shape[1] > 0, "Image should have non-zero dimensions" assert img.shape[2] == 3, "Should be a 3-channel (BGR) image" def test_high_scale_produces_larger_image(self, tmp_path): png1 = str(tmp_path / "scale1.png") png2 = str(tmp_path / "scale2.png") _render_jrxml_to_png(VALID_JRXML, png1, scale=1.0) _render_jrxml_to_png(VALID_JRXML, png2, scale=2.0) img1 = cv2.imread(png1) img2 = cv2.imread(png2) assert img2.shape[0] >= img1.shape[0], "Higher scale should produce >= height" assert img2.shape[1] >= img1.shape[1], "Higher scale should produce >= width" # ── _compute_pixel_similarity ─────────────────────────────────────────── class TestComputePixelSimilarity: def test_identical_images_have_high_ssim(self, tmp_path): img_path = str(tmp_path / "test.png") white = np.full((100, 200, 3), 255, dtype=np.uint8) cv2.imwrite(img_path, white) result = _compute_pixel_similarity(img_path, img_path) assert result["error"] is None, f"Should have no error: {result['error']}" assert result["ssim"] == 1.0, f"SSIM of identical images should be 1.0, got {result['ssim']}" assert result["diff_pct"] == 0.0, f"Diff% of identical images should be 0, got {result['diff_pct']}" def test_completely_different_images_have_low_ssim(self, tmp_path): white_path = str(tmp_path / "white.png") black_path = str(tmp_path / "black.png") cv2.imwrite(white_path, np.full((100, 200, 3), 255, dtype=np.uint8)) cv2.imwrite(black_path, np.full((100, 200, 3), 0, dtype=np.uint8)) result = _compute_pixel_similarity(white_path, black_path) assert result["error"] is None, f"Should have no error: {result['error']}" assert result["ssim"] < 0.3, f"Different images should have low SSIM, got {result['ssim']}" assert result["diff_pct"] > 0.9, f"Different images should have high diff%, got {result['diff_pct']}" def test_different_size_images_are_resized(self, tmp_path): img1_path = str(tmp_path / "img1.png") img2_path = str(tmp_path / "img2.png") cv2.imwrite(img1_path, np.full((50, 100, 3), 128, dtype=np.uint8)) cv2.imwrite(img2_path, np.full((100, 200, 3), 128, dtype=np.uint8)) result = _compute_pixel_similarity(img1_path, img2_path) assert result["error"] is None, f"Resize should work: {result['error']}" def test_missing_file_returns_error(self, tmp_path): result = _compute_pixel_similarity( str(tmp_path / "does_not_exist.png"), str(tmp_path / "also_missing.png"), ) assert result["error"] is not None, "Missing files should set error" assert result["ssim"] == 0.0 def test_non_image_file_returns_error(self, tmp_path): text_path = str(tmp_path / "text.txt") with open(text_path, "w") as f: f.write("not an image") png_path = str(tmp_path / "ref.png") cv2.imwrite(png_path, np.full((100, 100, 3), 255, dtype=np.uint8)) result = _compute_pixel_similarity(text_path, png_path) assert result["error"] is not None, "Non-image should set error" # ── _check_ocr_fidelity ───────────────────────────────────────────────── class TestCheckOcrFidelity: def test_no_ocr_data_returns_full_score(self): state = {} result = _check_ocr_fidelity(VALID_JRXML, state) assert result["score"] == 1.0 assert result["issues"] == [] def test_missing_ocr_fields_flagged(self): state = { "ocr_extraction_result": { "fields": [ {"name": "invoice_code"}, {"name": "invoice_number"}, {"name": "amount"}, ] } } result = _check_ocr_fidelity(VALID_JRXML, state) assert result["field_coverage"] < 1.0, "Missing fields should reduce coverage" assert len(result["issues"]) > 0, "Should have issues about missing fields" def test_matched_fields_increase_coverage(self): state = { "ocr_extraction_result": { "fields": [ {"name": "invoice_code"}, {"name": "invoice_number"}, ] } } jrxml_with_fields = """ """ result = _check_ocr_fidelity(jrxml_with_fields, state) assert result["field_coverage"] == 1.0, f"All fields matched, got {result['field_coverage']}" assert len(result["issues"]) == 0, f"No issues expected, got {result['issues']}" def test_element_count_mismatch_flagged(self): state = { "ocr_elements": [ {"text": "a"}, {"text": "b"}, {"text": "c"}, {"text": "d"}, {"text": "e"}, ] } result = _check_ocr_fidelity(VALID_JRXML, state) assert result["element_coverage"] < 1.0, \ "Fewer JRXML elements than OCR should reduce coverage" # ── validate node integration ─────────────────────────────────────────── class TestValidatePixelIntegration: def test_validate_skips_pixel_when_no_image(self): from agent.nodes import validate state = { "current_jrxml": VALID_JRXML, "uploaded_file_path": None, "ocr_elements": [], "layout_schema": {}, "conversation_history": [], } result = validate(state) assert result.get("pixel_fidelity") is None, \ "Should not set pixel_fidelity without uploaded_file_path" assert result["status"] == "pass", "Valid JRXML should pass XSD" def test_validate_skips_pixel_when_xsd_fails(self): from agent.nodes import validate state = { "current_jrxml": INVALID_JRXML, "uploaded_file_path": os.path.join( os.path.dirname(os.path.dirname(__file__)), "tmp", "test_output.png" ), "ocr_elements": [], "layout_schema": {}, "conversation_history": [], } result = validate(state) assert result["status"] == "fail", "Invalid JRXML should fail XSD"