diff --git a/ReportEngine/renderers/html_renderer.py b/ReportEngine/renderers/html_renderer.py index 0dbce63..b5db79d 100644 --- a/ReportEngine/renderers/html_renderer.py +++ b/ReportEngine/renderers/html_renderer.py @@ -11,6 +11,15 @@ import json import os from pathlib import Path from typing import Any, Dict, List +from loguru import logger + +from ReportEngine.utils.chart_validator import ( + ChartValidator, + ChartRepairer, + create_chart_validator, + create_chart_repairer +) +from ReportEngine.utils.chart_repair_api import create_llm_repair_functions class HTMLRenderer: @@ -65,6 +74,23 @@ class HTMLRenderer: self.hero_kpi_signature: tuple | None = None self._lib_cache: Dict[str, str] = {} + # 初始化图表验证和修复器 + self.chart_validator = create_chart_validator() + llm_repair_fns = create_llm_repair_functions() + self.chart_repairer = create_chart_repairer( + validator=self.chart_validator, + llm_repair_fns=llm_repair_fns + ) + + # 统计信息 + self.chart_validation_stats = { + 'total': 0, + 'valid': 0, + 'repaired_locally': 0, + 'repaired_api': 0, + 'failed': 0 + } + @staticmethod def _get_lib_path() -> Path: """获取第三方库文件的目录路径""" @@ -124,6 +150,15 @@ class HTMLRenderer: self.heading_label_map = self._compute_heading_labels(self.chapters) self.toc_entries = self._collect_toc_entries(self.chapters) + # 重置图表验证统计 + self.chart_validation_stats = { + 'total': 0, + 'valid': 0, + 'repaired_locally': 0, + 'repaired_api': 0, + 'failed': 0 + } + metadata = self.metadata theme_tokens = metadata.get("themeTokens") or self.document.get("themeTokens", {}) title = metadata.get("title") or metadata.get("query") or "智能舆情报告" @@ -132,6 +167,10 @@ class HTMLRenderer: head = self._render_head(title, theme_tokens) body = self._render_body() + + # 输出图表验证统计 + self._log_chart_validation_stats() + return f"\n\n{head}\n{body}\n" # ====== 头部 / 正文 ====== @@ -1150,12 +1189,66 @@ class HTMLRenderer: """ 渲染Chart.js等交互组件的占位容器,并记录配置JSON。 + 在渲染前进行图表验证和修复: + 1. 验证图表数据格式 + 2. 如果无效,尝试本地修复 + 3. 如果本地修复失败,尝试API修复 + 4. 如果所有修复都失败,使用原始数据(前端会降级处理) + 参数: block: widget类型的block,包含widgetId/props/data。 返回: str: 含canvas与配置脚本的HTML。 """ + # 统计 + widget_type = block.get('widgetType', '') + is_chart = isinstance(widget_type, str) and widget_type.startswith('chart.js') + + if is_chart: + self.chart_validation_stats['total'] += 1 + + # 验证图表数据 + validation_result = self.chart_validator.validate(block) + + if not validation_result.is_valid: + logger.warning( + f"图表 {block.get('widgetId', 'unknown')} 验证失败: {validation_result.errors}" + ) + + # 尝试修复 + repair_result = self.chart_repairer.repair(block, validation_result) + + if repair_result.success and repair_result.repaired_block: + # 修复成功,使用修复后的数据 + block = repair_result.repaired_block + logger.info( + f"图表 {block.get('widgetId', 'unknown')} 修复成功 " + f"(方法: {repair_result.method}): {repair_result.changes}" + ) + + # 更新统计 + if repair_result.method == 'local': + self.chart_validation_stats['repaired_locally'] += 1 + elif repair_result.method == 'api': + self.chart_validation_stats['repaired_api'] += 1 + else: + # 修复失败,使用原始数据,前端会尝试降级渲染 + logger.warning( + f"图表 {block.get('widgetId', 'unknown')} 修复失败," + f"将使用原始数据(前端会尝试降级渲染或显示fallback)" + ) + self.chart_validation_stats['failed'] += 1 + else: + # 验证通过 + self.chart_validation_stats['valid'] += 1 + if validation_result.warnings: + logger.info( + f"图表 {block.get('widgetId', 'unknown')} 验证通过," + f"但有警告: {validation_result.warnings}" + ) + + # 渲染图表HTML self.chart_counter += 1 canvas_id = f"chart-{self.chart_counter}" config_id = f"chart-config-{self.chart_counter}" @@ -1220,6 +1313,39 @@ class HTMLRenderer: """ return table_html + def _log_chart_validation_stats(self): + """输出图表验证统计信息""" + stats = self.chart_validation_stats + if stats['total'] == 0: + return + + logger.info("=" * 60) + logger.info("图表验证统计") + logger.info("=" * 60) + logger.info(f"总图表数量: {stats['total']}") + logger.info(f" ✓ 验证通过: {stats['valid']} ({stats['valid']/stats['total']*100:.1f}%)") + + if stats['repaired_locally'] > 0: + logger.info( + f" ⚠ 本地修复: {stats['repaired_locally']} " + f"({stats['repaired_locally']/stats['total']*100:.1f}%)" + ) + + if stats['repaired_api'] > 0: + logger.info( + f" ⚠ API修复: {stats['repaired_api']} " + f"({stats['repaired_api']/stats['total']*100:.1f}%)" + ) + + if stats['failed'] > 0: + logger.warning( + f" ✗ 修复失败: {stats['failed']} " + f"({stats['failed']/stats['total']*100:.1f}%) - " + f"这些图表将使用降级渲染或显示fallback表格" + ) + + logger.info("=" * 60) + # ====== 前置信息防护 ====== def _kpi_signature_from_items(self, items: Any) -> tuple | None: @@ -2317,6 +2443,80 @@ function buildChartOptions(payload) { return mergeOptions(baseOptions, overrideOptions); } +function validateChartData(payload, type) { + /** + * 前端验证图表数据 + * 返回: { valid: boolean, errors: string[] } + */ + const errors = []; + + if (!payload || typeof payload !== 'object') { + errors.push('无效的payload'); + return { valid: false, errors }; + } + + const data = payload.data; + if (!data || typeof data !== 'object') { + errors.push('缺少data字段'); + return { valid: false, errors }; + } + + // 特殊图表类型(scatter, bubble) + const specialTypes = { 'scatter': true, 'bubble': true }; + if (specialTypes[type]) { + // 这些类型需要特殊的数据格式 {x, y} 或 {x, y, r} + // 跳过标准验证 + return { valid: true, errors }; + } + + // 标准图表类型验证 + const datasets = data.datasets; + if (!Array.isArray(datasets)) { + errors.push('datasets必须是数组'); + return { valid: false, errors }; + } + + if (datasets.length === 0) { + errors.push('datasets数组为空'); + return { valid: false, errors }; + } + + // 验证每个dataset + for (let i = 0; i < datasets.length; i++) { + const dataset = datasets[i]; + if (!dataset || typeof dataset !== 'object') { + errors.push(`datasets[${i}]不是对象`); + continue; + } + + if (!Array.isArray(dataset.data)) { + errors.push(`datasets[${i}].data不是数组`); + } else if (dataset.data.length === 0) { + errors.push(`datasets[${i}].data为空`); + } + } + + // 需要labels的图表类型 + const labelRequiredTypes = { + 'line': true, 'bar': true, 'radar': true, + 'polarArea': true, 'pie': true, 'doughnut': true + }; + + if (labelRequiredTypes[type]) { + const labels = data.labels; + if (!Array.isArray(labels)) { + errors.push('缺少labels数组'); + } else if (labels.length === 0) { + errors.push('labels数组为空'); + } + } + + return { + valid: errors.length === 0, + errors + }; +} + function instantiateChart(ctx, payload, optionsTemplate, type) { if (!ctx) { return null; @@ -2358,9 +2558,17 @@ function hydrateCharts() { renderChartFallback(canvas, payload, 'Canvas 初始化失败'); return; } + + // 前端数据验证 + const desiredType = chartTypes[0]; + const validation = validateChartData(payload, desiredType); + if (!validation.valid) { + console.warn('图表数据验证失败:', validation.errors); + // 验证失败但仍然尝试渲染,因为可能会降级成功 + } + const card = canvas.closest('.chart-card') || canvas.parentElement; const optionsTemplate = buildChartOptions(payload); - const desiredType = chartTypes[0]; let chartInstance = null; let selectedType = null; let lastError; diff --git a/ReportEngine/utils/chart_repair_api.py b/ReportEngine/utils/chart_repair_api.py new file mode 100644 index 0000000..861914d --- /dev/null +++ b/ReportEngine/utils/chart_repair_api.py @@ -0,0 +1,283 @@ +""" +图表API修复模块。 + +提供调用4个Engine(ReportEngine, ForumEngine, InsightEngine, MediaEngine)的LLM API +来修复图表数据的功能。 +""" + +from __future__ import annotations + +import json +from typing import Any, Dict, List, Optional +from loguru import logger + +from ReportEngine.utils.config import settings + + +# 图表修复提示词 +CHART_REPAIR_SYSTEM_PROMPT = """你是一个专业的图表数据修复助手。你的任务是修复Chart.js图表数据中的格式错误,确保图表能够正常渲染。 + +**Chart.js标准数据格式:** + +1. 标准图表(line, bar, pie, doughnut, radar, polarArea): +```json +{ + "type": "widget", + "widgetType": "chart.js/bar", + "widgetId": "chart-001", + "props": { + "type": "bar", + "title": "图表标题", + "options": { + "responsive": true, + "plugins": { + "legend": { + "display": true + } + } + } + }, + "data": { + "labels": ["A", "B", "C"], + "datasets": [ + { + "label": "系列1", + "data": [10, 20, 30] + } + ] + } +} +``` + +2. 特殊图表(scatter, bubble): +```json +{ + "data": { + "datasets": [ + { + "label": "系列1", + "data": [ + {"x": 10, "y": 20}, + {"x": 15, "y": 25} + ] + } + ] + } +} +``` + +**修复原则:** +1. **宁愿不改,也不要改错** - 如果不确定如何修复,保持原始数据 +2. **最小改动** - 只修复明确的错误,不要过度修改 +3. **保持数据完整性** - 不要丢失原始数据 +4. **验证修复结果** - 确保修复后符合Chart.js格式 + +**常见错误及修复方法:** +1. 缺少labels字段 → 根据数据生成默认labels +2. datasets不是数组 → 转换为数组格式 +3. 数据长度不匹配 → 截断或补null +4. 非数值数据 → 尝试转换或设为null +5. 缺少必需字段 → 添加默认值 + +请根据错误信息修复图表数据,并返回修复后的完整widget block(JSON格式)。 +""" + + +def build_chart_repair_prompt( + widget_block: Dict[str, Any], + validation_errors: List[str] +) -> str: + """ + 构建图表修复提示词。 + + Args: + widget_block: 原始widget block + validation_errors: 验证错误列表 + + Returns: + str: 提示词 + """ + block_json = json.dumps(widget_block, ensure_ascii=False, indent=2) + errors_text = "\n".join(f"- {error}" for error in validation_errors) + + prompt = f"""请修复以下图表数据中的错误: + +**原始数据:** +```json +{block_json} +``` + +**检测到的错误:** +{errors_text} + +**要求:** +1. 返回修复后的完整widget block(JSON格式) +2. 只修复明确的错误,保持其他数据不变 +3. 确保修复后的数据符合Chart.js格式要求 +4. 如果无法确定如何修复,保持原始数据 + +**重要的输出格式要求:** +1. 只返回纯JSON对象,不要添加任何说明文字 +2. 不要使用```json```标记包裹 +3. 确保JSON语法完全正确 +4. 所有字符串使用双引号 +""" + return prompt + + +def create_llm_repair_functions() -> List: + """ + 创建LLM修复函数列表。 + + 返回4个Engine的修复函数: + 1. ReportEngine + 2. ForumEngine (通过ForumHost) + 3. InsightEngine + 4. MediaEngine + + Returns: + List[Callable]: 修复函数列表 + """ + repair_functions = [] + + # 1. ReportEngine修复函数 + if settings.REPORT_ENGINE_API_KEY and settings.REPORT_ENGINE_BASE_URL: + def repair_with_report_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]: + """使用ReportEngine的LLM修复图表""" + try: + from llm_client import LLMClient + + client = LLMClient( + api_key=settings.REPORT_ENGINE_API_KEY, + base_url=settings.REPORT_ENGINE_BASE_URL, + model_name=settings.REPORT_ENGINE_MODEL_NAME or "gpt-4", + provider="openai" + ) + + prompt = build_chart_repair_prompt(widget_block, errors) + response = client.invoke( + CHART_REPAIR_SYSTEM_PROMPT, + prompt, + temperature=0.0, + top_p=0.05 + ) + + if not response: + return None + + # 解析响应 + repaired = json.loads(response) + return repaired + + except Exception as e: + logger.error(f"ReportEngine图表修复失败: {e}") + return None + + repair_functions.append(repair_with_report_engine) + + # 2. ForumEngine修复函数 + if settings.FORUM_HOST_API_KEY and settings.FORUM_HOST_BASE_URL: + def repair_with_forum_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]: + """使用ForumEngine的LLM修复图表""" + try: + from llm_client import LLMClient + + client = LLMClient( + api_key=settings.FORUM_HOST_API_KEY, + base_url=settings.FORUM_HOST_BASE_URL, + model_name=settings.FORUM_HOST_MODEL_NAME or "gpt-4", + provider="openai" + ) + + prompt = build_chart_repair_prompt(widget_block, errors) + response = client.invoke( + CHART_REPAIR_SYSTEM_PROMPT, + prompt, + temperature=0.0, + top_p=0.05 + ) + + if not response: + return None + + repaired = json.loads(response) + return repaired + + except Exception as e: + logger.error(f"ForumEngine图表修复失败: {e}") + return None + + repair_functions.append(repair_with_forum_engine) + + # 3. InsightEngine修复函数 + if settings.INSIGHT_ENGINE_API_KEY and settings.INSIGHT_ENGINE_BASE_URL: + def repair_with_insight_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]: + """使用InsightEngine的LLM修复图表""" + try: + from llm_client import LLMClient + + client = LLMClient( + api_key=settings.INSIGHT_ENGINE_API_KEY, + base_url=settings.INSIGHT_ENGINE_BASE_URL, + model_name=settings.INSIGHT_ENGINE_MODEL_NAME or "gpt-4", + provider="openai" + ) + + prompt = build_chart_repair_prompt(widget_block, errors) + response = client.invoke( + CHART_REPAIR_SYSTEM_PROMPT, + prompt, + temperature=0.0, + top_p=0.05 + ) + + if not response: + return None + + repaired = json.loads(response) + return repaired + + except Exception as e: + logger.error(f"InsightEngine图表修复失败: {e}") + return None + + repair_functions.append(repair_with_insight_engine) + + # 4. MediaEngine修复函数 + if settings.MEDIA_ENGINE_API_KEY and settings.MEDIA_ENGINE_BASE_URL: + def repair_with_media_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]: + """使用MediaEngine的LLM修复图表""" + try: + from llm_client import LLMClient + + client = LLMClient( + api_key=settings.MEDIA_ENGINE_API_KEY, + base_url=settings.MEDIA_ENGINE_BASE_URL, + model_name=settings.MEDIA_ENGINE_MODEL_NAME or "gpt-4", + provider="openai" + ) + + prompt = build_chart_repair_prompt(widget_block, errors) + response = client.invoke( + CHART_REPAIR_SYSTEM_PROMPT, + prompt, + temperature=0.0, + top_p=0.05 + ) + + if not response: + return None + + repaired = json.loads(response) + return repaired + + except Exception as e: + logger.error(f"MediaEngine图表修复失败: {e}") + return None + + repair_functions.append(repair_with_media_engine) + + if not repair_functions: + logger.warning("未配置任何Engine API,图表API修复功能将不可用") + + return repair_functions diff --git a/ReportEngine/utils/chart_validator.py b/ReportEngine/utils/chart_validator.py new file mode 100644 index 0000000..53133e9 --- /dev/null +++ b/ReportEngine/utils/chart_validator.py @@ -0,0 +1,631 @@ +""" +图表验证和修复工具。 + +提供对Chart.js图表数据的验证和修复能力: +1. 验证图表数据格式是否符合Chart.js要求 +2. 本地规则修复常见问题 +3. LLM API辅助修复复杂问题 +4. 遵循"宁愿不改,也不要改错"的原则 + +支持的图表类型: +- line (折线图) +- bar (柱状图) +- pie (饼图) +- doughnut (圆环图) +- radar (雷达图) +- polarArea (极地区域图) +- scatter (散点图) +""" + +from __future__ import annotations + +import copy +import json +from typing import Any, Dict, List, Optional, Tuple, Callable +from dataclasses import dataclass +from loguru import logger + + +@dataclass +class ValidationResult: + """验证结果""" + is_valid: bool + errors: List[str] + warnings: List[str] + + def has_critical_errors(self) -> bool: + """是否有严重错误(会导致渲染失败)""" + return not self.is_valid and len(self.errors) > 0 + + +@dataclass +class RepairResult: + """修复结果""" + success: bool + repaired_block: Optional[Dict[str, Any]] + method: str # 'none', 'local', 'api' + changes: List[str] + + def has_changes(self) -> bool: + """是否有修改""" + return len(self.changes) > 0 + + +class ChartValidator: + """ + 图表验证器 - 验证Chart.js图表数据格式是否正确。 + + 验证规则: + 1. 基本结构验证:widgetType, props, data字段 + 2. 图表类型验证:支持的图表类型 + 3. 数据格式验证:labels和datasets结构 + 4. 数据一致性验证:labels和datasets长度匹配 + 5. 数值类型验证:数据值类型正确 + """ + + # 支持的图表类型 + SUPPORTED_CHART_TYPES = { + 'line', 'bar', 'pie', 'doughnut', 'radar', 'polarArea', 'scatter', + 'bubble', 'horizontalBar' + } + + # 需要labels的图表类型 + LABEL_REQUIRED_TYPES = { + 'line', 'bar', 'radar', 'polarArea', 'pie', 'doughnut' + } + + # 需要数值数据的图表类型 + NUMERIC_DATA_TYPES = { + 'line', 'bar', 'radar', 'polarArea', 'pie', 'doughnut' + } + + # 需要特殊数据格式的图表类型 + SPECIAL_DATA_TYPES = { + 'scatter': {'x', 'y'}, + 'bubble': {'x', 'y', 'r'} + } + + def __init__(self): + pass + + def validate(self, widget_block: Dict[str, Any]) -> ValidationResult: + """ + 验证图表格式。 + + Args: + widget_block: widget类型的block,包含widgetId/widgetType/props/data + + Returns: + ValidationResult: 验证结果 + """ + errors = [] + warnings = [] + + # 1. 基本结构验证 + if not isinstance(widget_block, dict): + errors.append("widget_block必须是字典类型") + return ValidationResult(False, errors, warnings) + + # 2. 检查widgetType + widget_type = widget_block.get('widgetType', '') + if not widget_type or not isinstance(widget_type, str): + errors.append("缺少widgetType字段或类型不正确") + return ValidationResult(False, errors, warnings) + + # 检查是否是chart.js类型 + if not widget_type.startswith('chart.js'): + # 不是图表类型,跳过验证 + return ValidationResult(True, errors, warnings) + + # 3. 提取图表类型 + chart_type = self._extract_chart_type(widget_block) + if not chart_type: + errors.append("无法确定图表类型") + return ValidationResult(False, errors, warnings) + + # 4. 检查是否支持该图表类型 + if chart_type not in self.SUPPORTED_CHART_TYPES: + warnings.append(f"图表类型 '{chart_type}' 可能不被支持,将尝试降级渲染") + + # 5. 验证数据结构 + data = widget_block.get('data') + if not isinstance(data, dict): + errors.append("data字段必须是字典类型") + return ValidationResult(False, errors, warnings) + + # 6. 根据图表类型验证数据 + if chart_type in self.SPECIAL_DATA_TYPES: + # 特殊数据格式(scatter, bubble) + self._validate_special_data(data, chart_type, errors, warnings) + else: + # 标准数据格式(labels + datasets) + self._validate_standard_data(data, chart_type, errors, warnings) + + # 7. 验证props + props = widget_block.get('props') + if props is not None and not isinstance(props, dict): + warnings.append("props字段应该是字典类型") + + is_valid = len(errors) == 0 + return ValidationResult(is_valid, errors, warnings) + + def _extract_chart_type(self, widget_block: Dict[str, Any]) -> Optional[str]: + """ + 提取图表类型。 + + 优先级: + 1. props.type + 2. widgetType中的类型(chart.js/bar -> bar) + 3. data.type + """ + # 1. 从props中获取 + props = widget_block.get('props') or {} + if isinstance(props, dict): + chart_type = props.get('type') + if chart_type and isinstance(chart_type, str): + return chart_type.lower() + + # 2. 从widgetType中提取 + widget_type = widget_block.get('widgetType', '') + if '/' in widget_type: + chart_type = widget_type.split('/')[-1] + if chart_type: + return chart_type.lower() + + # 3. 从data中获取 + data = widget_block.get('data') or {} + if isinstance(data, dict): + chart_type = data.get('type') + if chart_type and isinstance(chart_type, str): + return chart_type.lower() + + return None + + def _validate_standard_data( + self, + data: Dict[str, Any], + chart_type: str, + errors: List[str], + warnings: List[str] + ): + """验证标准数据格式(labels + datasets)""" + labels = data.get('labels') + datasets = data.get('datasets') + + # 验证labels + if chart_type in self.LABEL_REQUIRED_TYPES: + if not labels: + errors.append(f"{chart_type}类型图表必须包含labels字段") + elif not isinstance(labels, list): + errors.append("labels必须是数组类型") + elif len(labels) == 0: + warnings.append("labels数组为空,图表可能无法正常显示") + + # 验证datasets + if datasets is None: + errors.append("缺少datasets字段") + return + + if not isinstance(datasets, list): + errors.append("datasets必须是数组类型") + return + + if len(datasets) == 0: + errors.append("datasets数组为空") + return + + # 验证每个dataset + for idx, dataset in enumerate(datasets): + if not isinstance(dataset, dict): + errors.append(f"datasets[{idx}]必须是对象类型") + continue + + # 验证data字段 + ds_data = dataset.get('data') + if ds_data is None: + errors.append(f"datasets[{idx}]缺少data字段") + continue + + if not isinstance(ds_data, list): + errors.append(f"datasets[{idx}].data必须是数组类型") + continue + + if len(ds_data) == 0: + warnings.append(f"datasets[{idx}].data数组为空") + continue + + # 验证数据长度一致性 + if labels and isinstance(labels, list): + if len(ds_data) != len(labels): + warnings.append( + f"datasets[{idx}].data长度({len(ds_data)})与labels长度({len(labels)})不匹配" + ) + + # 验证数值类型 + if chart_type in self.NUMERIC_DATA_TYPES: + for data_idx, value in enumerate(ds_data): + if value is not None and not isinstance(value, (int, float)): + errors.append( + f"datasets[{idx}].data[{data_idx}]的值'{value}'不是有效的数值类型" + ) + break # 只报告第一个错误 + + def _validate_special_data( + self, + data: Dict[str, Any], + chart_type: str, + errors: List[str], + warnings: List[str] + ): + """验证特殊数据格式(scatter, bubble)""" + datasets = data.get('datasets') + + if not datasets: + errors.append("缺少datasets字段") + return + + if not isinstance(datasets, list): + errors.append("datasets必须是数组类型") + return + + if len(datasets) == 0: + errors.append("datasets数组为空") + return + + required_keys = self.SPECIAL_DATA_TYPES.get(chart_type, set()) + + # 验证每个dataset + for idx, dataset in enumerate(datasets): + if not isinstance(dataset, dict): + errors.append(f"datasets[{idx}]必须是对象类型") + continue + + ds_data = dataset.get('data') + if ds_data is None: + errors.append(f"datasets[{idx}]缺少data字段") + continue + + if not isinstance(ds_data, list): + errors.append(f"datasets[{idx}].data必须是数组类型") + continue + + if len(ds_data) == 0: + warnings.append(f"datasets[{idx}].data数组为空") + continue + + # 验证数据点格式 + for data_idx, point in enumerate(ds_data): + if not isinstance(point, dict): + errors.append( + f"datasets[{idx}].data[{data_idx}]必须是对象类型(包含{required_keys}字段)" + ) + break + + # 检查必需的键 + missing_keys = required_keys - set(point.keys()) + if missing_keys: + errors.append( + f"datasets[{idx}].data[{data_idx}]缺少必需字段: {missing_keys}" + ) + break + + # 验证数值类型 + for key in required_keys: + value = point.get(key) + if value is not None and not isinstance(value, (int, float)): + errors.append( + f"datasets[{idx}].data[{data_idx}].{key}的值'{value}'不是有效的数值类型" + ) + break + + def can_render(self, widget_block: Dict[str, Any]) -> bool: + """ + 判断图表是否能正常渲染(快速检查)。 + + Args: + widget_block: widget类型的block + + Returns: + bool: 是否能正常渲染 + """ + result = self.validate(widget_block) + return result.is_valid + + +class ChartRepairer: + """ + 图表修复器 - 尝试修复图表数据。 + + 修复策略: + 1. 本地规则修复:修复常见问题 + 2. API修复:使用LLM修复复杂问题 + 3. 验证修复结果:确保修复后能正常渲染 + """ + + def __init__( + self, + validator: ChartValidator, + llm_repair_fns: Optional[List[Callable]] = None + ): + """ + 初始化修复器。 + + Args: + validator: 图表验证器实例 + llm_repair_fns: LLM修复函数列表(对应4个Engine) + """ + self.validator = validator + self.llm_repair_fns = llm_repair_fns or [] + + def repair( + self, + widget_block: Dict[str, Any], + validation_result: Optional[ValidationResult] = None + ) -> RepairResult: + """ + 尝试修复图表数据。 + + Args: + widget_block: widget类型的block + validation_result: 验证结果(可选,如果没有会先进行验证) + + Returns: + RepairResult: 修复结果 + """ + # 1. 如果没有验证结果,先验证 + if validation_result is None: + validation_result = self.validator.validate(widget_block) + + # 2. 尝试本地修复(即使验证通过也尝试,因为可能有警告) + logger.info(f"尝试本地修复图表") + local_result = self.repair_locally(widget_block, validation_result) + + # 3. 验证修复结果 + if local_result.has_changes(): + repaired_validation = self.validator.validate(local_result.repaired_block) + if repaired_validation.is_valid: + logger.info(f"本地修复成功: {local_result.changes}") + return RepairResult(True, local_result.repaired_block, 'local', local_result.changes) + else: + logger.warning(f"本地修复后仍然无效: {repaired_validation.errors}") + + # 4. 如果本地修复失败且有严重错误,尝试API修复 + if validation_result.has_critical_errors() and len(self.llm_repair_fns) > 0: + logger.info("本地修复失败,尝试API修复") + api_result = self.repair_with_api(widget_block, validation_result) + + if api_result.success: + # 验证修复结果 + repaired_validation = self.validator.validate(api_result.repaired_block) + if repaired_validation.is_valid: + logger.info(f"API修复成功: {api_result.changes}") + return api_result + else: + logger.warning(f"API修复后仍然无效: {repaired_validation.errors}") + + # 5. 如果验证通过,返回原始或修复后的数据 + if validation_result.is_valid: + if local_result.has_changes(): + return RepairResult(True, local_result.repaired_block, 'local', local_result.changes) + else: + return RepairResult(True, widget_block, 'none', []) + + # 6. 所有修复都失败,返回原始数据 + logger.warning("所有修复尝试失败,保持原始数据") + return RepairResult(False, widget_block, 'none', []) + + def repair_locally( + self, + widget_block: Dict[str, Any], + validation_result: ValidationResult + ) -> RepairResult: + """ + 使用本地规则修复。 + + 修复规则: + 1. 补全缺失的基本字段 + 2. 修复数据类型错误 + 3. 修复数据长度不匹配 + 4. 清理无效数据 + 5. 添加默认值 + """ + repaired = copy.deepcopy(widget_block) + changes = [] + + # 1. 确保基本结构存在 + if 'props' not in repaired or not isinstance(repaired.get('props'), dict): + repaired['props'] = {} + changes.append("添加缺失的props字段") + + if 'data' not in repaired or not isinstance(repaired.get('data'), dict): + repaired['data'] = {} + changes.append("添加缺失的data字段") + + # 2. 确保图表类型存在 + chart_type = self.validator._extract_chart_type(repaired) + props = repaired['props'] + + if not chart_type: + # 尝试从widgetType推断 + widget_type = repaired.get('widgetType', '') + if '/' in widget_type: + chart_type = widget_type.split('/')[-1].lower() + props['type'] = chart_type + changes.append(f"从widgetType推断图表类型: {chart_type}") + else: + # 默认使用bar类型 + chart_type = 'bar' + props['type'] = chart_type + changes.append("设置默认图表类型: bar") + elif 'type' not in props or not props['type']: + # chart_type存在但props中没有type字段,需要添加 + props['type'] = chart_type + changes.append(f"将推断的图表类型添加到props: {chart_type}") + + # 3. 修复数据结构 + data = repaired['data'] + + # 确保datasets存在 + if 'datasets' not in data or not isinstance(data.get('datasets'), list): + data['datasets'] = [] + changes.append("添加缺失的datasets字段") + + # 如果datasets为空但data中有其他数据,尝试构造datasets + if len(data['datasets']) == 0: + constructed = self._try_construct_datasets(data, chart_type) + if constructed: + data['datasets'] = constructed + changes.append("从data中构造datasets") + elif 'labels' in data and isinstance(data.get('labels'), list) and len(data['labels']) > 0: + # 如果有labels但没有数据,创建一个空dataset + data['datasets'] = [{ + 'label': '数据', + 'data': [0] * len(data['labels']) + }] + changes.append("根据labels创建默认dataset(使用零值)") + + # 确保labels存在(如果需要) + if chart_type in ChartValidator.LABEL_REQUIRED_TYPES: + if 'labels' not in data or not isinstance(data.get('labels'), list): + # 尝试根据datasets长度生成labels + if data['datasets'] and len(data['datasets']) > 0: + first_ds = data['datasets'][0] + if isinstance(first_ds, dict) and isinstance(first_ds.get('data'), list): + data_len = len(first_ds['data']) + data['labels'] = [f"项目 {i+1}" for i in range(data_len)] + changes.append(f"生成{data_len}个默认labels") + + # 4. 修复datasets中的数据 + for idx, dataset in enumerate(data.get('datasets', [])): + if not isinstance(dataset, dict): + continue + + # 确保有data字段 + if 'data' not in dataset or not isinstance(dataset.get('data'), list): + dataset['data'] = [] + changes.append(f"为datasets[{idx}]添加空data数组") + + # 确保有label + if 'label' not in dataset: + dataset['label'] = f"系列 {idx + 1}" + changes.append(f"为datasets[{idx}]添加默认label") + + # 修复数据长度不匹配 + labels = data.get('labels', []) + ds_data = dataset.get('data', []) + if isinstance(labels, list) and isinstance(ds_data, list): + if len(ds_data) < len(labels): + # 数据不够,补null + dataset['data'] = ds_data + [None] * (len(labels) - len(ds_data)) + changes.append(f"datasets[{idx}]数据长度不足,补充null") + elif len(ds_data) > len(labels): + # 数据过多,截断 + dataset['data'] = ds_data[:len(labels)] + changes.append(f"datasets[{idx}]数据长度过长,截断") + + # 转换非数值数据为数值(如果可能) + if chart_type in ChartValidator.NUMERIC_DATA_TYPES: + ds_data = dataset.get('data', []) + converted = False + for i, value in enumerate(ds_data): + if value is None: + continue + if not isinstance(value, (int, float)): + # 尝试转换 + try: + if isinstance(value, str): + # 尝试转换字符串 + ds_data[i] = float(value) + converted = True + except (ValueError, TypeError): + # 转换失败,设为null + ds_data[i] = None + converted = True + if converted: + changes.append(f"datasets[{idx}]包含非数值数据,已尝试转换") + + # 5. 验证修复结果 + success = len(changes) > 0 + + return RepairResult(success, repaired, 'local', changes) + + def _try_construct_datasets( + self, + data: Dict[str, Any], + chart_type: str + ) -> Optional[List[Dict[str, Any]]]: + """尝试从data中构造datasets""" + # 如果data直接包含数据数组,尝试构造 + if 'values' in data and isinstance(data['values'], list): + return [{ + 'label': '数据', + 'data': data['values'] + }] + + # 如果data包含series字段 + if 'series' in data and isinstance(data['series'], list): + datasets = [] + for idx, series in enumerate(data['series']): + if isinstance(series, dict): + datasets.append({ + 'label': series.get('name', f'系列 {idx + 1}'), + 'data': series.get('data', []) + }) + elif isinstance(series, list): + datasets.append({ + 'label': f'系列 {idx + 1}', + 'data': series + }) + if datasets: + return datasets + + return None + + def repair_with_api( + self, + widget_block: Dict[str, Any], + validation_result: ValidationResult + ) -> RepairResult: + """ + 使用API修复(调用4个Engine的LLM)。 + + 策略:按顺序尝试不同的Engine,直到修复成功 + """ + if not self.llm_repair_fns: + return RepairResult(False, None, 'api', []) + + for idx, repair_fn in enumerate(self.llm_repair_fns): + try: + logger.info(f"尝试使用Engine {idx + 1}修复图表") + repaired = repair_fn(widget_block, validation_result.errors) + + if repaired and isinstance(repaired, dict): + # 验证修复结果 + repaired_validation = self.validator.validate(repaired) + if repaired_validation.is_valid: + return RepairResult( + True, + repaired, + 'api', + [f"使用Engine {idx + 1}修复成功"] + ) + except Exception as e: + logger.error(f"Engine {idx + 1}修复失败: {e}") + continue + + return RepairResult(False, None, 'api', []) + + +def create_chart_validator() -> ChartValidator: + """创建图表验证器实例""" + return ChartValidator() + + +def create_chart_repairer( + validator: Optional[ChartValidator] = None, + llm_repair_fns: Optional[List[Callable]] = None +) -> ChartRepairer: + """创建图表修复器实例""" + if validator is None: + validator = create_chart_validator() + return ChartRepairer(validator, llm_repair_fns) diff --git a/ReportEngine/utils/test_chart_validator.py b/ReportEngine/utils/test_chart_validator.py new file mode 100644 index 0000000..85e9844 --- /dev/null +++ b/ReportEngine/utils/test_chart_validator.py @@ -0,0 +1,456 @@ +""" +图表验证器和修复器的测试用例。 + +运行测试: + python -m pytest ReportEngine/utils/test_chart_validator.py -v +""" + +import pytest +from ReportEngine.utils.chart_validator import ( + ChartValidator, + ChartRepairer, + ValidationResult, + RepairResult, + create_chart_validator, + create_chart_repairer +) + + +class TestChartValidator: + """测试ChartValidator类""" + + def setup_method(self): + """每个测试前初始化""" + self.validator = create_chart_validator() + + def test_valid_bar_chart(self): + """测试有效的柱状图""" + widget_block = { + "type": "widget", + "widgetType": "chart.js/bar", + "widgetId": "chart-001", + "props": { + "type": "bar", + "title": "销售数据" + }, + "data": { + "labels": ["一月", "二月", "三月"], + "datasets": [ + { + "label": "销售额", + "data": [100, 200, 150] + } + ] + } + } + + result = self.validator.validate(widget_block) + assert result.is_valid + assert len(result.errors) == 0 + + def test_valid_line_chart(self): + """测试有效的折线图""" + widget_block = { + "type": "widget", + "widgetType": "chart.js/line", + "widgetId": "chart-002", + "props": { + "type": "line" + }, + "data": { + "labels": ["周一", "周二", "周三"], + "datasets": [ + { + "label": "访问量", + "data": [50, 75, 60] + } + ] + } + } + + result = self.validator.validate(widget_block) + assert result.is_valid + + def test_valid_pie_chart(self): + """测试有效的饼图""" + widget_block = { + "widgetType": "chart.js/pie", + "props": {"type": "pie"}, + "data": { + "labels": ["A", "B", "C"], + "datasets": [ + { + "data": [30, 40, 30] + } + ] + } + } + + result = self.validator.validate(widget_block) + assert result.is_valid + + def test_missing_widgetType(self): + """测试缺少widgetType""" + widget_block = { + "props": {}, + "data": {} + } + + result = self.validator.validate(widget_block) + assert not result.is_valid + assert "widgetType" in result.errors[0] + + def test_missing_data_field(self): + """测试缺少data字段""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"} + } + + result = self.validator.validate(widget_block) + assert not result.is_valid + assert "data" in result.errors[0] + + def test_missing_datasets(self): + """测试缺少datasets""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "labels": ["A", "B"] + } + } + + result = self.validator.validate(widget_block) + assert not result.is_valid + assert "datasets" in result.errors[0] + + def test_empty_datasets(self): + """测试空datasets""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "labels": ["A", "B"], + "datasets": [] + } + } + + result = self.validator.validate(widget_block) + assert not result.is_valid + assert "空" in result.errors[0] + + def test_missing_labels_for_bar_chart(self): + """测试柱状图缺少labels""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "datasets": [ + { + "label": "系列1", + "data": [10, 20, 30] + } + ] + } + } + + result = self.validator.validate(widget_block) + assert not result.is_valid + assert "labels" in result.errors[0] + + def test_invalid_data_type(self): + """测试数据类型错误""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "labels": ["A", "B"], + "datasets": [ + { + "label": "系列1", + "data": ["abc", "def"] # 应该是数值 + } + ] + } + } + + result = self.validator.validate(widget_block) + assert not result.is_valid + assert "数值类型" in result.errors[0] + + def test_data_length_mismatch_warning(self): + """测试数据长度不匹配(警告)""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "labels": ["A", "B", "C"], + "datasets": [ + { + "label": "系列1", + "data": [10, 20] # 长度不匹配 + } + ] + } + } + + result = self.validator.validate(widget_block) + # 长度不匹配是警告,不是错误 + assert len(result.warnings) > 0 + assert "不匹配" in result.warnings[0] + + def test_scatter_chart(self): + """测试散点图(特殊数据格式)""" + widget_block = { + "widgetType": "chart.js/scatter", + "props": {"type": "scatter"}, + "data": { + "datasets": [ + { + "label": "数据点", + "data": [ + {"x": 10, "y": 20}, + {"x": 15, "y": 25} + ] + } + ] + } + } + + result = self.validator.validate(widget_block) + assert result.is_valid + + def test_non_chart_widget(self): + """测试非图表类型的widget(应该跳过验证)""" + widget_block = { + "widgetType": "custom/widget", + "props": {}, + "data": {} + } + + result = self.validator.validate(widget_block) + # 非chart.js类型,跳过验证,返回valid + assert result.is_valid + + +class TestChartRepairer: + """测试ChartRepairer类""" + + def setup_method(self): + """每个测试前初始化""" + self.validator = create_chart_validator() + self.repairer = create_chart_repairer(validator=self.validator) + + def test_repair_missing_props(self): + """测试修复缺少props字段""" + widget_block = { + "widgetType": "chart.js/bar", + "data": { + "labels": ["A", "B"], + "datasets": [ + { + "label": "系列1", + "data": [10, 20] + } + ] + } + } + + result = self.repairer.repair(widget_block) + assert result.success + assert "props" in result.repaired_block + assert result.method == "local" + + def test_repair_missing_chart_type(self): + """测试修复缺少图表类型""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {}, + "data": { + "labels": ["A", "B"], + "datasets": [ + { + "label": "系列1", + "data": [10, 20] + } + ] + } + } + + result = self.repairer.repair(widget_block) + assert result.success + assert result.repaired_block["props"]["type"] == "bar" + assert "图表类型" in str(result.changes) + + def test_repair_missing_datasets(self): + """测试修复缺少datasets""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "labels": ["A", "B"] + } + } + + result = self.repairer.repair(widget_block) + assert result.success + assert "datasets" in result.repaired_block["data"] + assert isinstance(result.repaired_block["data"]["datasets"], list) + + def test_repair_missing_labels(self): + """测试修复缺少labels""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "datasets": [ + { + "label": "系列1", + "data": [10, 20, 30] + } + ] + } + } + + result = self.repairer.repair(widget_block) + assert result.success + assert "labels" in result.repaired_block["data"] + assert len(result.repaired_block["data"]["labels"]) == 3 + + def test_repair_data_length_mismatch(self): + """测试修复数据长度不匹配""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "labels": ["A", "B", "C", "D"], + "datasets": [ + { + "label": "系列1", + "data": [10, 20] # 长度不足 + } + ] + } + } + + result = self.repairer.repair(widget_block) + assert result.success + # 应该补充到4个元素 + assert len(result.repaired_block["data"]["datasets"][0]["data"]) == 4 + + def test_repair_string_to_number(self): + """测试修复字符串类型的数值""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "labels": ["A", "B"], + "datasets": [ + { + "label": "系列1", + "data": ["10", "20"] # 字符串数值 + } + ] + } + } + + result = self.repairer.repair(widget_block) + assert result.success + # 应该转换为数值 + assert isinstance(result.repaired_block["data"]["datasets"][0]["data"][0], float) + + def test_repair_construct_datasets_from_values(self): + """测试从values字段构造datasets""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "labels": ["A", "B"], + "values": [10, 20] # 使用values而不是datasets + } + } + + result = self.repairer.repair(widget_block) + assert result.success + assert "datasets" in result.repaired_block["data"] + assert len(result.repaired_block["data"]["datasets"]) > 0 + + def test_no_repair_needed(self): + """测试不需要修复的情况""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "labels": ["A", "B"], + "datasets": [ + { + "label": "系列1", + "data": [10, 20] + } + ] + } + } + + result = self.repairer.repair(widget_block) + assert result.success + assert result.method == "none" + assert len(result.changes) == 0 + + def test_repair_adds_default_label(self): + """测试修复添加默认label""" + widget_block = { + "widgetType": "chart.js/bar", + "props": {"type": "bar"}, + "data": { + "labels": ["A", "B"], + "datasets": [ + { + # 缺少label + "data": [10, 20] + } + ] + } + } + + result = self.repairer.repair(widget_block) + assert result.success + assert "label" in result.repaired_block["data"]["datasets"][0] + + +class TestValidatorIntegration: + """集成测试""" + + def test_full_validation_and_repair_workflow(self): + """测试完整的验证和修复流程""" + validator = create_chart_validator() + repairer = create_chart_repairer(validator=validator) + + # 一个有多个问题的图表 + widget_block = { + "widgetType": "chart.js/bar", + "data": { + "datasets": [ + { + "data": ["10", "20", "30"] # 字符串数值 + } + ] + } + } + + # 1. 验证(应该失败) + validation = validator.validate(widget_block) + assert not validation.is_valid + + # 2. 修复 + repair_result = repairer.repair(widget_block, validation) + assert repair_result.success + + # 3. 再次验证(应该通过) + final_validation = validator.validate(repair_result.repaired_block) + assert final_validation.is_valid + + +if __name__ == "__main__": + # 运行测试 + pytest.main([__file__, "-v", "--tb=short"])