Repair and Optimize the Chart Rendering

2025-11-17 16:34:44 +08:00
parent 50b6ab403e
commit c20cc24c78
4 changed files with 1579 additions and 1 deletions
@@ -0,0 +1,631 @@
+"""
+图表验证和修复工具。
+
+提供对Chart.js图表数据的验证和修复能力：
+1. 验证图表数据格式是否符合Chart.js要求
+2. 本地规则修复常见问题
+3. LLM API辅助修复复杂问题
+4. 遵循"宁愿不改，也不要改错"的原则
+
+支持的图表类型：
+- line (折线图)
+- bar (柱状图)
+- pie (饼图)
+- doughnut (圆环图)
+- radar (雷达图)
+- polarArea (极地区域图)
+- scatter (散点图)
+"""
+
+from __future__ import annotations
+
+import copy
+import json
+from typing import Any, Dict, List, Optional, Tuple, Callable
+from dataclasses import dataclass
+from loguru import logger
+
+
+@dataclass
+class ValidationResult:
+    """验证结果"""
+    is_valid: bool
+    errors: List[str]
+    warnings: List[str]
+
+    def has_critical_errors(self) -> bool:
+        """是否有严重错误（会导致渲染失败）"""
+        return not self.is_valid and len(self.errors) > 0
+
+
+@dataclass
+class RepairResult:
+    """修复结果"""
+    success: bool
+    repaired_block: Optional[Dict[str, Any]]
+    method: str  # 'none', 'local', 'api'
+    changes: List[str]
+
+    def has_changes(self) -> bool:
+        """是否有修改"""
+        return len(self.changes) > 0
+
+
+class ChartValidator:
+    """
+    图表验证器 - 验证Chart.js图表数据格式是否正确。
+
+    验证规则：
+    1. 基本结构验证：widgetType, props, data字段
+    2. 图表类型验证：支持的图表类型
+    3. 数据格式验证：labels和datasets结构
+    4. 数据一致性验证：labels和datasets长度匹配
+    5. 数值类型验证：数据值类型正确
+    """
+
+    # 支持的图表类型
+    SUPPORTED_CHART_TYPES = {
+        'line', 'bar', 'pie', 'doughnut', 'radar', 'polarArea', 'scatter',
+        'bubble', 'horizontalBar'
+    }
+
+    # 需要labels的图表类型
+    LABEL_REQUIRED_TYPES = {
+        'line', 'bar', 'radar', 'polarArea', 'pie', 'doughnut'
+    }
+
+    # 需要数值数据的图表类型
+    NUMERIC_DATA_TYPES = {
+        'line', 'bar', 'radar', 'polarArea', 'pie', 'doughnut'
+    }
+
+    # 需要特殊数据格式的图表类型
+    SPECIAL_DATA_TYPES = {
+        'scatter': {'x', 'y'},
+        'bubble': {'x', 'y', 'r'}
+    }
+
+    def __init__(self):
+        pass
+
+    def validate(self, widget_block: Dict[str, Any]) -> ValidationResult:
+        """
+        验证图表格式。
+
+        Args:
+            widget_block: widget类型的block，包含widgetId/widgetType/props/data
+
+        Returns:
+            ValidationResult: 验证结果
+        """
+        errors = []
+        warnings = []
+
+        # 1. 基本结构验证
+        if not isinstance(widget_block, dict):
+            errors.append("widget_block必须是字典类型")
+            return ValidationResult(False, errors, warnings)
+
+        # 2. 检查widgetType
+        widget_type = widget_block.get('widgetType', '')
+        if not widget_type or not isinstance(widget_type, str):
+            errors.append("缺少widgetType字段或类型不正确")
+            return ValidationResult(False, errors, warnings)
+
+        # 检查是否是chart.js类型
+        if not widget_type.startswith('chart.js'):
+            # 不是图表类型，跳过验证
+            return ValidationResult(True, errors, warnings)
+
+        # 3. 提取图表类型
+        chart_type = self._extract_chart_type(widget_block)
+        if not chart_type:
+            errors.append("无法确定图表类型")
+            return ValidationResult(False, errors, warnings)
+
+        # 4. 检查是否支持该图表类型
+        if chart_type not in self.SUPPORTED_CHART_TYPES:
+            warnings.append(f"图表类型 '{chart_type}' 可能不被支持，将尝试降级渲染")
+
+        # 5. 验证数据结构
+        data = widget_block.get('data')
+        if not isinstance(data, dict):
+            errors.append("data字段必须是字典类型")
+            return ValidationResult(False, errors, warnings)
+
+        # 6. 根据图表类型验证数据
+        if chart_type in self.SPECIAL_DATA_TYPES:
+            # 特殊数据格式（scatter, bubble）
+            self._validate_special_data(data, chart_type, errors, warnings)
+        else:
+            # 标准数据格式（labels + datasets）
+            self._validate_standard_data(data, chart_type, errors, warnings)
+
+        # 7. 验证props
+        props = widget_block.get('props')
+        if props is not None and not isinstance(props, dict):
+            warnings.append("props字段应该是字典类型")
+
+        is_valid = len(errors) == 0
+        return ValidationResult(is_valid, errors, warnings)
+
+    def _extract_chart_type(self, widget_block: Dict[str, Any]) -> Optional[str]:
+        """
+        提取图表类型。
+
+        优先级：
+        1. props.type
+        2. widgetType中的类型（chart.js/bar -> bar）
+        3. data.type
+        """
+        # 1. 从props中获取
+        props = widget_block.get('props') or {}
+        if isinstance(props, dict):
+            chart_type = props.get('type')
+            if chart_type and isinstance(chart_type, str):
+                return chart_type.lower()
+
+        # 2. 从widgetType中提取
+        widget_type = widget_block.get('widgetType', '')
+        if '/' in widget_type:
+            chart_type = widget_type.split('/')[-1]
+            if chart_type:
+                return chart_type.lower()
+
+        # 3. 从data中获取
+        data = widget_block.get('data') or {}
+        if isinstance(data, dict):
+            chart_type = data.get('type')
+            if chart_type and isinstance(chart_type, str):
+                return chart_type.lower()
+
+        return None
+
+    def _validate_standard_data(
+        self,
+        data: Dict[str, Any],
+        chart_type: str,
+        errors: List[str],
+        warnings: List[str]
+    ):
+        """验证标准数据格式（labels + datasets）"""
+        labels = data.get('labels')
+        datasets = data.get('datasets')
+
+        # 验证labels
+        if chart_type in self.LABEL_REQUIRED_TYPES:
+            if not labels:
+                errors.append(f"{chart_type}类型图表必须包含labels字段")
+            elif not isinstance(labels, list):
+                errors.append("labels必须是数组类型")
+            elif len(labels) == 0:
+                warnings.append("labels数组为空，图表可能无法正常显示")
+
+        # 验证datasets
+        if datasets is None:
+            errors.append("缺少datasets字段")
+            return
+
+        if not isinstance(datasets, list):
+            errors.append("datasets必须是数组类型")
+            return
+
+        if len(datasets) == 0:
+            errors.append("datasets数组为空")
+            return
+
+        # 验证每个dataset
+        for idx, dataset in enumerate(datasets):
+            if not isinstance(dataset, dict):
+                errors.append(f"datasets[{idx}]必须是对象类型")
+                continue
+
+            # 验证data字段
+            ds_data = dataset.get('data')
+            if ds_data is None:
+                errors.append(f"datasets[{idx}]缺少data字段")
+                continue
+
+            if not isinstance(ds_data, list):
+                errors.append(f"datasets[{idx}].data必须是数组类型")
+                continue
+
+            if len(ds_data) == 0:
+                warnings.append(f"datasets[{idx}].data数组为空")
+                continue
+
+            # 验证数据长度一致性
+            if labels and isinstance(labels, list):
+                if len(ds_data) != len(labels):
+                    warnings.append(
+                        f"datasets[{idx}].data长度({len(ds_data)})与labels长度({len(labels)})不匹配"
+                    )
+
+            # 验证数值类型
+            if chart_type in self.NUMERIC_DATA_TYPES:
+                for data_idx, value in enumerate(ds_data):
+                    if value is not None and not isinstance(value, (int, float)):
+                        errors.append(
+                            f"datasets[{idx}].data[{data_idx}]的值'{value}'不是有效的数值类型"
+                        )
+                        break  # 只报告第一个错误
+
+    def _validate_special_data(
+        self,
+        data: Dict[str, Any],
+        chart_type: str,
+        errors: List[str],
+        warnings: List[str]
+    ):
+        """验证特殊数据格式（scatter, bubble）"""
+        datasets = data.get('datasets')
+
+        if not datasets:
+            errors.append("缺少datasets字段")
+            return
+
+        if not isinstance(datasets, list):
+            errors.append("datasets必须是数组类型")
+            return
+
+        if len(datasets) == 0:
+            errors.append("datasets数组为空")
+            return
+
+        required_keys = self.SPECIAL_DATA_TYPES.get(chart_type, set())
+
+        # 验证每个dataset
+        for idx, dataset in enumerate(datasets):
+            if not isinstance(dataset, dict):
+                errors.append(f"datasets[{idx}]必须是对象类型")
+                continue
+
+            ds_data = dataset.get('data')
+            if ds_data is None:
+                errors.append(f"datasets[{idx}]缺少data字段")
+                continue
+
+            if not isinstance(ds_data, list):
+                errors.append(f"datasets[{idx}].data必须是数组类型")
+                continue
+
+            if len(ds_data) == 0:
+                warnings.append(f"datasets[{idx}].data数组为空")
+                continue
+
+            # 验证数据点格式
+            for data_idx, point in enumerate(ds_data):
+                if not isinstance(point, dict):
+                    errors.append(
+                        f"datasets[{idx}].data[{data_idx}]必须是对象类型（包含{required_keys}字段）"
+                    )
+                    break
+
+                # 检查必需的键
+                missing_keys = required_keys - set(point.keys())
+                if missing_keys:
+                    errors.append(
+                        f"datasets[{idx}].data[{data_idx}]缺少必需字段: {missing_keys}"
+                    )
+                    break
+
+                # 验证数值类型
+                for key in required_keys:
+                    value = point.get(key)
+                    if value is not None and not isinstance(value, (int, float)):
+                        errors.append(
+                            f"datasets[{idx}].data[{data_idx}].{key}的值'{value}'不是有效的数值类型"
+                        )
+                        break
+
+    def can_render(self, widget_block: Dict[str, Any]) -> bool:
+        """
+        判断图表是否能正常渲染（快速检查）。
+
+        Args:
+            widget_block: widget类型的block
+
+        Returns:
+            bool: 是否能正常渲染
+        """
+        result = self.validate(widget_block)
+        return result.is_valid
+
+
+class ChartRepairer:
+    """
+    图表修复器 - 尝试修复图表数据。
+
+    修复策略：
+    1. 本地规则修复：修复常见问题
+    2. API修复：使用LLM修复复杂问题
+    3. 验证修复结果：确保修复后能正常渲染
+    """
+
+    def __init__(
+        self,
+        validator: ChartValidator,
+        llm_repair_fns: Optional[List[Callable]] = None
+    ):
+        """
+        初始化修复器。
+
+        Args:
+            validator: 图表验证器实例
+            llm_repair_fns: LLM修复函数列表（对应4个Engine）
+        """
+        self.validator = validator
+        self.llm_repair_fns = llm_repair_fns or []
+
+    def repair(
+        self,
+        widget_block: Dict[str, Any],
+        validation_result: Optional[ValidationResult] = None
+    ) -> RepairResult:
+        """
+        尝试修复图表数据。
+
+        Args:
+            widget_block: widget类型的block
+            validation_result: 验证结果（可选，如果没有会先进行验证）
+
+        Returns:
+            RepairResult: 修复结果
+        """
+        # 1. 如果没有验证结果，先验证
+        if validation_result is None:
+            validation_result = self.validator.validate(widget_block)
+
+        # 2. 尝试本地修复（即使验证通过也尝试，因为可能有警告）
+        logger.info(f"尝试本地修复图表")
+        local_result = self.repair_locally(widget_block, validation_result)
+
+        # 3. 验证修复结果
+        if local_result.has_changes():
+            repaired_validation = self.validator.validate(local_result.repaired_block)
+            if repaired_validation.is_valid:
+                logger.info(f"本地修复成功: {local_result.changes}")
+                return RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
+            else:
+                logger.warning(f"本地修复后仍然无效: {repaired_validation.errors}")
+
+        # 4. 如果本地修复失败且有严重错误，尝试API修复
+        if validation_result.has_critical_errors() and len(self.llm_repair_fns) > 0:
+            logger.info("本地修复失败，尝试API修复")
+            api_result = self.repair_with_api(widget_block, validation_result)
+
+            if api_result.success:
+                # 验证修复结果
+                repaired_validation = self.validator.validate(api_result.repaired_block)
+                if repaired_validation.is_valid:
+                    logger.info(f"API修复成功: {api_result.changes}")
+                    return api_result
+                else:
+                    logger.warning(f"API修复后仍然无效: {repaired_validation.errors}")
+
+        # 5. 如果验证通过，返回原始或修复后的数据
+        if validation_result.is_valid:
+            if local_result.has_changes():
+                return RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
+            else:
+                return RepairResult(True, widget_block, 'none', [])
+
+        # 6. 所有修复都失败，返回原始数据
+        logger.warning("所有修复尝试失败，保持原始数据")
+        return RepairResult(False, widget_block, 'none', [])
+
+    def repair_locally(
+        self,
+        widget_block: Dict[str, Any],
+        validation_result: ValidationResult
+    ) -> RepairResult:
+        """
+        使用本地规则修复。
+
+        修复规则：
+        1. 补全缺失的基本字段
+        2. 修复数据类型错误
+        3. 修复数据长度不匹配
+        4. 清理无效数据
+        5. 添加默认值
+        """
+        repaired = copy.deepcopy(widget_block)
+        changes = []
+
+        # 1. 确保基本结构存在
+        if 'props' not in repaired or not isinstance(repaired.get('props'), dict):
+            repaired['props'] = {}
+            changes.append("添加缺失的props字段")
+
+        if 'data' not in repaired or not isinstance(repaired.get('data'), dict):
+            repaired['data'] = {}
+            changes.append("添加缺失的data字段")
+
+        # 2. 确保图表类型存在
+        chart_type = self.validator._extract_chart_type(repaired)
+        props = repaired['props']
+
+        if not chart_type:
+            # 尝试从widgetType推断
+            widget_type = repaired.get('widgetType', '')
+            if '/' in widget_type:
+                chart_type = widget_type.split('/')[-1].lower()
+                props['type'] = chart_type
+                changes.append(f"从widgetType推断图表类型: {chart_type}")
+            else:
+                # 默认使用bar类型
+                chart_type = 'bar'
+                props['type'] = chart_type
+                changes.append("设置默认图表类型: bar")
+        elif 'type' not in props or not props['type']:
+            # chart_type存在但props中没有type字段，需要添加
+            props['type'] = chart_type
+            changes.append(f"将推断的图表类型添加到props: {chart_type}")
+
+        # 3. 修复数据结构
+        data = repaired['data']
+
+        # 确保datasets存在
+        if 'datasets' not in data or not isinstance(data.get('datasets'), list):
+            data['datasets'] = []
+            changes.append("添加缺失的datasets字段")
+
+        # 如果datasets为空但data中有其他数据，尝试构造datasets
+        if len(data['datasets']) == 0:
+            constructed = self._try_construct_datasets(data, chart_type)
+            if constructed:
+                data['datasets'] = constructed
+                changes.append("从data中构造datasets")
+            elif 'labels' in data and isinstance(data.get('labels'), list) and len(data['labels']) > 0:
+                # 如果有labels但没有数据，创建一个空dataset
+                data['datasets'] = [{
+                    'label': '数据',
+                    'data': [0] * len(data['labels'])
+                }]
+                changes.append("根据labels创建默认dataset（使用零值）")
+
+        # 确保labels存在（如果需要）
+        if chart_type in ChartValidator.LABEL_REQUIRED_TYPES:
+            if 'labels' not in data or not isinstance(data.get('labels'), list):
+                # 尝试根据datasets长度生成labels
+                if data['datasets'] and len(data['datasets']) > 0:
+                    first_ds = data['datasets'][0]
+                    if isinstance(first_ds, dict) and isinstance(first_ds.get('data'), list):
+                        data_len = len(first_ds['data'])
+                        data['labels'] = [f"项目 {i+1}" for i in range(data_len)]
+                        changes.append(f"生成{data_len}个默认labels")
+
+        # 4. 修复datasets中的数据
+        for idx, dataset in enumerate(data.get('datasets', [])):
+            if not isinstance(dataset, dict):
+                continue
+
+            # 确保有data字段
+            if 'data' not in dataset or not isinstance(dataset.get('data'), list):
+                dataset['data'] = []
+                changes.append(f"为datasets[{idx}]添加空data数组")
+
+            # 确保有label
+            if 'label' not in dataset:
+                dataset['label'] = f"系列 {idx + 1}"
+                changes.append(f"为datasets[{idx}]添加默认label")
+
+            # 修复数据长度不匹配
+            labels = data.get('labels', [])
+            ds_data = dataset.get('data', [])
+            if isinstance(labels, list) and isinstance(ds_data, list):
+                if len(ds_data) < len(labels):
+                    # 数据不够，补null
+                    dataset['data'] = ds_data + [None] * (len(labels) - len(ds_data))
+                    changes.append(f"datasets[{idx}]数据长度不足，补充null")
+                elif len(ds_data) > len(labels):
+                    # 数据过多，截断
+                    dataset['data'] = ds_data[:len(labels)]
+                    changes.append(f"datasets[{idx}]数据长度过长，截断")
+
+            # 转换非数值数据为数值（如果可能）
+            if chart_type in ChartValidator.NUMERIC_DATA_TYPES:
+                ds_data = dataset.get('data', [])
+                converted = False
+                for i, value in enumerate(ds_data):
+                    if value is None:
+                        continue
+                    if not isinstance(value, (int, float)):
+                        # 尝试转换
+                        try:
+                            if isinstance(value, str):
+                                # 尝试转换字符串
+                                ds_data[i] = float(value)
+                                converted = True
+                        except (ValueError, TypeError):
+                            # 转换失败，设为null
+                            ds_data[i] = None
+                            converted = True
+                if converted:
+                    changes.append(f"datasets[{idx}]包含非数值数据，已尝试转换")
+
+        # 5. 验证修复结果
+        success = len(changes) > 0
+
+        return RepairResult(success, repaired, 'local', changes)
+
+    def _try_construct_datasets(
+        self,
+        data: Dict[str, Any],
+        chart_type: str
+    ) -> Optional[List[Dict[str, Any]]]:
+        """尝试从data中构造datasets"""
+        # 如果data直接包含数据数组，尝试构造
+        if 'values' in data and isinstance(data['values'], list):
+            return [{
+                'label': '数据',
+                'data': data['values']
+            }]
+
+        # 如果data包含series字段
+        if 'series' in data and isinstance(data['series'], list):
+            datasets = []
+            for idx, series in enumerate(data['series']):
+                if isinstance(series, dict):
+                    datasets.append({
+                        'label': series.get('name', f'系列 {idx + 1}'),
+                        'data': series.get('data', [])
+                    })
+                elif isinstance(series, list):
+                    datasets.append({
+                        'label': f'系列 {idx + 1}',
+                        'data': series
+                    })
+            if datasets:
+                return datasets
+
+        return None
+
+    def repair_with_api(
+        self,
+        widget_block: Dict[str, Any],
+        validation_result: ValidationResult
+    ) -> RepairResult:
+        """
+        使用API修复（调用4个Engine的LLM）。
+
+        策略：按顺序尝试不同的Engine，直到修复成功
+        """
+        if not self.llm_repair_fns:
+            return RepairResult(False, None, 'api', [])
+
+        for idx, repair_fn in enumerate(self.llm_repair_fns):
+            try:
+                logger.info(f"尝试使用Engine {idx + 1}修复图表")
+                repaired = repair_fn(widget_block, validation_result.errors)
+
+                if repaired and isinstance(repaired, dict):
+                    # 验证修复结果
+                    repaired_validation = self.validator.validate(repaired)
+                    if repaired_validation.is_valid:
+                        return RepairResult(
+                            True,
+                            repaired,
+                            'api',
+                            [f"使用Engine {idx + 1}修复成功"]
+                        )
+            except Exception as e:
+                logger.error(f"Engine {idx + 1}修复失败: {e}")
+                continue
+
+        return RepairResult(False, None, 'api', [])
+
+
+def create_chart_validator() -> ChartValidator:
+    """创建图表验证器实例"""
+    return ChartValidator()
+
+
+def create_chart_repairer(
+    validator: Optional[ChartValidator] = None,
+    llm_repair_fns: Optional[List[Callable]] = None
+) -> ChartRepairer:
+    """创建图表修复器实例"""
+    if validator is None:
+        validator = create_chart_validator()
+    return ChartRepairer(validator, llm_repair_fns)