Repair and Optimize the Chart Rendering

This commit is contained in:
马一丁
2025-11-17 16:34:44 +08:00
parent 50b6ab403e
commit c20cc24c78
4 changed files with 1579 additions and 1 deletions
+631
View File
@@ -0,0 +1,631 @@
"""
图表验证和修复工具。
提供对Chart.js图表数据的验证和修复能力:
1. 验证图表数据格式是否符合Chart.js要求
2. 本地规则修复常见问题
3. LLM API辅助修复复杂问题
4. 遵循"宁愿不改,也不要改错"的原则
支持的图表类型:
- line (折线图)
- bar (柱状图)
- pie (饼图)
- doughnut (圆环图)
- radar (雷达图)
- polarArea (极地区域图)
- scatter (散点图)
"""
from __future__ import annotations
import copy
import json
from typing import Any, Dict, List, Optional, Tuple, Callable
from dataclasses import dataclass
from loguru import logger
@dataclass
class ValidationResult:
"""验证结果"""
is_valid: bool
errors: List[str]
warnings: List[str]
def has_critical_errors(self) -> bool:
"""是否有严重错误(会导致渲染失败)"""
return not self.is_valid and len(self.errors) > 0
@dataclass
class RepairResult:
"""修复结果"""
success: bool
repaired_block: Optional[Dict[str, Any]]
method: str # 'none', 'local', 'api'
changes: List[str]
def has_changes(self) -> bool:
"""是否有修改"""
return len(self.changes) > 0
class ChartValidator:
"""
图表验证器 - 验证Chart.js图表数据格式是否正确。
验证规则:
1. 基本结构验证:widgetType, props, data字段
2. 图表类型验证:支持的图表类型
3. 数据格式验证:labels和datasets结构
4. 数据一致性验证:labels和datasets长度匹配
5. 数值类型验证:数据值类型正确
"""
# 支持的图表类型
SUPPORTED_CHART_TYPES = {
'line', 'bar', 'pie', 'doughnut', 'radar', 'polarArea', 'scatter',
'bubble', 'horizontalBar'
}
# 需要labels的图表类型
LABEL_REQUIRED_TYPES = {
'line', 'bar', 'radar', 'polarArea', 'pie', 'doughnut'
}
# 需要数值数据的图表类型
NUMERIC_DATA_TYPES = {
'line', 'bar', 'radar', 'polarArea', 'pie', 'doughnut'
}
# 需要特殊数据格式的图表类型
SPECIAL_DATA_TYPES = {
'scatter': {'x', 'y'},
'bubble': {'x', 'y', 'r'}
}
def __init__(self):
pass
def validate(self, widget_block: Dict[str, Any]) -> ValidationResult:
"""
验证图表格式。
Args:
widget_block: widget类型的block,包含widgetId/widgetType/props/data
Returns:
ValidationResult: 验证结果
"""
errors = []
warnings = []
# 1. 基本结构验证
if not isinstance(widget_block, dict):
errors.append("widget_block必须是字典类型")
return ValidationResult(False, errors, warnings)
# 2. 检查widgetType
widget_type = widget_block.get('widgetType', '')
if not widget_type or not isinstance(widget_type, str):
errors.append("缺少widgetType字段或类型不正确")
return ValidationResult(False, errors, warnings)
# 检查是否是chart.js类型
if not widget_type.startswith('chart.js'):
# 不是图表类型,跳过验证
return ValidationResult(True, errors, warnings)
# 3. 提取图表类型
chart_type = self._extract_chart_type(widget_block)
if not chart_type:
errors.append("无法确定图表类型")
return ValidationResult(False, errors, warnings)
# 4. 检查是否支持该图表类型
if chart_type not in self.SUPPORTED_CHART_TYPES:
warnings.append(f"图表类型 '{chart_type}' 可能不被支持,将尝试降级渲染")
# 5. 验证数据结构
data = widget_block.get('data')
if not isinstance(data, dict):
errors.append("data字段必须是字典类型")
return ValidationResult(False, errors, warnings)
# 6. 根据图表类型验证数据
if chart_type in self.SPECIAL_DATA_TYPES:
# 特殊数据格式(scatter, bubble
self._validate_special_data(data, chart_type, errors, warnings)
else:
# 标准数据格式(labels + datasets
self._validate_standard_data(data, chart_type, errors, warnings)
# 7. 验证props
props = widget_block.get('props')
if props is not None and not isinstance(props, dict):
warnings.append("props字段应该是字典类型")
is_valid = len(errors) == 0
return ValidationResult(is_valid, errors, warnings)
def _extract_chart_type(self, widget_block: Dict[str, Any]) -> Optional[str]:
"""
提取图表类型。
优先级:
1. props.type
2. widgetType中的类型(chart.js/bar -> bar
3. data.type
"""
# 1. 从props中获取
props = widget_block.get('props') or {}
if isinstance(props, dict):
chart_type = props.get('type')
if chart_type and isinstance(chart_type, str):
return chart_type.lower()
# 2. 从widgetType中提取
widget_type = widget_block.get('widgetType', '')
if '/' in widget_type:
chart_type = widget_type.split('/')[-1]
if chart_type:
return chart_type.lower()
# 3. 从data中获取
data = widget_block.get('data') or {}
if isinstance(data, dict):
chart_type = data.get('type')
if chart_type and isinstance(chart_type, str):
return chart_type.lower()
return None
def _validate_standard_data(
self,
data: Dict[str, Any],
chart_type: str,
errors: List[str],
warnings: List[str]
):
"""验证标准数据格式(labels + datasets"""
labels = data.get('labels')
datasets = data.get('datasets')
# 验证labels
if chart_type in self.LABEL_REQUIRED_TYPES:
if not labels:
errors.append(f"{chart_type}类型图表必须包含labels字段")
elif not isinstance(labels, list):
errors.append("labels必须是数组类型")
elif len(labels) == 0:
warnings.append("labels数组为空,图表可能无法正常显示")
# 验证datasets
if datasets is None:
errors.append("缺少datasets字段")
return
if not isinstance(datasets, list):
errors.append("datasets必须是数组类型")
return
if len(datasets) == 0:
errors.append("datasets数组为空")
return
# 验证每个dataset
for idx, dataset in enumerate(datasets):
if not isinstance(dataset, dict):
errors.append(f"datasets[{idx}]必须是对象类型")
continue
# 验证data字段
ds_data = dataset.get('data')
if ds_data is None:
errors.append(f"datasets[{idx}]缺少data字段")
continue
if not isinstance(ds_data, list):
errors.append(f"datasets[{idx}].data必须是数组类型")
continue
if len(ds_data) == 0:
warnings.append(f"datasets[{idx}].data数组为空")
continue
# 验证数据长度一致性
if labels and isinstance(labels, list):
if len(ds_data) != len(labels):
warnings.append(
f"datasets[{idx}].data长度({len(ds_data)})与labels长度({len(labels)})不匹配"
)
# 验证数值类型
if chart_type in self.NUMERIC_DATA_TYPES:
for data_idx, value in enumerate(ds_data):
if value is not None and not isinstance(value, (int, float)):
errors.append(
f"datasets[{idx}].data[{data_idx}]的值'{value}'不是有效的数值类型"
)
break # 只报告第一个错误
def _validate_special_data(
self,
data: Dict[str, Any],
chart_type: str,
errors: List[str],
warnings: List[str]
):
"""验证特殊数据格式(scatter, bubble"""
datasets = data.get('datasets')
if not datasets:
errors.append("缺少datasets字段")
return
if not isinstance(datasets, list):
errors.append("datasets必须是数组类型")
return
if len(datasets) == 0:
errors.append("datasets数组为空")
return
required_keys = self.SPECIAL_DATA_TYPES.get(chart_type, set())
# 验证每个dataset
for idx, dataset in enumerate(datasets):
if not isinstance(dataset, dict):
errors.append(f"datasets[{idx}]必须是对象类型")
continue
ds_data = dataset.get('data')
if ds_data is None:
errors.append(f"datasets[{idx}]缺少data字段")
continue
if not isinstance(ds_data, list):
errors.append(f"datasets[{idx}].data必须是数组类型")
continue
if len(ds_data) == 0:
warnings.append(f"datasets[{idx}].data数组为空")
continue
# 验证数据点格式
for data_idx, point in enumerate(ds_data):
if not isinstance(point, dict):
errors.append(
f"datasets[{idx}].data[{data_idx}]必须是对象类型(包含{required_keys}字段)"
)
break
# 检查必需的键
missing_keys = required_keys - set(point.keys())
if missing_keys:
errors.append(
f"datasets[{idx}].data[{data_idx}]缺少必需字段: {missing_keys}"
)
break
# 验证数值类型
for key in required_keys:
value = point.get(key)
if value is not None and not isinstance(value, (int, float)):
errors.append(
f"datasets[{idx}].data[{data_idx}].{key}的值'{value}'不是有效的数值类型"
)
break
def can_render(self, widget_block: Dict[str, Any]) -> bool:
"""
判断图表是否能正常渲染(快速检查)。
Args:
widget_block: widget类型的block
Returns:
bool: 是否能正常渲染
"""
result = self.validate(widget_block)
return result.is_valid
class ChartRepairer:
"""
图表修复器 - 尝试修复图表数据。
修复策略:
1. 本地规则修复:修复常见问题
2. API修复:使用LLM修复复杂问题
3. 验证修复结果:确保修复后能正常渲染
"""
def __init__(
self,
validator: ChartValidator,
llm_repair_fns: Optional[List[Callable]] = None
):
"""
初始化修复器。
Args:
validator: 图表验证器实例
llm_repair_fns: LLM修复函数列表(对应4个Engine)
"""
self.validator = validator
self.llm_repair_fns = llm_repair_fns or []
def repair(
self,
widget_block: Dict[str, Any],
validation_result: Optional[ValidationResult] = None
) -> RepairResult:
"""
尝试修复图表数据。
Args:
widget_block: widget类型的block
validation_result: 验证结果(可选,如果没有会先进行验证)
Returns:
RepairResult: 修复结果
"""
# 1. 如果没有验证结果,先验证
if validation_result is None:
validation_result = self.validator.validate(widget_block)
# 2. 尝试本地修复(即使验证通过也尝试,因为可能有警告)
logger.info(f"尝试本地修复图表")
local_result = self.repair_locally(widget_block, validation_result)
# 3. 验证修复结果
if local_result.has_changes():
repaired_validation = self.validator.validate(local_result.repaired_block)
if repaired_validation.is_valid:
logger.info(f"本地修复成功: {local_result.changes}")
return RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
else:
logger.warning(f"本地修复后仍然无效: {repaired_validation.errors}")
# 4. 如果本地修复失败且有严重错误,尝试API修复
if validation_result.has_critical_errors() and len(self.llm_repair_fns) > 0:
logger.info("本地修复失败,尝试API修复")
api_result = self.repair_with_api(widget_block, validation_result)
if api_result.success:
# 验证修复结果
repaired_validation = self.validator.validate(api_result.repaired_block)
if repaired_validation.is_valid:
logger.info(f"API修复成功: {api_result.changes}")
return api_result
else:
logger.warning(f"API修复后仍然无效: {repaired_validation.errors}")
# 5. 如果验证通过,返回原始或修复后的数据
if validation_result.is_valid:
if local_result.has_changes():
return RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
else:
return RepairResult(True, widget_block, 'none', [])
# 6. 所有修复都失败,返回原始数据
logger.warning("所有修复尝试失败,保持原始数据")
return RepairResult(False, widget_block, 'none', [])
def repair_locally(
self,
widget_block: Dict[str, Any],
validation_result: ValidationResult
) -> RepairResult:
"""
使用本地规则修复。
修复规则:
1. 补全缺失的基本字段
2. 修复数据类型错误
3. 修复数据长度不匹配
4. 清理无效数据
5. 添加默认值
"""
repaired = copy.deepcopy(widget_block)
changes = []
# 1. 确保基本结构存在
if 'props' not in repaired or not isinstance(repaired.get('props'), dict):
repaired['props'] = {}
changes.append("添加缺失的props字段")
if 'data' not in repaired or not isinstance(repaired.get('data'), dict):
repaired['data'] = {}
changes.append("添加缺失的data字段")
# 2. 确保图表类型存在
chart_type = self.validator._extract_chart_type(repaired)
props = repaired['props']
if not chart_type:
# 尝试从widgetType推断
widget_type = repaired.get('widgetType', '')
if '/' in widget_type:
chart_type = widget_type.split('/')[-1].lower()
props['type'] = chart_type
changes.append(f"从widgetType推断图表类型: {chart_type}")
else:
# 默认使用bar类型
chart_type = 'bar'
props['type'] = chart_type
changes.append("设置默认图表类型: bar")
elif 'type' not in props or not props['type']:
# chart_type存在但props中没有type字段,需要添加
props['type'] = chart_type
changes.append(f"将推断的图表类型添加到props: {chart_type}")
# 3. 修复数据结构
data = repaired['data']
# 确保datasets存在
if 'datasets' not in data or not isinstance(data.get('datasets'), list):
data['datasets'] = []
changes.append("添加缺失的datasets字段")
# 如果datasets为空但data中有其他数据,尝试构造datasets
if len(data['datasets']) == 0:
constructed = self._try_construct_datasets(data, chart_type)
if constructed:
data['datasets'] = constructed
changes.append("从data中构造datasets")
elif 'labels' in data and isinstance(data.get('labels'), list) and len(data['labels']) > 0:
# 如果有labels但没有数据,创建一个空dataset
data['datasets'] = [{
'label': '数据',
'data': [0] * len(data['labels'])
}]
changes.append("根据labels创建默认dataset(使用零值)")
# 确保labels存在(如果需要)
if chart_type in ChartValidator.LABEL_REQUIRED_TYPES:
if 'labels' not in data or not isinstance(data.get('labels'), list):
# 尝试根据datasets长度生成labels
if data['datasets'] and len(data['datasets']) > 0:
first_ds = data['datasets'][0]
if isinstance(first_ds, dict) and isinstance(first_ds.get('data'), list):
data_len = len(first_ds['data'])
data['labels'] = [f"项目 {i+1}" for i in range(data_len)]
changes.append(f"生成{data_len}个默认labels")
# 4. 修复datasets中的数据
for idx, dataset in enumerate(data.get('datasets', [])):
if not isinstance(dataset, dict):
continue
# 确保有data字段
if 'data' not in dataset or not isinstance(dataset.get('data'), list):
dataset['data'] = []
changes.append(f"为datasets[{idx}]添加空data数组")
# 确保有label
if 'label' not in dataset:
dataset['label'] = f"系列 {idx + 1}"
changes.append(f"为datasets[{idx}]添加默认label")
# 修复数据长度不匹配
labels = data.get('labels', [])
ds_data = dataset.get('data', [])
if isinstance(labels, list) and isinstance(ds_data, list):
if len(ds_data) < len(labels):
# 数据不够,补null
dataset['data'] = ds_data + [None] * (len(labels) - len(ds_data))
changes.append(f"datasets[{idx}]数据长度不足,补充null")
elif len(ds_data) > len(labels):
# 数据过多,截断
dataset['data'] = ds_data[:len(labels)]
changes.append(f"datasets[{idx}]数据长度过长,截断")
# 转换非数值数据为数值(如果可能)
if chart_type in ChartValidator.NUMERIC_DATA_TYPES:
ds_data = dataset.get('data', [])
converted = False
for i, value in enumerate(ds_data):
if value is None:
continue
if not isinstance(value, (int, float)):
# 尝试转换
try:
if isinstance(value, str):
# 尝试转换字符串
ds_data[i] = float(value)
converted = True
except (ValueError, TypeError):
# 转换失败,设为null
ds_data[i] = None
converted = True
if converted:
changes.append(f"datasets[{idx}]包含非数值数据,已尝试转换")
# 5. 验证修复结果
success = len(changes) > 0
return RepairResult(success, repaired, 'local', changes)
def _try_construct_datasets(
self,
data: Dict[str, Any],
chart_type: str
) -> Optional[List[Dict[str, Any]]]:
"""尝试从data中构造datasets"""
# 如果data直接包含数据数组,尝试构造
if 'values' in data and isinstance(data['values'], list):
return [{
'label': '数据',
'data': data['values']
}]
# 如果data包含series字段
if 'series' in data and isinstance(data['series'], list):
datasets = []
for idx, series in enumerate(data['series']):
if isinstance(series, dict):
datasets.append({
'label': series.get('name', f'系列 {idx + 1}'),
'data': series.get('data', [])
})
elif isinstance(series, list):
datasets.append({
'label': f'系列 {idx + 1}',
'data': series
})
if datasets:
return datasets
return None
def repair_with_api(
self,
widget_block: Dict[str, Any],
validation_result: ValidationResult
) -> RepairResult:
"""
使用API修复(调用4个Engine的LLM)。
策略:按顺序尝试不同的Engine,直到修复成功
"""
if not self.llm_repair_fns:
return RepairResult(False, None, 'api', [])
for idx, repair_fn in enumerate(self.llm_repair_fns):
try:
logger.info(f"尝试使用Engine {idx + 1}修复图表")
repaired = repair_fn(widget_block, validation_result.errors)
if repaired and isinstance(repaired, dict):
# 验证修复结果
repaired_validation = self.validator.validate(repaired)
if repaired_validation.is_valid:
return RepairResult(
True,
repaired,
'api',
[f"使用Engine {idx + 1}修复成功"]
)
except Exception as e:
logger.error(f"Engine {idx + 1}修复失败: {e}")
continue
return RepairResult(False, None, 'api', [])
def create_chart_validator() -> ChartValidator:
"""创建图表验证器实例"""
return ChartValidator()
def create_chart_repairer(
validator: Optional[ChartValidator] = None,
llm_repair_fns: Optional[List[Callable]] = None
) -> ChartRepairer:
"""创建图表修复器实例"""
if validator is None:
validator = create_chart_validator()
return ChartRepairer(validator, llm_repair_fns)