Repair and Optimize the Chart Rendering

This commit is contained in:
马一丁
2025-11-17 16:34:44 +08:00
parent 50b6ab403e
commit c20cc24c78
4 changed files with 1579 additions and 1 deletions
+209 -1
View File
@@ -11,6 +11,15 @@ import json
import os import os
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List from typing import Any, Dict, List
from loguru import logger
from ReportEngine.utils.chart_validator import (
ChartValidator,
ChartRepairer,
create_chart_validator,
create_chart_repairer
)
from ReportEngine.utils.chart_repair_api import create_llm_repair_functions
class HTMLRenderer: class HTMLRenderer:
@@ -65,6 +74,23 @@ class HTMLRenderer:
self.hero_kpi_signature: tuple | None = None self.hero_kpi_signature: tuple | None = None
self._lib_cache: Dict[str, str] = {} self._lib_cache: Dict[str, str] = {}
# 初始化图表验证和修复器
self.chart_validator = create_chart_validator()
llm_repair_fns = create_llm_repair_functions()
self.chart_repairer = create_chart_repairer(
validator=self.chart_validator,
llm_repair_fns=llm_repair_fns
)
# 统计信息
self.chart_validation_stats = {
'total': 0,
'valid': 0,
'repaired_locally': 0,
'repaired_api': 0,
'failed': 0
}
@staticmethod @staticmethod
def _get_lib_path() -> Path: def _get_lib_path() -> Path:
"""获取第三方库文件的目录路径""" """获取第三方库文件的目录路径"""
@@ -124,6 +150,15 @@ class HTMLRenderer:
self.heading_label_map = self._compute_heading_labels(self.chapters) self.heading_label_map = self._compute_heading_labels(self.chapters)
self.toc_entries = self._collect_toc_entries(self.chapters) self.toc_entries = self._collect_toc_entries(self.chapters)
# 重置图表验证统计
self.chart_validation_stats = {
'total': 0,
'valid': 0,
'repaired_locally': 0,
'repaired_api': 0,
'failed': 0
}
metadata = self.metadata metadata = self.metadata
theme_tokens = metadata.get("themeTokens") or self.document.get("themeTokens", {}) theme_tokens = metadata.get("themeTokens") or self.document.get("themeTokens", {})
title = metadata.get("title") or metadata.get("query") or "智能舆情报告" title = metadata.get("title") or metadata.get("query") or "智能舆情报告"
@@ -132,6 +167,10 @@ class HTMLRenderer:
head = self._render_head(title, theme_tokens) head = self._render_head(title, theme_tokens)
body = self._render_body() body = self._render_body()
# 输出图表验证统计
self._log_chart_validation_stats()
return f"<!DOCTYPE html>\n<html lang=\"zh-CN\" class=\"no-js\">\n{head}\n{body}\n</html>" return f"<!DOCTYPE html>\n<html lang=\"zh-CN\" class=\"no-js\">\n{head}\n{body}\n</html>"
# ====== 头部 / 正文 ====== # ====== 头部 / 正文 ======
@@ -1150,12 +1189,66 @@ class HTMLRenderer:
""" """
渲染Chart.js等交互组件的占位容器,并记录配置JSON。 渲染Chart.js等交互组件的占位容器,并记录配置JSON。
在渲染前进行图表验证和修复:
1. 验证图表数据格式
2. 如果无效,尝试本地修复
3. 如果本地修复失败,尝试API修复
4. 如果所有修复都失败,使用原始数据(前端会降级处理)
参数: 参数:
block: widget类型的block,包含widgetId/props/data。 block: widget类型的block,包含widgetId/props/data。
返回: 返回:
str: 含canvas与配置脚本的HTML。 str: 含canvas与配置脚本的HTML。
""" """
# 统计
widget_type = block.get('widgetType', '')
is_chart = isinstance(widget_type, str) and widget_type.startswith('chart.js')
if is_chart:
self.chart_validation_stats['total'] += 1
# 验证图表数据
validation_result = self.chart_validator.validate(block)
if not validation_result.is_valid:
logger.warning(
f"图表 {block.get('widgetId', 'unknown')} 验证失败: {validation_result.errors}"
)
# 尝试修复
repair_result = self.chart_repairer.repair(block, validation_result)
if repair_result.success and repair_result.repaired_block:
# 修复成功,使用修复后的数据
block = repair_result.repaired_block
logger.info(
f"图表 {block.get('widgetId', 'unknown')} 修复成功 "
f"(方法: {repair_result.method}): {repair_result.changes}"
)
# 更新统计
if repair_result.method == 'local':
self.chart_validation_stats['repaired_locally'] += 1
elif repair_result.method == 'api':
self.chart_validation_stats['repaired_api'] += 1
else:
# 修复失败,使用原始数据,前端会尝试降级渲染
logger.warning(
f"图表 {block.get('widgetId', 'unknown')} 修复失败,"
f"将使用原始数据(前端会尝试降级渲染或显示fallback)"
)
self.chart_validation_stats['failed'] += 1
else:
# 验证通过
self.chart_validation_stats['valid'] += 1
if validation_result.warnings:
logger.info(
f"图表 {block.get('widgetId', 'unknown')} 验证通过,"
f"但有警告: {validation_result.warnings}"
)
# 渲染图表HTML
self.chart_counter += 1 self.chart_counter += 1
canvas_id = f"chart-{self.chart_counter}" canvas_id = f"chart-{self.chart_counter}"
config_id = f"chart-config-{self.chart_counter}" config_id = f"chart-config-{self.chart_counter}"
@@ -1220,6 +1313,39 @@ class HTMLRenderer:
""" """
return table_html return table_html
def _log_chart_validation_stats(self):
"""输出图表验证统计信息"""
stats = self.chart_validation_stats
if stats['total'] == 0:
return
logger.info("=" * 60)
logger.info("图表验证统计")
logger.info("=" * 60)
logger.info(f"总图表数量: {stats['total']}")
logger.info(f" ✓ 验证通过: {stats['valid']} ({stats['valid']/stats['total']*100:.1f}%)")
if stats['repaired_locally'] > 0:
logger.info(
f" ⚠ 本地修复: {stats['repaired_locally']} "
f"({stats['repaired_locally']/stats['total']*100:.1f}%)"
)
if stats['repaired_api'] > 0:
logger.info(
f" ⚠ API修复: {stats['repaired_api']} "
f"({stats['repaired_api']/stats['total']*100:.1f}%)"
)
if stats['failed'] > 0:
logger.warning(
f" ✗ 修复失败: {stats['failed']} "
f"({stats['failed']/stats['total']*100:.1f}%) - "
f"这些图表将使用降级渲染或显示fallback表格"
)
logger.info("=" * 60)
# ====== 前置信息防护 ====== # ====== 前置信息防护 ======
def _kpi_signature_from_items(self, items: Any) -> tuple | None: def _kpi_signature_from_items(self, items: Any) -> tuple | None:
@@ -2317,6 +2443,80 @@ function buildChartOptions(payload) {
return mergeOptions(baseOptions, overrideOptions); return mergeOptions(baseOptions, overrideOptions);
} }
function validateChartData(payload, type) {
/**
* 前端验证图表数据
* 返回: { valid: boolean, errors: string[] }
*/
const errors = [];
if (!payload || typeof payload !== 'object') {
errors.push('无效的payload');
return { valid: false, errors };
}
const data = payload.data;
if (!data || typeof data !== 'object') {
errors.push('缺少data字段');
return { valid: false, errors };
}
// 特殊图表类型(scatter, bubble
const specialTypes = { 'scatter': true, 'bubble': true };
if (specialTypes[type]) {
// 这些类型需要特殊的数据格式 {x, y} 或 {x, y, r}
// 跳过标准验证
return { valid: true, errors };
}
// 标准图表类型验证
const datasets = data.datasets;
if (!Array.isArray(datasets)) {
errors.push('datasets必须是数组');
return { valid: false, errors };
}
if (datasets.length === 0) {
errors.push('datasets数组为空');
return { valid: false, errors };
}
// 验证每个dataset
for (let i = 0; i < datasets.length; i++) {
const dataset = datasets[i];
if (!dataset || typeof dataset !== 'object') {
errors.push(`datasets[${i}]不是对象`);
continue;
}
if (!Array.isArray(dataset.data)) {
errors.push(`datasets[${i}].data不是数组`);
} else if (dataset.data.length === 0) {
errors.push(`datasets[${i}].data为空`);
}
}
// 需要labels的图表类型
const labelRequiredTypes = {
'line': true, 'bar': true, 'radar': true,
'polarArea': true, 'pie': true, 'doughnut': true
};
if (labelRequiredTypes[type]) {
const labels = data.labels;
if (!Array.isArray(labels)) {
errors.push('缺少labels数组');
} else if (labels.length === 0) {
errors.push('labels数组为空');
}
}
return {
valid: errors.length === 0,
errors
};
}
function instantiateChart(ctx, payload, optionsTemplate, type) { function instantiateChart(ctx, payload, optionsTemplate, type) {
if (!ctx) { if (!ctx) {
return null; return null;
@@ -2358,9 +2558,17 @@ function hydrateCharts() {
renderChartFallback(canvas, payload, 'Canvas 初始化失败'); renderChartFallback(canvas, payload, 'Canvas 初始化失败');
return; return;
} }
// 前端数据验证
const desiredType = chartTypes[0];
const validation = validateChartData(payload, desiredType);
if (!validation.valid) {
console.warn('图表数据验证失败:', validation.errors);
// 验证失败但仍然尝试渲染,因为可能会降级成功
}
const card = canvas.closest('.chart-card') || canvas.parentElement; const card = canvas.closest('.chart-card') || canvas.parentElement;
const optionsTemplate = buildChartOptions(payload); const optionsTemplate = buildChartOptions(payload);
const desiredType = chartTypes[0];
let chartInstance = null; let chartInstance = null;
let selectedType = null; let selectedType = null;
let lastError; let lastError;
+283
View File
@@ -0,0 +1,283 @@
"""
图表API修复模块。
提供调用4个EngineReportEngine, ForumEngine, InsightEngine, MediaEngine)的LLM API
来修复图表数据的功能。
"""
from __future__ import annotations
import json
from typing import Any, Dict, List, Optional
from loguru import logger
from ReportEngine.utils.config import settings
# 图表修复提示词
CHART_REPAIR_SYSTEM_PROMPT = """你是一个专业的图表数据修复助手。你的任务是修复Chart.js图表数据中的格式错误,确保图表能够正常渲染。
**Chart.js标准数据格式:**
1. 标准图表(line, bar, pie, doughnut, radar, polarArea):
```json
{
"type": "widget",
"widgetType": "chart.js/bar",
"widgetId": "chart-001",
"props": {
"type": "bar",
"title": "图表标题",
"options": {
"responsive": true,
"plugins": {
"legend": {
"display": true
}
}
}
},
"data": {
"labels": ["A", "B", "C"],
"datasets": [
{
"label": "系列1",
"data": [10, 20, 30]
}
]
}
}
```
2. 特殊图表(scatter, bubble):
```json
{
"data": {
"datasets": [
{
"label": "系列1",
"data": [
{"x": 10, "y": 20},
{"x": 15, "y": 25}
]
}
]
}
}
```
**修复原则:**
1. **宁愿不改,也不要改错** - 如果不确定如何修复,保持原始数据
2. **最小改动** - 只修复明确的错误,不要过度修改
3. **保持数据完整性** - 不要丢失原始数据
4. **验证修复结果** - 确保修复后符合Chart.js格式
**常见错误及修复方法:**
1. 缺少labels字段 → 根据数据生成默认labels
2. datasets不是数组 → 转换为数组格式
3. 数据长度不匹配 → 截断或补null
4. 非数值数据 → 尝试转换或设为null
5. 缺少必需字段 → 添加默认值
请根据错误信息修复图表数据,并返回修复后的完整widget blockJSON格式)。
"""
def build_chart_repair_prompt(
widget_block: Dict[str, Any],
validation_errors: List[str]
) -> str:
"""
构建图表修复提示词。
Args:
widget_block: 原始widget block
validation_errors: 验证错误列表
Returns:
str: 提示词
"""
block_json = json.dumps(widget_block, ensure_ascii=False, indent=2)
errors_text = "\n".join(f"- {error}" for error in validation_errors)
prompt = f"""请修复以下图表数据中的错误:
**原始数据:**
```json
{block_json}
```
**检测到的错误:**
{errors_text}
**要求:**
1. 返回修复后的完整widget blockJSON格式)
2. 只修复明确的错误,保持其他数据不变
3. 确保修复后的数据符合Chart.js格式要求
4. 如果无法确定如何修复,保持原始数据
**重要的输出格式要求:**
1. 只返回纯JSON对象,不要添加任何说明文字
2. 不要使用```json```标记包裹
3. 确保JSON语法完全正确
4. 所有字符串使用双引号
"""
return prompt
def create_llm_repair_functions() -> List:
"""
创建LLM修复函数列表。
返回4个Engine的修复函数:
1. ReportEngine
2. ForumEngine (通过ForumHost)
3. InsightEngine
4. MediaEngine
Returns:
List[Callable]: 修复函数列表
"""
repair_functions = []
# 1. ReportEngine修复函数
if settings.REPORT_ENGINE_API_KEY and settings.REPORT_ENGINE_BASE_URL:
def repair_with_report_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]:
"""使用ReportEngine的LLM修复图表"""
try:
from llm_client import LLMClient
client = LLMClient(
api_key=settings.REPORT_ENGINE_API_KEY,
base_url=settings.REPORT_ENGINE_BASE_URL,
model_name=settings.REPORT_ENGINE_MODEL_NAME or "gpt-4",
provider="openai"
)
prompt = build_chart_repair_prompt(widget_block, errors)
response = client.invoke(
CHART_REPAIR_SYSTEM_PROMPT,
prompt,
temperature=0.0,
top_p=0.05
)
if not response:
return None
# 解析响应
repaired = json.loads(response)
return repaired
except Exception as e:
logger.error(f"ReportEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_report_engine)
# 2. ForumEngine修复函数
if settings.FORUM_HOST_API_KEY and settings.FORUM_HOST_BASE_URL:
def repair_with_forum_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]:
"""使用ForumEngine的LLM修复图表"""
try:
from llm_client import LLMClient
client = LLMClient(
api_key=settings.FORUM_HOST_API_KEY,
base_url=settings.FORUM_HOST_BASE_URL,
model_name=settings.FORUM_HOST_MODEL_NAME or "gpt-4",
provider="openai"
)
prompt = build_chart_repair_prompt(widget_block, errors)
response = client.invoke(
CHART_REPAIR_SYSTEM_PROMPT,
prompt,
temperature=0.0,
top_p=0.05
)
if not response:
return None
repaired = json.loads(response)
return repaired
except Exception as e:
logger.error(f"ForumEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_forum_engine)
# 3. InsightEngine修复函数
if settings.INSIGHT_ENGINE_API_KEY and settings.INSIGHT_ENGINE_BASE_URL:
def repair_with_insight_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]:
"""使用InsightEngine的LLM修复图表"""
try:
from llm_client import LLMClient
client = LLMClient(
api_key=settings.INSIGHT_ENGINE_API_KEY,
base_url=settings.INSIGHT_ENGINE_BASE_URL,
model_name=settings.INSIGHT_ENGINE_MODEL_NAME or "gpt-4",
provider="openai"
)
prompt = build_chart_repair_prompt(widget_block, errors)
response = client.invoke(
CHART_REPAIR_SYSTEM_PROMPT,
prompt,
temperature=0.0,
top_p=0.05
)
if not response:
return None
repaired = json.loads(response)
return repaired
except Exception as e:
logger.error(f"InsightEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_insight_engine)
# 4. MediaEngine修复函数
if settings.MEDIA_ENGINE_API_KEY and settings.MEDIA_ENGINE_BASE_URL:
def repair_with_media_engine(widget_block: Dict[str, Any], errors: List[str]) -> Optional[Dict[str, Any]]:
"""使用MediaEngine的LLM修复图表"""
try:
from llm_client import LLMClient
client = LLMClient(
api_key=settings.MEDIA_ENGINE_API_KEY,
base_url=settings.MEDIA_ENGINE_BASE_URL,
model_name=settings.MEDIA_ENGINE_MODEL_NAME or "gpt-4",
provider="openai"
)
prompt = build_chart_repair_prompt(widget_block, errors)
response = client.invoke(
CHART_REPAIR_SYSTEM_PROMPT,
prompt,
temperature=0.0,
top_p=0.05
)
if not response:
return None
repaired = json.loads(response)
return repaired
except Exception as e:
logger.error(f"MediaEngine图表修复失败: {e}")
return None
repair_functions.append(repair_with_media_engine)
if not repair_functions:
logger.warning("未配置任何Engine API,图表API修复功能将不可用")
return repair_functions
+631
View File
@@ -0,0 +1,631 @@
"""
图表验证和修复工具。
提供对Chart.js图表数据的验证和修复能力:
1. 验证图表数据格式是否符合Chart.js要求
2. 本地规则修复常见问题
3. LLM API辅助修复复杂问题
4. 遵循"宁愿不改,也不要改错"的原则
支持的图表类型:
- line (折线图)
- bar (柱状图)
- pie (饼图)
- doughnut (圆环图)
- radar (雷达图)
- polarArea (极地区域图)
- scatter (散点图)
"""
from __future__ import annotations
import copy
import json
from typing import Any, Dict, List, Optional, Tuple, Callable
from dataclasses import dataclass
from loguru import logger
@dataclass
class ValidationResult:
"""验证结果"""
is_valid: bool
errors: List[str]
warnings: List[str]
def has_critical_errors(self) -> bool:
"""是否有严重错误(会导致渲染失败)"""
return not self.is_valid and len(self.errors) > 0
@dataclass
class RepairResult:
"""修复结果"""
success: bool
repaired_block: Optional[Dict[str, Any]]
method: str # 'none', 'local', 'api'
changes: List[str]
def has_changes(self) -> bool:
"""是否有修改"""
return len(self.changes) > 0
class ChartValidator:
"""
图表验证器 - 验证Chart.js图表数据格式是否正确。
验证规则:
1. 基本结构验证:widgetType, props, data字段
2. 图表类型验证:支持的图表类型
3. 数据格式验证:labels和datasets结构
4. 数据一致性验证:labels和datasets长度匹配
5. 数值类型验证:数据值类型正确
"""
# 支持的图表类型
SUPPORTED_CHART_TYPES = {
'line', 'bar', 'pie', 'doughnut', 'radar', 'polarArea', 'scatter',
'bubble', 'horizontalBar'
}
# 需要labels的图表类型
LABEL_REQUIRED_TYPES = {
'line', 'bar', 'radar', 'polarArea', 'pie', 'doughnut'
}
# 需要数值数据的图表类型
NUMERIC_DATA_TYPES = {
'line', 'bar', 'radar', 'polarArea', 'pie', 'doughnut'
}
# 需要特殊数据格式的图表类型
SPECIAL_DATA_TYPES = {
'scatter': {'x', 'y'},
'bubble': {'x', 'y', 'r'}
}
def __init__(self):
pass
def validate(self, widget_block: Dict[str, Any]) -> ValidationResult:
"""
验证图表格式。
Args:
widget_block: widget类型的block,包含widgetId/widgetType/props/data
Returns:
ValidationResult: 验证结果
"""
errors = []
warnings = []
# 1. 基本结构验证
if not isinstance(widget_block, dict):
errors.append("widget_block必须是字典类型")
return ValidationResult(False, errors, warnings)
# 2. 检查widgetType
widget_type = widget_block.get('widgetType', '')
if not widget_type or not isinstance(widget_type, str):
errors.append("缺少widgetType字段或类型不正确")
return ValidationResult(False, errors, warnings)
# 检查是否是chart.js类型
if not widget_type.startswith('chart.js'):
# 不是图表类型,跳过验证
return ValidationResult(True, errors, warnings)
# 3. 提取图表类型
chart_type = self._extract_chart_type(widget_block)
if not chart_type:
errors.append("无法确定图表类型")
return ValidationResult(False, errors, warnings)
# 4. 检查是否支持该图表类型
if chart_type not in self.SUPPORTED_CHART_TYPES:
warnings.append(f"图表类型 '{chart_type}' 可能不被支持,将尝试降级渲染")
# 5. 验证数据结构
data = widget_block.get('data')
if not isinstance(data, dict):
errors.append("data字段必须是字典类型")
return ValidationResult(False, errors, warnings)
# 6. 根据图表类型验证数据
if chart_type in self.SPECIAL_DATA_TYPES:
# 特殊数据格式(scatter, bubble
self._validate_special_data(data, chart_type, errors, warnings)
else:
# 标准数据格式(labels + datasets
self._validate_standard_data(data, chart_type, errors, warnings)
# 7. 验证props
props = widget_block.get('props')
if props is not None and not isinstance(props, dict):
warnings.append("props字段应该是字典类型")
is_valid = len(errors) == 0
return ValidationResult(is_valid, errors, warnings)
def _extract_chart_type(self, widget_block: Dict[str, Any]) -> Optional[str]:
"""
提取图表类型。
优先级:
1. props.type
2. widgetType中的类型(chart.js/bar -> bar
3. data.type
"""
# 1. 从props中获取
props = widget_block.get('props') or {}
if isinstance(props, dict):
chart_type = props.get('type')
if chart_type and isinstance(chart_type, str):
return chart_type.lower()
# 2. 从widgetType中提取
widget_type = widget_block.get('widgetType', '')
if '/' in widget_type:
chart_type = widget_type.split('/')[-1]
if chart_type:
return chart_type.lower()
# 3. 从data中获取
data = widget_block.get('data') or {}
if isinstance(data, dict):
chart_type = data.get('type')
if chart_type and isinstance(chart_type, str):
return chart_type.lower()
return None
def _validate_standard_data(
self,
data: Dict[str, Any],
chart_type: str,
errors: List[str],
warnings: List[str]
):
"""验证标准数据格式(labels + datasets"""
labels = data.get('labels')
datasets = data.get('datasets')
# 验证labels
if chart_type in self.LABEL_REQUIRED_TYPES:
if not labels:
errors.append(f"{chart_type}类型图表必须包含labels字段")
elif not isinstance(labels, list):
errors.append("labels必须是数组类型")
elif len(labels) == 0:
warnings.append("labels数组为空,图表可能无法正常显示")
# 验证datasets
if datasets is None:
errors.append("缺少datasets字段")
return
if not isinstance(datasets, list):
errors.append("datasets必须是数组类型")
return
if len(datasets) == 0:
errors.append("datasets数组为空")
return
# 验证每个dataset
for idx, dataset in enumerate(datasets):
if not isinstance(dataset, dict):
errors.append(f"datasets[{idx}]必须是对象类型")
continue
# 验证data字段
ds_data = dataset.get('data')
if ds_data is None:
errors.append(f"datasets[{idx}]缺少data字段")
continue
if not isinstance(ds_data, list):
errors.append(f"datasets[{idx}].data必须是数组类型")
continue
if len(ds_data) == 0:
warnings.append(f"datasets[{idx}].data数组为空")
continue
# 验证数据长度一致性
if labels and isinstance(labels, list):
if len(ds_data) != len(labels):
warnings.append(
f"datasets[{idx}].data长度({len(ds_data)})与labels长度({len(labels)})不匹配"
)
# 验证数值类型
if chart_type in self.NUMERIC_DATA_TYPES:
for data_idx, value in enumerate(ds_data):
if value is not None and not isinstance(value, (int, float)):
errors.append(
f"datasets[{idx}].data[{data_idx}]的值'{value}'不是有效的数值类型"
)
break # 只报告第一个错误
def _validate_special_data(
self,
data: Dict[str, Any],
chart_type: str,
errors: List[str],
warnings: List[str]
):
"""验证特殊数据格式(scatter, bubble"""
datasets = data.get('datasets')
if not datasets:
errors.append("缺少datasets字段")
return
if not isinstance(datasets, list):
errors.append("datasets必须是数组类型")
return
if len(datasets) == 0:
errors.append("datasets数组为空")
return
required_keys = self.SPECIAL_DATA_TYPES.get(chart_type, set())
# 验证每个dataset
for idx, dataset in enumerate(datasets):
if not isinstance(dataset, dict):
errors.append(f"datasets[{idx}]必须是对象类型")
continue
ds_data = dataset.get('data')
if ds_data is None:
errors.append(f"datasets[{idx}]缺少data字段")
continue
if not isinstance(ds_data, list):
errors.append(f"datasets[{idx}].data必须是数组类型")
continue
if len(ds_data) == 0:
warnings.append(f"datasets[{idx}].data数组为空")
continue
# 验证数据点格式
for data_idx, point in enumerate(ds_data):
if not isinstance(point, dict):
errors.append(
f"datasets[{idx}].data[{data_idx}]必须是对象类型(包含{required_keys}字段)"
)
break
# 检查必需的键
missing_keys = required_keys - set(point.keys())
if missing_keys:
errors.append(
f"datasets[{idx}].data[{data_idx}]缺少必需字段: {missing_keys}"
)
break
# 验证数值类型
for key in required_keys:
value = point.get(key)
if value is not None and not isinstance(value, (int, float)):
errors.append(
f"datasets[{idx}].data[{data_idx}].{key}的值'{value}'不是有效的数值类型"
)
break
def can_render(self, widget_block: Dict[str, Any]) -> bool:
"""
判断图表是否能正常渲染(快速检查)。
Args:
widget_block: widget类型的block
Returns:
bool: 是否能正常渲染
"""
result = self.validate(widget_block)
return result.is_valid
class ChartRepairer:
"""
图表修复器 - 尝试修复图表数据。
修复策略:
1. 本地规则修复:修复常见问题
2. API修复:使用LLM修复复杂问题
3. 验证修复结果:确保修复后能正常渲染
"""
def __init__(
self,
validator: ChartValidator,
llm_repair_fns: Optional[List[Callable]] = None
):
"""
初始化修复器。
Args:
validator: 图表验证器实例
llm_repair_fns: LLM修复函数列表(对应4个Engine)
"""
self.validator = validator
self.llm_repair_fns = llm_repair_fns or []
def repair(
self,
widget_block: Dict[str, Any],
validation_result: Optional[ValidationResult] = None
) -> RepairResult:
"""
尝试修复图表数据。
Args:
widget_block: widget类型的block
validation_result: 验证结果(可选,如果没有会先进行验证)
Returns:
RepairResult: 修复结果
"""
# 1. 如果没有验证结果,先验证
if validation_result is None:
validation_result = self.validator.validate(widget_block)
# 2. 尝试本地修复(即使验证通过也尝试,因为可能有警告)
logger.info(f"尝试本地修复图表")
local_result = self.repair_locally(widget_block, validation_result)
# 3. 验证修复结果
if local_result.has_changes():
repaired_validation = self.validator.validate(local_result.repaired_block)
if repaired_validation.is_valid:
logger.info(f"本地修复成功: {local_result.changes}")
return RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
else:
logger.warning(f"本地修复后仍然无效: {repaired_validation.errors}")
# 4. 如果本地修复失败且有严重错误,尝试API修复
if validation_result.has_critical_errors() and len(self.llm_repair_fns) > 0:
logger.info("本地修复失败,尝试API修复")
api_result = self.repair_with_api(widget_block, validation_result)
if api_result.success:
# 验证修复结果
repaired_validation = self.validator.validate(api_result.repaired_block)
if repaired_validation.is_valid:
logger.info(f"API修复成功: {api_result.changes}")
return api_result
else:
logger.warning(f"API修复后仍然无效: {repaired_validation.errors}")
# 5. 如果验证通过,返回原始或修复后的数据
if validation_result.is_valid:
if local_result.has_changes():
return RepairResult(True, local_result.repaired_block, 'local', local_result.changes)
else:
return RepairResult(True, widget_block, 'none', [])
# 6. 所有修复都失败,返回原始数据
logger.warning("所有修复尝试失败,保持原始数据")
return RepairResult(False, widget_block, 'none', [])
def repair_locally(
self,
widget_block: Dict[str, Any],
validation_result: ValidationResult
) -> RepairResult:
"""
使用本地规则修复。
修复规则:
1. 补全缺失的基本字段
2. 修复数据类型错误
3. 修复数据长度不匹配
4. 清理无效数据
5. 添加默认值
"""
repaired = copy.deepcopy(widget_block)
changes = []
# 1. 确保基本结构存在
if 'props' not in repaired or not isinstance(repaired.get('props'), dict):
repaired['props'] = {}
changes.append("添加缺失的props字段")
if 'data' not in repaired or not isinstance(repaired.get('data'), dict):
repaired['data'] = {}
changes.append("添加缺失的data字段")
# 2. 确保图表类型存在
chart_type = self.validator._extract_chart_type(repaired)
props = repaired['props']
if not chart_type:
# 尝试从widgetType推断
widget_type = repaired.get('widgetType', '')
if '/' in widget_type:
chart_type = widget_type.split('/')[-1].lower()
props['type'] = chart_type
changes.append(f"从widgetType推断图表类型: {chart_type}")
else:
# 默认使用bar类型
chart_type = 'bar'
props['type'] = chart_type
changes.append("设置默认图表类型: bar")
elif 'type' not in props or not props['type']:
# chart_type存在但props中没有type字段,需要添加
props['type'] = chart_type
changes.append(f"将推断的图表类型添加到props: {chart_type}")
# 3. 修复数据结构
data = repaired['data']
# 确保datasets存在
if 'datasets' not in data or not isinstance(data.get('datasets'), list):
data['datasets'] = []
changes.append("添加缺失的datasets字段")
# 如果datasets为空但data中有其他数据,尝试构造datasets
if len(data['datasets']) == 0:
constructed = self._try_construct_datasets(data, chart_type)
if constructed:
data['datasets'] = constructed
changes.append("从data中构造datasets")
elif 'labels' in data and isinstance(data.get('labels'), list) and len(data['labels']) > 0:
# 如果有labels但没有数据,创建一个空dataset
data['datasets'] = [{
'label': '数据',
'data': [0] * len(data['labels'])
}]
changes.append("根据labels创建默认dataset(使用零值)")
# 确保labels存在(如果需要)
if chart_type in ChartValidator.LABEL_REQUIRED_TYPES:
if 'labels' not in data or not isinstance(data.get('labels'), list):
# 尝试根据datasets长度生成labels
if data['datasets'] and len(data['datasets']) > 0:
first_ds = data['datasets'][0]
if isinstance(first_ds, dict) and isinstance(first_ds.get('data'), list):
data_len = len(first_ds['data'])
data['labels'] = [f"项目 {i+1}" for i in range(data_len)]
changes.append(f"生成{data_len}个默认labels")
# 4. 修复datasets中的数据
for idx, dataset in enumerate(data.get('datasets', [])):
if not isinstance(dataset, dict):
continue
# 确保有data字段
if 'data' not in dataset or not isinstance(dataset.get('data'), list):
dataset['data'] = []
changes.append(f"为datasets[{idx}]添加空data数组")
# 确保有label
if 'label' not in dataset:
dataset['label'] = f"系列 {idx + 1}"
changes.append(f"为datasets[{idx}]添加默认label")
# 修复数据长度不匹配
labels = data.get('labels', [])
ds_data = dataset.get('data', [])
if isinstance(labels, list) and isinstance(ds_data, list):
if len(ds_data) < len(labels):
# 数据不够,补null
dataset['data'] = ds_data + [None] * (len(labels) - len(ds_data))
changes.append(f"datasets[{idx}]数据长度不足,补充null")
elif len(ds_data) > len(labels):
# 数据过多,截断
dataset['data'] = ds_data[:len(labels)]
changes.append(f"datasets[{idx}]数据长度过长,截断")
# 转换非数值数据为数值(如果可能)
if chart_type in ChartValidator.NUMERIC_DATA_TYPES:
ds_data = dataset.get('data', [])
converted = False
for i, value in enumerate(ds_data):
if value is None:
continue
if not isinstance(value, (int, float)):
# 尝试转换
try:
if isinstance(value, str):
# 尝试转换字符串
ds_data[i] = float(value)
converted = True
except (ValueError, TypeError):
# 转换失败,设为null
ds_data[i] = None
converted = True
if converted:
changes.append(f"datasets[{idx}]包含非数值数据,已尝试转换")
# 5. 验证修复结果
success = len(changes) > 0
return RepairResult(success, repaired, 'local', changes)
def _try_construct_datasets(
self,
data: Dict[str, Any],
chart_type: str
) -> Optional[List[Dict[str, Any]]]:
"""尝试从data中构造datasets"""
# 如果data直接包含数据数组,尝试构造
if 'values' in data and isinstance(data['values'], list):
return [{
'label': '数据',
'data': data['values']
}]
# 如果data包含series字段
if 'series' in data and isinstance(data['series'], list):
datasets = []
for idx, series in enumerate(data['series']):
if isinstance(series, dict):
datasets.append({
'label': series.get('name', f'系列 {idx + 1}'),
'data': series.get('data', [])
})
elif isinstance(series, list):
datasets.append({
'label': f'系列 {idx + 1}',
'data': series
})
if datasets:
return datasets
return None
def repair_with_api(
self,
widget_block: Dict[str, Any],
validation_result: ValidationResult
) -> RepairResult:
"""
使用API修复(调用4个Engine的LLM)。
策略:按顺序尝试不同的Engine,直到修复成功
"""
if not self.llm_repair_fns:
return RepairResult(False, None, 'api', [])
for idx, repair_fn in enumerate(self.llm_repair_fns):
try:
logger.info(f"尝试使用Engine {idx + 1}修复图表")
repaired = repair_fn(widget_block, validation_result.errors)
if repaired and isinstance(repaired, dict):
# 验证修复结果
repaired_validation = self.validator.validate(repaired)
if repaired_validation.is_valid:
return RepairResult(
True,
repaired,
'api',
[f"使用Engine {idx + 1}修复成功"]
)
except Exception as e:
logger.error(f"Engine {idx + 1}修复失败: {e}")
continue
return RepairResult(False, None, 'api', [])
def create_chart_validator() -> ChartValidator:
"""创建图表验证器实例"""
return ChartValidator()
def create_chart_repairer(
validator: Optional[ChartValidator] = None,
llm_repair_fns: Optional[List[Callable]] = None
) -> ChartRepairer:
"""创建图表修复器实例"""
if validator is None:
validator = create_chart_validator()
return ChartRepairer(validator, llm_repair_fns)
+456
View File
@@ -0,0 +1,456 @@
"""
图表验证器和修复器的测试用例。
运行测试:
python -m pytest ReportEngine/utils/test_chart_validator.py -v
"""
import pytest
from ReportEngine.utils.chart_validator import (
ChartValidator,
ChartRepairer,
ValidationResult,
RepairResult,
create_chart_validator,
create_chart_repairer
)
class TestChartValidator:
"""测试ChartValidator类"""
def setup_method(self):
"""每个测试前初始化"""
self.validator = create_chart_validator()
def test_valid_bar_chart(self):
"""测试有效的柱状图"""
widget_block = {
"type": "widget",
"widgetType": "chart.js/bar",
"widgetId": "chart-001",
"props": {
"type": "bar",
"title": "销售数据"
},
"data": {
"labels": ["一月", "二月", "三月"],
"datasets": [
{
"label": "销售额",
"data": [100, 200, 150]
}
]
}
}
result = self.validator.validate(widget_block)
assert result.is_valid
assert len(result.errors) == 0
def test_valid_line_chart(self):
"""测试有效的折线图"""
widget_block = {
"type": "widget",
"widgetType": "chart.js/line",
"widgetId": "chart-002",
"props": {
"type": "line"
},
"data": {
"labels": ["周一", "周二", "周三"],
"datasets": [
{
"label": "访问量",
"data": [50, 75, 60]
}
]
}
}
result = self.validator.validate(widget_block)
assert result.is_valid
def test_valid_pie_chart(self):
"""测试有效的饼图"""
widget_block = {
"widgetType": "chart.js/pie",
"props": {"type": "pie"},
"data": {
"labels": ["A", "B", "C"],
"datasets": [
{
"data": [30, 40, 30]
}
]
}
}
result = self.validator.validate(widget_block)
assert result.is_valid
def test_missing_widgetType(self):
"""测试缺少widgetType"""
widget_block = {
"props": {},
"data": {}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "widgetType" in result.errors[0]
def test_missing_data_field(self):
"""测试缺少data字段"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "data" in result.errors[0]
def test_missing_datasets(self):
"""测试缺少datasets"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"]
}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "datasets" in result.errors[0]
def test_empty_datasets(self):
"""测试空datasets"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"datasets": []
}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "" in result.errors[0]
def test_missing_labels_for_bar_chart(self):
"""测试柱状图缺少labels"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"datasets": [
{
"label": "系列1",
"data": [10, 20, 30]
}
]
}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "labels" in result.errors[0]
def test_invalid_data_type(self):
"""测试数据类型错误"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"datasets": [
{
"label": "系列1",
"data": ["abc", "def"] # 应该是数值
}
]
}
}
result = self.validator.validate(widget_block)
assert not result.is_valid
assert "数值类型" in result.errors[0]
def test_data_length_mismatch_warning(self):
"""测试数据长度不匹配(警告)"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B", "C"],
"datasets": [
{
"label": "系列1",
"data": [10, 20] # 长度不匹配
}
]
}
}
result = self.validator.validate(widget_block)
# 长度不匹配是警告,不是错误
assert len(result.warnings) > 0
assert "不匹配" in result.warnings[0]
def test_scatter_chart(self):
"""测试散点图(特殊数据格式)"""
widget_block = {
"widgetType": "chart.js/scatter",
"props": {"type": "scatter"},
"data": {
"datasets": [
{
"label": "数据点",
"data": [
{"x": 10, "y": 20},
{"x": 15, "y": 25}
]
}
]
}
}
result = self.validator.validate(widget_block)
assert result.is_valid
def test_non_chart_widget(self):
"""测试非图表类型的widget(应该跳过验证)"""
widget_block = {
"widgetType": "custom/widget",
"props": {},
"data": {}
}
result = self.validator.validate(widget_block)
# 非chart.js类型,跳过验证,返回valid
assert result.is_valid
class TestChartRepairer:
"""测试ChartRepairer类"""
def setup_method(self):
"""每个测试前初始化"""
self.validator = create_chart_validator()
self.repairer = create_chart_repairer(validator=self.validator)
def test_repair_missing_props(self):
"""测试修复缺少props字段"""
widget_block = {
"widgetType": "chart.js/bar",
"data": {
"labels": ["A", "B"],
"datasets": [
{
"label": "系列1",
"data": [10, 20]
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert "props" in result.repaired_block
assert result.method == "local"
def test_repair_missing_chart_type(self):
"""测试修复缺少图表类型"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {},
"data": {
"labels": ["A", "B"],
"datasets": [
{
"label": "系列1",
"data": [10, 20]
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert result.repaired_block["props"]["type"] == "bar"
assert "图表类型" in str(result.changes)
def test_repair_missing_datasets(self):
"""测试修复缺少datasets"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert "datasets" in result.repaired_block["data"]
assert isinstance(result.repaired_block["data"]["datasets"], list)
def test_repair_missing_labels(self):
"""测试修复缺少labels"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"datasets": [
{
"label": "系列1",
"data": [10, 20, 30]
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert "labels" in result.repaired_block["data"]
assert len(result.repaired_block["data"]["labels"]) == 3
def test_repair_data_length_mismatch(self):
"""测试修复数据长度不匹配"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B", "C", "D"],
"datasets": [
{
"label": "系列1",
"data": [10, 20] # 长度不足
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
# 应该补充到4个元素
assert len(result.repaired_block["data"]["datasets"][0]["data"]) == 4
def test_repair_string_to_number(self):
"""测试修复字符串类型的数值"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"datasets": [
{
"label": "系列1",
"data": ["10", "20"] # 字符串数值
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
# 应该转换为数值
assert isinstance(result.repaired_block["data"]["datasets"][0]["data"][0], float)
def test_repair_construct_datasets_from_values(self):
"""测试从values字段构造datasets"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"values": [10, 20] # 使用values而不是datasets
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert "datasets" in result.repaired_block["data"]
assert len(result.repaired_block["data"]["datasets"]) > 0
def test_no_repair_needed(self):
"""测试不需要修复的情况"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"datasets": [
{
"label": "系列1",
"data": [10, 20]
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert result.method == "none"
assert len(result.changes) == 0
def test_repair_adds_default_label(self):
"""测试修复添加默认label"""
widget_block = {
"widgetType": "chart.js/bar",
"props": {"type": "bar"},
"data": {
"labels": ["A", "B"],
"datasets": [
{
# 缺少label
"data": [10, 20]
}
]
}
}
result = self.repairer.repair(widget_block)
assert result.success
assert "label" in result.repaired_block["data"]["datasets"][0]
class TestValidatorIntegration:
"""集成测试"""
def test_full_validation_and_repair_workflow(self):
"""测试完整的验证和修复流程"""
validator = create_chart_validator()
repairer = create_chart_repairer(validator=validator)
# 一个有多个问题的图表
widget_block = {
"widgetType": "chart.js/bar",
"data": {
"datasets": [
{
"data": ["10", "20", "30"] # 字符串数值
}
]
}
}
# 1. 验证(应该失败)
validation = validator.validate(widget_block)
assert not validation.is_valid
# 2. 修复
repair_result = repairer.repair(widget_block, validation)
assert repair_result.success
# 3. 再次验证(应该通过)
final_validation = validator.validate(repair_result.repaired_block)
assert final_validation.is_valid
if __name__ == "__main__":
# 运行测试
pytest.main([__file__, "-v", "--tb=short"])