Files
intelligence_system/system_management/scheduler/task_scheduler.py
T
2025-09-09 16:29:20 +08:00

211 lines
8.0 KiB
Python

import importlib
import time
from datetime import datetime
from typing import Dict, List, Optional, Any
import croniter
import pytz
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
from storage.mysql_agent import MySQLAgent
from utils.logger import CrossPlatformLog
# 初始化调度器日志
log = CrossPlatformLog.get_logger("TaskScheduler")
class TaskScheduler:
def __init__(self, db_config: Optional[Dict] = None, max_workers: int = 5):
"""初始化任务调度器(基于Cron表达式)"""
self.db = MySQLAgent(db_config or {})
self.executor = ThreadPoolExecutor(max_workers=max_workers)
log.info(f"任务调度器已初始化,最大工作线程数: {max_workers}")
def check_and_run_tasks(self) -> Dict[str, int]:
"""检查并执行所有到期的任务"""
result = {'总任务数': 0, '成功': 0, '失败': 0}
try:
# 获取当前时间(带时区)
now = datetime.now(pytz.timezone('Asia/Shanghai')).replace(tzinfo=None)
# 查询所有到期的活跃任务
tasks_df = self.db.query_to_df("""
SELECT * FROM main_task
WHERE is_active = 1
AND next_run_time <= %s
AND is_running = 0
ORDER BY next_run_time
""", params=(now,))
result['总任务数'] = len(tasks_df)
if tasks_df.empty:
log.debug("没有到期的任务需要执行")
return result
# 并发执行任务
futures = []
for _, task in tasks_df.iterrows():
futures.append(self.executor.submit(self._process_single_task, task))
# 收集执行结果
for future in as_completed(futures):
try:
if future.result():
result['成功'] += 1
else:
result['失败'] += 1
except Exception as e:
log.error(f"任务线程执行失败: {str(e)}")
result['失败'] += 1
log.info(
"任务调度周期完成",
总任务数=result['总任务数'],
成功=result['成功'],
失败=result['失败']
)
return result
except Exception as e:
log.critical("调度器主循环执行失败", exc_info=True)
raise
def _process_single_task(self, task: Dict[str, Any]) -> bool:
"""处理单个任务(线程安全)"""
task_id = task['task_id']
task_log = log.bind(task_id=task_id, task_name=task['task_name'])
task_log.info(f"开始执行任务: {task['task_name']}")
try:
# 标记任务为运行中
self._update_task_status(task_id, {
'is_running': 1,
'last_run_time': datetime.now()
})
# 执行任务逻辑
self._execute_task_logic(task)
# 计算下次运行时间(基于Cron表达式)
next_run_time = self._calculate_next_run_time(
cron_expr=task['cron_expression'],
time_zone=task.get('time_zone', 'Asia/Shanghai')
)
# 更新任务状态为成功
self._update_task_status(task_id, {
'last_run_status': 'success',
'is_running': 0,
'run_count': task['run_count'] + 1,
'next_run_time': next_run_time
})
task_log.info(f"任务执行成功: {task['task_name']}")
return True
except Exception as e:
task_log.error(f"任务执行失败: {str(e)}", exc_info=True)
# 失败时计算下次重试时间(15分钟后)
next_retry_time = datetime.now() + pd.Timedelta(minutes=15)
self._update_task_status(task_id, {
'last_run_status': 'failed',
'is_running': 0,
'next_run_time': next_retry_time
})
return False
def _execute_task_logic(self, task: Dict[str, Any]) -> None:
"""执行任务的具体逻辑(动态导入模块)"""
start_time = time.time()
task_log = log.bind(task_id=task['task_id'], module=task['module_path'])
try:
# 动态导入任务模块
module = importlib.import_module(task['module_path'])
if not hasattr(module, 'main'):
raise ImportError(f"模块 {task['module_path']} 中未找到 main() 函数")
task_log.debug("开始执行模块中的 main() 函数")
module.main() # 调用任务主函数
task_log.info(f"任务执行完成,耗时: {time.time() - start_time:.2f}")
except Exception as e:
task_log.error("任务逻辑执行失败", exc_info=True)
raise
def _calculate_next_run_time(self, cron_expr: str, time_zone: str = 'Asia/Shanghai') -> datetime:
"""基于Cron表达式计算下次运行时间"""
try:
tz = pytz.timezone(time_zone)
now = datetime.now(tz)
cron = croniter.croniter(cron_expr, now)
next_run = cron.get_next(datetime)
return next_run.replace(tzinfo=None) # 移除时区信息,适应数据库存储
except Exception as e:
log.error(f"Cron表达式解析失败: {cron_expr}, 错误: {str(e)}")
raise ValueError(f"无效的Cron表达式: {cron_expr}")
def _update_task_status(self, task_id: int, updates: Dict[str, Any]) -> None:
"""更新任务状态到数据库"""
set_clause = ", ".join([f"{k}=%s" for k in updates.keys()])
sql = f"UPDATE main_task SET {set_clause}, updated_at=NOW() WHERE task_id=%s"
params = list(updates.values()) + [task_id]
try:
affected_rows = self.db.execute_sql(sql, params=params)
if affected_rows != 1:
log.warning(
"任务状态更新异常",
task_id=task_id,
预期影响行数=1,
实际影响行数=affected_rows
)
except Exception as e:
log.error(f"任务状态更新失败,task_id: {task_id}", exc_info=True)
raise
def add_task(self,
task_name: str,
task_type: str,
module_path: str,
cron_expression: str,
time_zone: str = 'Asia/Shanghai') -> int:
"""添加新的Cron任务"""
if not cron_expression:
raise ValueError("Cron表达式不能为空")
# 计算首次运行时间
first_run_time = self._calculate_next_run_time(cron_expression, time_zone)
# 插入数据库
sql = """
INSERT INTO main_task
(task_name, task_type, module_path, cron_expression, time_zone,
next_run_time, is_active)
VALUES (%s, %s, %s, %s, %s, %s, 1)
"""
params = (task_name, task_type, module_path, cron_expression, time_zone, first_run_time)
try:
self.db.execute_sql(sql, params=params)
task_id = self.db.query_to_df("SELECT LAST_INSERT_ID() AS id").iloc[0]['id']
log.info(
f"新任务添加成功",
task_id=task_id,
task_name=task_name,
cron表达式=cron_expression,
首次运行时间=first_run_time
)
return task_id
except Exception as e:
log.error(f"添加任务失败: {task_name}", exc_info=True)
raise
def get_pending_tasks_count(self) -> int:
"""获取当前等待执行的任务数量"""
result = self.db.query_to_df("""
SELECT COUNT(*) AS count FROM main_task
WHERE is_active = 1 AND next_run_time <= %s
""", params=(datetime.now(),))
return result.iloc[0]['count'] if not result.empty else 0