import importlib import time from datetime import datetime from typing import Dict, List, Optional, Any import croniter import pytz from concurrent.futures import ThreadPoolExecutor, as_completed import pandas as pd from utils.mysql_agent import MySQLAgent from utils.logger import CrossPlatformLog # 初始化调度器日志 log = CrossPlatformLog.get_logger("TaskScheduler") class TaskScheduler: def __init__(self, db_config: Optional[Dict] = None, max_workers: int = 5): """初始化任务调度器(基于Cron表达式)""" self.db = MySQLAgent(db_config or {}) self.executor = ThreadPoolExecutor(max_workers=max_workers) log.info(f"任务调度器已初始化,最大工作线程数: {max_workers}") def check_and_run_tasks(self) -> Dict[str, int]: """检查并执行所有到期的任务""" result = {'总任务数': 0, '成功': 0, '失败': 0} try: # 获取当前时间(带时区) now = datetime.now(pytz.timezone('Asia/Shanghai')).replace(tzinfo=None) # 查询所有到期的活跃任务 tasks_df = self.db.query_to_df(""" SELECT * FROM main_task WHERE is_active = 1 AND next_run_time <= %s AND is_running = 0 ORDER BY next_run_time """, params=(now,)) result['总任务数'] = len(tasks_df) if tasks_df.empty: log.debug("没有到期的任务需要执行") return result # 并发执行任务 futures = [] for _, task in tasks_df.iterrows(): futures.append(self.executor.submit(self._process_single_task, task)) # 收集执行结果 for future in as_completed(futures): try: if future.result(): result['成功'] += 1 else: result['失败'] += 1 except Exception as e: log.error(f"任务线程执行失败: {str(e)}") result['失败'] += 1 log.info( "任务调度周期完成", 总任务数=result['总任务数'], 成功=result['成功'], 失败=result['失败'] ) return result except Exception as e: log.critical("调度器主循环执行失败", exc_info=True) raise def _process_single_task(self, task: Dict[str, Any]) -> bool: """处理单个任务(线程安全)""" task_id = task['task_id'] task_log = log.bind(task_id=task_id, task_name=task['task_name']) task_log.info(f"开始执行任务: {task['task_name']}") try: # 标记任务为运行中 self._update_task_status(task_id, { 'is_running': 1, 'last_run_time': datetime.now() }) # 执行任务逻辑 self._execute_task_logic(task) # 计算下次运行时间(基于Cron表达式) next_run_time = self._calculate_next_run_time( cron_expr=task['cron_expression'], time_zone=task.get('time_zone', 'Asia/Shanghai') ) # 更新任务状态为成功 self._update_task_status(task_id, { 'last_run_status': 'success', 'is_running': 0, 'run_count': task['run_count'] + 1, 'next_run_time': next_run_time }) task_log.info(f"任务执行成功: {task['task_name']}") return True except Exception as e: task_log.error(f"任务执行失败: {str(e)}", exc_info=True) # 失败时计算下次重试时间(15分钟后) next_retry_time = datetime.now() + pd.Timedelta(minutes=15) self._update_task_status(task_id, { 'last_run_status': 'failed', 'is_running': 0, 'next_run_time': next_retry_time }) return False def _execute_task_logic(self, task: Dict[str, Any]) -> None: """执行任务的具体逻辑(动态导入模块)""" start_time = time.time() task_log = log.bind(task_id=task['task_id'], module=task['module_path']) try: # 动态导入任务模块 module = importlib.import_module(task['module_path']) if not hasattr(module, 'main'): raise ImportError(f"模块 {task['module_path']} 中未找到 main() 函数") task_log.debug("开始执行模块中的 main() 函数") module.main() # 调用任务主函数 task_log.info(f"任务执行完成,耗时: {time.time() - start_time:.2f}秒") except Exception as e: task_log.error("任务逻辑执行失败", exc_info=True) raise def _calculate_next_run_time(self, cron_expr: str, time_zone: str = 'Asia/Shanghai') -> datetime: """基于Cron表达式计算下次运行时间""" try: tz = pytz.timezone(time_zone) now = datetime.now(tz) cron = croniter.croniter(cron_expr, now) next_run = cron.get_next(datetime) return next_run.replace(tzinfo=None) # 移除时区信息,适应数据库存储 except Exception as e: log.error(f"Cron表达式解析失败: {cron_expr}, 错误: {str(e)}") raise ValueError(f"无效的Cron表达式: {cron_expr}") def _update_task_status(self, task_id: int, updates: Dict[str, Any]) -> None: """更新任务状态到数据库""" set_clause = ", ".join([f"{k}=%s" for k in updates.keys()]) sql = f"UPDATE main_task SET {set_clause}, updated_at=NOW() WHERE task_id=%s" params = list(updates.values()) + [task_id] try: affected_rows = self.db.execute_sql(sql, params=params) if affected_rows != 1: log.warning( "任务状态更新异常", task_id=task_id, 预期影响行数=1, 实际影响行数=affected_rows ) except Exception as e: log.error(f"任务状态更新失败,task_id: {task_id}", exc_info=True) raise def add_task(self, task_name: str, task_type: str, module_path: str, cron_expression: str, time_zone: str = 'Asia/Shanghai') -> int: """添加新的Cron任务""" if not cron_expression: raise ValueError("Cron表达式不能为空") # 计算首次运行时间 first_run_time = self._calculate_next_run_time(cron_expression, time_zone) # 插入数据库 sql = """ INSERT INTO main_task (task_name, task_type, module_path, cron_expression, time_zone, next_run_time, is_active) VALUES (%s, %s, %s, %s, %s, %s, 1) """ params = (task_name, task_type, module_path, cron_expression, time_zone, first_run_time) try: self.db.execute_sql(sql, params=params) task_id = self.db.query_to_df("SELECT LAST_INSERT_ID() AS id").iloc[0]['id'] log.info( f"新任务添加成功", task_id=task_id, task_name=task_name, cron表达式=cron_expression, 首次运行时间=first_run_time ) return task_id except Exception as e: log.error(f"添加任务失败: {task_name}", exc_info=True) raise def get_pending_tasks_count(self) -> int: """获取当前等待执行的任务数量""" result = self.db.query_to_df(""" SELECT COUNT(*) AS count FROM main_task WHERE is_active = 1 AND next_run_time <= %s """, params=(datetime.now(),)) return result.iloc[0]['count'] if not result.empty else 0