minio对象存储数据库链接
This commit is contained in:
@@ -6,12 +6,14 @@ import croniter
|
||||
import pytz
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
import pandas as pd
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from utils.mysql_agent import MySQLAgent
|
||||
from utils.logger import CrossPlatformLog
|
||||
|
||||
# 初始化调度器日志
|
||||
log = CrossPlatformLog.get_logger("TaskScheduler")
|
||||
|
||||
|
||||
class TaskScheduler:
|
||||
def __init__(self, db_config: Optional[Dict] = None, max_workers: int = 5):
|
||||
"""初始化任务调度器(基于Cron表达式)"""
|
||||
@@ -20,31 +22,37 @@ class TaskScheduler:
|
||||
log.info(f"任务调度器已初始化,最大工作线程数: {max_workers}")
|
||||
|
||||
def check_and_run_tasks(self) -> Dict[str, int]:
|
||||
"""检查并执行所有到期的任务"""
|
||||
"""检查并执行所有到期的任务,优化空任务处理和异常容错"""
|
||||
result = {'总任务数': 0, '成功': 0, '失败': 0}
|
||||
|
||||
try:
|
||||
# 获取当前时间(带时区)
|
||||
now = datetime.now(pytz.timezone('Asia/Shanghai')).replace(tzinfo=None)
|
||||
# 获取当前时间(带时区转换为本地时间)
|
||||
tz = pytz.timezone('Asia/Shanghai')
|
||||
now = datetime.now(tz).replace(tzinfo=None) # 移除时区信息,与数据库存储一致
|
||||
log.debug(f"当前检查时间: {now.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
# 查询所有到期的活跃任务
|
||||
# 查询所有到期的活跃任务(使用参数化查询防止注入)
|
||||
tasks_df = self.db.query_to_df("""
|
||||
SELECT * FROM main_task
|
||||
WHERE is_active = 1
|
||||
AND next_run_time <= %s
|
||||
AND is_running = 0
|
||||
ORDER BY next_run_time
|
||||
""", params=(now,))
|
||||
SELECT *
|
||||
FROM main_task
|
||||
WHERE is_active = 1
|
||||
AND next_run_time <= %s
|
||||
AND is_running = 0
|
||||
ORDER BY next_run_time
|
||||
""", params=(now,))
|
||||
|
||||
result['总任务数'] = len(tasks_df)
|
||||
if tasks_df.empty:
|
||||
log.debug("没有到期的任务需要执行")
|
||||
# 空任务时输出INFO级日志,明确提示状态
|
||||
log.info("当前没有到期的任务,等待新任务加入...")
|
||||
return result
|
||||
|
||||
# 并发执行任务
|
||||
futures = []
|
||||
for _, task in tasks_df.iterrows():
|
||||
futures.append(self.executor.submit(self._process_single_task, task))
|
||||
# 传递任务字典的副本避免线程安全问题
|
||||
task_copy = task.to_dict()
|
||||
futures.append(self.executor.submit(self._process_single_task, task_copy))
|
||||
|
||||
# 收集执行结果
|
||||
for future in as_completed(futures):
|
||||
@@ -54,7 +62,7 @@ class TaskScheduler:
|
||||
else:
|
||||
result['失败'] += 1
|
||||
except Exception as e:
|
||||
log.error(f"任务线程执行失败: {str(e)}")
|
||||
log.error(f"任务线程执行失败: {str(e)}", exc_info=True)
|
||||
result['失败'] += 1
|
||||
|
||||
log.info(
|
||||
@@ -65,21 +73,28 @@ class TaskScheduler:
|
||||
)
|
||||
return result
|
||||
|
||||
except SQLAlchemyError as e: # 数据库异常处理优化
|
||||
log.error(f"数据库操作失败,将在下次轮询重试: {str(e)}", exc_info=True)
|
||||
return result # 不中断,返回当前结果
|
||||
except Exception as e:
|
||||
log.critical("调度器主循环执行失败", exc_info=True)
|
||||
raise
|
||||
log.error("调度器周期执行异常,将在下次轮询重试", exc_info=True)
|
||||
return result # 不中断主循环,允许下次重试
|
||||
|
||||
def _process_single_task(self, task: Dict[str, Any]) -> bool:
|
||||
"""处理单个任务(线程安全)"""
|
||||
task_id = task['task_id']
|
||||
task_log = log.bind(task_id=task_id, task_name=task['task_name'])
|
||||
task_log.info(f"开始执行任务: {task['task_name']}")
|
||||
task_name = task['task_name']
|
||||
task_log = log.bind(task_id=task_id, task_name=task_name)
|
||||
task_log.info(f"开始执行任务: {task_name}")
|
||||
|
||||
try:
|
||||
# 标记任务为运行中
|
||||
# 标记任务为运行中(使用当前时间的时区感知对象)
|
||||
tz = pytz.timezone(task.get('time_zone', 'Asia/Shanghai'))
|
||||
current_time = datetime.now(tz).replace(tzinfo=None)
|
||||
|
||||
self._update_task_status(task_id, {
|
||||
'is_running': 1,
|
||||
'last_run_time': datetime.now()
|
||||
'last_run_time': current_time
|
||||
})
|
||||
|
||||
# 执行任务逻辑
|
||||
@@ -98,7 +113,7 @@ class TaskScheduler:
|
||||
'run_count': task['run_count'] + 1,
|
||||
'next_run_time': next_run_time
|
||||
})
|
||||
task_log.info(f"任务执行成功: {task['task_name']}")
|
||||
task_log.info(f"任务执行成功: {task_name}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
@@ -107,23 +122,35 @@ class TaskScheduler:
|
||||
# 失败时计算下次重试时间(15分钟后)
|
||||
next_retry_time = datetime.now() + pd.Timedelta(minutes=15)
|
||||
|
||||
self._update_task_status(task_id, {
|
||||
'last_run_status': 'failed',
|
||||
'is_running': 0,
|
||||
'next_run_time': next_retry_time
|
||||
})
|
||||
# 即使任务执行失败,也要确保状态更新
|
||||
try:
|
||||
self._update_task_status(task_id, {
|
||||
'last_run_status': 'failed',
|
||||
'is_running': 0,
|
||||
'next_run_time': next_retry_time
|
||||
})
|
||||
except Exception as update_err:
|
||||
task_log.error(f"任务失败后状态更新失败: {str(update_err)}", exc_info=True)
|
||||
|
||||
return False
|
||||
|
||||
def _execute_task_logic(self, task: Dict[str, Any]) -> None:
|
||||
"""执行任务的具体逻辑(动态导入模块)"""
|
||||
start_time = time.time()
|
||||
task_log = log.bind(task_id=task['task_id'], module=task['module_path'])
|
||||
task_id = task['task_id']
|
||||
module_path = task['module_path']
|
||||
task_log = log.bind(task_id=task_id, module=module_path)
|
||||
|
||||
try:
|
||||
# 动态导入任务模块
|
||||
module = importlib.import_module(task['module_path'])
|
||||
if not hasattr(module, 'main'):
|
||||
raise ImportError(f"模块 {task['module_path']} 中未找到 main() 函数")
|
||||
# 动态导入任务模块(增加模块存在性检查)
|
||||
try:
|
||||
module = importlib.import_module(module_path)
|
||||
except ImportError as e:
|
||||
raise ImportError(f"模块 {module_path} 导入失败: {str(e)}")
|
||||
|
||||
# 检查main函数是否存在
|
||||
if not hasattr(module, 'main') or not callable(module.main):
|
||||
raise AttributeError(f"模块 {module_path} 中未找到可调用的 main() 函数")
|
||||
|
||||
task_log.debug("开始执行模块中的 main() 函数")
|
||||
module.main() # 调用任务主函数
|
||||
@@ -137,7 +164,7 @@ class TaskScheduler:
|
||||
"""基于Cron表达式计算下次运行时间"""
|
||||
try:
|
||||
tz = pytz.timezone(time_zone)
|
||||
now = datetime.now(tz)
|
||||
now = datetime.now(tz) # 使用任务指定时区的当前时间
|
||||
cron = croniter.croniter(cron_expr, now)
|
||||
next_run = cron.get_next(datetime)
|
||||
return next_run.replace(tzinfo=None) # 移除时区信息,适应数据库存储
|
||||
@@ -146,12 +173,26 @@ class TaskScheduler:
|
||||
raise ValueError(f"无效的Cron表达式: {cron_expr}")
|
||||
|
||||
def _update_task_status(self, task_id: int, updates: Dict[str, Any]) -> None:
|
||||
"""更新任务状态到数据库"""
|
||||
set_clause = ", ".join([f"{k}=%s" for k in updates.keys()])
|
||||
"""更新任务状态到数据库(适配SQLAlchemy的参数传递方式)"""
|
||||
if not updates:
|
||||
log.warning(f"任务ID {task_id} 未提供任何更新字段")
|
||||
return
|
||||
|
||||
# 构建UPDATE语句(确保字段名安全)
|
||||
valid_fields = {'is_running', 'last_run_time', 'last_run_status',
|
||||
'run_count', 'next_run_time', 'updated_at'}
|
||||
filtered_updates = {k: v for k, v in updates.items() if k in valid_fields}
|
||||
|
||||
if not filtered_updates:
|
||||
log.warning(f"任务ID {task_id} 没有有效的更新字段")
|
||||
return
|
||||
|
||||
set_clause = ", ".join([f"{k}=%s" for k in filtered_updates.keys()])
|
||||
sql = f"UPDATE main_task SET {set_clause}, updated_at=NOW() WHERE task_id=%s"
|
||||
params = list(updates.values()) + [task_id]
|
||||
params = list(filtered_updates.values()) + [task_id]
|
||||
|
||||
try:
|
||||
# 执行更新并获取受影响的行数
|
||||
affected_rows = self.db.execute_sql(sql, params=params)
|
||||
if affected_rows != 1:
|
||||
log.warning(
|
||||
@@ -160,51 +201,104 @@ class TaskScheduler:
|
||||
预期影响行数=1,
|
||||
实际影响行数=affected_rows
|
||||
)
|
||||
except SQLAlchemyError as e:
|
||||
log.error(f"任务状态更新失败(数据库错误),task_id: {task_id}", exc_info=True)
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error(f"任务状态更新失败,task_id: {task_id}", exc_info=True)
|
||||
raise
|
||||
|
||||
def add_task(self,
|
||||
task_name: str,
|
||||
task_type: str,
|
||||
module_path: str,
|
||||
cron_expression: str,
|
||||
time_zone: str = 'Asia/Shanghai') -> int:
|
||||
task_name: str,
|
||||
task_type: str,
|
||||
module_path: str,
|
||||
cron_expression: str,
|
||||
time_zone: str = 'Asia/Shanghai') -> int:
|
||||
"""添加新的Cron任务"""
|
||||
if not cron_expression:
|
||||
raise ValueError("Cron表达式不能为空")
|
||||
|
||||
# 验证模块是否存在(提前检查,避免添加无效任务)
|
||||
try:
|
||||
importlib.import_module(module_path)
|
||||
except ImportError as e:
|
||||
raise ValueError(f"模块 {module_path} 不存在: {str(e)}")
|
||||
|
||||
# 计算首次运行时间
|
||||
first_run_time = self._calculate_next_run_time(cron_expression, time_zone)
|
||||
|
||||
# 插入数据库
|
||||
sql = """
|
||||
INSERT INTO main_task
|
||||
(task_name, task_type, module_path, cron_expression, time_zone,
|
||||
next_run_time, is_active)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, 1)
|
||||
"""
|
||||
INSERT INTO main_task
|
||||
(task_name, task_type, module_path, cron_expression, time_zone,
|
||||
next_run_time, is_active, created_at, updated_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, 1, NOW(), NOW()) \
|
||||
"""
|
||||
params = (task_name, task_type, module_path, cron_expression, time_zone, first_run_time)
|
||||
|
||||
try:
|
||||
self.db.execute_sql(sql, params=params)
|
||||
task_id = self.db.query_to_df("SELECT LAST_INSERT_ID() AS id").iloc[0]['id']
|
||||
# 获取插入的任务ID
|
||||
result_df = self.db.query_to_df("SELECT LAST_INSERT_ID() AS id")
|
||||
if result_df.empty or 'id' not in result_df.columns:
|
||||
raise ValueError("无法获取新添加任务的ID")
|
||||
|
||||
task_id = result_df.iloc[0]['id']
|
||||
log.info(
|
||||
f"新任务添加成功",
|
||||
"新任务添加成功",
|
||||
task_id=task_id,
|
||||
task_name=task_name,
|
||||
cron表达式=cron_expression,
|
||||
首次运行时间=first_run_time
|
||||
首次运行时间=first_run_time.strftime('%Y-%m-%d %H:%M:%S')
|
||||
)
|
||||
return task_id
|
||||
except SQLAlchemyError as e:
|
||||
log.error(f"添加任务失败(数据库错误): {task_name}", exc_info=True)
|
||||
raise
|
||||
except Exception as e:
|
||||
log.error(f"添加任务失败: {task_name}", exc_info=True)
|
||||
raise
|
||||
|
||||
def get_pending_tasks_count(self) -> int:
|
||||
"""获取当前等待执行的任务数量"""
|
||||
result = self.db.query_to_df("""
|
||||
SELECT COUNT(*) AS count FROM main_task
|
||||
WHERE is_active = 1 AND next_run_time <= %s
|
||||
""", params=(datetime.now(),))
|
||||
return result.iloc[0]['count'] if not result.empty else 0
|
||||
"""获取待执行任务数量(用于优雅关闭)"""
|
||||
try:
|
||||
tz = pytz.timezone('Asia/Shanghai')
|
||||
now = datetime.now(tz).replace(tzinfo=None)
|
||||
sql = """
|
||||
SELECT COUNT(*) as cnt
|
||||
FROM main_task
|
||||
WHERE is_active = 1
|
||||
AND next_run_time <= %s
|
||||
AND is_running = 0
|
||||
"""
|
||||
df = self.db.query_to_df(sql, params=(now,))
|
||||
return df['cnt'].iloc[0] if not df.empty else 0
|
||||
except Exception as e:
|
||||
log.error(f"查询待执行任务数量失败: {str(e)}", exc_info=True)
|
||||
return 0 # 出错时返回0,避免影响关闭流程
|
||||
|
||||
def get_pending_tasks(self) -> List[Dict[str, Any]]:
|
||||
"""查询所有待执行任务(兼容原有逻辑)"""
|
||||
try:
|
||||
tz = pytz.timezone('Asia/Shanghai')
|
||||
now = datetime.now(tz).replace(tzinfo=None)
|
||||
sql = """
|
||||
SELECT *
|
||||
FROM main_task
|
||||
WHERE is_active = 1
|
||||
AND next_run_time <= %s
|
||||
AND is_running = 0
|
||||
ORDER BY next_run_time
|
||||
"""
|
||||
tasks_df = self.db.query_to_df(sql, params=(now,))
|
||||
|
||||
if tasks_df.empty:
|
||||
log.info("当前任务列表为空,等待新任务加入...")
|
||||
return []
|
||||
|
||||
log.info(f"查询到{len(tasks_df)}个待执行任务")
|
||||
return tasks_df.to_dict('records')
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"查询待执行任务失败,将重试: {str(e)}", exc_info=True)
|
||||
return []
|
||||
Reference in New Issue
Block a user