rss订阅数据爬取及数据处理
This commit is contained in:
@@ -22,6 +22,9 @@ class TaskScheduler:
|
||||
self.executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||
# 并发容量控制:限制同时运行的后台任务不超过 max_workers
|
||||
self._running_semaphore = threading.Semaphore(max_workers)
|
||||
# 任务统计
|
||||
self.hourly_stats = {'成功': 0, '失败': 0, '总数': 0}
|
||||
self.hourly_stats_lock = threading.Lock()
|
||||
log.info(f"任务调度器已初始化,最大工作线程数: {max_workers}")
|
||||
|
||||
def _resolve_callable(self, module_path: str):
|
||||
@@ -84,8 +87,12 @@ class TaskScheduler:
|
||||
# 如果所有尝试均失败,则抛出最后的错误
|
||||
raise ImportError(f"模块 {module_path} 导入/解析失败: {str(last_import_error)}")
|
||||
|
||||
def check_and_run_tasks(self) -> Dict[str, int]:
|
||||
"""检查并执行所有到期的任务,优化空任务处理和异常容错"""
|
||||
def check_and_run_tasks(self, print_empty_status: bool = False) -> Dict[str, int]:
|
||||
"""检查并执行所有到期的任务,优化空任务处理和异常容错
|
||||
|
||||
Args:
|
||||
print_empty_status: 是否打印空任务状态(默认False,避免频繁输出)
|
||||
"""
|
||||
result = {'总任务数': 0, '成功': 0, '失败': 0}
|
||||
|
||||
try:
|
||||
@@ -106,8 +113,9 @@ class TaskScheduler:
|
||||
|
||||
result['总任务数'] = len(tasks_df)
|
||||
if tasks_df.empty:
|
||||
# 空任务时输出INFO级日志,明确提示状态
|
||||
print(f"当前没有到期的任务,等待新任务加入...{now.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
# 空任务时根据参数决定是否输出
|
||||
if print_empty_status:
|
||||
print(f"当前没有到期的任务,等待新任务加入...{now.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
return result
|
||||
|
||||
# 并发执行任务
|
||||
@@ -128,6 +136,12 @@ class TaskScheduler:
|
||||
log.error(f"任务线程执行失败: {str(e)}", exc_info=True)
|
||||
result['失败'] += 1
|
||||
|
||||
# 更新小时统计
|
||||
with self.hourly_stats_lock:
|
||||
self.hourly_stats['成功'] += result['成功']
|
||||
self.hourly_stats['失败'] += result['失败']
|
||||
self.hourly_stats['总数'] += result['总任务数']
|
||||
|
||||
log.info(
|
||||
"任务调度周期完成",
|
||||
总任务数=result['总任务数'],
|
||||
@@ -419,4 +433,12 @@ class TaskScheduler:
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"查询待执行任务失败,将重试: {str(e)}", exc_info=True)
|
||||
return []
|
||||
return []
|
||||
|
||||
def get_and_reset_hourly_stats(self) -> Dict[str, int]:
|
||||
"""获取并重置小时统计数据(用于每小时统计)"""
|
||||
with self.hourly_stats_lock:
|
||||
stats = self.hourly_stats.copy()
|
||||
# 重置统计
|
||||
self.hourly_stats = {'成功': 0, '失败': 0, '总数': 0}
|
||||
return stats
|
||||
Reference in New Issue
Block a user