娣诲姞RSS鏁版嵁澶勭悊鍣ㄥ拰浠诲姟璋冨害鍔熻兘锛屾洿鏂伴厤缃拰鏃ュ織鏂囦欢
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,11 +1,23 @@
|
||||
import os
|
||||
|
||||
|
||||
class Config:
|
||||
|
||||
MYSQL_CONFIG = {
|
||||
'host': '123.60.167.249',
|
||||
'port': 3306,
|
||||
'user': 'intelligence',
|
||||
'password': '123123',
|
||||
'database': "intelligence_system",
|
||||
'max_connections': 10
|
||||
}
|
||||
|
||||
OFFLINE_MYSQL_CONFIG = {
|
||||
'host': 'localhost',
|
||||
'port': 3306,
|
||||
'user': 'root',
|
||||
'password': '123123',
|
||||
'database':"intelligence_system",
|
||||
'database': "intelligence_system",
|
||||
'max_connections': 10
|
||||
}
|
||||
|
||||
@@ -15,3 +27,18 @@ class Config:
|
||||
'secret_key': 'abc88888888',
|
||||
'secure': False # 社区版默认不启用SSL
|
||||
}
|
||||
|
||||
# 百度AI API配置(千帆平台)
|
||||
# 优先从环境变量读取,如果没有则使用默认值(需要用户自行配置)
|
||||
BAIDU_AI_CONFIG = {
|
||||
'api_key': os.getenv('BAIDU_API_KEY', 'bce-v3/ALTAK-SFA4vEP3uBYLsyqCZcERg/1f43596d40d9a2c8318b13d5888a5e8e4e7a7f30'), # 百度千帆API Key
|
||||
'model': 'ernie-x1-turbo-32k', # 使用的模型
|
||||
}
|
||||
|
||||
# AI处理器配置
|
||||
AI_PROCESSOR_CONFIG = {
|
||||
'batch_size': 10, # 批量处理的默认大小
|
||||
'delay': 1.5, # 每条记录之间的延迟(秒),避免API限流
|
||||
'source_table': 'processed_rss_data', # 源数据表
|
||||
'result_table': 'ai_processor_rss_analysis', # AI分析结果表
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
@@ -133075,3 +133075,855 @@
|
||||
→ processing_time: '2025-10-28 13:41:03'
|
||||
→ save_success: True
|
||||
→ mark_success: True
|
||||
2025-10-29 09:54:06.511 | INFO | task_scheduler:28 - 任务调度器已初始化,最大工作线程数: 5
|
||||
→ module: 'TaskScheduler'
|
||||
2025-10-29 09:54:08.957 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: '\n SELECT *\n FROM main_task\n \n ORDER BY created_at DESC, task_id DESC\n '
|
||||
2025-10-29 09:54:09.655 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 2
|
||||
2025-10-29 09:56:52.413 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 09:56:52.413 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT LAST_INSERT_ID() AS id'
|
||||
2025-10-29 09:56:52.692 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 09:56:52.693 | INFO | task_scheduler:379 - 新任务添加成功
|
||||
→ module: 'TaskScheduler'
|
||||
→ task_id: np.int64(0)
|
||||
→ task_name: 'AI处理RSS新闻'
|
||||
→ cron表达式: '5 0 * * *'
|
||||
→ 首次运行时间: '2025-10-30 00:05:00'
|
||||
2025-10-29 09:57:05.272 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 09:57:05.541 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 09:57:05.544 | DEBUG | task_scheduler:287 - 开始执行任务入口函数
|
||||
→ module: 'processors.ai_processors.ai_processor_rss_data.RSSDataAIProcessor'
|
||||
→ task_id: 3
|
||||
2025-10-29 09:57:05.544 | ERROR | task_scheduler:292 - 任务逻辑执行失败
|
||||
→ module: 'processors.ai_processors.ai_processor_rss_data.RSSDataAIProcessor'
|
||||
→ task_id: 3
|
||||
→ exc_info: True
|
||||
2025-10-29 09:57:05.545 | ERROR | 1757831752:141 - 任务执行失败: AI处理RSS新闻
|
||||
→ module: 'TaskNotebook'
|
||||
2025-10-29 10:04:03.950 | INFO | task_scheduler:28 - 任务调度器已初始化,最大工作线程数: 5
|
||||
→ module: 'TaskScheduler'
|
||||
2025-10-29 10:04:09.593 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:04:09.869 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:04:10.878 | DEBUG | task_scheduler:287 - 开始执行任务入口函数
|
||||
→ module: 'processors.ai_processors.ai_processor_rss_data.RSSDataAIProcessor'
|
||||
→ task_id: 3
|
||||
2025-10-29 10:04:10.878 | ERROR | task_scheduler:292 - 任务逻辑执行失败
|
||||
→ module: 'processors.ai_processors.ai_processor_rss_data.RSSDataAIProcessor'
|
||||
→ task_id: 3
|
||||
→ exc_info: True
|
||||
2025-10-29 10:04:10.879 | ERROR | 2003508611:141 - 任务执行失败: AI处理RSS新闻
|
||||
→ module: 'TaskNotebook'
|
||||
2025-10-29 10:21:43.650 | INFO | task_scheduler:28 - 任务调度器已初始化,最大工作线程数: 5
|
||||
→ module: 'TaskScheduler'
|
||||
2025-10-29 10:21:43.691 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:21:43.981 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:21:45.731 | INFO | ai_processor_rss_data:51 - RSS数据AI处理器初始化完成
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:21:45.733 | INFO | ai_processor_rss_data:87 - 开始批量处理数据,批次大小: 200, 延迟: 1.5秒
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:21:45.892 | DEBUG | mysql_agent:609 - 查询执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:21:45.893 | DEBUG | ai_processor_rss_data:191 - 表 processed_rss_data 已存在 '是否ai处理' 字段
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:21:45.988 | DEBUG | mysql_agent:609 - 查询执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:21:45.989 | DEBUG | mysql_agent:640 - 检查表是否存在
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 表: 'ai_processor_rss_analysis'
|
||||
→ 存在: True
|
||||
2025-10-29 10:21:45.989 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: '\n SELECT id, 文章标题, 文章摘要, 发布时间, 来源URL, 文章链接\n FROM processed_rss_data\n WHERE 是否ai处理 = 0 OR 是否ai处理 IS NULL\n ORDER BY 创建时间 DESC\n LIMIT %s\n '
|
||||
2025-10-29 10:21:46.195 | INFO | ai_processor_rss_data:249 - 成功加载 2 条未处理的数据
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:21:46.210 | DEBUG | ai_processor_rss_data:112 - 处理记录 180 (1/2)
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:22:15.178 | DEBUG | ai_processor_rss_data:112 - 处理记录 179 (2/2)
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:22:31.052 | DEBUG | mysql_agent:176 - 已建立连接,准备插入数据到 ai_processor_rss_analysis
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:31.087 | DEBUG | mysql_agent:182 - 表 ai_processor_rss_analysis 包含以下列:['id', 'source_id', '文章标题', '文章摘要', '发布时间', '来源URL', '文章链接', '是否相关', '相关度评分', '标签', '分类', '分析说明', '处理时间', '创建时间', '更新时间']
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:31.096 | DEBUG | mysql_agent:208 - 表 ai_processor_rss_analysis 的过滤后DataFrame:共 2 行待插入
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:31.182 | INFO | mysql_agent:294 - 表 ai_processor_rss_analysis 插入结果汇总
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ total_to_insert: 2
|
||||
→ total_inserted: 2
|
||||
→ total_duplicates: 0
|
||||
→ total_failed: 0
|
||||
→ failed_records_count: 0
|
||||
2025-10-29 10:22:31.184 | INFO | ai_processor_rss_data:391 - 成功保存 2 条AI处理结果
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:22:31.302 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 2
|
||||
2025-10-29 10:22:31.302 | INFO | ai_processor_rss_data:419 - 成功标记 2 条记录为已处理
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:22:31.304 | INFO | ai_processor_rss_data:151 - 批量处理完成
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
→ success: True
|
||||
→ message: 'AI处理完成'
|
||||
→ total_count: 2
|
||||
→ processed_count: 2
|
||||
→ saved_count: 2
|
||||
→ failed_count: 0
|
||||
→ relevant_count: 0
|
||||
→ processing_time: '2025-10-29 10:22:31'
|
||||
2025-10-29 10:22:33.960 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:22:54.079 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:22:54.303 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:22:54.420 | INFO | rss_subscriptions:46 - 新闻API客户端初始化完成,已连接到数据库
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:54.520 | DEBUG | mysql_agent:609 - 查询执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:22:54.615 | DEBUG | mysql_agent:609 - 查询执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 9
|
||||
2025-10-29 10:22:54.615 | INFO | rss_subscriptions:84 - 数据库表结构验证通过,当前字段:['id', '文章标题', '文章链接', '文章摘要', '发布时间', '来源URL', '创建时间', '更新时间', '是否已处理']
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:54.616 | DEBUG | rss_subscriptions:102 - 未找到上次更新时间缓存,将获取全部数据
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:54.616 | INFO | rss_subscriptions:302 - 开始获取RSS源数据...
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:54.984 | DEBUG | rss_subscriptions:134 - 成功获取 https://www.chinanews.com.cn/rss/finance.xml 的RSS数据
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.009 | DEBUG | rss_subscriptions:134 - 成功获取 https://www.chinanews.com.cn/rss/world.xml 的RSS数据
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.019 | DEBUG | rss_subscriptions:134 - 成功获取 https://www.chinanews.com.cn/rss/china.xml 的RSS数据
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.279 | DEBUG | rss_subscriptions:134 - 成功获取 https://www.chinanews.com.cn/rss/scroll-news.xml 的RSS数据
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.281 | INFO | rss_subscriptions:161 - RSS源获取完成,成功获取 4/4 个源
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.282 | INFO | rss_subscriptions:305 - 获取完成,耗时: 0.66秒
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.283 | INFO | rss_subscriptions:218 - 开始处理 RSS 源: https://www.chinanews.com.cn/rss/finance.xml
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.284 | DEBUG | rss_subscriptions:235 - 处理条目 1: 海南产经新观察:紧抓封关机遇 央企纷赴自贸港布局
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.284 | DEBUG | rss_subscriptions:235 - 处理条目 2: 国家统计局社科文司统计师解读2024年中国创新指数
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.284 | DEBUG | rss_subscriptions:235 - 处理条目 3: 超500亿人次!前三季度我国人员流动量持续增加
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.284 | DEBUG | rss_subscriptions:235 - 处理条目 4: 面向东盟的规则对接与贸易促进合作交流会在南宁举办
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.284 | DEBUG | rss_subscriptions:235 - 处理条目 5: 263.2万亿元!前三季度物流需求总量稳步增长
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.285 | DEBUG | rss_subscriptions:235 - 处理条目 6: 前三季度我国社会物流总费用14.2万亿元 物流效能稳步提升
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.285 | DEBUG | rss_subscriptions:235 - 处理条目 7: 2024年中国创新指数为174.2 比上年增长5.3%
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.285 | DEBUG | rss_subscriptions:235 - 处理条目 8: 10月29日央行开展5577亿元7天期逆回购操作
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.286 | DEBUG | rss_subscriptions:235 - 处理条目 9: 2024年中国创新指数比上年增长5.3%
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.286 | DEBUG | rss_subscriptions:235 - 处理条目 10: 10月29日人民币对美元中间价报7.0843元 上调13个基点
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.286 | DEBUG | rss_subscriptions:235 - 处理条目 11: 第138届广交会首设智慧医疗专区
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.286 | DEBUG | rss_subscriptions:235 - 处理条目 12: 我国首个国家级陆相页岩油示范区年产量突破150万吨
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.287 | DEBUG | rss_subscriptions:235 - 处理条目 13: (经济观察)“十五五”规划建议清晰勾勒“确定的中国”
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.287 | DEBUG | rss_subscriptions:235 - 处理条目 14: (经济观察)锚定2035年,中国人均GDP如何实现跨越?
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.287 | DEBUG | rss_subscriptions:235 - 处理条目 15: “全运号”新能源游船在广州启航
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.288 | DEBUG | rss_subscriptions:235 - 处理条目 16: 云南石林国际花卉农特产品展示交易中心启动运营
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.288 | DEBUG | rss_subscriptions:235 - 处理条目 17: 新疆文化和旅游产业投融资对接会举办
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.288 | DEBUG | rss_subscriptions:235 - 处理条目 18: 云南地理标志保护产品达134个 较“十三五”末实现倍增
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.288 | DEBUG | rss_subscriptions:235 - 处理条目 19: 2025中国(大营)国际皮草博览会首日达成销售意向超3亿元
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.289 | DEBUG | rss_subscriptions:235 - 处理条目 20: 中国国家邮政局约谈中通快递
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.289 | DEBUG | rss_subscriptions:235 - 处理条目 21: 潘功胜:持续释放政策效能,研究储备新的政策举措
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.289 | DEBUG | rss_subscriptions:235 - 处理条目 22: 工信部:“二次号码焕新”服务已覆盖230余款主流应用
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.289 | DEBUG | rss_subscriptions:235 - 处理条目 23: 中国农业农村部:全年粮食有望再获丰收
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.289 | DEBUG | rss_subscriptions:235 - 处理条目 24: 2025碳达峰碳中和论坛暨深圳国家低碳城论坛启幕
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.290 | DEBUG | rss_subscriptions:235 - 处理条目 25: 阿尔及尔至广州直飞航线成功首航
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.290 | DEBUG | rss_subscriptions:235 - 处理条目 26: 中国移动董事长调整 杨杰卸任陈忠岳接任
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.290 | DEBUG | rss_subscriptions:235 - 处理条目 27: 金价跌破3900美元,未来会是什么走势?
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.290 | DEBUG | rss_subscriptions:235 - 处理条目 28: 广珠北段黄阁互通立交焕新通车 助力全运会出行
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.291 | DEBUG | rss_subscriptions:235 - 处理条目 29: 总投资超2600亿元 重庆“城市更新机会清单”发布
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.291 | DEBUG | rss_subscriptions:235 - 处理条目 30: “十五五”规划建议更重视投资于人
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.358 | DEBUG | mysql_agent:176 - 已建立连接,准备插入数据到 collector_rss_subscriptions
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:55.384 | DEBUG | mysql_agent:182 - 表 collector_rss_subscriptions 包含以下列:['id', '文章标题', '文章链接', '文章摘要', '发布时间', '来源URL', '创建时间', '更新时间', '是否已处理']
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:55.385 | DEBUG | mysql_agent:208 - 表 collector_rss_subscriptions 的过滤后DataFrame:共 30 行待插入
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:55.955 | INFO | mysql_agent:294 - 表 collector_rss_subscriptions 插入结果汇总
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ total_to_insert: 30
|
||||
→ total_inserted: 30
|
||||
→ total_duplicates: 0
|
||||
→ total_failed: 0
|
||||
→ failed_records_count: 0
|
||||
2025-10-29 10:22:55.957 | INFO | rss_subscriptions:259 - 成功写入 30/30 条记录
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.957 | INFO | rss_subscriptions:218 - 开始处理 RSS 源: https://www.chinanews.com.cn/rss/world.xml
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.958 | DEBUG | rss_subscriptions:235 - 处理条目 1: “蓉品出海——成都国际美食之都推介活动”在巴黎举办
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.959 | DEBUG | rss_subscriptions:235 - 处理条目 2: 巴西冲突致60余人死亡,中国总领馆发布紧急安全提醒
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.959 | DEBUG | rss_subscriptions:235 - 处理条目 3: 朝中社:朝鲜试射海对地战略巡航导弹(图)
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.959 | DEBUG | rss_subscriptions:235 - 处理条目 4: 荷兰启动加沙地带医疗后送接收首批患病儿童
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.959 | DEBUG | rss_subscriptions:235 - 处理条目 5: 美国10月消费者信心指数跌至94.6 略低于市场预期
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.959 | DEBUG | rss_subscriptions:235 - 处理条目 6: 哈马斯指责以色列违反停火协议,否认与以军遭袭事件有关
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.960 | DEBUG | rss_subscriptions:235 - 处理条目 7: 中国-东盟自贸区3.0版升级获东盟各界期待
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.960 | DEBUG | rss_subscriptions:235 - 处理条目 8: 德国11月消费者信心指数再度下滑
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.960 | DEBUG | rss_subscriptions:235 - 处理条目 9: 美防长称美军打击东太平洋四艘“贩毒船” ,已致14人死亡
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.961 | DEBUG | rss_subscriptions:235 - 处理条目 10: 参议院第13次否决临时拨款法案,美政府“停摆”继续
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.961 | DEBUG | rss_subscriptions:235 - 处理条目 11: 内塔尼亚胡下令:对加沙实施“有力打击”
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.961 | DEBUG | rss_subscriptions:235 - 处理条目 12: 达成停火18天后战火再起 以总理下令袭击加沙
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.961 | DEBUG | rss_subscriptions:235 - 处理条目 13: 庆祝北京堪培拉缔结友好城市25周年活动在堪培拉举行
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.962 | DEBUG | rss_subscriptions:235 - 处理条目 14: 专访丨期待中日韩以创新合作引领亚太新增长——访中日韩合作秘书处秘书长李熙燮
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.962 | DEBUG | rss_subscriptions:235 - 处理条目 15: 第47届东盟峰会及系列会议闭幕
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.962 | DEBUG | rss_subscriptions:235 - 处理条目 16: 巴基斯坦、阿富汗谈判未取得成果
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.963 | DEBUG | rss_subscriptions:235 - 处理条目 17: 韩德洙涉内乱案判决或将于年内出炉 最高可判死刑
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.963 | DEBUG | rss_subscriptions:235 - 处理条目 18: 外媒:巴基斯坦与阿富汗“谈崩了”
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.963 | DEBUG | rss_subscriptions:235 - 处理条目 19: 创纪录!美国男子接受猪肾脏移植近9个月
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.964 | DEBUG | rss_subscriptions:235 - 处理条目 20: 特朗普与高市同乘“海军一号” 抵达驻日美军基地
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.964 | DEBUG | rss_subscriptions:235 - 处理条目 21: AI赋能在线教学创新成果大赛线下决赛在北京印刷学院举办
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.964 | DEBUG | rss_subscriptions:235 - 处理条目 22: 聚焦AI、自由贸易等议题 庆州APEC助推区域团结协作
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.965 | DEBUG | rss_subscriptions:235 - 处理条目 23: 美政府停摆或致现役军人停薪 美财长:这太令人难堪了
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.965 | DEBUG | rss_subscriptions:235 - 处理条目 24: 气旋“蒙塔”持续增强 印度紧急疏散数万人
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.965 | DEBUG | rss_subscriptions:235 - 处理条目 25: 联合国调查报告:援助削减严重影响全球女性权益保护事业
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.966 | DEBUG | rss_subscriptions:235 - 处理条目 26: 高市早苗将推荐特朗普角逐诺奖,特朗普称高市将成“伟大的首相”
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.966 | DEBUG | rss_subscriptions:235 - 处理条目 27: 高市早苗将安倍用过的球杆,赠予特朗普
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.966 | DEBUG | rss_subscriptions:235 - 处理条目 28: 2025视听中国·澳新湖北传媒周在悉尼开幕
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.967 | DEBUG | rss_subscriptions:235 - 处理条目 29: 生物量迁移最新研究:人类超所有陆地动物总和 野生哺乳动物锐减
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:55.967 | DEBUG | rss_subscriptions:235 - 处理条目 30: 日媒:安倍晋三枪击案被告认罪
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.041 | DEBUG | mysql_agent:176 - 已建立连接,准备插入数据到 collector_rss_subscriptions
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:56.059 | DEBUG | mysql_agent:182 - 表 collector_rss_subscriptions 包含以下列:['id', '文章标题', '文章链接', '文章摘要', '发布时间', '来源URL', '创建时间', '更新时间', '是否已处理']
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:56.060 | DEBUG | mysql_agent:208 - 表 collector_rss_subscriptions 的过滤后DataFrame:共 30 行待插入
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:56.621 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 29
|
||||
→ error_message: "Duplicate entry '日媒:安倍晋三枪击案被告认罪-2025-10-28 05:31:25' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '日媒:安倍晋三枪击案被告认罪...', '文章链接': 'http://www.chinanews.com/gj/2025/10-28/10505670.shtml...', '文章摘要': '中新网10月28日电 据日本共同社报道,当地时间28日,日本奈良地方法院对前首相安倍晋三枪击案进行首次公开庭审。被告山上彻也承认检方所指控犯杀人罪、违反枪刀法等起诉内容。...',...
|
||||
2025-10-29 10:22:56.637 | INFO | mysql_agent:294 - 表 collector_rss_subscriptions 插入结果汇总
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ total_to_insert: 30
|
||||
→ total_inserted: 29
|
||||
→ total_duplicates: 1
|
||||
→ total_failed: 0
|
||||
→ failed_records_count: 1
|
||||
2025-10-29 10:22:56.638 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ failed_records_summary: [{'index': 29, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '日媒:安倍晋三枪击案被告认罪-2025-10-28 05:31:25' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||
→ detailed_failed_records: [{'index': 29, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '日媒:安倍晋三枪击案被告认罪-2025-10-28 05:31:25' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'...
|
||||
2025-10-29 10:22:56.639 | INFO | rss_subscriptions:259 - 成功写入 29/30 条记录
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.640 | INFO | rss_subscriptions:218 - 开始处理 RSS 源: https://www.chinanews.com.cn/rss/china.xml
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.640 | DEBUG | rss_subscriptions:235 - 处理条目 1: APEC会议“非正式”,却为何如此重要?
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.641 | DEBUG | rss_subscriptions:235 - 处理条目 2: 多国记者参访西藏:中国在少数民族文化保护方面作出巨大努力
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.641 | DEBUG | rss_subscriptions:235 - 处理条目 3: 前三季度全国城镇新增就业1057万人
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.641 | DEBUG | rss_subscriptions:235 - 处理条目 4: 国务院台办新任发言人亮相
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.641 | DEBUG | rss_subscriptions:235 - 处理条目 5: 韩正出席第九届未来投资倡议大会并发表演讲
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.641 | DEBUG | rss_subscriptions:235 - 处理条目 6: 湖南省农村信用社联合社原副主任舒立凡被开除党籍
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.642 | DEBUG | rss_subscriptions:235 - 处理条目 7: 11月新规来了!事关小客车上牌,海南离岛免税购物
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.642 | DEBUG | rss_subscriptions:235 - 处理条目 8: 为高质量发展提供法治保障 续写经济快速发展和社会长期稳定两大奇迹新篇章
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.642 | DEBUG | rss_subscriptions:235 - 处理条目 9: 凝心聚力担使命 继往开来谱新篇
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.642 | DEBUG | rss_subscriptions:235 - 处理条目 10: 深入学习贯彻全会精神 奋力续写“两大奇迹”新篇章
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.642 | DEBUG | rss_subscriptions:235 - 处理条目 11: 深入学习贯彻全会精神 推动“十五五”高质量发展
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.643 | DEBUG | rss_subscriptions:235 - 处理条目 12: 党的全面领导是实现“十五五”时期经济社会发展目标任务的根本保证
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.643 | DEBUG | rss_subscriptions:235 - 处理条目 13: 以史为鉴,走和平融合之道
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.643 | DEBUG | rss_subscriptions:235 - 处理条目 14: 中印两军举行边境西段第23轮将军级会谈
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.643 | DEBUG | rss_subscriptions:235 - 处理条目 15: 新华社权威快报|中国文明乡风大会聚焦“文明,让乡村更美好”
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.644 | DEBUG | rss_subscriptions:235 - 处理条目 16: 今年前九个月 全国纪检监察机关共接收信访举报314.4万件次
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.644 | DEBUG | rss_subscriptions:235 - 处理条目 17: 中华人民共和国海商法
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.644 | DEBUG | rss_subscriptions:235 - 处理条目 18: 全国人民代表大会常务委员会关于修改《中华人民共和国网络安全法》的决定
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.644 | DEBUG | rss_subscriptions:235 - 处理条目 19: 全国人民代表大会常务委员会关于修改《中华人民共和国环境保护税法》的决定
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.645 | DEBUG | rss_subscriptions:235 - 处理条目 20: 中华人民共和国城市居民委员会组织法
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.645 | DEBUG | rss_subscriptions:235 - 处理条目 21: 全国人民代表大会常务委员会关于修改《中华人民共和国村民委员会组织法》的决定
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.645 | DEBUG | rss_subscriptions:235 - 处理条目 22: 全国人民代表大会常务委员会关于批准《中华人民共和国和埃塞俄比亚联邦民主共和国关于刑事司法协助的条约》的决定
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.646 | DEBUG | rss_subscriptions:235 - 处理条目 23: 全国人民代表大会常务委员会关于批准《中华人民共和国和津巴布韦共和国关于刑事司法协助的条约》的决定
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.646 | DEBUG | rss_subscriptions:235 - 处理条目 24: 全国人民代表大会常务委员会关于批准《〈关于汞的水俣公约〉缔约方大会第五次会议第5/4号决定对〈关于汞的水俣公约〉附件A和附件B的修正》的决定
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.646 | DEBUG | rss_subscriptions:235 - 处理条目 25: 全国人民代表大会常务委员会关于批准《〈联合国海洋法公约〉下国家管辖范围以外区域海洋生物多样性的养护和可持续利用协定》的决定
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.648 | DEBUG | rss_subscriptions:235 - 处理条目 26: 全国人民代表大会常务委员会任免名单
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.648 | DEBUG | rss_subscriptions:235 - 处理条目 27: 全国人民代表大会常务委员会任免名单
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.648 | DEBUG | rss_subscriptions:235 - 处理条目 28: 全国人民代表大会常务委员会任免名单
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.648 | DEBUG | rss_subscriptions:235 - 处理条目 29: 全国人民代表大会常务委员会任免名单
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.648 | DEBUG | rss_subscriptions:235 - 处理条目 30: 全国人民代表大会常务委员会免职名单
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:56.725 | DEBUG | mysql_agent:176 - 已建立连接,准备插入数据到 collector_rss_subscriptions
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:56.743 | DEBUG | mysql_agent:182 - 表 collector_rss_subscriptions 包含以下列:['id', '文章标题', '文章链接', '文章摘要', '发布时间', '来源URL', '创建时间', '更新时间', '是否已处理']
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:56.744 | DEBUG | mysql_agent:208 - 表 collector_rss_subscriptions 的过滤后DataFrame:共 30 行待插入
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:57.335 | INFO | mysql_agent:294 - 表 collector_rss_subscriptions 插入结果汇总
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ total_to_insert: 30
|
||||
→ total_inserted: 30
|
||||
→ total_duplicates: 0
|
||||
→ total_failed: 0
|
||||
→ failed_records_count: 0
|
||||
2025-10-29 10:22:57.337 | INFO | rss_subscriptions:259 - 成功写入 30/30 条记录
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.338 | INFO | rss_subscriptions:218 - 开始处理 RSS 源: https://www.chinanews.com.cn/rss/scroll-news.xml
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.338 | DEBUG | rss_subscriptions:235 - 处理条目 1: 中国气象局发布人工智能气象服务系统“风和”
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.338 | DEBUG | rss_subscriptions:235 - 处理条目 2: “蓉品出海——成都国际美食之都推介活动”在巴黎举办
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.340 | DEBUG | rss_subscriptions:235 - 处理条目 3: APEC会议“非正式”,却为何如此重要?
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.340 | DEBUG | rss_subscriptions:235 - 处理条目 4: 多国记者参访西藏:中国在少数民族文化保护方面作出巨大努力
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.340 | DEBUG | rss_subscriptions:235 - 处理条目 5: 海南产经新观察:紧抓封关机遇 央企纷赴自贸港布局
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.341 | DEBUG | rss_subscriptions:235 - 处理条目 6: 国家统计局社科文司统计师解读2024年中国创新指数
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.341 | DEBUG | rss_subscriptions:235 - 处理条目 7: 超500亿人次!前三季度我国人员流动量持续增加
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.341 | DEBUG | rss_subscriptions:235 - 处理条目 8: 28国驻华大使和外交官参访广西 共探合作新机遇
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.341 | DEBUG | rss_subscriptions:235 - 处理条目 9: 广西南宁抓好“一老一小”民生实事 持续增进民生福祉
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.342 | DEBUG | rss_subscriptions:235 - 处理条目 10: 面向东盟的规则对接与贸易促进合作交流会在南宁举办
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.342 | DEBUG | rss_subscriptions:235 - 处理条目 11: 前三季度全国城镇新增就业1057万人
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.342 | DEBUG | rss_subscriptions:235 - 处理条目 12: 国务院台办新任发言人亮相
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.342 | DEBUG | rss_subscriptions:235 - 处理条目 13: 首届“全国十佳新闻工作者”宣传选树活动名单公示 中新社记者入选
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.343 | DEBUG | rss_subscriptions:235 - 处理条目 14: 巴西冲突致60余人死亡,中国总领馆发布紧急安全提醒
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.343 | DEBUG | rss_subscriptions:235 - 处理条目 15: 韩正出席第九届未来投资倡议大会并发表演讲
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.343 | DEBUG | rss_subscriptions:235 - 处理条目 16: 湖南省农村信用社联合社原副主任舒立凡被开除党籍
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.344 | DEBUG | rss_subscriptions:235 - 处理条目 17: 11月新规来了!事关小客车上牌,海南离岛免税购物
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.344 | DEBUG | rss_subscriptions:235 - 处理条目 18: 263.2万亿元!前三季度物流需求总量稳步增长
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.344 | DEBUG | rss_subscriptions:235 - 处理条目 19: 前三季度我国社会物流总费用14.2万亿元 物流效能稳步提升
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.345 | DEBUG | rss_subscriptions:235 - 处理条目 20: 我国中东部降水发展多地气温下滑 青藏高原大范围雨雪持续
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.345 | DEBUG | rss_subscriptions:235 - 处理条目 21: 2024年中国创新指数为174.2 比上年增长5.3%
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.345 | DEBUG | rss_subscriptions:235 - 处理条目 22: 菊香伴重阳!全国重阳节出游天气地图出炉 五大名城赏菊指南请收好
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.345 | DEBUG | rss_subscriptions:235 - 处理条目 23: 【这个城市有点潮】福州:山海为证 把这座城市的今天讲给您听
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.346 | DEBUG | rss_subscriptions:235 - 处理条目 24: 10月29日央行开展5577亿元7天期逆回购操作
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.346 | DEBUG | rss_subscriptions:235 - 处理条目 25: 第十二届中国—东盟(南宁)戏剧周首场演出在南宁上演
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.346 | DEBUG | rss_subscriptions:235 - 处理条目 26: 2024年中国创新指数比上年增长5.3%
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.346 | DEBUG | rss_subscriptions:235 - 处理条目 27: 广西三江:金桂飘香添富路
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.346 | DEBUG | rss_subscriptions:235 - 处理条目 28: 为高质量发展提供法治保障 续写经济快速发展和社会长期稳定两大奇迹新篇章
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.346 | DEBUG | rss_subscriptions:235 - 处理条目 29: “天下第一雄关”嘉峪关:光影讲述长城故事
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.348 | DEBUG | rss_subscriptions:235 - 处理条目 30: 凝心聚力担使命 继往开来谱新篇
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:57.422 | DEBUG | mysql_agent:176 - 已建立连接,准备插入数据到 collector_rss_subscriptions
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:57.440 | DEBUG | mysql_agent:182 - 表 collector_rss_subscriptions 包含以下列:['id', '文章标题', '文章链接', '文章摘要', '发布时间', '来源URL', '创建时间', '更新时间', '是否已处理']
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:57.441 | DEBUG | mysql_agent:208 - 表 collector_rss_subscriptions 的过滤后DataFrame:共 30 行待插入
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:22:57.479 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 1
|
||||
→ error_message: "Duplicate entry '“蓉品出海——成都国际美食之都推介活动”在å' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '“蓉品出海——成都国际美食之都推介活动”在巴黎举办...', '文章链接': 'http://www.chinanews.com/gj/2025/10-29/10506172.shtml...', '文章摘要': '中新网巴黎10月29日电 当地时间10月27日,“蓉品出海——成都国际美食之都推介活动”在巴黎中国文化中心举行。...', '发布时间': '2025-10-...
|
||||
2025-10-29 10:22:57.496 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 2
|
||||
→ error_message: "Duplicate entry 'APEC会议“非正式”,却为何如此重要?-2025-10-29 ' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': 'APEC会议“非正式”,却为何如此重要?...', '文章链接': 'http://www.chinanews.com/gn/2025/10-29/10506201.shtml...', '文章摘要': '中新网北京10月29日电 题:APEC会议“非正式”,却为何如此重要?...', '发布时间': '2025-10-29 02:13:08...', '来源URL': ...
|
||||
2025-10-29 10:22:57.515 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 3
|
||||
→ error_message: "Duplicate entry '多国记者参访西藏:中国在少数民族文化保护方é' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '多国记者参访西藏:中国在少数民族文化保护方面作出巨大努力...', '文章链接': 'http://www.chinanews.com/gn/2025/10-29/10506203.shtml...', '文章摘要': '中新社西藏林芝10月29日电 题:多国记者参访西藏:中国在少数民族文化保护方面作出巨大努力...', '发布时间': '2025-10-29 02:12...
|
||||
2025-10-29 10:22:57.532 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 4
|
||||
→ error_message: "Duplicate entry '海南产经新观察:紧抓封关机遇 央企纷赴自贸港' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '海南产经新观察:紧抓封关机遇 央企纷赴自贸港布局...', '文章链接': 'http://www.chinanews.com/cj/2025/10-29/10506183.shtml...', '文章摘要': '中新网海口10月29日电 (记者 王子谦)海南自贸港建设以来,央企加快在琼战略布局,助力海南经济社会高质量发展。全岛封关运作即将启动,央企紧抓封关机遇,加快入琼...
|
||||
2025-10-29 10:22:57.551 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 5
|
||||
→ error_message: "Duplicate entry '国家统计局社科文司统计师解读2024年中国创新指' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '国家统计局社科文司统计师解读2024年中国创新指数...', '文章链接': 'http://www.chinanews.com/cj/2025/10-29/10506207.shtml...', '文章摘要': '中新网10月29日电 据国家统计局网站消息,日前,国家统计局社科文司《中国创新指数研究》课题组发布了2024年中国创新指数测算结果。从测算结果看,2024年中...
|
||||
2025-10-29 10:22:57.568 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 6
|
||||
→ error_message: "Duplicate entry '超500亿人次!前三季度我国人员流动量持续增加-' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '超500亿人次!前三季度我国人员流动量持续增加...', '文章链接': 'http://www.chinanews.com/cj/2025/10-29/10506211.shtml...', '文章摘要': '交通运输部10月29日发布数据显示,2025年前三季度,我国完成跨区域人员流动量506亿人次,同比增长3.1%;完成营业性货运量432.5亿吨,同比增长3.89%...
|
||||
2025-10-29 10:22:57.623 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 9
|
||||
→ error_message: "Duplicate entry '面向东盟的规则对接与贸易促进合作交流会在南å' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '面向东盟的规则对接与贸易促进合作交流会在南宁举办...', '文章链接': 'http://www.chinanews.com/cj/2025/10-29/10506173.shtml...', '文章摘要': '中新网南宁10月29日电(林浩)10月28日,面向东盟的规则对接与贸易促进合作交流会在广西南宁市举办。...', '发布时间': '2025-10-29 02:...
|
||||
2025-10-29 10:22:57.644 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 10
|
||||
→ error_message: "Duplicate entry '前三季度全国城镇新增就业1057万人-2025-10-29 02:06:' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '前三季度全国城镇新增就业1057万人...', '文章链接': 'http://www.chinanews.com/gn/2025/10-29/10506209.shtml...', '文章摘要': '人力资源社会保障部今天发布,1—9月,全国城镇新增就业1057万人,完成全年目标任务的88%,就业形势保持总体稳定。前三季度,全国累计发放稳岗返还资金196亿元,支持培训等...
|
||||
2025-10-29 10:22:57.662 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 11
|
||||
→ error_message: "Duplicate entry '国务院台办新任发言人亮相-2025-10-29 02:02:32' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '国务院台办新任发言人亮相...', '文章链接': 'http://www.chinanews.com/gn/2025/10-29/10506206.shtml...', '文章摘要': '10月29日上午,国务院台办举行例行新闻发布会。新任发言人彭庆恩首次亮相并主持发布会。...', '发布时间': '2025-10-29 02:02:32...', '来源URL': ...
|
||||
2025-10-29 10:22:57.698 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 13
|
||||
→ error_message: "Duplicate entry '巴西冲突致60余人死亡,中国总领馆发布紧急安å\x85' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '巴西冲突致60余人死亡,中国总领馆发布紧急安全提醒...', '文章链接': 'http://www.chinanews.com/gj/2025/10-29/10506204.shtml...', '文章摘要': '中新网10月29日电 据中国驻巴西里约热内卢总领馆微信公众号消息,10月28日,里约警方在市区北部阿莱芒(Alem#227;o)和佩尼亚(Penha)等区域开...
|
||||
2025-10-29 10:22:57.716 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 14
|
||||
→ error_message: "Duplicate entry '韩正出席第九届未来投资倡议大会并发表演讲-202' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '韩正出席第九届未来投资倡议大会并发表演讲...', '文章链接': 'http://www.chinanews.com/gn/2025/10-29/10506159.shtml...', '文章摘要': '中新社北京10月29日电 当地时间10月28日,中国国家副主席韩正在沙特利雅得出席第九届未来投资倡议大会并发表演讲。...', '发布时间': '2025-10-29...
|
||||
2025-10-29 10:22:57.735 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 15
|
||||
→ error_message: "Duplicate entry '湖南省农村信用社联合社原副主任舒立凡被开除å' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '湖南省农村信用社联合社原副主任舒立凡被开除党籍...', '文章链接': 'http://www.chinanews.com/gn/2025/10-29/10506199.shtml...', '文章摘要': '中新网10月29日电 据湖南省纪委监委消息,日前,经湖南省委批准,湖南省纪委监委对湖南省农村信用社联合社原党委委员、副主任舒立凡严重违纪违法问题进行了立案审查调查...
|
||||
2025-10-29 10:22:57.753 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 16
|
||||
→ error_message: "Duplicate entry '11月新规来了!事关小客车上牌,海南离岛免税è´' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '11月新规来了!事关小客车上牌,海南离岛免税购物...', '文章链接': 'http://www.chinanews.com/gn/2025/10-29/10506196.shtml...', '文章摘要': '中新经纬10月29日电 (王永乐)国产小客车新车上牌不用再跑车管所、互联网法院管辖范围调整、海南离岛旅客免税购物政策调整……2025年11月,一大波新规将施行,...
|
||||
2025-10-29 10:22:57.771 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 17
|
||||
→ error_message: "Duplicate entry '263.2万亿元!前三季度物流需求总量稳步增长-2025' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '263.2万亿元!前三季度物流需求总量稳步增长...', '文章链接': 'http://www.chinanews.com/cj/2025/10-29/10506200.shtml...', '文章摘要': '中国物流与采购联合会今天(29日)公布今年前三季度物流运行数据。随着积极有为的宏观政策持续发力,国内生产相关物流需求稳步扩张,消费物流需求潜力有序释放。前三季度,...
|
||||
2025-10-29 10:22:57.791 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 18
|
||||
→ error_message: "Duplicate entry '前三季度我国社会物流总费用14.2万亿元 物流效è\x83' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '前三季度我国社会物流总费用14.2万亿元 物流效能稳步提升...', '文章链接': 'http://www.chinanews.com/cj/2025/10-29/10506198.shtml...', '文章摘要': '中国物流与采购联合会今天(29日)公布今年前三季度物流运行数据。今年前三季度,有效降低全社会物流成本专项行动稳步推进,成果持续巩固。...', '发布...
|
||||
2025-10-29 10:22:57.833 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 20
|
||||
→ error_message: "Duplicate entry '2024年中国创新指数为174.2 比上年增长5.3%-2025-10-29' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '2024年中国创新指数为174.2 比上年增长5.3%...', '文章链接': 'http://www.chinanews.com/cj/2025/10-29/10506194.shtml...', '文章摘要': '中新网10月29日电 据国家统计局网站消息,根据国家统计局社科文司《中国创新指数研究》课题组测算,2024年中国创新指数为174.2(以2015年为100...
|
||||
2025-10-29 10:22:57.888 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 23
|
||||
→ error_message: "Duplicate entry '10月29日央行开展5577亿元7天期逆回购操作-2025-10-2' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '10月29日央行开展5577亿元7天期逆回购操作...', '文章链接': 'http://www.chinanews.com/cj/2025/10-29/10506179.shtml...', '文章摘要': '中新网10月29日电 据央行网站消息,2025年10月29日中国人民银行以固定利率、数量招标方式开展了5577亿元7天期逆回购操作。具体情况如下:...', '...
|
||||
2025-10-29 10:22:57.926 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 25
|
||||
→ error_message: "Duplicate entry '2024年中国创新指数比上年增长5.3%-2025-10-29 01:36:56' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '2024年中国创新指数比上年增长5.3%...', '文章链接': 'http://www.chinanews.com/cj/2025/10-29/10506192.shtml...', '文章摘要': '根据国家统计局社科文司《中国创新指数研究》课题组测算,2024年中国创新指数为174.2(以2015年为100),比上年增长5.3%。分领域看,创新环境指数、创新投入指...
|
||||
2025-10-29 10:22:57.964 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 27
|
||||
→ error_message: "Duplicate entry '为高质量发展提供法治保障 续写经济快速发展和' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '为高质量发展提供法治保障 续写经济快速发展和社会长期稳定两大奇迹新篇章...', '文章链接': 'http://www.chinanews.com/gn/2025/10-29/10506189.shtml...', '文章摘要': '周强...', '发布时间': '2025-10-29 01:34:12...', '来源URL': 'https://www.china...
|
||||
2025-10-29 10:22:58.000 | WARNING | mysql_agent:253 - 表 collector_rss_subscriptions 中跳过重复记录
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ index: 29
|
||||
→ error_message: "Duplicate entry '凝心聚力担使命 继往开来谱新篇-2025-10-29 01:32:29' for key 'collector_rss_subscriptions.idx_title_pubtime'"
|
||||
→ record: {'文章标题': '凝心聚力担使命 继往开来谱新篇...', '文章链接': 'http://www.chinanews.com/gn/2025/10-29/10506187.shtml...', '文章摘要': '王勇...', '发布时间': '2025-10-29 01:32:29...', '来源URL': 'https://www.chinanews.com.cn/rss/scro...
|
||||
2025-10-29 10:22:58.017 | INFO | mysql_agent:294 - 表 collector_rss_subscriptions 插入结果汇总
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ total_to_insert: 30
|
||||
→ total_inserted: 10
|
||||
→ total_duplicates: 20
|
||||
→ total_failed: 0
|
||||
→ failed_records_count: 20
|
||||
2025-10-29 10:22:58.018 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ failed_records_summary: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“蓉品出海——成都国际美食之都推介活动”在å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 2, 'type': 'du...
|
||||
→ detailed_failed_records: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“蓉品出海——成都国际美食之都推介活动”在å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '“蓉品出海...
|
||||
2025-10-29 10:22:58.020 | INFO | rss_subscriptions:259 - 成功写入 10/30 条记录
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:58.022 | DEBUG | rss_subscriptions:114 - 已保存本次更新时间: 2025-10-29 02:13:52
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:58.022 | INFO | rss_subscriptions:317 - 本次最新更新时间: 2025-10-29 02:13:52
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:22:58.131 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:23:03.426 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:23:03.637 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:23:03.639 | ERROR | 2003508611:141 - 任务执行失败: RSS基于规则数据处理
|
||||
→ module: 'TaskNotebook'
|
||||
2025-10-29 10:24:49.090 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:24:58.045 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:25:05.593 | INFO | task_scheduler:28 - 任务调度器已初始化,最大工作线程数: 5
|
||||
→ module: 'TaskScheduler'
|
||||
2025-10-29 10:25:08.578 | INFO | task_scheduler:28 - 任务调度器已初始化,最大工作线程数: 5
|
||||
→ module: 'TaskScheduler'
|
||||
2025-10-29 10:25:17.496 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:26:12.653 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:26:12.866 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:26:20.817 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:26:20.818 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:26:21.018 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:27:05.469 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:27:05.470 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:27:05.710 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:27:09.948 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:27:10.216 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:27:10.686 | ERROR | 2003508611:141 - 任务执行失败: RSS基于规则数据处理
|
||||
→ module: 'TaskNotebook'
|
||||
2025-10-29 10:27:28.047 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:27:28.421 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:27:28.424 | ERROR | 2003508611:141 - 任务执行失败: RSS新闻订阅
|
||||
→ module: 'TaskNotebook'
|
||||
2025-10-29 10:27:51.936 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:27:51.938 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:27:52.149 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:28:19.005 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:28:19.234 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:28:19.236 | ERROR | 2003508611:141 - 任务执行失败: RSS新闻订阅
|
||||
→ module: 'TaskNotebook'
|
||||
2025-10-29 10:28:48.825 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:28:48.828 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:28:49.058 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:29:06.300 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:29:06.301 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:29:06.542 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:29:11.015 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:29:11.232 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:29:11.234 | INFO | rss_subscriptions:46 - 新闻API客户端初始化完成,已连接到数据库
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:11.332 | DEBUG | mysql_agent:609 - 查询执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:29:11.419 | DEBUG | mysql_agent:609 - 查询执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 9
|
||||
2025-10-29 10:29:11.420 | INFO | rss_subscriptions:84 - 数据库表结构验证通过,当前字段:['id', '文章标题', '文章链接', '文章摘要', '发布时间', '来源URL', '创建时间', '更新时间', '是否已处理']
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:11.421 | DEBUG | rss_subscriptions:98 - 加载上次更新时间: 2025-10-29 02:13:52
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:11.421 | INFO | rss_subscriptions:299 - 上次更新时间: 2025-10-29 02:13:52
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:11.422 | INFO | rss_subscriptions:302 - 开始获取RSS源数据...
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:11.783 | DEBUG | rss_subscriptions:134 - 成功获取 https://www.chinanews.com.cn/rss/finance.xml 的RSS数据
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:11.801 | DEBUG | rss_subscriptions:134 - 成功获取 https://www.chinanews.com.cn/rss/china.xml 的RSS数据
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:11.818 | DEBUG | rss_subscriptions:134 - 成功获取 https://www.chinanews.com.cn/rss/world.xml 的RSS数据
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.068 | DEBUG | rss_subscriptions:134 - 成功获取 https://www.chinanews.com.cn/rss/scroll-news.xml 的RSS数据
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.069 | INFO | rss_subscriptions:161 - RSS源获取完成,成功获取 4/4 个源
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.070 | INFO | rss_subscriptions:305 - 获取完成,耗时: 0.65秒
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.071 | INFO | rss_subscriptions:218 - 开始处理 RSS 源: https://www.chinanews.com.cn/rss/finance.xml
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.072 | INFO | rss_subscriptions:218 - 开始处理 RSS 源: https://www.chinanews.com.cn/rss/china.xml
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.074 | DEBUG | rss_subscriptions:235 - 处理条目 1: 年轻人关心的这些事,“十五五”规划建议都提到了
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.156 | DEBUG | mysql_agent:176 - 已建立连接,准备插入数据到 collector_rss_subscriptions
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:29:12.172 | DEBUG | mysql_agent:182 - 表 collector_rss_subscriptions 包含以下列:['id', '文章标题', '文章链接', '文章摘要', '发布时间', '来源URL', '创建时间', '更新时间', '是否已处理']
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:29:12.173 | DEBUG | mysql_agent:208 - 表 collector_rss_subscriptions 的过滤后DataFrame:共 1 行待插入
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:29:12.209 | INFO | mysql_agent:294 - 表 collector_rss_subscriptions 插入结果汇总
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ total_to_insert: 1
|
||||
→ total_inserted: 1
|
||||
→ total_duplicates: 0
|
||||
→ total_failed: 0
|
||||
→ failed_records_count: 0
|
||||
2025-10-29 10:29:12.211 | INFO | rss_subscriptions:259 - 成功写入 1/1 条记录
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.212 | INFO | rss_subscriptions:218 - 开始处理 RSS 源: https://www.chinanews.com.cn/rss/world.xml
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.213 | INFO | rss_subscriptions:218 - 开始处理 RSS 源: https://www.chinanews.com.cn/rss/scroll-news.xml
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.217 | DEBUG | rss_subscriptions:114 - 已保存本次更新时间: 2025-10-29 02:23:59
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.217 | INFO | rss_subscriptions:317 - 本次最新更新时间: 2025-10-29 02:23:59
|
||||
→ module: 'NewsAPIClient'
|
||||
2025-10-29 10:29:12.326 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:29:16.940 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:29:17.149 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:29:17.151 | ERROR | 2003508611:141 - 任务执行失败: RSS基于规则数据处理
|
||||
→ module: 'TaskNotebook'
|
||||
2025-10-29 10:29:55.856 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:29:55.857 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:29:56.081 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:29:58.296 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:29:58.534 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:29:58.535 | INFO | processor_rss_data:65 - RSS数据处理器初始化完成
|
||||
→ module: 'RSSDataProcessor'
|
||||
2025-10-29 10:29:58.537 | INFO | processor_rss_data:335 - 开始处理RSS数据...
|
||||
→ module: 'RSSDataProcessor'
|
||||
2025-10-29 10:29:58.538 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: '\n SELECT id, 文章标题, 文章摘要, 发布时间, 来源URL, 文章链接\n FROM collector_rss_subscriptions\n WHERE 是否已处理 = 0\n ORDER BY 发布时间 DESC\n LIMIT %s\n '
|
||||
2025-10-29 10:29:58.844 | INFO | processor_rss_data:107 - 成功加载 100 条未处理的RSS数据
|
||||
→ module: 'RSSDataProcessor'
|
||||
2025-10-29 10:29:58.846 | INFO | processor_rss_data:146 - 成功加载停用词表,共 98 个词
|
||||
→ module: 'RSSDataProcessor'
|
||||
2025-10-29 10:29:58.848 | INFO | processor_rss_data:82 - 成功加载汽车后市场关键词,共 37 个
|
||||
→ module: 'RSSDataProcessor'
|
||||
2025-10-29 10:29:59.913 | INFO | processor_rss_data:235 - 数据处理完成,共处理 100 条记录
|
||||
→ module: 'RSSDataProcessor'
|
||||
2025-10-29 10:29:59.930 | INFO | processor_rss_data:246 - 过滤出 3 条汽车后市场相关新闻
|
||||
→ module: 'RSSDataProcessor'
|
||||
2025-10-29 10:30:00.026 | DEBUG | mysql_agent:609 - 查询执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:30:00.027 | DEBUG | mysql_agent:640 - 检查表是否存在
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 表: 'processed_rss_data'
|
||||
→ 存在: True
|
||||
2025-10-29 10:30:00.096 | DEBUG | mysql_agent:176 - 已建立连接,准备插入数据到 processed_rss_data
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:30:00.112 | DEBUG | mysql_agent:182 - 表 processed_rss_data 包含以下列:['id', '文章标题', '文章摘要', '发布时间', '来源URL', '文章链接', '分词结果', '相关度分数', '是否汽车相关', '处理时间', '创建时间', '更新时间', '是否ai处理']
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:30:00.113 | DEBUG | mysql_agent:208 - 表 processed_rss_data 的过滤后DataFrame:共 3 行待插入
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
2025-10-29 10:30:00.191 | INFO | mysql_agent:294 - 表 processed_rss_data 插入结果汇总
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ total_to_insert: 3
|
||||
→ total_inserted: 3
|
||||
→ total_duplicates: 0
|
||||
→ total_failed: 0
|
||||
→ failed_records_count: 0
|
||||
2025-10-29 10:30:00.192 | INFO | processor_rss_data:283 - 成功保存 3 条处理结果到数据库
|
||||
→ module: 'RSSDataProcessor'
|
||||
2025-10-29 10:30:00.341 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 100
|
||||
2025-10-29 10:30:00.342 | INFO | processor_rss_data:129 - 成功标记 100 条数据为已处理
|
||||
→ module: 'RSSDataProcessor'
|
||||
2025-10-29 10:30:00.343 | INFO | processor_rss_data:372 - RSS数据处理完成
|
||||
→ module: 'RSSDataProcessor'
|
||||
→ total_articles: 100
|
||||
→ filtered_articles: 3
|
||||
→ filter_rate: 0.03
|
||||
→ processing_time: '2025-10-29 10:29:59'
|
||||
→ save_success: True
|
||||
→ mark_success: True
|
||||
2025-10-29 10:30:00.448 | DEBUG | mysql_agent:614 - 更新执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 受影响行数: 1
|
||||
2025-10-29 10:30:10.307 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
|
||||
2025-10-29 10:30:10.544 | INFO | mysql_agent:134 - 查询执行成功
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:30:11.221 | INFO | ai_processor_rss_data:51 - RSS数据AI处理器初始化完成
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:30:11.222 | INFO | ai_processor_rss_data:87 - 开始批量处理数据,批次大小: 200, 延迟: 1.5秒
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:30:11.323 | DEBUG | mysql_agent:609 - 查询执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:30:11.324 | DEBUG | ai_processor_rss_data:191 - 表 processed_rss_data 已存在 '是否ai处理' 字段
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:30:11.416 | DEBUG | mysql_agent:609 - 查询执行完成
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 行数: 1
|
||||
2025-10-29 10:30:11.416 | DEBUG | mysql_agent:640 - 检查表是否存在
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ 表: 'ai_processor_rss_analysis'
|
||||
→ 存在: True
|
||||
2025-10-29 10:30:11.417 | DEBUG | mysql_agent:116 - 执行SQL查询
|
||||
→ module: 'MySQLAgent(Windows)'
|
||||
→ sql: '\n SELECT id, 文章标题, 文章摘要, 发布时间, 来源URL, 文章链接\n FROM processed_rss_data\n WHERE 是否ai处理 = 0 OR 是否ai处理 IS NULL\n ORDER BY 创建时间 DESC\n LIMIT %s\n '
|
||||
2025-10-29 10:30:11.644 | INFO | ai_processor_rss_data:249 - 成功加载 3 条未处理的数据
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:30:11.645 | DEBUG | ai_processor_rss_data:112 - 处理记录 181 (1/3)
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
2025-10-29 10:30:40.130 | DEBUG | ai_processor_rss_data:112 - 处理记录 182 (2/3)
|
||||
→ module: 'RSSDataAIProcessor'
|
||||
|
||||
+1829
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,37 @@
|
||||
汽车配件
|
||||
汽车维修
|
||||
汽车保养
|
||||
汽车改装
|
||||
汽车美容
|
||||
汽车装饰
|
||||
轮胎
|
||||
机油
|
||||
刹车片
|
||||
火花塞
|
||||
滤清器
|
||||
蓄电池
|
||||
车灯
|
||||
保险杠
|
||||
车门
|
||||
座椅
|
||||
方向盘
|
||||
仪表盘
|
||||
音响
|
||||
导航
|
||||
汽车用品
|
||||
车载设备
|
||||
汽车电子
|
||||
汽车安全
|
||||
汽车保险
|
||||
二手车
|
||||
汽车交易
|
||||
汽车金融
|
||||
汽车租赁
|
||||
汽车服务
|
||||
4S店
|
||||
汽修店
|
||||
汽车后市场
|
||||
汽车产业链
|
||||
汽车供应链
|
||||
汽车
|
||||
车
|
||||
@@ -0,0 +1,409 @@
|
||||
# RSS数据处理模块 - 汽车后市场新闻分词和过滤
|
||||
import pandas as pd
|
||||
import jieba
|
||||
import jieba.posseg as pseg
|
||||
import os
|
||||
import sys
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
# 添加项目根目录到路径
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
parent_dir = os.path.dirname(current_dir)
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.insert(0, parent_dir)
|
||||
|
||||
from utils.mysql_agent import MySQLAgent
|
||||
from utils.logger import log
|
||||
from config import Config
|
||||
|
||||
class RSSDataProcessor:
|
||||
"""RSS数据处理器 - 专门处理汽车后市场相关新闻"""
|
||||
|
||||
def __init__(self):
|
||||
"""初始化处理器"""
|
||||
self.log = log.bind(module="RSSDataProcessor")
|
||||
self.db_agent = MySQLAgent(Config.MYSQL_CONFIG)
|
||||
self.table_name = "collector_rss_subscriptions"
|
||||
self.processed_table_name = "processed_rss_data"
|
||||
|
||||
# 获取项目根目录
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
self.project_root = os.path.dirname(current_dir)
|
||||
|
||||
# 设置文件路径(相对于项目根目录)
|
||||
self.keywords_file = os.path.join(self.project_root, "processors", "keywords.txt")
|
||||
self.stopwords_file = os.path.join(self.project_root, "processors", "stopwords.txt")
|
||||
|
||||
# 汽车后市场相关关键词(默认值,实际从文件加载)
|
||||
self.auto_aftermarket_keywords = {
|
||||
'汽车配件', '汽车维修', '汽车保养', '汽车改装', '汽车美容', '汽车装饰',
|
||||
'轮胎', '机油', '刹车片', '火花塞', '滤清器', '蓄电池', '车灯',
|
||||
'保险杠', '车门', '座椅', '方向盘', '仪表盘', '音响', '导航',
|
||||
'汽车用品', '车载设备', '汽车电子', '汽车安全', '汽车保险',
|
||||
'二手车', '汽车交易', '汽车金融', '汽车租赁', '汽车服务',
|
||||
'4S店', '汽修店', '汽车后市场', '汽车产业链', '汽车供应链', '汽车', '车'
|
||||
}
|
||||
|
||||
# 停用词表(默认值,实际从文件加载)
|
||||
self.stopwords = {
|
||||
'的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一', '一个',
|
||||
'上', '也', '很', '到', '说', '要', '去', '你', '会', '着', '没有', '看', '好',
|
||||
'自己', '这', '那', '它', '他', '她', '我们', '你们', '他们', '什么', '怎么',
|
||||
'为什么', '因为', '所以', '但是', '然后', '如果', '虽然', '而且', '或者',
|
||||
'可以', '应该', '必须', '需要', '想要', '希望', '觉得', '认为', '知道',
|
||||
'了解', '明白', '清楚', '简单', '容易', '困难', '重要', '主要', '基本',
|
||||
'一般', '特别', '非常', '十分', '相当', '比较', '更加', '最', '更',
|
||||
'已经', '正在', '将要', '可能', '也许', '大概', '大约', '左右', '上下',
|
||||
'今天', '明天', '昨天', '现在', '以前', '以后', '时候', '时间', '地方',
|
||||
'这里', '那里', '这样', '那样', '如此', '这样', '那样', '如何', '怎样'
|
||||
}
|
||||
|
||||
# 缓存关键词,避免重复加载
|
||||
self._cached_keywords = None
|
||||
|
||||
self.log.info("RSS数据处理器初始化完成")
|
||||
|
||||
def load_keywords(self, keywords_file: Optional[str] = None) -> set:
|
||||
"""从文件加载汽车后市场关键词(带缓存)"""
|
||||
# 如果已经缓存,直接返回
|
||||
if self._cached_keywords is not None:
|
||||
return self._cached_keywords
|
||||
|
||||
# 使用默认路径(项目根目录下的文件)
|
||||
if keywords_file is None:
|
||||
keywords_file = self.keywords_file
|
||||
|
||||
keywords = set()
|
||||
try:
|
||||
if os.path.exists(keywords_file):
|
||||
with open(keywords_file, 'r', encoding='utf-8') as f:
|
||||
keywords = set(line.strip() for line in f if line.strip())
|
||||
self.log.info(f"成功加载汽车后市场关键词,共 {len(keywords)} 个")
|
||||
else:
|
||||
self.log.warning(f"关键词文件不存在: {keywords_file}")
|
||||
# 使用默认关键词
|
||||
keywords = self.auto_aftermarket_keywords
|
||||
except Exception as e:
|
||||
self.log.error(f"加载关键词失败: {str(e)}")
|
||||
keywords = self.auto_aftermarket_keywords
|
||||
|
||||
# 缓存关键词
|
||||
self._cached_keywords = keywords
|
||||
return keywords
|
||||
|
||||
def load_rss_data(self, limit: int = 1000) -> pd.DataFrame:
|
||||
"""从数据库加载未处理的RSS数据"""
|
||||
try:
|
||||
sql = f"""
|
||||
SELECT id, 文章标题, 文章摘要, 发布时间, 来源URL, 文章链接
|
||||
FROM {self.table_name}
|
||||
WHERE 是否已处理 = 0
|
||||
ORDER BY 发布时间 DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
|
||||
df = self.db_agent.query_to_df(sql, params=(limit,), is_print=False)
|
||||
self.log.info(f"成功加载 {len(df)} 条未处理的RSS数据")
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
self.log.error(f"加载RSS数据失败: {str(e)}", exc_info=True)
|
||||
return pd.DataFrame()
|
||||
|
||||
def mark_as_processed(self, ids: List[int]) -> bool:
|
||||
"""标记指定ID的数据为已处理"""
|
||||
if not ids:
|
||||
return True
|
||||
|
||||
try:
|
||||
# 将ID列表转换为字符串格式用于SQL IN语句
|
||||
id_placeholders = ','.join(['%s'] * len(ids))
|
||||
sql = f"""
|
||||
UPDATE {self.table_name}
|
||||
SET 是否已处理 = 1
|
||||
WHERE id IN ({id_placeholders})
|
||||
"""
|
||||
|
||||
result = self.db_agent.execute_sql(sql, params=ids)
|
||||
self.log.info(f"成功标记 {len(ids)} 条数据为已处理")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.log.error(f"标记数据为已处理失败: {str(e)}", exc_info=True)
|
||||
return False
|
||||
|
||||
def load_stopwords(self, stopwords_file: Optional[str] = None) -> set:
|
||||
"""加载停用词表"""
|
||||
# 使用默认路径(项目根目录下的文件)
|
||||
if stopwords_file is None:
|
||||
stopwords_file = self.stopwords_file
|
||||
|
||||
try:
|
||||
if os.path.exists(stopwords_file):
|
||||
with open(stopwords_file, 'r', encoding='utf-8') as f:
|
||||
stopwords = set(line.strip() for line in f if line.strip())
|
||||
self.log.info(f"成功加载停用词表,共 {len(stopwords)} 个词")
|
||||
return stopwords
|
||||
else:
|
||||
self.log.warning(f"停用词文件不存在: {stopwords_file},使用默认停用词")
|
||||
return self.stopwords
|
||||
except Exception as e:
|
||||
self.log.error(f"加载停用词表失败: {str(e)}")
|
||||
return self.stopwords
|
||||
|
||||
def add_custom_dict(self, custom_dict_file: Optional[str] = None):
|
||||
"""添加自定义词典"""
|
||||
if custom_dict_file and os.path.exists(custom_dict_file):
|
||||
try:
|
||||
jieba.load_userdict(custom_dict_file)
|
||||
self.log.info("成功加载自定义词典")
|
||||
except Exception as e:
|
||||
self.log.warning(f"加载自定义词典失败: {str(e)}")
|
||||
|
||||
# 从文件加载汽车后市场关键词并添加到jieba词典
|
||||
keywords = self.load_keywords()
|
||||
for keyword in keywords:
|
||||
jieba.add_word(keyword, freq=1000, tag='n')
|
||||
|
||||
def segment_and_pos(self, text: str, stopwords: set) -> List[str]:
|
||||
"""分词并标注词性,过滤停用词"""
|
||||
if not text or pd.isna(text):
|
||||
return []
|
||||
|
||||
words = pseg.cut(str(text))
|
||||
result = []
|
||||
# 汽车后市场相关的词性标签
|
||||
allowed_flags = {'n', 'vn', 'np', 'ns', 'nr', 'nt'} # 名词、动词、动名词、名词短语、处所词、人名、机构名
|
||||
|
||||
for word, flag in words:
|
||||
word = word.strip()
|
||||
if (len(word) >= 1 and
|
||||
word not in stopwords and
|
||||
flag in allowed_flags and
|
||||
not word.isdigit()): # 过滤纯数字
|
||||
result.append(word)
|
||||
|
||||
return result
|
||||
|
||||
def is_auto_aftermarket_related(self, text: str) -> bool:
|
||||
"""判断文本是否与汽车后市场相关"""
|
||||
if not text:
|
||||
return False
|
||||
|
||||
text_lower = str(text).lower()
|
||||
|
||||
# 从文件加载关键词
|
||||
keywords = self.load_keywords()
|
||||
|
||||
# 检查是否包含汽车后市场关键词
|
||||
for keyword in keywords:
|
||||
if keyword in text_lower:
|
||||
return True
|
||||
|
||||
# 检查分词结果中是否包含相关词汇
|
||||
words = self.segment_and_pos(text, self.stopwords)
|
||||
for word in words:
|
||||
if word in keywords:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def process_dataframe(self, df: pd.DataFrame, stopwords: set) -> pd.DataFrame:
|
||||
"""处理整个DataFrame,进行分词和过滤"""
|
||||
if df.empty:
|
||||
self.log.warning("输入的DataFrame为空")
|
||||
return df
|
||||
|
||||
# 确保所有文本都是字符串,并处理NaN值
|
||||
df['文章标题'] = df['文章标题'].fillna('').astype(str)
|
||||
df['文章摘要'] = df['文章摘要'].fillna('').astype(str)
|
||||
|
||||
# 合并标题和摘要进行分词
|
||||
df['combined_text'] = df['文章标题'] + ' ' + df['文章摘要']
|
||||
|
||||
# 分词处理
|
||||
df['segmented_words'] = df['combined_text'].apply(lambda x: self.segment_and_pos(x, stopwords))
|
||||
|
||||
# 判断是否与汽车后市场相关(只要出现关键词就入库)
|
||||
df['is_auto_related'] = df['combined_text'].apply(self.is_auto_aftermarket_related)
|
||||
df['is_filtered'] = df['is_auto_related']
|
||||
|
||||
# 添加处理时间
|
||||
df['processed_time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
self.log.info(f"数据处理完成,共处理 {len(df)} 条记录")
|
||||
return df
|
||||
|
||||
def filter_auto_aftermarket_news(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""过滤出汽车后市场相关的新闻"""
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
# 过滤出包含关键词的文章
|
||||
filtered_df = df[df['is_filtered'] == True].copy()
|
||||
|
||||
self.log.info(f"过滤出 {len(filtered_df)} 条汽车后市场相关新闻")
|
||||
return filtered_df
|
||||
|
||||
def save_to_database(self, df: pd.DataFrame) -> bool:
|
||||
"""保存处理结果到数据库"""
|
||||
if df.empty:
|
||||
self.log.warning("没有数据需要保存")
|
||||
return False
|
||||
|
||||
try:
|
||||
# 准备保存的数据
|
||||
save_df = df[['文章标题', '文章摘要', '发布时间', '来源URL', '文章链接',
|
||||
'segmented_words', 'is_auto_related', 'processed_time']].copy()
|
||||
|
||||
# 将分词结果转换为字符串
|
||||
save_df['分词结果'] = save_df['segmented_words'].apply(lambda x: ' '.join(x))
|
||||
|
||||
# 重命名列名为中文
|
||||
save_df = save_df.rename(columns={
|
||||
'is_auto_related': '是否汽车相关',
|
||||
'processed_time': '处理时间'
|
||||
})
|
||||
|
||||
# 删除不需要的列
|
||||
save_df = save_df.drop('segmented_words', axis=1)
|
||||
|
||||
# 检查目标表是否存在,不存在则创建
|
||||
if not self.db_agent.table_exists(self.processed_table_name):
|
||||
self.create_processed_table()
|
||||
|
||||
# 插入数据
|
||||
inserted_rows = self.db_agent.insert_from_df(
|
||||
table_name=self.processed_table_name,
|
||||
df=save_df,
|
||||
ignore_duplicates=True
|
||||
)
|
||||
|
||||
self.log.info(f"成功保存 {inserted_rows} 条处理结果到数据库")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.log.error(f"保存到数据库失败: {str(e)}", exc_info=True)
|
||||
return False
|
||||
|
||||
def create_processed_table(self):
|
||||
"""创建处理结果表"""
|
||||
create_sql = f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.processed_table_name} (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
文章标题 TEXT,
|
||||
文章摘要 TEXT,
|
||||
发布时间 DATETIME,
|
||||
来源URL VARCHAR(1024),
|
||||
文章链接 VARCHAR(1024),
|
||||
分词结果 TEXT,
|
||||
是否汽车相关 BOOLEAN,
|
||||
处理时间 DATETIME,
|
||||
创建时间 TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
更新时间 TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
|
||||
"""
|
||||
|
||||
try:
|
||||
self.db_agent.execute_sql(create_sql)
|
||||
self.log.info(f"成功创建处理结果表: {self.processed_table_name}")
|
||||
except Exception as e:
|
||||
self.log.error(f"创建表失败: {str(e)}", exc_info=True)
|
||||
raise
|
||||
|
||||
def get_processing_statistics(self, df: pd.DataFrame) -> Dict[str, Any]:
|
||||
"""获取处理统计信息"""
|
||||
if df.empty:
|
||||
return {}
|
||||
|
||||
total_count = len(df)
|
||||
filtered_count = len(df[df['is_filtered'] == True])
|
||||
|
||||
stats = {
|
||||
'total_articles': total_count,
|
||||
'filtered_articles': filtered_count,
|
||||
'filter_rate': filtered_count / total_count if total_count > 0 else 0,
|
||||
'processing_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def process_rss_data(self, limit: int = 1000, save_to_db: bool = True) -> Dict[str, Any]:
|
||||
"""处理RSS数据的主函数"""
|
||||
try:
|
||||
self.log.info("开始处理RSS数据...")
|
||||
|
||||
# 1. 加载RSS数据
|
||||
df = self.load_rss_data(limit)
|
||||
if df.empty:
|
||||
self.log.warning("没有加载到RSS数据")
|
||||
return {'success': False, 'message': '没有数据可处理'}
|
||||
|
||||
# 2. 加载停用词表
|
||||
stopwords = self.load_stopwords()
|
||||
|
||||
# 3. 添加自定义词典
|
||||
self.add_custom_dict()
|
||||
|
||||
# 4. 处理数据
|
||||
processed_df = self.process_dataframe(df, stopwords)
|
||||
|
||||
# 5. 过滤汽车后市场相关新闻
|
||||
filtered_df = self.filter_auto_aftermarket_news(processed_df)
|
||||
|
||||
# 6. 获取统计信息
|
||||
stats = self.get_processing_statistics(processed_df)
|
||||
|
||||
# 7. 保存到数据库
|
||||
if save_to_db and not filtered_df.empty:
|
||||
save_success = self.save_to_database(filtered_df)
|
||||
stats['save_success'] = save_success
|
||||
|
||||
# 8. 标记数据为已处理
|
||||
if not df.empty and 'id' in df.columns:
|
||||
processed_ids = df['id'].tolist()
|
||||
mark_success = self.mark_as_processed(processed_ids)
|
||||
stats['mark_success'] = mark_success
|
||||
if not mark_success:
|
||||
self.log.warning("部分数据标记为已处理失败")
|
||||
|
||||
# 9. 输出结果
|
||||
self.log.info("RSS数据处理完成", **stats)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'message': 'RSS数据处理完成',
|
||||
'statistics': stats,
|
||||
'filtered_data': filtered_df
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.log.error(f"RSS数据处理失败: {str(e)}", exc_info=True)
|
||||
return {'success': False, 'message': f'处理失败: {str(e)}'}
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数入口"""
|
||||
try:
|
||||
# 创建处理器实例
|
||||
processor = RSSDataProcessor()
|
||||
|
||||
# 处理RSS数据
|
||||
result = processor.process_rss_data(
|
||||
limit=5000, # 处理最近5000条数据
|
||||
save_to_db=True # 保存到数据库
|
||||
)
|
||||
|
||||
if result['success']:
|
||||
print("RSS数据处理完成!")
|
||||
print(f"处理统计: {result['statistics']}")
|
||||
else:
|
||||
print(f"处理失败: {result['message']}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"程序运行出错: {str(e)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,100 @@
|
||||
的
|
||||
了
|
||||
在
|
||||
是
|
||||
我
|
||||
有
|
||||
和
|
||||
就
|
||||
不
|
||||
人
|
||||
都
|
||||
一
|
||||
一个
|
||||
上
|
||||
也
|
||||
很
|
||||
到
|
||||
说
|
||||
要
|
||||
去
|
||||
你
|
||||
会
|
||||
着
|
||||
没有
|
||||
看
|
||||
好
|
||||
自己
|
||||
这
|
||||
那
|
||||
它
|
||||
他
|
||||
她
|
||||
我们
|
||||
你们
|
||||
他们
|
||||
什么
|
||||
怎么
|
||||
为什么
|
||||
因为
|
||||
所以
|
||||
但是
|
||||
然后
|
||||
如果
|
||||
虽然
|
||||
而且
|
||||
或者
|
||||
可以
|
||||
应该
|
||||
必须
|
||||
需要
|
||||
想要
|
||||
希望
|
||||
觉得
|
||||
认为
|
||||
知道
|
||||
了解
|
||||
明白
|
||||
清楚
|
||||
简单
|
||||
容易
|
||||
困难
|
||||
重要
|
||||
主要
|
||||
基本
|
||||
一般
|
||||
特别
|
||||
非常
|
||||
十分
|
||||
相当
|
||||
比较
|
||||
更加
|
||||
最
|
||||
更
|
||||
已经
|
||||
正在
|
||||
将要
|
||||
可能
|
||||
也许
|
||||
大概
|
||||
大约
|
||||
左右
|
||||
上下
|
||||
今天
|
||||
明天
|
||||
昨天
|
||||
现在
|
||||
以前
|
||||
以后
|
||||
时候
|
||||
时间
|
||||
地方
|
||||
这里
|
||||
那里
|
||||
这样
|
||||
那样
|
||||
如此
|
||||
这样
|
||||
那样
|
||||
如何
|
||||
怎样
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -246,50 +246,90 @@ class TaskScheduler:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _execute_task_logic(self, task: Dict[str, Any]) -> None:
|
||||
"""执行任务的具体逻辑(动态导入模块)"""
|
||||
start_time = time.time()
|
||||
task_id = task['task_id']
|
||||
module_path = task['module_path']
|
||||
task_log = log.bind(task_id=task_id, module=module_path)
|
||||
def _execute_task_logic(self, task):
|
||||
"""
|
||||
执行任务逻辑的核心方法
|
||||
支持类方法、静态方法和实例方法的调用
|
||||
"""
|
||||
module_path = task.get('module_path')
|
||||
if not module_path:
|
||||
raise ValueError("任务缺少 module_path 配置")
|
||||
|
||||
# 解析模块路径和类名
|
||||
try:
|
||||
path_parts = module_path.split('.')
|
||||
if len(path_parts) < 2:
|
||||
raise ValueError(f"无效的模块路径: {module_path}")
|
||||
|
||||
module_name = '.'.join(path_parts[:-1])
|
||||
class_name = path_parts[-1]
|
||||
method_name = 'main' # 默认方法名
|
||||
except Exception as e:
|
||||
raise ValueError(f"解析模块路径失败: {str(e)}")
|
||||
|
||||
# 动态导入模块
|
||||
try:
|
||||
import importlib
|
||||
module = importlib.import_module(module_name)
|
||||
except ImportError as e:
|
||||
raise ImportError(f"无法导入模块 {module_name}: {str(e)}")
|
||||
|
||||
# 获取类和方法
|
||||
if not hasattr(module, class_name):
|
||||
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
|
||||
|
||||
cls = getattr(module, class_name)
|
||||
|
||||
# 检查是否存在指定方法
|
||||
if not hasattr(cls, method_name):
|
||||
raise AttributeError(f"类 {class_name} 中未找到方法 {method_name}")
|
||||
|
||||
method = getattr(cls, method_name)
|
||||
|
||||
# 根据方法类型决定如何调用
|
||||
import inspect
|
||||
callable_entry = None
|
||||
|
||||
# 判断是否为静态方法或类方法
|
||||
if isinstance(method, staticmethod):
|
||||
# 静态方法可以直接调用
|
||||
callable_entry = method
|
||||
elif isinstance(method, classmethod):
|
||||
# 类方法需要传入类作为第一个参数
|
||||
callable_entry = method
|
||||
else:
|
||||
# 实例方法或普通函数
|
||||
try:
|
||||
# 尝试检查方法签名
|
||||
sig = inspect.signature(method)
|
||||
params = list(sig.parameters.values())
|
||||
|
||||
# 如果第一个参数是self且没有默认值,则认为是实例方法
|
||||
if params and params[0].name == 'self' and params[0].default == inspect.Parameter.empty:
|
||||
# 创建实例并获取绑定方法
|
||||
instance = cls()
|
||||
callable_entry = getattr(instance, method_name)
|
||||
else:
|
||||
# 可能是普通函数或者是带有默认self参数的方法
|
||||
callable_entry = method
|
||||
except Exception:
|
||||
# 如果检查签名失败,默认尝试创建实例
|
||||
try:
|
||||
instance = cls()
|
||||
callable_entry = getattr(instance, method_name)
|
||||
except Exception:
|
||||
# 如果创建实例也失败,则直接调用方法(适用于不需要self的特殊情况)
|
||||
callable_entry = method
|
||||
|
||||
# 执行任务
|
||||
if not callable(callable_entry):
|
||||
raise TypeError(f"{module_path}.{method_name} 不是可调用对象")
|
||||
|
||||
try:
|
||||
# 解析可调用入口(支持模块/类/函数路径)
|
||||
# 若路径最终为类,先实例化再调 main;否则直接调用
|
||||
target_obj = None
|
||||
parts = module_path.split('.') if isinstance(module_path, str) else []
|
||||
resolved = None
|
||||
try:
|
||||
# 尝试导入尽可能深的模块
|
||||
for i in range(len(parts), 0, -1):
|
||||
mod = importlib.import_module('.'.join(parts[:i]))
|
||||
attr_chain = parts[i:]
|
||||
obj = mod
|
||||
for attr in attr_chain:
|
||||
obj = getattr(obj, attr)
|
||||
resolved = obj
|
||||
break
|
||||
except Exception:
|
||||
resolved = None
|
||||
|
||||
if isinstance(resolved, type):
|
||||
try:
|
||||
target_obj = resolved() # 触发 __init__ 日志
|
||||
if hasattr(target_obj, 'main') and callable(getattr(target_obj, 'main')):
|
||||
task_log.debug("开始执行实例的 main()")
|
||||
getattr(target_obj, 'main')()
|
||||
else:
|
||||
raise AttributeError(f"类 {resolved.__name__} 未提供可调用的 main()")
|
||||
except Exception as e:
|
||||
raise
|
||||
else:
|
||||
callable_entry = self._resolve_callable(module_path)
|
||||
task_log.debug("开始执行任务入口函数")
|
||||
callable_entry()
|
||||
task_log.info(f"任务执行完成,耗时: {time.time() - start_time:.2f}秒")
|
||||
|
||||
# 执行任务逻辑
|
||||
callable_entry()
|
||||
except Exception as e:
|
||||
task_log.error("任务逻辑执行失败", exc_info=True)
|
||||
self.logger.error(f"任务逻辑执行失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def _calculate_next_run_time(self, cron_expr: str, time_zone: str = 'Asia/Shanghai') -> datetime:
|
||||
|
||||
Binary file not shown.
+223
-319
@@ -10,25 +10,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-17T05:43:18.381936Z",
|
||||
"start_time": "2025-10-17T05:43:15.265036Z"
|
||||
},
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"PROJECT_ROOT = d:\\Idea Project\\intelligence_system\n",
|
||||
"\u001b[32m2025-10-23 16:56:55\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m任务调度器已初始化,最大工作线程数: 5\u001b[0m\n"
|
||||
]
|
||||
"end_time": "2025-10-29T02:25:08.582541Z",
|
||||
"start_time": "2025-10-29T02:25:08.473381Z"
|
||||
}
|
||||
],
|
||||
},
|
||||
"source": [
|
||||
"# 使 Notebook 可从项目根导入\n",
|
||||
"import sys\n",
|
||||
@@ -217,7 +206,18 @@
|
||||
" except Exception:\n",
|
||||
" pass\n",
|
||||
" return str(dt)"
|
||||
]
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"PROJECT_ROOT = D:\\Idea Project\\intelligence_system\n",
|
||||
"\u001B[32m2025-10-29 10:25:08\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mtask_scheduler\u001B[0m - \u001B[1m任务调度器已初始化,最大工作线程数: 5\u001B[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 8
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -242,7 +242,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32m2025-10-17 13:43:18\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n"
|
||||
"\u001B[32m2025-10-29 09:54:09\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -281,20 +281,36 @@
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>RSS基于规则数据处理</td>\n",
|
||||
" <td>processor</td>\n",
|
||||
" <td>processors.processor_rss_data</td>\n",
|
||||
" <td>0 8,20 * * *</td>\n",
|
||||
" <td>Asia/Shanghai</td>\n",
|
||||
" <td>2025-10-28 20:00:00</td>\n",
|
||||
" <td>2025-10-28 13:34:49</td>\n",
|
||||
" <td>success</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2025-10-22 16:06:42</td>\n",
|
||||
" <td>2025-10-28 13:34:50</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>RSS新闻订阅</td>\n",
|
||||
" <td>collector</td>\n",
|
||||
" <td>collectors.rss_subscriptions.NewsAPIClient</td>\n",
|
||||
" <td>5 0 * * *</td>\n",
|
||||
" <td>*/5 * * * *</td>\n",
|
||||
" <td>Asia/Shanghai</td>\n",
|
||||
" <td>2025-10-18 00:05:00</td>\n",
|
||||
" <td>2025-10-17 00:05:07</td>\n",
|
||||
" <td>2025-10-28 13:40:00</td>\n",
|
||||
" <td>2025-10-28 13:35:09</td>\n",
|
||||
" <td>success</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>495</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2025-10-16 15:47:34</td>\n",
|
||||
" <td>2025-10-17 00:05:08</td>\n",
|
||||
" <td>2025-10-28 13:35:09</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>"
|
||||
@@ -346,34 +362,54 @@
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>RSS基于规则数据处理</td>\n",
|
||||
" <td>processor</td>\n",
|
||||
" <td>processors.processor_rss_data</td>\n",
|
||||
" <td>0 8,20 * * *</td>\n",
|
||||
" <td>Asia/Shanghai</td>\n",
|
||||
" <td>2025-10-28 20:00:00</td>\n",
|
||||
" <td>2025-10-28 13:34:49</td>\n",
|
||||
" <td>success</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2025-10-22 16:06:42</td>\n",
|
||||
" <td>2025-10-28 13:34:50</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>RSS新闻订阅</td>\n",
|
||||
" <td>collector</td>\n",
|
||||
" <td>collectors.rss_subscriptions.NewsAPIClient</td>\n",
|
||||
" <td>5 0 * * *</td>\n",
|
||||
" <td>*/5 * * * *</td>\n",
|
||||
" <td>Asia/Shanghai</td>\n",
|
||||
" <td>2025-10-18 00:05:00</td>\n",
|
||||
" <td>2025-10-17 00:05:07</td>\n",
|
||||
" <td>2025-10-28 13:40:00</td>\n",
|
||||
" <td>2025-10-28 13:35:09</td>\n",
|
||||
" <td>success</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>495</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2025-10-16 15:47:34</td>\n",
|
||||
" <td>2025-10-17 00:05:08</td>\n",
|
||||
" <td>2025-10-28 13:35:09</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" 任务ID 任务名称 任务类型 模块路径 \\\n",
|
||||
"0 1 RSS新闻订阅 collector collectors.rss_subscriptions.NewsAPIClient \n",
|
||||
" 任务ID 任务名称 任务类型 模块路径 \\\n",
|
||||
"0 2 RSS基于规则数据处理 processor processors.processor_rss_data \n",
|
||||
"1 1 RSS新闻订阅 collector collectors.rss_subscriptions.NewsAPIClient \n",
|
||||
"\n",
|
||||
" Cron表达式 时区 下次运行时间 最后运行时间 \\\n",
|
||||
"0 5 0 * * * Asia/Shanghai 2025-10-18 00:05:00 2025-10-17 00:05:07 \n",
|
||||
" Cron表达式 时区 下次运行时间 最后运行时间 \\\n",
|
||||
"0 0 8,20 * * * Asia/Shanghai 2025-10-28 20:00:00 2025-10-28 13:34:49 \n",
|
||||
"1 */5 * * * * Asia/Shanghai 2025-10-28 13:40:00 2025-10-28 13:35:09 \n",
|
||||
"\n",
|
||||
" 运行状态 运行次数 是否活跃 is_running created_at updated_at \n",
|
||||
"0 success 4 1 0 2025-10-16 15:47:34 2025-10-17 00:05:08 "
|
||||
"0 success 10 1 0 2025-10-22 16:06:42 2025-10-28 13:34:50 \n",
|
||||
"1 success 495 1 0 2025-10-16 15:47:34 2025-10-28 13:35:09 "
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
@@ -433,70 +469,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "eab90de72c35429e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-17T05:43:26.113877Z",
|
||||
"start_time": "2025-10-17T05:43:26.071398Z"
|
||||
"end_time": "2025-10-29T02:26:12.873536Z",
|
||||
"start_time": "2025-10-29T02:26:12.648420Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32m2025-10-17 13:43:26\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"### 任务详情\n",
|
||||
"**任务ID**: 1\n",
|
||||
"**任务名称**: RSS新闻订阅\n",
|
||||
"**任务类型**: collector\n",
|
||||
"**模块路径**: collectors.rss_subscriptions.NewsAPIClient\n",
|
||||
"**Cron表达式**: 5 0 * * *\n",
|
||||
"**时区**: Asia/Shanghai\n",
|
||||
"**最后运行时间**: 2025-10-17 00:05:07\n",
|
||||
"**下次运行时间**: 2025-10-18 00:05:00\n",
|
||||
"**运行状态**: success\n",
|
||||
"**是否活跃**: 是\n",
|
||||
"**运行次数**: 4\n",
|
||||
"**创建时间**: 2025-10-16 15:47:34"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'task_id': 1,\n",
|
||||
" 'task_name': 'RSS新闻订阅',\n",
|
||||
" 'task_type': 'collector',\n",
|
||||
" 'module_path': 'collectors.rss_subscriptions.NewsAPIClient',\n",
|
||||
" 'cron_expression': '5 0 * * *',\n",
|
||||
" 'time_zone': 'Asia/Shanghai',\n",
|
||||
" 'next_run_time': Timestamp('2025-10-18 00:05:00'),\n",
|
||||
" 'last_run_time': Timestamp('2025-10-17 00:05:07'),\n",
|
||||
" 'last_run_status': 'success',\n",
|
||||
" 'run_count': 4,\n",
|
||||
" 'is_active': 1,\n",
|
||||
" 'is_running': 0,\n",
|
||||
" 'created_at': Timestamp('2025-10-16 15:47:34'),\n",
|
||||
" 'updated_at': Timestamp('2025-10-17 00:05:08')}"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 查看指定任务的详情\n",
|
||||
"def show_task_details(task_id):\n",
|
||||
@@ -524,7 +503,53 @@
|
||||
"\n",
|
||||
"# 执行:查看任务ID为1的详情(替换为实际ID)\n",
|
||||
"show_task_details(1)"
|
||||
]
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001B[32m2025-10-29 10:26:12\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
],
|
||||
"text/markdown": "### 任务详情\n**任务ID**: 1\n**任务名称**: RSS新闻订阅\n**任务类型**: collector\n**模块路径**: processors.processor_rss_data.RSSDataProcessor\n**Cron表达式**: */5 * * * *\n**时区**: Asia/Shanghai\n**最后运行时间**: 2025-10-28 13:35:09\n**下次运行时间**: 2025-10-29 10:25:00\n**运行状态**: success\n**是否活跃**: 是\n**运行次数**: 496\n**创建时间**: 2025-10-16 15:47:34"
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data",
|
||||
"jetTransient": {
|
||||
"display_id": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'task_id': 1,\n",
|
||||
" 'task_name': 'RSS新闻订阅',\n",
|
||||
" 'task_type': 'collector',\n",
|
||||
" 'module_path': 'processors.processor_rss_data.RSSDataProcessor',\n",
|
||||
" 'cron_expression': '*/5 * * * *',\n",
|
||||
" 'time_zone': 'Asia/Shanghai',\n",
|
||||
" 'next_run_time': Timestamp('2025-10-29 10:25:00'),\n",
|
||||
" 'last_run_time': Timestamp('2025-10-28 13:35:09'),\n",
|
||||
" 'last_run_status': 'success',\n",
|
||||
" 'run_count': 496,\n",
|
||||
" 'is_active': 1,\n",
|
||||
" 'is_running': 0,\n",
|
||||
" 'created_at': Timestamp('2025-10-16 15:47:34'),\n",
|
||||
" 'updated_at': Timestamp('2025-10-29 10:24:49')}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 10
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -536,24 +561,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 4,
|
||||
"id": "2b2d723bb8e2784f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"d:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\requests\\__init__.py:86: RequestsDependencyWarning: Unable to find acceptable character detection dependency (chardet or charset_normalizer).\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32m2025-10-16 15:47:34\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n",
|
||||
"\u001b[32m2025-10-16 15:47:34\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m新任务添加成功\u001b[0m\n"
|
||||
"\u001B[32m2025-10-29 09:56:52\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n",
|
||||
"\u001B[32m2025-10-29 09:56:52\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mtask_scheduler\u001B[0m - \u001B[1m新任务添加成功\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -571,7 +588,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"新任务ID: 0,任务名称: RSS新闻订阅"
|
||||
"新任务ID: 0,任务名称: AI处理RSS新闻"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
@@ -586,7 +603,7 @@
|
||||
"np.int64(0)"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -611,9 +628,9 @@
|
||||
"\n",
|
||||
"# 执行:添加一个新闻采集任务\n",
|
||||
"add_new_task(\n",
|
||||
" name=\"RSS新闻订阅\",\n",
|
||||
" task_type=\"collector\",\n",
|
||||
" module_path=\"collectors.rss_subscriptions\",\n",
|
||||
" name=\"AI处理RSS新闻\",\n",
|
||||
" task_type=\"processor\",\n",
|
||||
" module_path=\"processors.ai_processors.ai_processor_rss_data.RSSDataAIProcessor\",\n",
|
||||
" cron_expression=\"5 0 * * *\", # 每5分钟执行1次\n",
|
||||
" timezone=\"Asia/Shanghai\"\n",
|
||||
")"
|
||||
@@ -629,69 +646,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c892fd8ad2f0dd9d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-17T05:44:19.046308Z",
|
||||
"start_time": "2025-10-17T05:44:18.980345Z"
|
||||
"end_time": "2025-10-29T02:29:56.088085Z",
|
||||
"start_time": "2025-10-29T02:29:55.754298Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"### 任务ID 1 更新成功"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32m2025-10-17 13:44:19\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"### 任务详情\n",
|
||||
"**任务ID**: 1\n",
|
||||
"**任务名称**: RSS新闻订阅\n",
|
||||
"**任务类型**: collector\n",
|
||||
"**模块路径**: collectors.rss_subscriptions.NewsAPIClient\n",
|
||||
"**Cron表达式**: 5 * * * *\n",
|
||||
"**时区**: Asia/Shanghai\n",
|
||||
"**最后运行时间**: 2025-10-17 00:05:07\n",
|
||||
"**下次运行时间**: 2025-10-18 00:05:00\n",
|
||||
"**运行状态**: success\n",
|
||||
"**是否活跃**: 是\n",
|
||||
"**运行次数**: 4\n",
|
||||
"**创建时间**: 2025-10-16 15:47:34"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 更新任务属性\n",
|
||||
"def update_task(task_id, **kwargs):\n",
|
||||
@@ -720,11 +681,57 @@
|
||||
" return success\n",
|
||||
"\n",
|
||||
"# 执行:更新任务(示例:修改任务1的Cron表达式为每天10点)\n",
|
||||
"update_task(1, cron = \"5 * * * *\")\n",
|
||||
"update_task(2, module = \"processors.processor_rss_data\")\n",
|
||||
"\n",
|
||||
"# 执行:同时更新多个属性(名称和Cron表达式)\n",
|
||||
"# update_task(1, name=\"每日早间新闻采集\", cron=\"0 8 * * *\")"
|
||||
]
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
],
|
||||
"text/markdown": "### 任务ID 2 更新成功"
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data",
|
||||
"jetTransient": {
|
||||
"display_id": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001B[32m2025-10-29 10:29:56\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
],
|
||||
"text/markdown": "### 任务详情\n**任务ID**: 2\n**任务名称**: RSS基于规则数据处理\n**任务类型**: processor\n**模块路径**: processors.processor_rss_data\n**Cron表达式**: 0 8,20 * * *\n**时区**: Asia/Shanghai\n**最后运行时间**: 2025-10-28 13:34:49\n**下次运行时间**: 2025-10-28 20:00:00\n**运行状态**: success\n**是否活跃**: 是\n**运行次数**: 10\n**创建时间**: 2025-10-22 16:06:42"
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data",
|
||||
"jetTransient": {
|
||||
"display_id": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"execution_count": 21
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -786,164 +793,22 @@
|
||||
"id": "c554c748169d5ac8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. 手动执行任务(对应命令行 run)"
|
||||
"## 7. 手动执行任务(对应命令行 run)\n",
|
||||
"\n",
|
||||
"自动识别main,即main的上一级"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "94892f4134316f8e",
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"is_executing": true
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-10-17T05:44:37.714559Z",
|
||||
"start_time": "2025-10-17T05:44:35.084369Z"
|
||||
"start_time": "2025-10-29T02:30:10.298891Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"### 开始执行任务ID 2"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"---"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n",
|
||||
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1mRSS数据处理器初始化完成\u001b[0m\n",
|
||||
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m开始处理RSS数据...\u001b[0m\n",
|
||||
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载 0 条未处理的RSS数据\u001b[0m\n",
|
||||
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[33m\u001b[1m没有加载到RSS数据\u001b[0m\n",
|
||||
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m任务执行完成,耗时: 0.01秒\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"**任务名称**: RSS基于规则数据处理"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"**任务ID**: 2"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"**执行时长**: 0.02 秒"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"---"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"### 📋 执行输出:"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"处理失败: 没有数据可处理\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"---"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/markdown": [
|
||||
"### ✅ 任务执行成功"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'success': True,\n",
|
||||
" 'task_name': 'RSS基于规则数据处理',\n",
|
||||
" 'task_id': 2,\n",
|
||||
" 'execution_time': 0.023162126541137695,\n",
|
||||
" 'output': '处理失败: 没有数据可处理\\n',\n",
|
||||
" 'error': None}"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# 手动执行任务(异步方式,快速返回)\n",
|
||||
"def run_task_manually(task_id):\n",
|
||||
@@ -989,8 +854,47 @@
|
||||
" return result\n",
|
||||
"\n",
|
||||
"# 执行:手动运行任务ID为2的任务(显示详细执行过程)\n",
|
||||
"run_task_with_details(2)"
|
||||
]
|
||||
"run_task_with_details(3)"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
],
|
||||
"text/markdown": "### 开始执行任务ID 3"
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data",
|
||||
"jetTransient": {
|
||||
"display_id": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Markdown object>"
|
||||
],
|
||||
"text/markdown": "---"
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data",
|
||||
"jetTransient": {
|
||||
"display_id": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001B[32m2025-10-29 10:30:10\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n",
|
||||
"\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1mRSS数据AI处理器初始化完成\u001B[0m\n",
|
||||
"\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1m开始批量处理数据,批次大小: 200, 延迟: 1.5秒\u001B[0m\n",
|
||||
"\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1m成功加载 3 条未处理的数据\u001B[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user