ai提取rss相关数据
This commit is contained in:
+52589
File diff suppressed because it is too large
Load Diff
+261
@@ -68981,3 +68981,264 @@
|
|||||||
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“维护和平,共创未来”纪念研讨会在日本福冈ä' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 1, 'type': 'du...
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“维护和平,共创未来”纪念研讨会在日本福冈ä' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 1, 'type': 'du...
|
||||||
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“维护和平,共创未来”纪念研讨会在日本福冈ä' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '“维护和平...
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“维护和平,共创未来”纪念研讨会在日本福冈ä' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '“维护和平...
|
||||||
|
|
||||||
|
2025-10-23 17:25:10.628 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '日媒:日本新任首相高市早苗定于24日发表施政æ¼' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '日媒:日本新任首相高市早苗定于24日发表施政æ¼' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '日媒:...
|
||||||
|
|
||||||
|
2025-10-23 17:50:03.236 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '换罐15分钟、续航4000公里 2025绿色能源发展大会å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '换罐15分钟、续航4000公里 2025绿色能源发展大会å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题'...
|
||||||
|
|
||||||
|
2025-10-23 17:55:03.685 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '国家金融监管总局肖远企:AI在金融领域作用仍æ\x98' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '国家金融监管总局肖远企:AI在金融领域作用仍æ\x98' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '...
|
||||||
|
|
||||||
|
2025-10-23 18:15:05.663 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '受权发布丨中国共产党中央军事委员会副主席张å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 1, 'type': 'du...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '受权发布丨中国共产党中央军事委员会副主席张å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '受权发布丨...
|
||||||
|
|
||||||
|
2025-10-23 18:20:05.986 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '十四届全国人大常委会第十八次会议相关法律案ç' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '十四届全国人大常委会第十八次会议相关法律案ç' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '十四届全国...
|
||||||
|
|
||||||
|
2025-10-23 18:35:07.263 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“十四五”山西晋中能源发展全景图:煤更优、ç' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“十四五”山西晋中能源发展全景图:煤更优、ç' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '“十四五”...
|
||||||
|
|
||||||
|
2025-10-23 19:05:10.109 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '2025豫台经贸洽谈会签约超293亿元-2025-10-23 10:58:28' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '2025豫台经贸洽谈会签约超293亿元-2025-10-23 10:58:28' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record'...
|
||||||
|
|
||||||
|
2025-10-23 19:10:10.664 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 2, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '上海首发“进博会溢出联动政策包”-2025-10-23 11:0' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 2, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '上海首发“进博会溢出联动政策包”-2025-10-23 11:0' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标...
|
||||||
|
|
||||||
|
2025-10-23 19:20:01.616 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 7, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '突发:以军空袭黎巴嫩东部 称打击真主党导弹设' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 8, 'type': 'du...
|
||||||
|
→ detailed_failed_records: [{'index': 7, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '突发:以军空袭黎巴嫩东部 称打击真主党导弹设' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '突发:以军...
|
||||||
|
|
||||||
|
2025-10-23 19:35:03.144 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 2, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '东西问丨意大利汉学家里卡多·波佐:如何通过æ\x96' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 3, 'type':...
|
||||||
|
→ detailed_failed_records: [{'index': 2, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '东西问丨意大利汉学家里卡多·波佐:如何通过æ\x96' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '东...
|
||||||
|
|
||||||
|
2025-10-23 19:45:03.931 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '第24届汕头·澄海国际玩具礼品博览会举行-2025-10-' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 3, 'typ...
|
||||||
|
→ detailed_failed_records: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '第24届汕头·澄海国际玩具礼品博览会举行-2025-10-' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题':...
|
||||||
|
|
||||||
|
2025-10-23 19:50:04.321 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '第八届进博会展品锡兰红茶抵沪-2025-10-23 11:43:57' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '第八届进博会展品锡兰红茶抵沪-2025-10-23 11:43:57' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文...
|
||||||
|
|
||||||
|
2025-10-23 20:00:05.324 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '国际大都市科技创新能力研究报告在沪发布-2025-1' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 4, 'type':...
|
||||||
|
→ detailed_failed_records: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '国际大都市科技创新能力研究报告在沪发布-2025-1' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '国...
|
||||||
|
|
||||||
|
2025-10-23 20:15:06.631 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 2, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '科技企业领军 中国企业加速“出海”中东地区-20' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 2, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '科技企业领军 中国企业加速“出海”中东地区-20' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '科技企...
|
||||||
|
|
||||||
|
2025-10-23 20:25:07.542 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '中新天津生态城发布国际市场准入方案 助力国内' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '中新天津生态城发布国际市场准入方案 助力国内' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '中新天津生...
|
||||||
|
|
||||||
|
2025-10-23 20:35:08.462 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“两岸关系新形势”学术研讨会暨清华两岸论坛å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“两岸关系新形势”学术研讨会暨清华两岸论坛å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '“两岸关系...
|
||||||
|
|
||||||
|
2025-10-23 20:50:09.992 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '一图速览四中全会公报-2025-10-23 12:43:45' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '一图速览四中全会公报-2025-10-23 12:43:45' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题'...
|
||||||
|
|
||||||
|
2025-10-23 21:05:01.330 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 3, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '扎哈罗娃:欧盟制裁拓展空间已近极限,俄保留å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 3, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '扎哈罗娃:欧盟制裁拓展空间已近极限,俄保留å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '扎哈罗娃:...
|
||||||
|
|
||||||
|
2025-10-23 21:10:01.739 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '风险谁来担?比利时与卢森堡首相就对乌贷款方æ' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '风险谁来担?比利时与卢森堡首相就对乌贷款方æ' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '风险谁来担...
|
||||||
|
|
||||||
|
2025-10-23 21:15:02.214 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '党的二十届四中全会公报,这些表述值得关注-202' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 2, 'type': '...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '党的二十届四中全会公报,这些表述值得关注-202' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '党的二...
|
||||||
|
|
||||||
|
2025-10-23 21:25:03.182 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '经纬线·绿色回响-2025-10-23 13:15:25' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '经纬线·绿色回响-2025-10-23 13:15:25' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': ...
|
||||||
|
|
||||||
|
2025-10-23 21:30:03.623 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 3, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '(经济观察)北京风能展明星产品反映“大风车â' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 5, 'type': 'du...
|
||||||
|
→ detailed_failed_records: [{'index': 3, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '(经济观察)北京风能展明星产品反映“大风车â' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '(经济观察...
|
||||||
|
|
||||||
|
2025-10-23 21:45:04.962 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '日本外相最新涉华表态:致力于推动两国间战略ä' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '日本外相最新涉华表态:致力于推动两国间战略ä' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '日本外相最...
|
||||||
|
|
||||||
|
2025-10-23 21:50:05.510 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 4, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '《2025中国数字文创城市指数》发布 京沪深蓉杭å±' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 5, 'type':...
|
||||||
|
→ detailed_failed_records: [{'index': 4, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '《2025中国数字文创城市指数》发布 京沪深蓉杭å±' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '《...
|
||||||
|
|
||||||
|
2025-10-23 21:55:05.949 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 3, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '中国矿产资源报告:铜、铁、磷等矿产资源量大å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 3, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '中国矿产资源报告:铜、铁、磷等矿产资源量大å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '中国矿产资...
|
||||||
|
|
||||||
|
2025-10-23 22:00:06.446 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 3, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '新石器无人车获6亿美元融资 无人配送车需求有æ\x9c' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 3, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '新石器无人车获6亿美元融资 无人配送车需求有æ\x9c' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '...
|
||||||
|
|
||||||
|
2025-10-23 22:10:07.479 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '夯实基础全面发力 四中全会为中国未来五年明调' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '夯实基础全面发力 四中全会为中国未来五年明调' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '夯实基础全...
|
||||||
|
|
||||||
|
2025-10-23 22:25:09.055 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '中国又一个五年目标-2025-10-23 14:19:35' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '中国又一个五年目标-2025-10-23 14:19:35' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题':...
|
||||||
|
|
||||||
|
2025-10-23 23:30:05.142 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '两岸学人对谈台湾光复:以史为鉴,共推民族复å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '两岸学人对谈台湾光复:以史为鉴,共推民族复å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '两岸学人对...
|
||||||
|
|
||||||
|
2025-10-24 00:30:10.762 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '中宣部组织召开学习宣传贯彻党的二十届四中全ä' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '中宣部组织召开学习宣传贯彻党的二十届四中全ä' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '中宣部组织...
|
||||||
|
|
||||||
|
2025-10-24 05:45:10.408 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '晚播小麦如何保产量?霜降抢秋该怎么抢?专家å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '晚播小麦如何保产量?霜降抢秋该怎么抢?专家å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '晚播小麦如...
|
||||||
|
|
||||||
|
2025-10-24 05:55:01.583 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '欧盟领导人会议闭幕 聚焦乌克兰局势与欧洲防务' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '欧盟领导人会议闭幕 聚焦乌克兰局势与欧洲防务' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '欧盟领导人...
|
||||||
|
|
||||||
|
2025-10-24 06:05:02.443 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '美国务卿与以总理会面 强调重视巩固加沙停火协' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '美国务卿与以总理会面 强调重视巩固加沙停火协' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '美国务卿与...
|
||||||
|
|
||||||
|
2025-10-24 07:35:10.910 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '中方:当前的加沙停火应当成为全面持久停火的å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 2, 'type': 'du...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '中方:当前的加沙停火应当成为全面持久停火的å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '中方:当前...
|
||||||
|
|
||||||
|
2025-10-24 08:15:04.360 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '美国战略轰炸机抵近委内瑞拉-2025-10-24 00:07:02' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '美国战略轰炸机抵近委内瑞拉-2025-10-24 00:07:02' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章...
|
||||||
|
|
||||||
|
2025-10-24 08:30:05.718 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '解放军报社论:坚定捍卫人民军队政治本色-2025-1' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '解放军报社论:坚定捍卫人民军队政治本色-2025-1' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '解...
|
||||||
|
|
||||||
|
2025-10-24 09:15:09.624 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '柬埔寨重申打击网赌电诈等跨国犯罪决心-2025-10-2' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 2, 'type...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '柬埔寨重申打击网赌电诈等跨国犯罪决心-2025-10-2' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': ...
|
||||||
|
|
||||||
|
2025-10-24 09:35:01.368 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '首艘、首颗、首飞!中国制造硬核实力再出圈 连' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 1, 'type': 'du...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '首艘、首颗、首飞!中国制造硬核实力再出圈 连' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '首艘、首颗...
|
||||||
|
|
||||||
|
2025-10-24 09:45:02.278 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '杨振宁遗体告别仪式在京举行,八宝山革命公墓å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '杨振宁遗体告别仪式在京举行,八宝山革命公墓å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '杨振宁遗体...
|
||||||
|
|
||||||
|
2025-10-24 09:55:03.350 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '长城 Hi4 技术体系斩获“科学技术奖特等奖” 喜' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 1, 'type':...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '长城 Hi4 技术体系斩获“科学技术奖特等奖” 喜' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '长...
|
||||||
|
|
||||||
|
2025-10-24 10:15:05.069 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '时政新闻眼丨未来五年怎么干?党的二十届四中å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 1, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '时政新闻眼丨未来五年怎么干?党的二十届四中å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '时政新闻眼...
|
||||||
|
|
||||||
|
2025-10-24 10:25:06.264 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '新华图讯|中共中央举行新闻发布会 介绍和解读' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 1, 'type': 'du...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '新华图讯|中共中央举行新闻发布会 介绍和解读' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '新华图讯|...
|
||||||
|
|
||||||
|
2025-10-24 10:40:07.596 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '山城直播盛典启幕 “直播+”绘就重庆消费新图æ\x99' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 3, 'type'...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '山城直播盛典启幕 “直播+”绘就重庆消费新图æ\x99' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '...
|
||||||
|
|
||||||
|
2025-10-24 10:45:08.108 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“2025生态保护与绿色发展论坛·广州”在暨南大å' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 1, 'type': ...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '“2025生态保护与绿色发展论坛·广州”在暨南大å' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '“2...
|
||||||
|
|
||||||
|
2025-10-24 10:50:08.775 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 3, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '一滴水 如何汇入幸福河湖?——农工党中央主办' for key 'collector_rss_subscriptions.idx_title_pubtime'"}]
|
||||||
|
→ detailed_failed_records: [{'index': 3, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '一滴水 如何汇入幸福河湖?——农工党中央主办' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '一滴水 如...
|
||||||
|
|
||||||
|
2025-10-27 10:49:23.458 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 6, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '前三季度企业创新力度加大 新质生产力加快培育' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 7, 'type': 'du...
|
||||||
|
→ detailed_failed_records: [{'index': 6, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '前三季度企业创新力度加大 新质生产力加快培育' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标题': '前三季度企...
|
||||||
|
|
||||||
|
2025-10-27 10:55:06.148 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '韩国3500亿美元对美投资承诺陷入僵局-2025-10-27 02:' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 2,...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '韩国3500亿美元对美投资承诺陷入僵局-2025-10-27 02:' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文...
|
||||||
|
|
||||||
|
2025-10-28 13:34:51.417 | ERROR | mysql_agent:305 - 表 collector_rss_subscriptions 插入失败记录详情
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ failed_records_summary: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '安倍晋三枪击案嫌疑人认罪-2025-10-28 05:23:53' for key 'collector_rss_subscriptions.idx_title_pubtime'"}, {'index': 2, '...
|
||||||
|
→ detailed_failed_records: [{'index': 0, 'type': 'duplicate', 'error_code': 1062, 'error_message': "Duplicate entry '安倍晋三枪击案嫌疑人认罪-2025-10-28 05:23:53' for key 'collector_rss_subscriptions.idx_title_pubtime'", 'record': {'文章标...
|
||||||
|
|
||||||
|
2025-10-28 13:39:14.477 | ERROR | mysql_agent:90 - 连接失败
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ error: '(1049, "Unknown database \'intelligence_system\'")'
|
||||||
|
→ error_type: 'OperationalError'
|
||||||
|
→ host: '123.60.167.249'
|
||||||
|
→ port: 3306
|
||||||
|
→ database: 'intelligence_system'
|
||||||
|
→ exc_info: True
|
||||||
|
|
||||||
|
2025-10-28 13:39:14.477 | ERROR | mysql_agent:139 - SQL查询失败
|
||||||
|
→ module: 'MySQLAgent(Windows)'
|
||||||
|
→ sql: '\n SELECT id, 文章标题, 文章摘要, 发布时间, 来源URL, 文章链接\n FROM collector_rss_subscriptions\n WHERE 是否已处理 = 0\n ORDER BY 发布时间 DESC\n LIMIT %s\n '
|
||||||
|
→ params: (5000,)
|
||||||
|
→ error: '(1049, "Unknown database \'intelligence_system\'")'
|
||||||
|
→ error_type: 'OperationalError'
|
||||||
|
→ exc_info: True
|
||||||
|
|
||||||
|
2025-10-28 13:39:14.477 | ERROR | processor_rss_data:111 - 加载RSS数据失败: (1049, "Unknown database 'intelligence_system'")
|
||||||
|
→ module: 'RSSDataProcessor'
|
||||||
|
→ exc_info: True
|
||||||
|
|
||||||
|
|||||||
@@ -11,17 +11,32 @@ log = CrossPlatformLog.get_logger("Main")
|
|||||||
|
|
||||||
|
|
||||||
class IntelligenceSystem:
|
class IntelligenceSystem:
|
||||||
def __init__(self, db_config=None):
|
def __init__(self, db_config=None, run_all_on_startup=False):
|
||||||
"""初始化系统(仅作为容器,不包含业务逻辑)"""
|
"""初始化系统(仅作为容器,不包含业务逻辑)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_config: 数据库配置
|
||||||
|
run_all_on_startup: 启动时是否立即执行所有到期任务(默认False)
|
||||||
|
"""
|
||||||
self.scheduler = TaskScheduler(Config.MYSQL_CONFIG, max_workers=5)
|
self.scheduler = TaskScheduler(Config.MYSQL_CONFIG, max_workers=5)
|
||||||
self._running = False
|
self._running = False
|
||||||
log.info("情报系统已初始化(Cron模式)")
|
self.run_all_on_startup = run_all_on_startup
|
||||||
|
log.info(f"情报系统已初始化(Cron模式),启动时执行任务: {run_all_on_startup}")
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
"""启动系统主入口"""
|
"""启动系统主入口"""
|
||||||
self._running = True
|
self._running = True
|
||||||
self._setup_signal_handlers()
|
self._setup_signal_handlers()
|
||||||
log.info("系统启动 - 运行在Cron调度模式")
|
log.info("系统启动 - 运行在Cron调度模式")
|
||||||
|
|
||||||
|
# 启动时执行所有到期任务(如果开关开启)
|
||||||
|
if self.run_all_on_startup:
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print("🚀 启动时执行所有到期任务...")
|
||||||
|
print(f"{'='*60}\n")
|
||||||
|
log.info("启动时执行所有到期任务")
|
||||||
|
result = self.scheduler.check_and_run_tasks(print_empty_status=True)
|
||||||
|
print(f"\n启动任务执行完成: 总数={result['总任务数']}, 成功={result['成功']}, 失败={result['失败']}\n")
|
||||||
|
|
||||||
# 时间追踪变量
|
# 时间追踪变量
|
||||||
last_status_print_time = time.time() # 上次打印状态的时间
|
last_status_print_time = time.time() # 上次打印状态的时间
|
||||||
@@ -110,7 +125,9 @@ class IntelligenceSystem:
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
try:
|
try:
|
||||||
# 启动系统 - 仅作为入口,不包含调度逻辑
|
# 启动系统 - 仅作为入口,不包含调度逻辑
|
||||||
system = IntelligenceSystem()
|
# run_all_on_startup=True: 启动时立即执行所有到期任务
|
||||||
|
# run_all_on_startup=False: 启动时不执行任务,等待下次调度周期
|
||||||
|
system = IntelligenceSystem(run_all_on_startup=False)
|
||||||
system.start()
|
system.start()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.critical("情报系统启动失败", exc_info=True)
|
log.critical("情报系统启动失败", exc_info=True)
|
||||||
|
|||||||
Binary file not shown.
@@ -0,0 +1,453 @@
|
|||||||
|
# RSS数据AI处理模块
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
from datetime import datetime
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# 添加项目根目录到路径
|
||||||
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
parent_dir = os.path.dirname(os.path.dirname(current_dir))
|
||||||
|
if parent_dir not in sys.path:
|
||||||
|
sys.path.insert(0, parent_dir)
|
||||||
|
|
||||||
|
from utils.mysql_agent import MySQLAgent
|
||||||
|
from utils.logger import log
|
||||||
|
from config import Config
|
||||||
|
|
||||||
|
|
||||||
|
class RSSDataAIProcessor:
|
||||||
|
"""RSS数据AI处理主类
|
||||||
|
|
||||||
|
负责:
|
||||||
|
- 从数据库加载未处理的RSS数据
|
||||||
|
- 调用AI进行分析
|
||||||
|
- 保存分析结果
|
||||||
|
- 更新处理状态
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""初始化AI处理器"""
|
||||||
|
self.log = log.bind(module="RSSDataAIProcessor")
|
||||||
|
self.db_agent = MySQLAgent(Config.MYSQL_CONFIG)
|
||||||
|
|
||||||
|
# 从Config读取配置
|
||||||
|
self.source_table = Config.AI_PROCESSOR_CONFIG['source_table']
|
||||||
|
self.ai_table = Config.AI_PROCESSOR_CONFIG['result_table']
|
||||||
|
self.default_batch_size = Config.AI_PROCESSOR_CONFIG['batch_size']
|
||||||
|
self.default_delay = Config.AI_PROCESSOR_CONFIG['delay']
|
||||||
|
|
||||||
|
# 初始化百度千帆API客户端
|
||||||
|
self.api_key = Config.BAIDU_AI_CONFIG.get('api_key')
|
||||||
|
if self.api_key:
|
||||||
|
self.ai_client = OpenAI(
|
||||||
|
base_url='https://qianfan.baidubce.com/v2',
|
||||||
|
api_key=self.api_key
|
||||||
|
)
|
||||||
|
self.model = Config.BAIDU_AI_CONFIG.get('model', 'ernie-x1-turbo-32k')
|
||||||
|
self.log.info("RSS数据AI处理器初始化完成")
|
||||||
|
else:
|
||||||
|
self.ai_client = None
|
||||||
|
self.log.warning("百度AI未配置,AI处理功能将不可用")
|
||||||
|
self.log.warning("请在config.py中配置 BAIDU_AI_CONFIG['api_key']")
|
||||||
|
|
||||||
|
def is_configured(self) -> bool:
|
||||||
|
"""检查是否已配置API"""
|
||||||
|
return self.ai_client is not None
|
||||||
|
|
||||||
|
def main(self, batch_size: Optional[int] = 200, delay: Optional[float] = None) -> Dict[str, Any]:
|
||||||
|
"""主程序:批量处理RSS数据的完整流程
|
||||||
|
|
||||||
|
Args:
|
||||||
|
batch_size: 批量处理的记录数,None则使用配置的默认值
|
||||||
|
delay: 每条记录之间的延迟(秒),None则使用配置的默认值
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: 处理结果统计信息
|
||||||
|
"""
|
||||||
|
# 使用传入参数或默认配置
|
||||||
|
batch_size = batch_size or self.default_batch_size
|
||||||
|
delay = delay or self.default_delay
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. 检查配置
|
||||||
|
if not self.is_configured():
|
||||||
|
error_msg = "百度AI未配置,请在config.py中配置 BAIDU_AI_CONFIG['api_key']"
|
||||||
|
self.log.error(error_msg)
|
||||||
|
return {
|
||||||
|
'success': False,
|
||||||
|
'message': error_msg,
|
||||||
|
'processed_count': 0,
|
||||||
|
'failed_count': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
self.log.info(f"开始批量处理数据,批次大小: {batch_size}, 延迟: {delay}秒")
|
||||||
|
|
||||||
|
# 2. 准备数据库表结构
|
||||||
|
self.ensure_ai_processed_column()
|
||||||
|
if not self.db_agent.table_exists(self.ai_table):
|
||||||
|
self.create_ai_result_table()
|
||||||
|
|
||||||
|
# 3. 加载未处理的数据
|
||||||
|
df = self.load_unprocessed_data(batch_size)
|
||||||
|
if df.empty:
|
||||||
|
self.log.info("没有需要处理的数据")
|
||||||
|
return {
|
||||||
|
'success': True,
|
||||||
|
'message': '没有需要处理的数据',
|
||||||
|
'processed_count': 0,
|
||||||
|
'failed_count': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# 4. 处理每条记录
|
||||||
|
results = []
|
||||||
|
processed_ids = []
|
||||||
|
failed_count = 0
|
||||||
|
|
||||||
|
for idx, record in df.iterrows():
|
||||||
|
try:
|
||||||
|
self.log.debug(f"处理记录 {record['id']} ({idx + 1}/{len(df)})")
|
||||||
|
|
||||||
|
result = self.process_single_record(record.to_dict())
|
||||||
|
|
||||||
|
if result:
|
||||||
|
results.append(result)
|
||||||
|
processed_ids.append(record['id'])
|
||||||
|
else:
|
||||||
|
failed_count += 1
|
||||||
|
|
||||||
|
# 延迟,避免API限流
|
||||||
|
if delay > 0 and idx < len(df) - 1:
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log.error(f"处理记录 {record['id']} 异常: {str(e)}", exc_info=True)
|
||||||
|
failed_count += 1
|
||||||
|
|
||||||
|
# 5. 保存结果
|
||||||
|
saved_count = 0
|
||||||
|
if results:
|
||||||
|
saved_count = self.save_ai_results(results)
|
||||||
|
|
||||||
|
# 6. 标记为已处理
|
||||||
|
if processed_ids:
|
||||||
|
self.mark_as_processed(processed_ids)
|
||||||
|
|
||||||
|
# 7. 返回统计信息
|
||||||
|
stats = {
|
||||||
|
'success': True,
|
||||||
|
'message': 'AI处理完成',
|
||||||
|
'total_count': len(df),
|
||||||
|
'processed_count': len(processed_ids),
|
||||||
|
'saved_count': saved_count,
|
||||||
|
'failed_count': failed_count,
|
||||||
|
'relevant_count': sum(1 for r in results if r.get('是否相关')),
|
||||||
|
'processing_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
}
|
||||||
|
|
||||||
|
self.log.info("批量处理完成", **stats)
|
||||||
|
return stats
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"批量处理失败: {str(e)}"
|
||||||
|
self.log.error(error_msg, exc_info=True)
|
||||||
|
return {
|
||||||
|
'success': False,
|
||||||
|
'message': error_msg,
|
||||||
|
'processed_count': 0,
|
||||||
|
'failed_count': 0
|
||||||
|
}
|
||||||
|
|
||||||
|
def ensure_ai_processed_column(self):
|
||||||
|
"""确保processed_rss_data表有"是否ai处理"字段"""
|
||||||
|
try:
|
||||||
|
# 检查字段是否存在
|
||||||
|
check_sql = """
|
||||||
|
SELECT COUNT(*) as count
|
||||||
|
FROM information_schema.COLUMNS
|
||||||
|
WHERE TABLE_SCHEMA = %s
|
||||||
|
AND TABLE_NAME = %s
|
||||||
|
AND COLUMN_NAME = '是否ai处理'
|
||||||
|
"""
|
||||||
|
|
||||||
|
result = self.db_agent.execute_sql(
|
||||||
|
check_sql,
|
||||||
|
params=(Config.MYSQL_CONFIG['database'], self.source_table),
|
||||||
|
fetch=True
|
||||||
|
)
|
||||||
|
|
||||||
|
if result[0][0] == 0:
|
||||||
|
# 字段不存在,添加字段
|
||||||
|
alter_sql = f"""
|
||||||
|
ALTER TABLE {self.source_table}
|
||||||
|
ADD COLUMN 是否ai处理 TINYINT(1) DEFAULT 0 COMMENT 'AI处理标记:0-未处理,1-已处理'
|
||||||
|
"""
|
||||||
|
self.db_agent.execute_sql(alter_sql)
|
||||||
|
self.log.info(f"成功为表 {self.source_table} 添加 '是否ai处理' 字段")
|
||||||
|
else:
|
||||||
|
self.log.debug(f"表 {self.source_table} 已存在 '是否ai处理' 字段")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log.error(f"检查/添加字段失败: {str(e)}", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def create_ai_result_table(self):
|
||||||
|
"""创建AI处理结果表"""
|
||||||
|
create_sql = f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS {self.ai_table} (
|
||||||
|
id INT AUTO_INCREMENT PRIMARY KEY COMMENT '主键ID',
|
||||||
|
source_id INT NOT NULL COMMENT '来源数据ID(processed_rss_data.id)',
|
||||||
|
文章标题 TEXT COMMENT '文章标题',
|
||||||
|
文章摘要 TEXT COMMENT '文章摘要',
|
||||||
|
发布时间 DATETIME COMMENT '发布时间',
|
||||||
|
来源URL VARCHAR(1024) COMMENT '来源URL',
|
||||||
|
文章链接 VARCHAR(1024) COMMENT '文章链接',
|
||||||
|
是否相关 BOOLEAN COMMENT 'AI判断是否与汽车后市场相关',
|
||||||
|
相关度评分 INT COMMENT '相关度评分(0-100)',
|
||||||
|
标签 TEXT COMMENT 'AI生成的标签(JSON数组)',
|
||||||
|
分类 VARCHAR(100) COMMENT 'AI判断的主要分类',
|
||||||
|
分析说明 TEXT COMMENT 'AI分析说明',
|
||||||
|
处理时间 DATETIME COMMENT 'AI处理时间',
|
||||||
|
创建时间 TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
|
||||||
|
更新时间 TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录更新时间',
|
||||||
|
INDEX idx_source_id (source_id),
|
||||||
|
INDEX idx_是否相关 (是否相关),
|
||||||
|
INDEX idx_分类 (分类),
|
||||||
|
INDEX idx_处理时间 (处理时间)
|
||||||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='RSS数据AI分析结果表'
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.db_agent.execute_sql(create_sql)
|
||||||
|
self.log.info(f"成功创建AI结果表: {self.ai_table}")
|
||||||
|
except Exception as e:
|
||||||
|
self.log.error(f"创建AI结果表失败: {str(e)}", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def load_unprocessed_data(self, limit: int = 100) -> pd.DataFrame:
|
||||||
|
"""加载未经AI处理的数据
|
||||||
|
|
||||||
|
Args:
|
||||||
|
limit: 每次处理的记录数量
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
未处理的数据DataFrame
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
sql = f"""
|
||||||
|
SELECT id, 文章标题, 文章摘要, 发布时间, 来源URL, 文章链接
|
||||||
|
FROM {self.source_table}
|
||||||
|
WHERE 是否ai处理 = 0 OR 是否ai处理 IS NULL
|
||||||
|
ORDER BY 创建时间 DESC
|
||||||
|
LIMIT %s
|
||||||
|
"""
|
||||||
|
|
||||||
|
df = self.db_agent.query_to_df(sql, params=(limit,), is_print=False)
|
||||||
|
self.log.info(f"成功加载 {len(df)} 条未处理的数据")
|
||||||
|
return df
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log.error(f"加载未处理数据失败: {str(e)}", exc_info=True)
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
def analyze_news(self, title: str, summary: str) -> Dict[str, Any]:
|
||||||
|
"""调用AI分析新闻(保留原有提示词)"""
|
||||||
|
# 构建提示词(保留原有格式)
|
||||||
|
prompt = f"""分析以下新闻是否与汽车后市场相关,返回JSON格式:
|
||||||
|
|
||||||
|
标题:{title}
|
||||||
|
摘要:{summary}
|
||||||
|
|
||||||
|
返回格式:
|
||||||
|
{{
|
||||||
|
"is_relevant": true/false,
|
||||||
|
"relevance_score": 0-100,
|
||||||
|
"tags": ["标签1", "标签2"],
|
||||||
|
"category": "分类(配件/维修/保养/改装/美容/装饰/二手车/金融/保险/其他)",
|
||||||
|
"analysis": "简要说明"
|
||||||
|
}}
|
||||||
|
|
||||||
|
注意:只返回JSON格式的结果,不要包含其他说明文字。"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 调用百度千帆API
|
||||||
|
response = self.ai_client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[{
|
||||||
|
"role": "user",
|
||||||
|
"content": prompt
|
||||||
|
}]
|
||||||
|
)
|
||||||
|
|
||||||
|
# 获取响应内容
|
||||||
|
raw_content = response.choices[0].message.content
|
||||||
|
|
||||||
|
# 解析JSON(处理markdown包裹)
|
||||||
|
if '```json' in raw_content:
|
||||||
|
json_str = raw_content.split('```json')[1].split('```')[0].strip()
|
||||||
|
elif '```' in raw_content:
|
||||||
|
json_str = raw_content.split('```')[1].split('```')[0].strip()
|
||||||
|
else:
|
||||||
|
json_str = raw_content.strip()
|
||||||
|
|
||||||
|
result = json.loads(json_str)
|
||||||
|
|
||||||
|
# 补充缺失字段
|
||||||
|
return {
|
||||||
|
'is_relevant': result.get('is_relevant', False),
|
||||||
|
'relevance_score': result.get('relevance_score', 0),
|
||||||
|
'tags': result.get('tags', []),
|
||||||
|
'category': result.get('category', '其他'),
|
||||||
|
'analysis': result.get('analysis', '')
|
||||||
|
}
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
self.log.warning(f"JSON解析失败: {str(e)}, 原始响应: {raw_content[:200]}")
|
||||||
|
return {
|
||||||
|
'is_relevant': False,
|
||||||
|
'relevance_score': 0,
|
||||||
|
'tags': [],
|
||||||
|
'category': '其他',
|
||||||
|
'analysis': f"解析失败: {raw_content[:100]}"
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
self.log.error(f"AI调用异常: {str(e)}", exc_info=True)
|
||||||
|
return {
|
||||||
|
'is_relevant': False,
|
||||||
|
'relevance_score': 0,
|
||||||
|
'tags': [],
|
||||||
|
'category': '其他',
|
||||||
|
'analysis': f"处理异常: {str(e)}"
|
||||||
|
}
|
||||||
|
|
||||||
|
def process_single_record(self, record: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||||
|
"""处理单条记录
|
||||||
|
|
||||||
|
Args:
|
||||||
|
record: 记录字典
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
处理结果字典
|
||||||
|
"""
|
||||||
|
if not self.is_configured():
|
||||||
|
self.log.error("AI客户端未配置,无法处理数据")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
title = str(record.get('文章标题', '')).strip()
|
||||||
|
summary = str(record.get('文章摘要', '')).strip()
|
||||||
|
|
||||||
|
if not title and not summary:
|
||||||
|
self.log.warning(f"记录 {record.get('id')} 标题和摘要均为空,跳过处理")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 调用AI分析
|
||||||
|
analysis_result = self.analyze_news(title, summary)
|
||||||
|
|
||||||
|
# 构建结果记录
|
||||||
|
result = {
|
||||||
|
'source_id': record['id'],
|
||||||
|
'文章标题': title,
|
||||||
|
'文章摘要': summary,
|
||||||
|
'发布时间': record.get('发布时间'),
|
||||||
|
'来源URL': record.get('来源URL'),
|
||||||
|
'文章链接': record.get('文章链接'),
|
||||||
|
'是否相关': analysis_result.get('is_relevant', False),
|
||||||
|
'相关度评分': analysis_result.get('relevance_score', 0),
|
||||||
|
'标签': json.dumps(analysis_result.get('tags', []), ensure_ascii=False),
|
||||||
|
'分类': analysis_result.get('category', '其他'),
|
||||||
|
'分析说明': analysis_result.get('analysis', ''),
|
||||||
|
'处理时间': datetime.now()
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log.error(f"处理记录 {record.get('id')} 失败: {str(e)}", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def save_ai_results(self, results: List[Dict[str, Any]]) -> int:
|
||||||
|
"""保存AI处理结果
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: 处理结果列表
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
成功保存的记录数
|
||||||
|
"""
|
||||||
|
if not results:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
df = pd.DataFrame(results)
|
||||||
|
inserted = self.db_agent.insert_from_df(
|
||||||
|
table_name=self.ai_table,
|
||||||
|
df=df,
|
||||||
|
ignore_duplicates=True
|
||||||
|
)
|
||||||
|
self.log.info(f"成功保存 {inserted} 条AI处理结果")
|
||||||
|
return inserted
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log.error(f"保存AI处理结果失败: {str(e)}", exc_info=True)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def mark_as_processed(self, ids: List[int]) -> bool:
|
||||||
|
"""标记记录为已处理
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ids: 记录ID列表
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
是否成功
|
||||||
|
"""
|
||||||
|
if not ids:
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
id_placeholders = ','.join(['%s'] * len(ids))
|
||||||
|
sql = f"""
|
||||||
|
UPDATE {self.source_table}
|
||||||
|
SET 是否ai处理 = 1
|
||||||
|
WHERE id IN ({id_placeholders})
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.db_agent.execute_sql(sql, params=ids)
|
||||||
|
self.log.info(f"成功标记 {len(ids)} 条记录为已处理")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log.error(f"标记记录为已处理失败: {str(e)}", exc_info=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
"""命令行直接运行"""
|
||||||
|
# 实例化处理器并调用main方法
|
||||||
|
processor = RSSDataAIProcessor()
|
||||||
|
result = processor.main()
|
||||||
|
|
||||||
|
# 输出结果
|
||||||
|
if result['success']:
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("✓ AI处理完成")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"总记录数: {result.get('total_count', 0)}")
|
||||||
|
print(f"成功处理: {result.get('processed_count', 0)}")
|
||||||
|
print(f"保存记录: {result.get('saved_count', 0)}")
|
||||||
|
print(f"失败记录: {result.get('failed_count', 0)}")
|
||||||
|
print(f"相关记录: {result.get('relevant_count', 0)}")
|
||||||
|
print(f"处理时间: {result.get('processing_time', '')}")
|
||||||
|
print("=" * 60 + "\n")
|
||||||
|
else:
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("✗ 处理失败")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"错误信息: {result['message']}")
|
||||||
|
print("\n提示: 请设置环境变量")
|
||||||
|
print(" Windows: $env:BAIDU_API_KEY = 'your_key'")
|
||||||
|
print(" Linux/Mac: export BAIDU_API_KEY='your_key'")
|
||||||
|
print("=" * 60 + "\n")
|
||||||
+12
-36
@@ -791,7 +791,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 3,
|
||||||
"id": "94892f4134316f8e",
|
"id": "94892f4134316f8e",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
@@ -828,35 +828,12 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"\u001b[32m2025-10-23 16:57:20\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n",
|
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n",
|
||||||
"\u001b[32m2025-10-23 16:57:20\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1mRSS数据处理器初始化完成\u001b[0m\n",
|
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1mRSS数据处理器初始化完成\u001b[0m\n",
|
||||||
"\u001b[32m2025-10-23 16:57:20\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m开始处理RSS数据...\u001b[0m\n",
|
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m开始处理RSS数据...\u001b[0m\n",
|
||||||
"\u001b[32m2025-10-23 16:57:21\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载 8 条未处理的RSS数据\u001b[0m\n",
|
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载 0 条未处理的RSS数据\u001b[0m\n",
|
||||||
"\u001b[32m2025-10-23 16:57:21\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[33m\u001b[1m停用词文件不存在: processors/stopwords.txt,使用默认停用词\u001b[0m\n",
|
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[33m\u001b[1m没有加载到RSS数据\u001b[0m\n",
|
||||||
"\u001b[32m2025-10-23 16:57:21\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[33m\u001b[1m关键词文件不存在: processors/keywords.txt\u001b[0m\n"
|
"\u001b[32m2025-10-23 16:59:03\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m任务执行完成,耗时: 0.01秒\u001b[0m\n"
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Building prefix dict from the default dictionary ...\n",
|
|
||||||
"Loading model from cache C:\\Users\\zy187\\AppData\\Local\\Temp\\jieba.cache\n",
|
|
||||||
"Loading model cost 0.609 seconds.\n",
|
|
||||||
"Prefix dict has been built successfully.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"\u001b[32m2025-10-23 16:57:21\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m数据处理完成,共处理 8 条记录\u001b[0m\n",
|
|
||||||
"\u001b[32m2025-10-23 16:57:21\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m过滤出 1 条汽车后市场相关新闻\u001b[0m\n",
|
|
||||||
"\u001b[32m2025-10-23 16:57:21\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m表 processed_rss_data 插入结果汇总\u001b[0m\n",
|
|
||||||
"\u001b[32m2025-10-23 16:57:21\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功保存 1 条处理结果到数据库\u001b[0m\n",
|
|
||||||
"\u001b[32m2025-10-23 16:57:21\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功标记 8 条数据为已处理\u001b[0m\n",
|
|
||||||
"\u001b[32m2025-10-23 16:57:21\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1mRSS数据处理完成\u001b[0m\n",
|
|
||||||
"\u001b[32m2025-10-23 16:57:21\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m任务执行完成,耗时: 1.19秒\u001b[0m\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -886,7 +863,7 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/markdown": [
|
"text/markdown": [
|
||||||
"**执行时长**: 1.26 秒"
|
"**执行时长**: 0.02 秒"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<IPython.core.display.Markdown object>"
|
"<IPython.core.display.Markdown object>"
|
||||||
@@ -923,8 +900,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"RSS数据处理完成!\n",
|
"处理失败: 没有数据可处理\n",
|
||||||
"处理统计: {'total_articles': 8, 'filtered_articles': 1, 'filter_rate': 0.125, 'processing_time': '2025-10-23 16:57:21', 'save_success': True, 'mark_success': True}\n",
|
|
||||||
"\n"
|
"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -958,12 +934,12 @@
|
|||||||
"{'success': True,\n",
|
"{'success': True,\n",
|
||||||
" 'task_name': 'RSS基于规则数据处理',\n",
|
" 'task_name': 'RSS基于规则数据处理',\n",
|
||||||
" 'task_id': 2,\n",
|
" 'task_id': 2,\n",
|
||||||
" 'execution_time': 1.2610254287719727,\n",
|
" 'execution_time': 0.023162126541137695,\n",
|
||||||
" 'output': \"RSS数据处理完成!\\n处理统计: {'total_articles': 8, 'filtered_articles': 1, 'filter_rate': 0.125, 'processing_time': '2025-10-23 16:57:21', 'save_success': True, 'mark_success': True}\\n\",\n",
|
" 'output': '处理失败: 没有数据可处理\\n',\n",
|
||||||
" 'error': None}"
|
" 'error': None}"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 2,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user