Compare commits

..

3 Commits

Author SHA1 Message Date
panda b0bf0fa9bc 优化连接不上时创建表 2025-11-05 09:50:55 +08:00
panda 4154eb452f 钉钉api 2025-10-30 17:24:28 +08:00
panda c5a5a0a99c 生成日报、周报 2025-10-30 09:54:47 +08:00
18 changed files with 3356 additions and 191 deletions
+459
View File
@@ -0,0 +1,459 @@
"""
报告生成器基类
提供数据源接口、AI处理接口等扩展能力
"""
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
import os
import sys
from loguru import logger
# 添加父目录到路径
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(os.path.dirname(current_dir))
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
from utils.mysql_agent import MySQLAgent
from config import Config
class DataSource(ABC):
"""数据源接口基类,用于后续扩展其他数据源"""
@abstractmethod
def fetch_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]:
"""
获取指定时间范围内的数据
Args:
start_time: 开始时间
end_time: 结束时间
Returns:
数据列表,每条数据应包含:标题、链接、摘要、发布时间等字段
"""
pass
@abstractmethod
def get_source_name(self) -> str:
"""获取数据源名称"""
pass
class RSSDataSource(DataSource):
"""RSS数据源实现"""
def __init__(self, db_agent: MySQLAgent, table_name: str = "collector_rss_subscriptions"):
self.db_agent = db_agent
self.table_name = table_name
self.logger = logger.bind(module="RSSDataSource")
def fetch_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]:
"""从数据库获取RSS数据"""
try:
sql = f"""
SELECT
`文章标题` as title,
`文章链接` as link,
`文章摘要` as summary,
`发布时间` as publish_time,
`来源URL` as source_url,
`创建时间` as create_time
FROM `{self.table_name}`
WHERE `发布时间` >= %s AND `发布时间` < %s
ORDER BY `发布时间` DESC
"""
params = (
start_time.strftime('%Y-%m-%d %H:%M:%S'),
end_time.strftime('%Y-%m-%d %H:%M:%S')
)
df = self.db_agent.query_to_df(sql, params=params, is_print=False)
if df.empty:
self.logger.info(f"时间范围 {start_time}{end_time} 内没有RSS数据")
return []
# 转换为字典列表
data_list = df.to_dict('records')
self.logger.info(f"获取到 {len(data_list)} 条RSS数据")
return data_list
except Exception as e:
self.logger.error(f"获取RSS数据失败: {str(e)}", exc_info=True)
return []
def get_source_name(self) -> str:
return "RSS订阅"
class AIAnalysisDataSource(DataSource):
"""AI分析结果数据源实现 - 从ai_processor_rss_analysis表获取已筛选的相关内容"""
def __init__(self, db_agent: MySQLAgent, table_name: str = "ai_processor_rss_analysis"):
self.db_agent = db_agent
self.table_name = table_name
self.logger = logger.bind(module="AIAnalysisDataSource")
def fetch_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]:
"""从AI分析结果表获取相关数据(是否相关=1)"""
try:
sql = f"""
SELECT
`文章标题` as title,
`文章链接` as link,
`文章摘要` as summary,
`发布时间` as publish_time,
`来源URL` as source_url,
`分类` as category,
`标签` as tags,
`相关度评分` as relevance_score,
`分析说明` as analysis_note,
`处理时间` as process_time
FROM `{self.table_name}`
WHERE `发布时间` >= %s AND `发布时间` < %s
AND `是否相关` = 1
ORDER BY `发布时间` DESC, `相关度评分` DESC
"""
params = (
start_time.strftime('%Y-%m-%d %H:%M:%S'),
end_time.strftime('%Y-%m-%d %H:%M:%S')
)
df = self.db_agent.query_to_df(sql, params=params, is_print=False)
if df.empty:
self.logger.info(f"时间范围 {start_time}{end_time} 内没有相关数据(是否相关=1")
return []
# 转换为字典列表
data_list = df.to_dict('records')
self.logger.info(f"获取到 {len(data_list)} 条相关数据(是否相关=1")
return data_list
except Exception as e:
self.logger.error(f"获取AI分析数据失败: {str(e)}", exc_info=True)
return []
def get_source_name(self) -> str:
return "AI分析结果"
class AIProcessor:
"""AI处理器,用于筛选和分析内容"""
def __init__(self, api_key: str = None, model: str = None):
from openai import OpenAI
self.base_url = 'https://qianfan.baidubce.com/v2'
self.api_key = api_key or Config.BAIDU_AI_CONFIG.get('api_key')
self.model = model or Config.BAIDU_AI_CONFIG.get('model', 'ernie-x1-turbo-32k')
self.client = OpenAI(
base_url=self.base_url,
api_key=self.api_key
)
self.logger = logger.bind(module="AIProcessor")
def filter_automotive_content(self, articles: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
筛选与汽车后市场相关的内容
Args:
articles: 文章列表
Returns:
筛选后的文章列表(包含AI标记信息)
"""
if not articles:
return []
self.logger.info(f"开始AI筛选 {len(articles)} 篇文章")
# 批量处理,避免API限流
batch_size = 10
filtered_articles = []
for i in range(0, len(articles), batch_size):
batch = articles[i:i + batch_size]
try:
# 构建批量分析的prompt
articles_text = ""
for idx, article in enumerate(batch):
articles_text += f"\n[{idx + i}] 标题: {article.get('title', '')}\n"
articles_text += f"摘要: {article.get('summary', '')}\n"
prompt = f"""请分析以下新闻文章,判断哪些与汽车后市场相关。
汽车后市场的定义:汽车销售以后,围绕汽车使用过程中的各种服务,包括:
- 汽车维修保养
- 汽车配件
- 汽车改装
- 汽车美容
- 汽车用品
- 汽车金融
- 汽车保险
- 二手车交易
- 汽车租赁
- 汽车检测
- 汽车报废回收
- 汽车相关法律法规和政策
文章列表:
{articles_text}
请按以下JSON格式返回结果:
{{
"related_articles": [
{{
"index": 文章的序号(从0开始),
"is_related": true/false,
"reason": "判断理由",
"category": "所属类别(如:维修保养、配件、政策等)"
}}
]
}}
只返回JSON,不要其他文字说明。"""
response = self.client.chat.completions.create(
model=self.model,
messages=[{
"role": "user",
"content": prompt
}]
)
result_text = response.choices[0].message.content.strip()
# 尝试解析JSON(去除可能的markdown代码块标记)
import json
import re
# 提取JSON部分(尝试多种方式)
result_json = None
# 方式1:查找markdown代码块中的JSON
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result_text, re.DOTALL)
if json_match:
try:
result_json = json.loads(json_match.group(1))
except:
pass
# 方式2:直接查找JSON对象
if result_json is None:
json_match = re.search(r'\{.*\}', result_text, re.DOTALL)
if json_match:
try:
result_json = json.loads(json_match.group())
except:
pass
# 方式3:尝试直接解析
if result_json is None:
try:
result_json = json.loads(result_text)
except:
self.logger.warning(f"无法解析AI返回的JSON: {result_text[:200]}")
result_json = {'related_articles': []}
# 处理结果
for item in result_json.get('related_articles', []):
idx = item.get('index', -1)
if 0 <= idx < len(batch):
article = batch[idx]
if item.get('is_related', False):
article['ai_marked'] = True
article['ai_category'] = item.get('category', '其他')
article['ai_reason'] = item.get('reason', '')
filtered_articles.append(article)
# 避免API限流
import time
if i + batch_size < len(articles):
time.sleep(1.5)
except Exception as e:
self.logger.error(f"AI筛选批处理失败: {str(e)}", exc_info=True)
# 如果AI处理失败,保留所有文章但标记为未筛选
for article in batch:
article['ai_marked'] = False
article['ai_error'] = str(e)
self.logger.info(f"AI筛选完成,找到 {len(filtered_articles)} 篇相关文章")
return filtered_articles
def generate_news_summary(self, articles: List[Dict[str, Any]]) -> str:
"""
生成新闻摘要
Args:
articles: 筛选后的文章列表
Returns:
Markdown格式的新闻摘要
"""
if not articles:
return "## 相关新闻\n\n暂无相关新闻。\n"
articles_text = ""
for idx, article in enumerate(articles, 1):
category = article.get('ai_category', '其他')
reason = article.get('ai_reason', '')
articles_text += f"\n### {idx}. {article.get('title', '无标题')}\n"
articles_text += f"- **类别**: {category}\n"
articles_text += f"- **摘要**: {article.get('summary', '无摘要')}\n"
articles_text += f"- **链接**: [{article.get('link', '')}]({article.get('link', '')})\n"
articles_text += f"- **发布时间**: {article.get('publish_time', '')}\n"
if reason:
articles_text += f"- **相关性说明**: {reason}\n"
articles_text += "\n"
return f"## 汽车后市场相关新闻\n\n共找到 {len(articles)} 篇相关新闻:\n\n{articles_text}"
class BaseReporter:
"""报告生成器基类"""
def __init__(self, data_sources: List[DataSource] = None):
self.data_sources = data_sources or []
self.ai_processor = AIProcessor()
self.logger = logger.bind(module="BaseReporter")
def add_data_source(self, data_source: DataSource):
"""添加数据源"""
self.data_sources.append(data_source)
self.logger.info(f"添加数据源: {data_source.get_source_name()}")
def collect_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]:
"""从所有数据源收集数据"""
all_data = []
for source in self.data_sources:
try:
data = source.fetch_data(start_time, end_time)
# 标记数据来源
for item in data:
item['data_source'] = source.get_source_name()
all_data.extend(data)
except Exception as e:
self.logger.error(f"{source.get_source_name()} 收集数据失败: {str(e)}")
# 按发布时间排序
all_data.sort(key=lambda x: x.get('publish_time', ''), reverse=True)
return all_data
def generate_report_content(self, articles: List[Dict[str, Any]], report_type: str = "日报") -> str:
"""
生成报告内容(Markdown格式)
Args:
articles: 文章列表(已从AI分析结果表筛选,是否相关=1)
report_type: 报告类型("日报""周报"),用于无数据时的提示
"""
# 数据已经是从AI分析结果表筛选过的(是否相关=1),直接使用
related_articles = articles
# 生成统计信息
related_count = len(related_articles)
# 如果没有相关数据,返回提示信息
if related_count == 0:
if report_type == "日报":
message = "昨日无汽车后市场相关的新闻"
else:
message = "上周无汽车后市场相关的新闻"
return f"""
## 数据统计
- **相关文章数**: 0
## 相关新闻
{message}
"""
# 生成新闻摘要
news_summary = self._generate_news_summary_from_analysis(related_articles)
stats = f"""
## 数据统计
- **相关文章数**: {related_count}
"""
return stats + news_summary
def _generate_news_summary_from_analysis(self, articles: List[Dict[str, Any]]) -> str:
"""
从AI分析结果生成新闻摘要(使用数据库中已有的分类和分析说明)
Args:
articles: 文章列表(包含category、tags、analysis_note等字段)
Returns:
Markdown格式的新闻摘要
"""
if not articles:
return "## 相关新闻\n\n暂无相关新闻。\n"
articles_text = ""
for idx, article in enumerate(articles, 1):
category = article.get('category', '其他')
tags = article.get('tags', '')
analysis_note = article.get('analysis_note', '')
relevance_score = article.get('relevance_score', '')
articles_text += f"\n### {idx}. {article.get('title', '无标题')}\n"
articles_text += f"- **分类**: {category}\n"
if tags:
articles_text += f"- **标签**: {tags}\n"
articles_text += f"- **摘要**: {article.get('summary', '无摘要')}\n"
articles_text += f"- **链接**: [{article.get('link', '')}]({article.get('link', '')})\n"
articles_text += f"- **发布时间**: {article.get('publish_time', '')}\n"
if relevance_score:
articles_text += f"- **相关度评分**: {relevance_score}\n"
if analysis_note:
articles_text += f"- **分析说明**: {analysis_note}\n"
articles_text += "\n"
return f"## 汽车后市场相关新闻\n\n共找到 {len(articles)} 篇相关新闻:\n\n{articles_text}"
def generate_html_report(self, markdown_content: str, template_path: str = None) -> str:
"""生成HTML报告"""
# 使用相对导入避免循环依赖
from .html_template import HTMLTemplateManager
template_manager = HTMLTemplateManager()
if template_path and os.path.exists(template_path):
# 使用外部模板
html_content = template_manager.render_external_template(template_path, markdown_content)
else:
# 使用内置模板
html_content = template_manager.render_builtin_template(markdown_content)
return html_content
def save_report(self, html_content: str, output_path: str):
"""保存HTML报告到文件"""
os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else '.', exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(html_content)
self.logger.info(f"HTML报告已保存到: {output_path}")
def save_markdown_report(self, markdown_content: str, output_path: str):
"""保存Markdown报告到文件"""
os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else '.', exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(markdown_content)
self.logger.info(f"Markdown报告已保存到: {output_path}")
+139
View File
@@ -0,0 +1,139 @@
"""
日报生成器 - 生成24小时内的汽车后市场情报报告
"""
import os
import sys
from datetime import datetime, timedelta
from loguru import logger
# 添加父目录到路径
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(os.path.dirname(current_dir))
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
from applications.reporter.base_reporter import BaseReporter, AIAnalysisDataSource
from applications.reporter.dingtalk_webhook import DingTalkWebhook
from utils.mysql_agent import MySQLAgent
from config import Config
class DailyReporter(BaseReporter):
"""日报生成器"""
def __init__(self, dingtalk_webhook: str = None):
"""
初始化日报生成器
Args:
dingtalk_webhook: 钉钉Webhook地址(可选)
"""
super().__init__()
# 初始化数据库连接
db_agent = MySQLAgent(Config.MYSQL_CONFIG)
# 添加AI分析结果数据源(已筛选是否相关=1)
self.add_data_source(AIAnalysisDataSource(db_agent))
self.logger = logger.bind(module="DailyReporter")
# 初始化钉钉推送(如果提供了webhook)
self.dingtalk_webhook = dingtalk_webhook or getattr(Config, 'DINGTALK_WEBHOOK', None)
self.dingtalk_client = None
if self.dingtalk_webhook:
self.dingtalk_client = DingTalkWebhook(self.dingtalk_webhook)
self.logger.info("已启用钉钉推送功能")
def generate(self, output_dir: str = "output/reports/daily",
template_path: str = None,
save_markdown: bool = True,
send_dingtalk: bool = True) -> dict:
"""
生成日报
Args:
output_dir: 输出目录
template_path: 可选的外部HTML模板路径
save_markdown: 是否保存Markdown文件
send_dingtalk: 是否发送到钉钉
Returns:
包含生成文件路径的字典
"""
self.logger.info("开始生成日报")
# 计算时间范围:24小时内
end_time = datetime.now()
start_time = end_time - timedelta(hours=24)
self.logger.info(f"时间范围: {start_time.strftime('%Y-%m-%d %H:%M:%S')}{end_time.strftime('%Y-%m-%d %H:%M:%S')}")
# 收集数据
articles = self.collect_data(start_time, end_time)
# 生成报告内容(generate_report_content会自动处理空数据情况)
markdown_content = f"""# 汽车后市场情报日报
## 报告时间
**生成时间**: {end_time.strftime('%Y-%m-%d %H:%M:%S')}
**时间范围**: {start_time.strftime('%Y-%m-%d %H:%M:%S')}{end_time.strftime('%Y-%m-%d %H:%M:%S')}
{self.generate_report_content(articles, report_type="日报")}
"""
# 生成HTML报告
html_content = self.generate_html_report(markdown_content, template_path=template_path)
# 保存报告
os.makedirs(output_dir, exist_ok=True)
timestamp = end_time.strftime('%Y%m%d_%H%M%S')
result = {}
# 保存HTML报告
html_filename = f"daily_report_{timestamp}.html"
html_path = os.path.join(output_dir, html_filename)
self.save_report(html_content, html_path)
result['html_path'] = html_path
self.logger.info(f"HTML报告已保存: {html_path}")
# 保存Markdown报告
markdown_path = None
if save_markdown:
markdown_filename = f"daily_report_{timestamp}.md"
markdown_path = os.path.join(output_dir, markdown_filename)
self.save_markdown_report(markdown_content, markdown_path)
result['markdown_path'] = markdown_path
self.logger.info(f"Markdown报告已保存: {markdown_path}")
# 发送到钉钉
if send_dingtalk and self.dingtalk_client:
title = f"汽车后市场情报日报 - {end_time.strftime('%Y-%m-%d')}"
success = self.dingtalk_client.send_report(title, markdown_content, markdown_path)
result['dingtalk_sent'] = success
if success:
self.logger.info("报告已推送到钉钉群")
else:
self.logger.warning("报告推送到钉钉群失败")
self.logger.info(f"日报生成完成")
return result
def main():
"""主函数"""
try:
reporter = DailyReporter()
result = reporter.generate()
print(f"日报已生成:")
print(f" HTML: {result.get('html_path')}")
if 'markdown_path' in result:
print(f" Markdown: {result.get('markdown_path')}")
if 'dingtalk_sent' in result:
print(f" 钉钉推送: {'成功' if result.get('dingtalk_sent') else '失败'}")
except Exception as e:
logger.error(f"生成日报失败: {str(e)}", exc_info=True)
raise
if __name__ == "__main__":
main()
+399
View File
@@ -0,0 +1,399 @@
"""
HTML模板管理器
支持内置模板和外部HTML模板
"""
import os
import markdown
from bs4 import BeautifulSoup
import re
from typing import Optional
from loguru import logger
class HTMLTemplateManager:
"""HTML模板管理器"""
def __init__(self):
self.logger = logger.bind(module="HTMLTemplateManager")
def markdown_to_html(self, markdown_content: str) -> str:
"""将Markdown转换为HTML"""
html = markdown.markdown(
markdown_content,
extensions=['tables', 'fenced_code', 'codehilite']
)
return html
def render_builtin_template(self, markdown_content: str) -> str:
"""使用内置模板渲染HTML"""
html_body = self.markdown_to_html(markdown_content)
# 增强HTML结构
soup = BeautifulSoup(html_body, 'html.parser')
self._enhance_html_structure(soup)
# 生成完整HTML
html_template = f"""<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>汽车后市场情报报告</title>
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@300;400;500;700&display=swap" rel="stylesheet">
<style>
:root {{
--primary: #3498db;
--secondary: #2ecc71;
--accent: #e74c3c;
--dark: #2c3e50;
--light: #f8f9fa;
--border: #e0e0e0;
}}
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: 'Noto Sans SC', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.8;
color: #333;
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
padding: 20px;
}}
.report-container {{
max-width: 1200px;
margin: 0 auto;
padding: 40px;
background: white;
box-shadow: 0 10px 40px rgba(0,0,0,0.1);
border-radius: 12px;
}}
.report-header {{
text-align: center;
padding-bottom: 30px;
border-bottom: 3px solid var(--primary);
margin-bottom: 40px;
}}
.report-header h1 {{
color: var(--dark);
font-size: 2.5em;
margin-bottom: 10px;
}}
.report-header .report-date {{
color: #666;
font-size: 1.1em;
}}
h1 {{
color: var(--dark);
font-size: 2em;
margin: 30px 0 20px 0;
padding-bottom: 10px;
border-bottom: 2px solid var(--primary);
}}
h2 {{
color: var(--dark);
font-size: 1.6em;
margin: 25px 0 15px 0;
padding-left: 10px;
border-left: 4px solid var(--primary);
}}
h3 {{
color: var(--dark);
font-size: 1.3em;
margin: 20px 0 10px 0;
}}
h4 {{
color: #555;
font-size: 1.1em;
margin: 15px 0 8px 0;
}}
p {{
margin: 12px 0;
text-align: justify;
}}
ul, ol {{
margin: 15px 0;
padding-left: 30px;
}}
li {{
margin: 8px 0;
}}
/* 表格样式 */
table {{
width: 100%;
border-collapse: collapse;
margin: 25px 0;
box-shadow: 0 2px 15px rgba(0,0,0,0.1);
border-radius: 8px;
overflow: hidden;
}}
table thead {{
background: linear-gradient(135deg, var(--primary) 0%, #2980b9 100%);
color: white;
}}
table th {{
padding: 15px;
text-align: left;
font-weight: 600;
}}
table td {{
padding: 12px 15px;
border-bottom: 1px solid var(--border);
}}
table tbody tr:hover {{
background-color: #f5f5f5;
}}
table tbody tr:last-child td {{
border-bottom: none;
}}
/* 代码块样式 */
pre {{
background: #f4f4f4;
border: 1px solid var(--border);
border-radius: 6px;
padding: 15px;
overflow-x: auto;
margin: 20px 0;
}}
code {{
background: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}}
pre code {{
background: none;
padding: 0;
}}
/* 链接样式 */
a {{
color: var(--primary);
text-decoration: none;
border-bottom: 1px dotted var(--primary);
transition: all 0.3s;
}}
a:hover {{
color: var(--accent);
border-bottom-color: var(--accent);
}}
/* 新闻列表样式 */
.news-item {{
background: #f9f9f9;
border-left: 4px solid var(--secondary);
padding: 15px 20px;
margin: 15px 0;
border-radius: 6px;
transition: all 0.3s;
}}
.news-item:hover {{
background: #f0f0f0;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}}
.news-item h3 {{
margin-top: 0;
color: var(--dark);
}}
.news-item .news-meta {{
color: #666;
font-size: 0.9em;
margin-top: 10px;
}}
.news-item .news-category {{
display: inline-block;
background: var(--secondary);
color: white;
padding: 3px 10px;
border-radius: 12px;
font-size: 0.85em;
margin-right: 10px;
}}
/* 统计信息样式 */
.stats-box {{
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 25px;
border-radius: 10px;
margin: 25px 0;
}}
.stats-box h2 {{
color: white;
border: none;
padding: 0;
margin: 0 0 15px 0;
}}
.stats-grid {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin-top: 20px;
}}
.stat-item {{
text-align: center;
}}
.stat-number {{
font-size: 2.5em;
font-weight: bold;
margin-bottom: 5px;
}}
.stat-label {{
font-size: 0.9em;
opacity: 0.9;
}}
/* 响应式设计 */
@media (max-width: 768px) {{
.report-container {{
padding: 20px;
}}
.report-header h1 {{
font-size: 1.8em;
}}
h1 {{
font-size: 1.6em;
}}
h2 {{
font-size: 1.3em;
}}
table {{
font-size: 0.9em;
}}
table th,
table td {{
padding: 8px;
}}
}}
/* 打印样式 */
@media print {{
body {{
background: white;
padding: 0;
}}
.report-container {{
box-shadow: none;
padding: 0;
}}
}}
</style>
</head>
<body>
<div class="report-container">
{str(soup)}
</div>
</body>
</html>"""
return html_template
def render_external_template(self, template_path: str, markdown_content: str) -> str:
"""
使用外部HTML模板渲染
Args:
template_path: 外部模板文件路径
markdown_content: Markdown内容
Returns:
渲染后的HTML内容
"""
try:
with open(template_path, 'r', encoding='utf-8') as f:
template = f.read()
html_body = self.markdown_to_html(markdown_content)
# 查找模板中的占位符并替换
# 支持 {{content}} 或 {content} 等格式
patterns = [
r'\{\{content\}\}',
r'\{content\}',
r'<!--\s*content\s*-->',
]
replaced = False
for pattern in patterns:
if re.search(pattern, template, re.IGNORECASE):
template = re.sub(pattern, html_body, template, flags=re.IGNORECASE)
replaced = True
break
if not replaced:
# 如果没有找到占位符,在body标签内追加内容
soup = BeautifulSoup(template, 'html.parser')
body = soup.find('body')
if body:
body.append(BeautifulSoup(html_body, 'html.parser'))
else:
# 如果没有body标签,在html末尾追加
template += html_body
template = str(soup) if soup else template
self.logger.info(f"使用外部模板渲染: {template_path}")
return template
except Exception as e:
self.logger.error(f"使用外部模板失败: {str(e)},回退到内置模板", exc_info=True)
return self.render_builtin_template(markdown_content)
def _enhance_html_structure(self, soup: BeautifulSoup):
"""增强HTML结构"""
# 增强表格
for table in soup.find_all('table'):
if not table.get('class'):
table['class'] = 'data-table'
# 增强列表项
for ul in soup.find_all('ul'):
# 检查是否是新闻列表
if any('新闻' in str(item) for item in ul.find_all('li')):
ul['class'] = 'news-list'
# 增强链接
for a in soup.find_all('a'):
if not a.get('target'):
a['target'] = '_blank'
a['rel'] = 'noopener noreferrer'
@@ -0,0 +1,50 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>自定义报告模板示例</title>
<style>
/* 自定义样式示例 */
body {
font-family: 'Microsoft YaHei', Arial, sans-serif;
background: #f0f2f5;
padding: 20px;
margin: 0;
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
padding: 30px;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
/* 内容区域样式 */
#content {
line-height: 1.8;
}
h1 {
color: #1890ff;
border-bottom: 2px solid #1890ff;
padding-bottom: 10px;
}
h2 {
color: #333;
margin-top: 30px;
}
</style>
</head>
<body>
<div class="container">
<!-- 占位符:内容将在这里插入 -->
<!-- 支持以下格式之一:{{content}} 或 {content} 或 <!-- content --> -->
{{content}}
</div>
</body>
</html>
+139
View File
@@ -0,0 +1,139 @@
"""
周报生成器 - 生成7天内的汽车后市场情报报告
"""
import os
import sys
from datetime import datetime, timedelta
from loguru import logger
# 添加父目录到路径
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(os.path.dirname(current_dir))
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
from applications.reporter.base_reporter import BaseReporter, AIAnalysisDataSource
from applications.reporter.dingtalk_webhook import DingTalkWebhook
from utils.mysql_agent import MySQLAgent
from config import Config
class WeeklyReporter(BaseReporter):
"""周报生成器"""
def __init__(self, dingtalk_webhook: str = None):
"""
初始化周报生成器
Args:
dingtalk_webhook: 钉钉Webhook地址(可选)
"""
super().__init__()
# 初始化数据库连接
db_agent = MySQLAgent(Config.MYSQL_CONFIG)
# 添加AI分析结果数据源(已筛选是否相关=1)
self.add_data_source(AIAnalysisDataSource(db_agent))
self.logger = logger.bind(module="WeeklyReporter")
# 初始化钉钉推送(如果提供了webhook)
self.dingtalk_webhook = dingtalk_webhook or getattr(Config, 'DINGTALK_WEBHOOK', None)
self.dingtalk_client = None
if self.dingtalk_webhook:
self.dingtalk_client = DingTalkWebhook(self.dingtalk_webhook)
self.logger.info("已启用钉钉推送功能")
def generate(self, output_dir: str = "output/reports/weekly",
template_path: str = None,
save_markdown: bool = True,
send_dingtalk: bool = True) -> dict:
"""
生成周报
Args:
output_dir: 输出目录
template_path: 可选的外部HTML模板路径
save_markdown: 是否保存Markdown文件
send_dingtalk: 是否发送到钉钉
Returns:
包含生成文件路径的字典
"""
self.logger.info("开始生成周报")
# 计算时间范围:7天内
end_time = datetime.now()
start_time = end_time - timedelta(days=7)
self.logger.info(f"时间范围: {start_time.strftime('%Y-%m-%d %H:%M:%S')}{end_time.strftime('%Y-%m-%d %H:%M:%S')}")
# 收集数据
articles = self.collect_data(start_time, end_time)
# 生成报告内容(generate_report_content会自动处理空数据情况)
markdown_content = f"""# 汽车后市场情报周报
## 报告时间
**生成时间**: {end_time.strftime('%Y-%m-%d %H:%M:%S')}
**时间范围**: {start_time.strftime('%Y-%m-%d %H:%M:%S')}{end_time.strftime('%Y-%m-%d %H:%M:%S')}
{self.generate_report_content(articles, report_type="周报")}
"""
# 生成HTML报告
html_content = self.generate_html_report(markdown_content, template_path=template_path)
# 保存报告
os.makedirs(output_dir, exist_ok=True)
timestamp = end_time.strftime('%Y%m%d_%H%M%S')
result = {}
# 保存HTML报告
html_filename = f"weekly_report_{timestamp}.html"
html_path = os.path.join(output_dir, html_filename)
self.save_report(html_content, html_path)
result['html_path'] = html_path
self.logger.info(f"HTML报告已保存: {html_path}")
# 保存Markdown报告
markdown_path = None
if save_markdown:
markdown_filename = f"weekly_report_{timestamp}.md"
markdown_path = os.path.join(output_dir, markdown_filename)
self.save_markdown_report(markdown_content, markdown_path)
result['markdown_path'] = markdown_path
self.logger.info(f"Markdown报告已保存: {markdown_path}")
# 发送到钉钉
if send_dingtalk and self.dingtalk_client:
title = f"汽车后市场情报周报 - {start_time.strftime('%Y-%m-%d')}{end_time.strftime('%Y-%m-%d')}"
success = self.dingtalk_client.send_report(title, markdown_content, markdown_path)
result['dingtalk_sent'] = success
if success:
self.logger.info("报告已推送到钉钉群")
else:
self.logger.warning("报告推送到钉钉群失败")
self.logger.info(f"周报生成完成")
return result
def main():
"""主函数"""
try:
reporter = WeeklyReporter()
result = reporter.generate()
print(f"周报已生成:")
print(f" HTML: {result.get('html_path')}")
if 'markdown_path' in result:
print(f" Markdown: {result.get('markdown_path')}")
if 'dingtalk_sent' in result:
print(f" 钉钉推送: {'成功' if result.get('dingtalk_sent') else '失败'}")
except Exception as e:
logger.error(f"生成周报失败: {str(e)}", exc_info=True)
raise
if __name__ == "__main__":
main()
+7 -1
View File
@@ -41,4 +41,10 @@ class Config:
'delay': 1.5, # 每条记录之间的延迟(秒),避免API限流 'delay': 1.5, # 每条记录之间的延迟(秒),避免API限流
'source_table': 'processed_rss_data', # 源数据表 'source_table': 'processed_rss_data', # 源数据表
'result_table': 'ai_processor_rss_analysis', # AI分析结果表 'result_table': 'ai_processor_rss_analysis', # AI分析结果表
} }
# 钉钉Webhook配置
# 优先从环境变量读取,如果没有则使用下面的默认值(需要用户自行配置)
# 请将下面的空字符串替换为你的钉钉Webhook地址,格式:https://oapi.dingtalk.com/robot/send?access_token=xxx
DINGTALK_WEBHOOK = os.getenv('DINGTALK_WEBHOOK', '') # 钉钉机器人Webhook地址
# 例如:DINGTALK_WEBHOOK = os.getenv('DINGTALK_WEBHOOK', 'https://oapi.dingtalk.com/robot/send?access_token=your_token_here')
+135
View File
@@ -0,0 +1,135 @@
@echo off
REM 情报数据处理系统 - 简化启动脚本
REM 功能: Python环境检测 + 系统启动
REM 作者: AI Assistant
REM 版本: 1.0
REM 日期: 2025-10-29
chcp 65001 >nul
setlocal enabledelayedexpansion
REM 设置颜色
for /f %%a in ('echo prompt $E ^| cmd') do set "ESC=%%a"
set "GREEN=%ESC%[32m"
set "RED=%ESC%[31m"
set "YELLOW=%ESC%[33m"
set "CYAN=%ESC%[36m"
set "RESET=%ESC%[0m"
REM 配置变量
set "CONDA_ENV_NAME=intelligence_env"
set "PROJECT_PATH=%~dp0.."
set "PYTHON_VERSION=3.13"
echo %CYAN%===============================================%RESET%
echo %CYAN% 情报数据处理系统启动器%RESET%
echo %CYAN%===============================================%RESET%
echo.
REM 检查项目路径
if not exist "%PROJECT_PATH%\main.py" (
echo %RED%错误: 项目路径不存在或main.py文件未找到%RESET%
echo %YELLOW%当前路径: %PROJECT_PATH%%RESET%
pause
exit /b 1
)
echo %GREEN%✓ 项目路径检查通过%RESET%
REM 检查Python是否安装
echo %CYAN%检查Python环境...%RESET%
python --version >nul 2>&1
if %errorLevel% neq 0 (
echo %RED%Python未安装或未添加到PATH%RESET%
echo %YELLOW%正在尝试检测Anaconda...%RESET%
REM 检查Anaconda
where conda >nul 2>&1
if %errorLevel% neq 0 (
echo %RED%Anaconda未安装%RESET%
echo %YELLOW%请安装Python或Anaconda后重试%RESET%
echo %CYAN%下载地址: https://www.python.org/downloads/%RESET%
echo %CYAN%或: https://www.anaconda.com/products/distribution%RESET%
pause
exit /b 1
) else (
echo %GREEN%✓ 检测到Anaconda%RESET%
conda --version
)
) else (
echo %GREEN%✓ Python已安装%RESET%
python --version
)
REM 检查Conda环境
echo %CYAN%检查Conda环境: %CONDA_ENV_NAME%%RESET%
conda env list | findstr /i "%CONDA_ENV_NAME%" >nul 2>&1
if %errorLevel% neq 0 (
echo %YELLOW%环境不存在,正在创建...%RESET%
conda create -n %CONDA_ENV_NAME% python=%PYTHON_VERSION% -y
if %errorLevel% neq 0 (
echo %RED%环境创建失败%RESET%
pause
exit /b 1
)
echo %GREEN%✓ 环境创建成功%RESET%
) else (
echo %GREEN%✓ 环境已存在%RESET%
)
REM 激活环境
echo %CYAN%激活Conda环境...%RESET%
call conda activate %CONDA_ENV_NAME%
if %errorLevel% neq 0 (
echo %RED%环境激活失败%RESET%
pause
exit /b 1
)
echo %GREEN%✓ 环境激活成功%RESET%
REM 检查依赖
echo %CYAN%检查Python依赖...%RESET%
if exist "%PROJECT_PATH%\requirements.txt" (
echo %YELLOW%安装/更新依赖包...%RESET%
pip install -r "%PROJECT_PATH%\requirements.txt" --quiet
if %errorLevel% neq 0 (
echo %YELLOW%依赖安装失败,尝试继续运行...%RESET%
) else (
echo %GREEN%✓ 依赖安装完成%RESET%
)
) else (
echo %YELLOW%未找到requirements.txt,跳过依赖安装%RESET%
)
REM 切换到项目目录
echo %CYAN%切换到项目目录: %PROJECT_PATH%%RESET%
cd /d "%PROJECT_PATH%"
REM 检查配置文件
if not exist "config.py" (
echo %YELLOW%警告: 未找到config.py配置文件%RESET%
echo %CYAN%将使用默认配置运行%RESET%
)
REM 显示启动信息
echo.
echo %GREEN%===============================================%RESET%
echo %GREEN% 启动情报数据处理系统%RESET%
echo %GREEN%===============================================%RESET%
echo.
echo %CYAN%环境信息:%RESET%
echo Conda环境: %CONDA_ENV_NAME%
echo 项目路径: %PROJECT_PATH%
echo Python版本:
python --version
echo.
echo %YELLOW%按 Ctrl+C 停止系统%RESET%
echo.
REM 启动系统
echo %CYAN%启动情报数据处理系统主程序...%RESET%
python main.py
echo.
echo %CYAN%情报数据处理系统已停止%RESET%
pause
+109
View File
@@ -133927,3 +133927,112 @@
→ module: 'RSSDataAIProcessor' → module: 'RSSDataAIProcessor'
2025-10-29 10:30:40.130 | DEBUG | ai_processor_rss_data:112 - 处理记录 182 (2/3) 2025-10-29 10:30:40.130 | DEBUG | ai_processor_rss_data:112 - 处理记录 182 (2/3)
→ module: 'RSSDataAIProcessor' → module: 'RSSDataAIProcessor'
2025-10-29 17:34:56.620 | INFO | base_reporter:329 - 添加数据源: AI分析结果
→ module: 'BaseReporter'
2025-10-29 17:34:56.621 | INFO | daily:42 - 开始生成日报
→ module: 'DailyReporter'
2025-10-29 17:34:56.621 | INFO | daily:48 - 时间范围: 2025-10-28 17:34:56 至 2025-10-29 17:34:56
→ module: 'DailyReporter'
2025-10-29 17:34:56.621 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: '\n SELECT \n `文章标题` as title,\n `文章链接` as link,\n `文章摘要` as summary,\n `发布时间` as publish_time,\n `来源URL` as source_url,\n `分类` as category,\n `标签` as tags,\n `相关度评分` as relevance_score,\n `分析说明` as analysis_note,\n `处理时间` as process_time\n FROM `ai_processor_rss_analysis`\n ...
2025-10-29 17:34:57.326 | INFO | base_reporter:130 - 时间范围 2025-10-28 17:34:56.621364 到 2025-10-29 17:34:56.621364 内没有相关数据(是否相关=1)
→ module: 'AIAnalysisDataSource'
2025-10-29 17:34:57.329 | ERROR | daily:83 - 生成日报失败: No module named 'markdown'
→ exc_info: True
2025-10-29 17:35:46.546 | INFO | base_reporter:329 - 添加数据源: AI分析结果
→ module: 'BaseReporter'
2025-10-29 17:35:46.547 | INFO | daily:42 - 开始生成日报
→ module: 'DailyReporter'
2025-10-29 17:35:46.547 | INFO | daily:48 - 时间范围: 2025-10-28 17:35:46 至 2025-10-29 17:35:46
→ module: 'DailyReporter'
2025-10-29 17:35:46.547 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: '\n SELECT \n `文章标题` as title,\n `文章链接` as link,\n `文章摘要` as summary,\n `发布时间` as publish_time,\n `来源URL` as source_url,\n `分类` as category,\n `标签` as tags,\n `相关度评分` as relevance_score,\n `分析说明` as analysis_note,\n `处理时间` as process_time\n FROM `ai_processor_rss_analysis`\n ...
2025-10-29 17:35:47.054 | INFO | base_reporter:130 - 时间范围 2025-10-28 17:35:46.547444 到 2025-10-29 17:35:46.547444 内没有相关数据(是否相关=1)
→ module: 'AIAnalysisDataSource'
2025-10-29 17:35:47.358 | INFO | base_reporter:449 - 报告已保存到: output/reports/daily\daily_report_20251029_173546.html
→ module: 'DailyReporter'
2025-10-29 17:35:47.360 | INFO | daily:72 - 日报生成完成: output/reports/daily\daily_report_20251029_173546.html
→ module: 'DailyReporter'
2025-10-29 17:36:37.665 | INFO | base_reporter:329 - 添加数据源: AI分析结果
→ module: 'BaseReporter'
2025-10-29 17:36:37.666 | INFO | weekly:42 - 开始生成周报
→ module: 'WeeklyReporter'
2025-10-29 17:36:37.666 | INFO | weekly:48 - 时间范围: 2025-10-22 17:36:37 至 2025-10-29 17:36:37
→ module: 'WeeklyReporter'
2025-10-29 17:36:37.667 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: '\n SELECT \n `文章标题` as title,\n `文章链接` as link,\n `文章摘要` as summary,\n `发布时间` as publish_time,\n `来源URL` as source_url,\n `分类` as category,\n `标签` as tags,\n `相关度评分` as relevance_score,\n `分析说明` as analysis_note,\n `处理时间` as process_time\n FROM `ai_processor_rss_analysis`\n ...
2025-10-29 17:36:38.112 | INFO | base_reporter:135 - 获取到 1 条相关数据(是否相关=1)
→ module: 'AIAnalysisDataSource'
2025-10-29 17:36:38.234 | INFO | base_reporter:449 - 报告已保存到: output/reports/weekly\weekly_report_20251029_173637.html
→ module: 'WeeklyReporter'
2025-10-29 17:36:38.235 | INFO | weekly:72 - 周报生成完成: output/reports/weekly\weekly_report_20251029_173637.html
→ module: 'WeeklyReporter'
2025-10-30 13:47:56.057 | INFO | task_scheduler:27 - 任务调度器已初始化,最大工作线程数: 5
→ module: 'TaskScheduler'
2025-10-30 13:48:20.079 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
2025-10-30 13:48:20.346 | INFO | mysql_agent:134 - 查询执行成功
→ module: 'MySQLAgent(Windows)'
→ 行数: 1
2025-10-30 13:48:20.349 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
2025-10-30 13:56:55.700 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
2025-10-30 13:56:55.939 | INFO | mysql_agent:134 - 查询执行成功
→ module: 'MySQLAgent(Windows)'
→ 行数: 1
2025-10-30 13:56:55.941 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
2025-10-30 13:57:07.496 | INFO | task_scheduler:27 - 任务调度器已初始化,最大工作线程数: 5
→ module: 'TaskScheduler'
2025-10-30 13:57:10.827 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
2025-10-30 13:57:11.121 | INFO | mysql_agent:134 - 查询执行成功
→ module: 'MySQLAgent(Windows)'
→ 行数: 1
2025-10-30 13:57:11.125 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
2025-10-30 13:57:49.005 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: 'SELECT * FROM main_task WHERE task_id = %s'
2025-10-30 13:57:49.286 | INFO | mysql_agent:134 - 查询执行成功
→ module: 'MySQLAgent(Windows)'
→ 行数: 1
2025-10-30 13:57:49.737 | INFO | processor_rss_data:65 - RSS数据处理器初始化完成
→ module: 'RSSDataProcessor'
2025-10-30 13:57:49.738 | INFO | processor_rss_data:335 - 开始处理RSS数据...
→ module: 'RSSDataProcessor'
2025-10-30 13:57:49.740 | DEBUG | mysql_agent:116 - 执行SQL查询
→ module: 'MySQLAgent(Windows)'
→ sql: '\n SELECT id, 文章标题, 文章摘要, 发布时间, 来源URL, 文章链接\n FROM collector_rss_subscriptions\n WHERE 是否已处理 = 0\n ORDER BY 发布时间 DESC\n LIMIT %s\n '
2025-10-30 13:57:50.013 | INFO | processor_rss_data:107 - 成功加载 6 条未处理的RSS数据
→ module: 'RSSDataProcessor'
2025-10-30 13:57:50.014 | INFO | processor_rss_data:146 - 成功加载停用词表,共 98 个词
→ module: 'RSSDataProcessor'
2025-10-30 13:57:50.015 | INFO | processor_rss_data:82 - 成功加载汽车后市场关键词,共 37 个
→ module: 'RSSDataProcessor'
2025-10-30 13:57:50.899 | INFO | processor_rss_data:235 - 数据处理完成,共处理 6 条记录
→ module: 'RSSDataProcessor'
2025-10-30 13:57:50.905 | INFO | processor_rss_data:246 - 过滤出 0 条汽车后市场相关新闻
→ module: 'RSSDataProcessor'
2025-10-30 13:57:51.015 | DEBUG | mysql_agent:614 - 更新执行完成
→ module: 'MySQLAgent(Windows)'
→ 受影响行数: 6
2025-10-30 13:57:51.015 | INFO | processor_rss_data:129 - 成功标记 6 条数据为已处理
→ module: 'RSSDataProcessor'
2025-10-30 13:57:51.016 | INFO | processor_rss_data:372 - RSS数据处理完成
→ module: 'RSSDataProcessor'
→ total_articles: 6
→ filtered_articles: 0
→ filter_rate: 0.0
→ processing_time: '2025-10-30 13:57:50'
→ mark_success: True
2025-10-30 13:57:53.702 | DEBUG | mysql_agent:614 - 更新执行完成
→ module: 'MySQLAgent(Windows)'
→ 受影响行数: 1
+744
View File
@@ -71071,3 +71071,747 @@ Traceback (most recent call last):
└ 'RSSDataProcessor' └ 'RSSDataProcessor'
AttributeError: 类 RSSDataProcessor 中未找到方法 main AttributeError: 类 RSSDataProcessor 中未找到方法 main
2025-10-29 17:34:57.329 | ERROR | daily:83 - 生成日报失败: No module named 'markdown'
→ exc_info: True
2025-10-30 13:48:20.349 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x00000241EF7C0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x00000241EF7C1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x00000241EDB7B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x00000241EDB7D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x00000241ED6E9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x00000241EF781D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'be29372e-391'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'ddbb9c4d44b6'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x00000241F0E71E40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 48, 20, 70000, tzinfo=tzutc()), 'msg_id': '7653bb92-3d92-4584-b5fd-f30...
│ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x00000241EF7AB140>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x00000241EF6F5B40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x00000241EF7ADDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x00000241EEDFA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x00000241F0E1B5E0>
└ <function _pseudo_sync_runner at 0x00000241EEDD5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x00000241F0E1B5E0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x00000241F101DA90>, <ast.FunctionDef object at 0x00000241F101D890>, <ast.Expr object at 0x000002...
│ │ └ <ast.Module object at 0x00000241F101DA50>
│ └ <function InteractiveShell.run_ast_nodes at 0x00000241EEDFA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 24191c224e0, execution_count=2 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x00000241EEDFA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x00000241EEDF4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
│ │ └ <property object at 0x00000241EEDF4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
└ <code object <module> at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x0000024191C38C20>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x0000024191C38EA0>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x0000024191C3F920>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x00000241FF4E9700>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x00000241EF7C0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x00000241EF7C1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x00000241EDB7B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x00000241EDB7D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x00000241ED6E9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x00000241EF781D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'be29372e-391'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'ddbb9c4d44b6'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x00000241F0E71E40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 48, 20, 70000, tzinfo=tzutc()), 'msg_id': '7653bb92-3d92-4584-b5fd-f30...
│ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x00000241EF7AB140>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x00000241EF6F5B40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x00000241EF7ADDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x00000241EEDFA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x00000241F0E1B5E0>
└ <function _pseudo_sync_runner at 0x00000241EEDD5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x00000241F0E1B5E0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x00000241F101DA90>, <ast.FunctionDef object at 0x00000241F101D890>, <ast.Expr object at 0x000002...
│ │ └ <ast.Module object at 0x00000241F101DA50>
│ └ <function InteractiveShell.run_ast_nodes at 0x00000241EEDFA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 24191c224e0, execution_count=2 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x00000241EEDFA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x00000241EEDF4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
│ │ └ <property object at 0x00000241EEDF4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
└ <code object <module> at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x0000024191C38C20>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x0000024191C38EA0>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x0000024191C3F920>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x00000241FF4E9700>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
2025-10-30 13:56:55.941 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x00000241EF7C0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x00000241EF7C1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x00000241EDB7B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x00000241EDB7D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x00000241ED6E9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x00000241EF781D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'be29372e-391'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'36f03038e4ef'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x0000024191C6BC40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 56, 55, 692000, tzinfo=tzutc()), 'msg_id': '788ee0fd-a13f-4e53-98b0-e9...
│ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x00000241EF7AB140>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x00000241F0DB22C0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x00000241EF7ADDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x00000241EEDFA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x0000024191A23AC0>
└ <function _pseudo_sync_runner at 0x00000241EEDD5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x0000024191A23AC0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x0000024191E00090>, <ast.FunctionDef object at 0x00000241F1027390>, <ast.Expr object at 0x000002...
│ │ └ <ast.Module object at 0x0000024191E00050>
│ └ <function InteractiveShell.run_ast_nodes at 0x00000241EEDFA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 24191c22900, execution_count=3 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x00000241EEDFA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x00000241EEDF4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
│ │ └ <property object at 0x00000241EEDF4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
└ <code object <module> at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x0000024191C38CC0>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x0000024191C38EA0>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x0000024191C3F920>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x00000241FF4E9700>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x00000241EF7C0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x00000241EF7C1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
└ <ipykernel.kernelapp.IPKernelApp object at 0x00000241EB5B7D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x00000241EDB7B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x00000241EF7AAD20>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x00000241EDB7D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x00000241ED6E9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x00000241EF781D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'be29372e-391'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'36f03038e4ef'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x00000241EF7AAD80>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x0000024191C6BC40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 56, 55, 692000, tzinfo=tzutc()), 'msg_id': '788ee0fd-a13f-4e53-98b0-e9...
│ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x00000241EF7AB140>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x00000241F0DB22C0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x00000241EF7ADDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x00000241EEDFA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x0000024191A23AC0>
└ <function _pseudo_sync_runner at 0x00000241EEDD5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x0000024191A23AC0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x0000024191E00090>, <ast.FunctionDef object at 0x00000241F1027390>, <ast.Expr object at 0x000002...
│ │ └ <ast.Module object at 0x0000024191E00050>
│ └ <function InteractiveShell.run_ast_nodes at 0x00000241EEDFA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 24191c22900, execution_count=3 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x00000241EEDFA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x00000241EEDF4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
│ │ └ <property object at 0x00000241EEDF4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000241F0C50860>
└ <code object <module> at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x0000024191C38CC0>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x0000024191C38EA0>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x0000024191C3F920>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x00000241FF4E9700>
└ <__main__.TaskManager object at 0x00000241919D6AB0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
2025-10-30 13:57:11.125 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理
→ module: 'TaskNotebook'
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x000001E4F28A0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x000001E4EE687D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x000001E4F28A1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x000001E4F288ADE0>
└ <ipykernel.kernelapp.IPKernelApp object at 0x000001E4EE687D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x000001E4F0D2B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x000001E4F288ADE0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x000001E4F0D2D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x000001E4F08B9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x000001E4F2861D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x000001E4F288A6C0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'4c713768-9d1'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'1b55c49a4a65'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x000001E4F288A6C0>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x000001E4F3F51E40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 57, 10, 815000, tzinfo=tzutc()), 'msg_id': '132b59a7-4a02-4a8d-a25c-e2...
│ └ [b'4c713768-9d1a-49ca-83ed-6814787009a5']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x000001E4F288B200>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x000001E4F27D5B40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x000001E4F288DDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x000001E4F1EEA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x000001E4F3EFADC0>
└ <function _pseudo_sync_runner at 0x000001E4F1ED5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x000001E4F3EFADC0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_19964\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x000001E4F412F450>, <ast.FunctionDef object at 0x000001E4F410FED0>, <ast.Expr object at 0x000001...
│ │ └ <ast.Module object at 0x000001E4F412C190>
│ └ <function InteractiveShell.run_ast_nodes at 0x000001E4F1EEA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 1e4ffcf6240, execution_count=2 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x000001E4F1EEA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x000001E4F1EE4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
│ │ └ <property object at 0x000001E4F1EE4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
└ <code object <module> at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x000001E4FFD10CC0>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x000001E4FFD10EA0>
└ <__main__.TaskManager object at 0x000001E4FEBA31D0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x000001E4FFD0F9C0>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x000001E4FE552A20>
└ <__main__.TaskManager object at 0x000001E4FEBA31D0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
app.launch_new_instance()
│ └ <bound method Application.launch_instance of <class 'ipykernel.kernelapp.IPKernelApp'>>
└ <module 'ipykernel.kernelapp' from 'd:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\ipykernel\\ke...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
app.start()
│ └ <function IPKernelApp.start at 0x000001E4F28A0D60>
└ <ipykernel.kernelapp.IPKernelApp object at 0x000001E4EE687D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
self.io_loop.start()
│ │ └ <function BaseAsyncIOLoop.start at 0x000001E4F28A1C60>
│ └ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x000001E4F288ADE0>
└ <ipykernel.kernelapp.IPKernelApp object at 0x000001E4EE687D10>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start
self.asyncio_loop.run_forever()
│ │ └ <function BaseEventLoop.run_forever at 0x000001E4F0D2B920>
│ └ <_WindowsSelectorEventLoop running=True closed=False debug=False>
└ <tornado.platform.asyncio.AsyncIOMainLoop object at 0x000001E4F288ADE0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x000001E4F0D2D760>
└ <_WindowsSelectorEventLoop running=True closed=False debug=False>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once
handle._run()
│ └ <function Handle._run at 0x000001E4F08B9D00>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run
self._context.run(self._callback, *self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup(<Future finis...7B)>, ...],))>)>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue
await self.process_one()
│ └ <function Kernel.process_one at 0x000001E4F2861D00>
└ <ipykernel.ipkernel.IPythonKernel object at 0x000001E4F288A6C0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one
await dispatch(*args)
│ └ ([<zmq.Frame(b'4c713768-9d1'...36B)>, <zmq.Frame(b'<IDS|MSG>')>, <zmq.Frame(b'1b55c49a4a65'...64B)>, <zmq.Frame(b'{"date":"20...
└ <bound method Kernel.dispatch_shell of <ipykernel.ipkernel.IPythonKernel object at 0x000001E4F288A6C0>>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell
await result
└ <coroutine object IPythonKernel.execute_request at 0x000001E4F3F51E40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request
await super().execute_request(stream, ident, parent)
│ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 57, 10, 815000, tzinfo=tzutc()), 'msg_id': '132b59a7-4a02-4a8d-a25c-e2...
│ └ [b'4c713768-9d1a-49ca-83ed-6814787009a5']
└ <zmq.eventloop.zmqstream.ZMQStream object at 0x000001E4F288B200>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request
reply_content = await reply_content
└ <coroutine object IPythonKernel.do_execute at 0x000001E4F27D5B40>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute
res = shell.run_cell(
│ └ <function ZMQInteractiveShell.run_cell at 0x000001E4F288DDA0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell
return super().run_cell(*args, **kwargs)
│ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task...
└ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ...
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell
result = self._run_cell(
│ └ <function InteractiveShell._run_cell at 0x000001E4F1EEA200>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell
result = runner(coro)
│ └ <coroutine object InteractiveShell.run_cell_async at 0x000001E4F3EFADC0>
└ <function _pseudo_sync_runner at 0x000001E4F1ED5260>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner
coro.send(None)
│ └ <method 'send' of 'coroutine' objects>
└ <coroutine object InteractiveShell.run_cell_async at 0x000001E4F3EFADC0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
│ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_19964\\1819114417.py'
│ │ │ └ [<ast.FunctionDef object at 0x000001E4F412F450>, <ast.FunctionDef object at 0x000001E4F410FED0>, <ast.Expr object at 0x000001...
│ │ └ <ast.Module object at 0x000001E4F412C190>
│ └ <function InteractiveShell.run_ast_nodes at 0x000001E4F1EEA5C0>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
│ │ │ │ └ False
│ │ │ └ <ExecutionResult object at 1e4ffcf6240, execution_count=2 error_before_exec=None error_in_exec=None info=<ExecutionInfo objec...
│ │ └ <code object <module> at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1>
│ └ <function InteractiveShell.run_code at 0x000001E4F1EEA660>
└ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
│ │ │ │ └ <property object at 0x000001E4F1EE4B30>
│ │ │ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
│ │ └ <property object at 0x000001E4F1EE4BD0>
│ └ <ipykernel.zmqshell.ZMQInteractiveShell object at 0x000001E4F3DED8B0>
└ <code object <module> at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 45, in <module>
run_task_with_details(2)
└ <function run_task_with_details at 0x000001E4FFD10CC0>
File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 16, in run_task_with_details
result = manager.run_task_synchronously(task_id)
│ │ └ 2
│ └ <function TaskManager.run_task_synchronously at 0x000001E4FFD10EA0>
└ <__main__.TaskManager object at 0x000001E4FEBA31D0>
> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1757831752.py", line 122, in run_task_synchronously
self.scheduler._execute_task_logic(task)
│ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex...
│ │ └ <function TaskScheduler._execute_task_logic at 0x000001E4FFD0F9C0>
│ └ <system_management.scheduler.task_scheduler.TaskScheduler object at 0x000001E4FE552A20>
└ <__main__.TaskManager object at 0x000001E4FEBA31D0>
File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic
raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}")
│ └ 'processor_rss_data'
└ 'processors'
AttributeError: 模块 processors 中未找到类 processor_rss_data
@@ -0,0 +1,301 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>汽车后市场情报报告</title>
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@300;400;500;700&display=swap" rel="stylesheet">
<style>
:root {
--primary: #3498db;
--secondary: #2ecc71;
--accent: #e74c3c;
--dark: #2c3e50;
--light: #f8f9fa;
--border: #e0e0e0;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Noto Sans SC', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.8;
color: #333;
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
padding: 20px;
}
.report-container {
max-width: 1200px;
margin: 0 auto;
padding: 40px;
background: white;
box-shadow: 0 10px 40px rgba(0,0,0,0.1);
border-radius: 12px;
}
.report-header {
text-align: center;
padding-bottom: 30px;
border-bottom: 3px solid var(--primary);
margin-bottom: 40px;
}
.report-header h1 {
color: var(--dark);
font-size: 2.5em;
margin-bottom: 10px;
}
.report-header .report-date {
color: #666;
font-size: 1.1em;
}
h1 {
color: var(--dark);
font-size: 2em;
margin: 30px 0 20px 0;
padding-bottom: 10px;
border-bottom: 2px solid var(--primary);
}
h2 {
color: var(--dark);
font-size: 1.6em;
margin: 25px 0 15px 0;
padding-left: 10px;
border-left: 4px solid var(--primary);
}
h3 {
color: var(--dark);
font-size: 1.3em;
margin: 20px 0 10px 0;
}
h4 {
color: #555;
font-size: 1.1em;
margin: 15px 0 8px 0;
}
p {
margin: 12px 0;
text-align: justify;
}
ul, ol {
margin: 15px 0;
padding-left: 30px;
}
li {
margin: 8px 0;
}
/* 表格样式 */
table {
width: 100%;
border-collapse: collapse;
margin: 25px 0;
box-shadow: 0 2px 15px rgba(0,0,0,0.1);
border-radius: 8px;
overflow: hidden;
}
table thead {
background: linear-gradient(135deg, var(--primary) 0%, #2980b9 100%);
color: white;
}
table th {
padding: 15px;
text-align: left;
font-weight: 600;
}
table td {
padding: 12px 15px;
border-bottom: 1px solid var(--border);
}
table tbody tr:hover {
background-color: #f5f5f5;
}
table tbody tr:last-child td {
border-bottom: none;
}
/* 代码块样式 */
pre {
background: #f4f4f4;
border: 1px solid var(--border);
border-radius: 6px;
padding: 15px;
overflow-x: auto;
margin: 20px 0;
}
code {
background: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}
pre code {
background: none;
padding: 0;
}
/* 链接样式 */
a {
color: var(--primary);
text-decoration: none;
border-bottom: 1px dotted var(--primary);
transition: all 0.3s;
}
a:hover {
color: var(--accent);
border-bottom-color: var(--accent);
}
/* 新闻列表样式 */
.news-item {
background: #f9f9f9;
border-left: 4px solid var(--secondary);
padding: 15px 20px;
margin: 15px 0;
border-radius: 6px;
transition: all 0.3s;
}
.news-item:hover {
background: #f0f0f0;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
.news-item h3 {
margin-top: 0;
color: var(--dark);
}
.news-item .news-meta {
color: #666;
font-size: 0.9em;
margin-top: 10px;
}
.news-item .news-category {
display: inline-block;
background: var(--secondary);
color: white;
padding: 3px 10px;
border-radius: 12px;
font-size: 0.85em;
margin-right: 10px;
}
/* 统计信息样式 */
.stats-box {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 25px;
border-radius: 10px;
margin: 25px 0;
}
.stats-box h2 {
color: white;
border: none;
padding: 0;
margin: 0 0 15px 0;
}
.stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin-top: 20px;
}
.stat-item {
text-align: center;
}
.stat-number {
font-size: 2.5em;
font-weight: bold;
margin-bottom: 5px;
}
.stat-label {
font-size: 0.9em;
opacity: 0.9;
}
/* 响应式设计 */
@media (max-width: 768px) {
.report-container {
padding: 20px;
}
.report-header h1 {
font-size: 1.8em;
}
h1 {
font-size: 1.6em;
}
h2 {
font-size: 1.3em;
}
table {
font-size: 0.9em;
}
table th,
table td {
padding: 8px;
}
}
/* 打印样式 */
@media print {
body {
background: white;
padding: 0;
}
.report-container {
box-shadow: none;
padding: 0;
}
}
</style>
</head>
<body>
<div class="report-container">
<h1>汽车后市场情报日报</h1>
<h2>报告时间</h2>
<p><strong>生成时间</strong>: 2025-10-29 17:35:46
<strong>时间范围</strong>: 2025-10-28 17:35:46 至 2025-10-29 17:35:46</p>
<h2>数据统计</h2>
<ul>
<li><strong>相关文章数</strong>: 0</li>
</ul>
<h2>相关新闻</h2>
<p>昨日无汽车后市场相关的新闻</p>
</div>
</body>
</html>
@@ -0,0 +1,311 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>汽车后市场情报报告</title>
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@300;400;500;700&display=swap" rel="stylesheet">
<style>
:root {
--primary: #3498db;
--secondary: #2ecc71;
--accent: #e74c3c;
--dark: #2c3e50;
--light: #f8f9fa;
--border: #e0e0e0;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Noto Sans SC', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
line-height: 1.8;
color: #333;
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
padding: 20px;
}
.report-container {
max-width: 1200px;
margin: 0 auto;
padding: 40px;
background: white;
box-shadow: 0 10px 40px rgba(0,0,0,0.1);
border-radius: 12px;
}
.report-header {
text-align: center;
padding-bottom: 30px;
border-bottom: 3px solid var(--primary);
margin-bottom: 40px;
}
.report-header h1 {
color: var(--dark);
font-size: 2.5em;
margin-bottom: 10px;
}
.report-header .report-date {
color: #666;
font-size: 1.1em;
}
h1 {
color: var(--dark);
font-size: 2em;
margin: 30px 0 20px 0;
padding-bottom: 10px;
border-bottom: 2px solid var(--primary);
}
h2 {
color: var(--dark);
font-size: 1.6em;
margin: 25px 0 15px 0;
padding-left: 10px;
border-left: 4px solid var(--primary);
}
h3 {
color: var(--dark);
font-size: 1.3em;
margin: 20px 0 10px 0;
}
h4 {
color: #555;
font-size: 1.1em;
margin: 15px 0 8px 0;
}
p {
margin: 12px 0;
text-align: justify;
}
ul, ol {
margin: 15px 0;
padding-left: 30px;
}
li {
margin: 8px 0;
}
/* 表格样式 */
table {
width: 100%;
border-collapse: collapse;
margin: 25px 0;
box-shadow: 0 2px 15px rgba(0,0,0,0.1);
border-radius: 8px;
overflow: hidden;
}
table thead {
background: linear-gradient(135deg, var(--primary) 0%, #2980b9 100%);
color: white;
}
table th {
padding: 15px;
text-align: left;
font-weight: 600;
}
table td {
padding: 12px 15px;
border-bottom: 1px solid var(--border);
}
table tbody tr:hover {
background-color: #f5f5f5;
}
table tbody tr:last-child td {
border-bottom: none;
}
/* 代码块样式 */
pre {
background: #f4f4f4;
border: 1px solid var(--border);
border-radius: 6px;
padding: 15px;
overflow-x: auto;
margin: 20px 0;
}
code {
background: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
font-size: 0.9em;
}
pre code {
background: none;
padding: 0;
}
/* 链接样式 */
a {
color: var(--primary);
text-decoration: none;
border-bottom: 1px dotted var(--primary);
transition: all 0.3s;
}
a:hover {
color: var(--accent);
border-bottom-color: var(--accent);
}
/* 新闻列表样式 */
.news-item {
background: #f9f9f9;
border-left: 4px solid var(--secondary);
padding: 15px 20px;
margin: 15px 0;
border-radius: 6px;
transition: all 0.3s;
}
.news-item:hover {
background: #f0f0f0;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
.news-item h3 {
margin-top: 0;
color: var(--dark);
}
.news-item .news-meta {
color: #666;
font-size: 0.9em;
margin-top: 10px;
}
.news-item .news-category {
display: inline-block;
background: var(--secondary);
color: white;
padding: 3px 10px;
border-radius: 12px;
font-size: 0.85em;
margin-right: 10px;
}
/* 统计信息样式 */
.stats-box {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 25px;
border-radius: 10px;
margin: 25px 0;
}
.stats-box h2 {
color: white;
border: none;
padding: 0;
margin: 0 0 15px 0;
}
.stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 20px;
margin-top: 20px;
}
.stat-item {
text-align: center;
}
.stat-number {
font-size: 2.5em;
font-weight: bold;
margin-bottom: 5px;
}
.stat-label {
font-size: 0.9em;
opacity: 0.9;
}
/* 响应式设计 */
@media (max-width: 768px) {
.report-container {
padding: 20px;
}
.report-header h1 {
font-size: 1.8em;
}
h1 {
font-size: 1.6em;
}
h2 {
font-size: 1.3em;
}
table {
font-size: 0.9em;
}
table th,
table td {
padding: 8px;
}
}
/* 打印样式 */
@media print {
body {
background: white;
padding: 0;
}
.report-container {
box-shadow: none;
padding: 0;
}
}
</style>
</head>
<body>
<div class="report-container">
<h1>汽车后市场情报周报</h1>
<h2>报告时间</h2>
<p><strong>生成时间</strong>: 2025-10-29 17:36:37
<strong>时间范围</strong>: 2025-10-22 17:36:37 至 2025-10-29 17:36:37</p>
<h2>数据统计</h2>
<ul>
<li><strong>相关文章数</strong>: 1</li>
</ul>
<h2>汽车后市场相关新闻</h2>
<p>共找到 1 篇相关新闻:</p>
<h3>1. 2025年全国汽车以旧换新补贴申请量突破1000万份</h3>
<ul class="news-list">
<li><strong>分类</strong>: 二手车</li>
<li><strong>标签</strong>: ["二手车", "政策补贴"]</li>
<li><strong>摘要</strong>: 记者从商务部了解到,截至10月22日,2025年汽车以旧换新补贴申请量突破1000万份,其中汽车报废更新超340万份,置换更新超660万份。</li>
<li><strong>链接</strong>: <a href="http://www.chinanews.com/cj/2025/10-23/10503300.shtml" rel="noopener noreferrer" target="_blank">http://www.chinanews.com/cj/2025/10-23/10503300.shtml</a></li>
<li><strong>发布时间</strong>: 2025-10-23 08:35:31</li>
<li><strong>相关度评分</strong>: 70</li>
<li><strong>分析说明</strong>: 新闻涉及汽车以旧换新补贴申请量,其中包含置换更新超660万份,直接关联二手车流通环节,属于汽车后市场中二手车领域的政策动态。</li>
</ul>
</div>
</body>
</html>
@@ -195,7 +195,7 @@ class RSSDataAIProcessor:
raise raise
def create_ai_result_table(self): def create_ai_result_table(self):
"""创建AI处理结果表""" """创建AI处理结果表(使用安全方法,确保不会删除现有数据)"""
create_sql = f""" create_sql = f"""
CREATE TABLE IF NOT EXISTS {self.ai_table} ( CREATE TABLE IF NOT EXISTS {self.ai_table} (
id INT AUTO_INCREMENT PRIMARY KEY COMMENT '主键ID', id INT AUTO_INCREMENT PRIMARY KEY COMMENT '主键ID',
@@ -221,10 +221,13 @@ class RSSDataAIProcessor:
""" """
try: try:
self.db_agent.execute_sql(create_sql) # 使用安全方法创建表(如果不存在),确保不会删除现有数据
self.log.info(f"成功创建AI结果表: {self.ai_table}") self.db_agent.create_table_if_not_exists(
table_name=self.ai_table,
create_sql=create_sql
)
except Exception as e: except Exception as e:
self.log.error(f"创建AI结果表失败: {str(e)}", exc_info=True) self.log.error(f"创建AI结果表失败(可能是数据库连接问题): {str(e)}", exc_info=True)
raise raise
def load_unprocessed_data(self, limit: int = 100) -> pd.DataFrame: def load_unprocessed_data(self, limit: int = 100) -> pd.DataFrame:
+52 -30
View File
@@ -270,14 +270,34 @@ class RSSDataProcessor:
save_df = save_df.drop('segmented_words', axis=1) save_df = save_df.drop('segmented_words', axis=1)
# 检查目标表是否存在,不存在则创建 # 检查目标表是否存在,不存在则创建
if not self.db_agent.table_exists(self.processed_table_name): # 注意:如果连接失败,table_exists可能返回False,需要捕获异常
self.create_processed_table() try:
table_exists = self.db_agent.table_exists(self.processed_table_name)
if not table_exists:
self.log.warning(f"{self.processed_table_name} 不存在,正在创建...")
self.create_processed_table()
else:
# 表存在时,也确保有唯一索引(安全操作,不会删除数据)
self.create_processed_table() # 这个方法会检查并添加索引,不会删除数据
except Exception as table_check_error:
# 如果检查表存在性时连接失败,记录错误但不中断
# 因为后续的插入操作会再次尝试连接
self.log.warning(f"检查表存在性时出错(可能是连接问题): {str(table_check_error)}")
# 尝试创建表(如果表已存在,CREATE TABLE IF NOT EXISTS不会报错)
try:
self.create_processed_table()
except Exception as create_error:
# 如果创建表也失败(可能是连接问题),记录错误
self.log.error(f"创建表时出错(可能是连接问题): {str(create_error)}")
# 继续尝试插入,如果表存在,插入会成功;如果表不存在,插入会失败并抛出异常
# 插入数据 # 插入数据ignore_duplicates=True 会跳过重复的文章链接)
# 注意:INSERT INTO + ignore_duplicates 只会跳过重复记录,不会覆盖或删除现有数据
# 如果数据库连接失败,此操作会抛出异常,不会部分成功
inserted_rows = self.db_agent.insert_from_df( inserted_rows = self.db_agent.insert_from_df(
table_name=self.processed_table_name, table_name=self.processed_table_name,
df=save_df, df=save_df,
ignore_duplicates=True ignore_duplicates=True # 跳过重复的文章链接,不会删除或覆盖现有数据
) )
self.log.info(f"成功保存 {inserted_rows} 条处理结果到数据库") self.log.info(f"成功保存 {inserted_rows} 条处理结果到数据库")
@@ -288,7 +308,10 @@ class RSSDataProcessor:
return False return False
def create_processed_table(self): def create_processed_table(self):
"""创建处理结果表""" """
创建处理结果表(带唯一索引保护,防止重复插入)
使用 MySQLAgent 的安全方法,确保不会删除现有数据
"""
create_sql = f""" create_sql = f"""
CREATE TABLE IF NOT EXISTS {self.processed_table_name} ( CREATE TABLE IF NOT EXISTS {self.processed_table_name} (
id INT AUTO_INCREMENT PRIMARY KEY, id INT AUTO_INCREMENT PRIMARY KEY,
@@ -306,10 +329,27 @@ class RSSDataProcessor:
""" """
try: try:
self.db_agent.execute_sql(create_sql) # 使用安全方法创建表(如果不存在)
self.log.info(f"成功创建处理结果表: {self.processed_table_name}") self.db_agent.create_table_if_not_exists(
table_name=self.processed_table_name,
create_sql=create_sql
)
# 使用安全方法添加唯一索引(如果不存在)
# 注意:唯一索引在创建表时不能直接包含,因为如果表已存在会报错
# 所以先创建表,再单独添加索引
self.db_agent.add_unique_index_if_not_exists(
table_name=self.processed_table_name,
index_name='uk_article_link',
column_name='文章链接',
column_length=500,
check_duplicates=True
)
except Exception as e: except Exception as e:
self.log.error(f"创建表失败: {str(e)}", exc_info=True) # 如果创建表或添加索引失败(可能是连接问题),抛出异常
# 这样上层调用可以知道操作失败,不会误以为成功
self.log.error(f"创建/检查表失败(可能是数据库连接问题): {str(e)}", exc_info=True)
raise raise
def get_processing_statistics(self, df: pd.DataFrame) -> Dict[str, Any]: def get_processing_statistics(self, df: pd.DataFrame) -> Dict[str, Any]:
@@ -383,27 +423,9 @@ class RSSDataProcessor:
return {'success': False, 'message': f'处理失败: {str(e)}'} return {'success': False, 'message': f'处理失败: {str(e)}'}
def main(): def main(self, limit: int = 1000, save_to_db: bool = True) -> Dict[str, Any]:
"""主函数入口""" """主函数入口(实例方法),对外统一调用"""
try: return self.process_rss_data(limit=limit, save_to_db=save_to_db)
# 创建处理器实例
processor = RSSDataProcessor()
# 处理RSS数据
result = processor.process_rss_data(
limit=5000, # 处理最近5000条数据
save_to_db=True # 保存到数据库
)
if result['success']:
print("RSS数据处理完成!")
print(f"处理统计: {result['statistics']}")
else:
print(f"处理失败: {result['message']}")
except Exception as e:
print(f"程序运行出错: {str(e)}")
if __name__ == "__main__": if __name__ == "__main__":
main() RSSDataProcessor().main(limit=5000, save_to_db=True)
@@ -1,6 +1,5 @@
import importlib import importlib
import threading import threading
import time
from datetime import datetime from datetime import datetime
from typing import Dict, List, Optional, Any from typing import Dict, List, Optional, Any
import croniter import croniter
+67
View File
@@ -0,0 +1,67 @@
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "## 获取钉钉token",
"id": "4a7d18176711daad"
},
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-10-30T02:59:09.458462Z",
"start_time": "2025-10-30T02:59:09.015765Z"
}
},
"source": [
"from utils.Ding_api import DingAPI\n",
"\n",
"api_instance = DingAPI()\n",
"token = api_instance.get_token()\n",
"print(token)"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\requests\\__init__.py:86: RequestsDependencyWarning: Unable to find acceptable character detection dependency (chardet or charset_normalizer).\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2b166a1c8e683ee38f8d2112a7de5e05\n"
]
}
],
"execution_count": 1
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+271 -155
View File
@@ -10,14 +10,25 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1,
"id": "initial_id", "id": "initial_id",
"metadata": { "metadata": {
"collapsed": true,
"ExecuteTime": { "ExecuteTime": {
"end_time": "2025-10-29T02:25:08.582541Z", "end_time": "2025-10-29T02:25:08.582541Z",
"start_time": "2025-10-29T02:25:08.473381Z" "start_time": "2025-10-29T02:25:08.473381Z"
} },
"collapsed": true
}, },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PROJECT_ROOT = d:\\Idea Project\\intelligence_system\n",
"\u001b[32m2025-10-30 13:57:07\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m任务调度器已初始化,最大工作线程数: 5\u001b[0m\n"
]
}
],
"source": [ "source": [
"# 使 Notebook 可从项目根导入\n", "# 使 Notebook 可从项目根导入\n",
"import sys\n", "import sys\n",
@@ -206,18 +217,7 @@
" except Exception:\n", " except Exception:\n",
" pass\n", " pass\n",
" return str(dt)" " return str(dt)"
], ]
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PROJECT_ROOT = D:\\Idea Project\\intelligence_system\n",
"\u001B[32m2025-10-29 10:25:08\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mtask_scheduler\u001B[0m - \u001B[1m任务调度器已初始化,最大工作线程数: 5\u001B[0m\n"
]
}
],
"execution_count": 8
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@@ -242,7 +242,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\u001B[32m2025-10-29 09:54:09\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n" "\u001b[32m2025-10-29 09:54:09\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n"
] ]
}, },
{ {
@@ -469,6 +469,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10,
"id": "eab90de72c35429e", "id": "eab90de72c35429e",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -476,6 +477,62 @@
"start_time": "2025-10-29T02:26:12.648420Z" "start_time": "2025-10-29T02:26:12.648420Z"
} }
}, },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2025-10-29 10:26:12\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n"
]
},
{
"data": {
"text/markdown": [
"### 任务详情\n",
"**任务ID**: 1\n",
"**任务名称**: RSS新闻订阅\n",
"**任务类型**: collector\n",
"**模块路径**: processors.processor_rss_data.RSSDataProcessor\n",
"**Cron表达式**: */5 * * * *\n",
"**时区**: Asia/Shanghai\n",
"**最后运行时间**: 2025-10-28 13:35:09\n",
"**下次运行时间**: 2025-10-29 10:25:00\n",
"**运行状态**: success\n",
"**是否活跃**: 是\n",
"**运行次数**: 496\n",
"**创建时间**: 2025-10-16 15:47:34"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'task_id': 1,\n",
" 'task_name': 'RSS新闻订阅',\n",
" 'task_type': 'collector',\n",
" 'module_path': 'processors.processor_rss_data.RSSDataProcessor',\n",
" 'cron_expression': '*/5 * * * *',\n",
" 'time_zone': 'Asia/Shanghai',\n",
" 'next_run_time': Timestamp('2025-10-29 10:25:00'),\n",
" 'last_run_time': Timestamp('2025-10-28 13:35:09'),\n",
" 'last_run_status': 'success',\n",
" 'run_count': 496,\n",
" 'is_active': 1,\n",
" 'is_running': 0,\n",
" 'created_at': Timestamp('2025-10-16 15:47:34'),\n",
" 'updated_at': Timestamp('2025-10-29 10:24:49')}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# 查看指定任务的详情\n", "# 查看指定任务的详情\n",
"def show_task_details(task_id):\n", "def show_task_details(task_id):\n",
@@ -503,53 +560,7 @@
"\n", "\n",
"# 执行:查看任务ID为1的详情(替换为实际ID)\n", "# 执行:查看任务ID为1的详情(替换为实际ID)\n",
"show_task_details(1)" "show_task_details(1)"
], ]
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001B[32m2025-10-29 10:26:12\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n"
]
},
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "### 任务详情\n**任务ID**: 1\n**任务名称**: RSS新闻订阅\n**任务类型**: collector\n**模块路径**: processors.processor_rss_data.RSSDataProcessor\n**Cron表达式**: */5 * * * *\n**时区**: Asia/Shanghai\n**最后运行时间**: 2025-10-28 13:35:09\n**下次运行时间**: 2025-10-29 10:25:00\n**运行状态**: success\n**是否活跃**: 是\n**运行次数**: 496\n**创建时间**: 2025-10-16 15:47:34"
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"data": {
"text/plain": [
"{'task_id': 1,\n",
" 'task_name': 'RSS新闻订阅',\n",
" 'task_type': 'collector',\n",
" 'module_path': 'processors.processor_rss_data.RSSDataProcessor',\n",
" 'cron_expression': '*/5 * * * *',\n",
" 'time_zone': 'Asia/Shanghai',\n",
" 'next_run_time': Timestamp('2025-10-29 10:25:00'),\n",
" 'last_run_time': Timestamp('2025-10-28 13:35:09'),\n",
" 'last_run_status': 'success',\n",
" 'run_count': 496,\n",
" 'is_active': 1,\n",
" 'is_running': 0,\n",
" 'created_at': Timestamp('2025-10-16 15:47:34'),\n",
" 'updated_at': Timestamp('2025-10-29 10:24:49')}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 10
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@@ -569,8 +580,8 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\u001B[32m2025-10-29 09:56:52\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n", "\u001b[32m2025-10-29 09:56:52\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n",
"\u001B[32m2025-10-29 09:56:52\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mtask_scheduler\u001B[0m - \u001B[1m新任务添加成功\u001B[0m\n" "\u001b[32m2025-10-29 09:56:52\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m新任务添加成功\u001b[0m\n"
] ]
}, },
{ {
@@ -646,6 +657,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21,
"id": "c892fd8ad2f0dd9d", "id": "c892fd8ad2f0dd9d",
"metadata": { "metadata": {
"ExecuteTime": { "ExecuteTime": {
@@ -653,6 +665,61 @@
"start_time": "2025-10-29T02:29:55.754298Z" "start_time": "2025-10-29T02:29:55.754298Z"
} }
}, },
"outputs": [
{
"data": {
"text/markdown": [
"### 任务ID 2 更新成功"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2025-10-29 10:29:56\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n"
]
},
{
"data": {
"text/markdown": [
"### 任务详情\n",
"**任务ID**: 2\n",
"**任务名称**: RSS基于规则数据处理\n",
"**任务类型**: processor\n",
"**模块路径**: processors.processor_rss_data\n",
"**Cron表达式**: 0 8,20 * * *\n",
"**时区**: Asia/Shanghai\n",
"**最后运行时间**: 2025-10-28 13:34:49\n",
"**下次运行时间**: 2025-10-28 20:00:00\n",
"**运行状态**: success\n",
"**是否活跃**: 是\n",
"**运行次数**: 10\n",
"**创建时间**: 2025-10-22 16:06:42"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# 更新任务属性\n", "# 更新任务属性\n",
"def update_task(task_id, **kwargs):\n", "def update_task(task_id, **kwargs):\n",
@@ -685,53 +752,7 @@
"\n", "\n",
"# 执行:同时更新多个属性(名称和Cron表达式)\n", "# 执行:同时更新多个属性(名称和Cron表达式)\n",
"# update_task(1, name=\"每日早间新闻采集\", cron=\"0 8 * * *\")" "# update_task(1, name=\"每日早间新闻采集\", cron=\"0 8 * * *\")"
], ]
"outputs": [
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "### 任务ID 2 更新成功"
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001B[32m2025-10-29 10:29:56\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n"
]
},
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "### 任务详情\n**任务ID**: 2\n**任务名称**: RSS基于规则数据处理\n**任务类型**: processor\n**模块路径**: processors.processor_rss_data\n**Cron表达式**: 0 8,20 * * *\n**时区**: Asia/Shanghai\n**最后运行时间**: 2025-10-28 13:34:49\n**下次运行时间**: 2025-10-28 20:00:00\n**运行状态**: success\n**是否活跃**: 是\n**运行次数**: 10\n**创建时间**: 2025-10-22 16:06:42"
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 21
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@@ -800,15 +821,149 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3,
"id": "94892f4134316f8e", "id": "94892f4134316f8e",
"metadata": { "metadata": {
"jupyter": {
"is_executing": true
},
"ExecuteTime": { "ExecuteTime": {
"start_time": "2025-10-29T02:30:10.298891Z" "start_time": "2025-10-29T02:30:10.298891Z"
},
"jupyter": {
"is_executing": true
} }
}, },
"outputs": [
{
"data": {
"text/markdown": [
"### 开始执行任务ID 2"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"---"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2025-10-30 13:57:49\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:49\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1mRSS数据处理器初始化完成\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:49\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m开始处理RSS数据...\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载 6 条未处理的RSS数据\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载停用词表,共 98 个词\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载汽车后市场关键词,共 37 个\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Building prefix dict from the default dictionary ...\n",
"Loading model from cache C:\\Users\\zy187\\AppData\\Local\\Temp\\jieba.cache\n",
"Loading model cost 0.839 seconds.\n",
"Prefix dict has been built successfully.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m数据处理完成,共处理 6 条记录\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m过滤出 0 条汽车后市场相关新闻\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:51\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功标记 6 条数据为已处理\u001b[0m\n",
"\u001b[32m2025-10-30 13:57:51\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1mRSS数据处理完成\u001b[0m\n"
]
},
{
"data": {
"text/markdown": [
"**任务名称**: RSS基于规则数据处理"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"**任务ID**: 2"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"**执行时长**: 4.41 秒"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"---"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/markdown": [
"### ✅ 任务执行成功"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'success': True,\n",
" 'task_name': 'RSS基于规则数据处理',\n",
" 'task_id': 2,\n",
" 'execution_time': 4.414557695388794,\n",
" 'output': '',\n",
" 'error': None}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"# 手动执行任务(异步方式,快速返回)\n", "# 手动执行任务(异步方式,快速返回)\n",
"def run_task_manually(task_id):\n", "def run_task_manually(task_id):\n",
@@ -854,47 +1009,8 @@
" return result\n", " return result\n",
"\n", "\n",
"# 执行:手动运行任务ID为2的任务(显示详细执行过程)\n", "# 执行:手动运行任务ID为2的任务(显示详细执行过程)\n",
"run_task_with_details(3)" "run_task_with_details(2)"
], ]
"outputs": [
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "### 开始执行任务ID 3"
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"data": {
"text/plain": [
"<IPython.core.display.Markdown object>"
],
"text/markdown": "---"
},
"metadata": {},
"output_type": "display_data",
"jetTransient": {
"display_id": null
}
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001B[32m2025-10-29 10:30:10\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n",
"\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1mRSS数据AI处理器初始化完成\u001B[0m\n",
"\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1m开始批量处理数据,批次大小: 200, 延迟: 1.5秒\u001B[0m\n",
"\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1m成功加载 3 条未处理的数据\u001B[0m\n"
]
}
],
"execution_count": null
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
+52
View File
@@ -0,0 +1,52 @@
import requests
from typing import Optional
class DingAPI():
def __init__(self):
self.token = None
self.url = ''
def get_token(self) -> Optional:
"""
获取Access Token
return: token(str)
"""
url = 'https://api.dingtalk.com/v1.0/oauth2/dinga88e3d35525b86ca/token'
payload = {
"client_id": "dingn3de1pyuwkymohhe",
"client_secret": "qv__egWJnLVXh14_R1rfD_vBi7M8Gzhnk94EJN6puMzsqqpBCP8U7Ow-zA7SV8Rx",
"grant_type": "client_credentials"
}
response = requests.post(url, json=payload)
token = response.json().get('access_token')
return token
def card_create(self, data):
"""
创建并投放卡片
return: response(dict)
"""
url = 'https://api.dingtalk.com/v1.0/card/instances/createAndDeliver'
headers = {
'x-acs-dingtalk-access-token': data["token"],
'Content-Type': 'application/json'
}
data = {
"cardTemplateId": "cee2715f-001d-41cb-8fcd-3be18be9fbf5.schema",
"outTrackId": "",
"cardData":"",
"openSpaceId":"dtv1.card//IM_GROUP.4210192048793363",# 场域id
}
response = requests.post(url, json=data, headers=headers)
return response.json()
def get_
+114
View File
@@ -153,6 +153,12 @@ class MySQLAgent:
""" """
兼容旧接口的通用插入方法:保留replace参数,同时支持新的ignore_duplicates 兼容旧接口的通用插入方法:保留replace参数,同时支持新的ignore_duplicates
自动处理重复数据,对所有数据源通用,插入失败的数据会通过日志记录 自动处理重复数据,对所有数据源通用,插入失败的数据会通过日志记录
安全性说明:
- 使用 INSERT INTO(不是 REPLACE INTO 或 INSERT ... ON DUPLICATE KEY UPDATE
- 当 ignore_duplicates=True 时,重复记录会被跳过,不会覆盖或删除现有数据
- 如果数据库连接失败,操作会抛出异常,不会部分成功
- 所有操作都是安全的,不会导致数据丢失或覆盖
""" """
# 【兼容性处理】如果未指定ignore_duplicates,用replace参数推导 # 【兼容性处理】如果未指定ignore_duplicates,用replace参数推导
if ignore_duplicates is None: if ignore_duplicates is None:
@@ -592,6 +598,114 @@ class MySQLAgent:
exc_info=True) exc_info=True)
return False return False
def create_table_if_not_exists(self, table_name: str, create_sql: str) -> bool:
"""
创建表(如果不存在)
使用 CREATE TABLE IF NOT EXISTS,不会删除已存在的表和数据
参数:
table_name: 表名
create_sql: 完整的 CREATE TABLE SQL 语句(必须包含 IF NOT EXISTS
返回:
bool: 是否成功(表已存在也会返回True)
注意:
- 此方法使用 CREATE TABLE IF NOT EXISTS,是安全的,不会删除现有数据
- 如果连接失败,会抛出异常
"""
if "IF NOT EXISTS" not in create_sql.upper():
self.log.warning(f"CREATE TABLE 语句建议使用 IF NOT EXISTS 以保证安全性")
try:
self.execute_sql(create_sql)
self.log.info(f"成功创建/检查表(表已存在时不会删除数据): {table_name}")
return True
except Exception as e:
self.log.error(f"创建/检查表失败(可能是数据库连接问题): {str(e)}",
table=table_name, exc_info=True)
raise
def add_unique_index_if_not_exists(self, table_name: str, index_name: str,
column_name: str, column_length: int = 500,
check_duplicates: bool = True) -> bool:
"""
添加唯一索引(如果不存在)
不会删除数据,只添加索引
参数:
table_name: 表名
index_name: 索引名称
column_name: 要添加索引的列名
column_length: 索引长度(对于VARCHAR/TEXT类型)
check_duplicates: 是否在添加索引前检查重复数据
返回:
bool: 是否成功添加索引(索引已存在也会返回True)
注意:
- 此方法是安全的,不会删除数据
- 如果表中存在重复数据,会跳过添加索引(不会删除数据)
- 如果连接失败,会抛出异常
"""
try:
# 1. 检查索引是否已存在
check_index_sql = f"""
SELECT COUNT(*) as cnt
FROM INFORMATION_SCHEMA.STATISTICS
WHERE TABLE_SCHEMA = %s
AND TABLE_NAME = %s
AND INDEX_NAME = %s
"""
result = self.query_to_df(
check_index_sql,
params=(self.config['database'], table_name, index_name),
is_print=False
)
if not result.empty and result['cnt'].iloc[0] > 0:
self.log.debug(f"唯一索引 {index_name} 已存在,跳过添加")
return True
# 2. 如果启用重复检查,先检查是否有重复数据
if check_duplicates:
check_duplicates_sql = f"""
SELECT {column_name}, COUNT(*) as cnt
FROM `{table_name}`
WHERE {column_name} IS NOT NULL AND {column_name} != ''
GROUP BY {column_name}
HAVING cnt > 1
LIMIT 1
"""
duplicates = self.query_to_df(check_duplicates_sql, is_print=False)
if not duplicates.empty:
self.log.warning(
f"{table_name} 中存在重复的 {column_name} 数据,无法添加唯一索引。"
"现有数据不会被删除。",
duplicate_count=len(duplicates)
)
return False
# 3. 添加唯一索引
add_index_sql = f"""
ALTER TABLE `{table_name}`
ADD UNIQUE KEY `{index_name}` ({column_name}({column_length}))
"""
self.execute_sql(add_index_sql)
self.log.info(f"成功添加唯一索引 {index_name}(现有数据不受影响)")
return True
except Exception as e:
error_msg = str(e)
# 如果索引已存在,不报错
if "Duplicate key name" in error_msg or "already exists" in error_msg.lower():
self.log.debug(f"唯一索引 {index_name} 已存在,跳过添加")
return True
else:
self.log.warning(f"添加唯一索引时出现问题(不影响现有数据): {error_msg}")
raise
def execute_sql(self, sql: str, params: Union[tuple, dict, None] = None, def execute_sql(self, sql: str, params: Union[tuple, dict, None] = None,
fetch: bool = False) -> Union[int, List[Dict[str, Any]]]: fetch: bool = False) -> Union[int, List[Dict[str, Any]]]:
"""执行SQL语句(原有逻辑完全保留)""" """执行SQL语句(原有逻辑完全保留)"""