diff --git a/applications/reporter/README.md b/applications/reporter/README.md deleted file mode 100644 index 962bcdf..0000000 --- a/applications/reporter/README.md +++ /dev/null @@ -1,254 +0,0 @@ -# 报告生成器使用说明 - -## 功能概述 - -本模块提供了日报和周报生成功能,主要特点: - -1. **AI智能筛选**:从AI分析结果表获取已筛选的相关内容(是否相关=1) -2. **多格式输出**:同时生成HTML和Markdown格式的报告 -3. **钉钉推送**:支持自动推送到钉钉群 -4. **可扩展数据源**:支持添加多个数据源(RSS、投诉、API等) -5. **灵活模板系统**:支持内置HTML模板和外部HTML模板 - -## 快速开始 - -### 生成日报(24小时内数据) - -```python -from applications.reporter.daily import DailyReporter - -reporter = DailyReporter() -result = reporter.generate() -print(f"日报已生成:") -print(f" HTML: {result.get('html_path')}") -print(f" Markdown: {result.get('markdown_path')}") -``` - -### 生成周报(7天内数据) - -```python -from applications.reporter.weekly import WeeklyReporter - -reporter = WeeklyReporter() -result = reporter.generate() -print(f"周报已生成:") -print(f" HTML: {result.get('html_path')}") -print(f" Markdown: {result.get('markdown_path')}") -``` - -## 钉钉推送配置 - -### 1. 获取钉钉Webhook地址 - -1. 在钉钉群中,点击"群设置" -> "智能群助手" -> "添加机器人" -2. 选择"自定义"机器人 -3. 设置机器人名称和头像 -4. 复制Webhook地址(格式:`https://oapi.dingtalk.com/robot/send?access_token=xxx`) - -### 2. 配置Webhook地址 - -**方式1:通过环境变量(推荐)** - -```bash -export DINGTALK_WEBHOOK="https://oapi.dingtalk.com/robot/send?access_token=xxx" -``` - -**方式2:在config.py中配置** - -```python -DINGTALK_WEBHOOK = "https://oapi.dingtalk.com/robot/send?access_token=xxx" -``` - -**方式3:在代码中指定** - -```python -from applications.reporter.daily import DailyReporter - -reporter = DailyReporter(dingtalk_webhook="https://oapi.dingtalk.com/robot/send?access_token=xxx") -result = reporter.generate() -``` - -### 3. 控制推送行为 - -```python -# 生成报告但不推送到钉钉 -reporter = DailyReporter() -result = reporter.generate(send_dingtalk=False) - -# 不保存Markdown文件 -result = reporter.generate(save_markdown=False) - -# 同时控制 -result = reporter.generate(save_markdown=True, send_dingtalk=True) -``` - -### 4. 钉钉消息格式 - -- 自动使用Markdown格式发送 -- 如果内容过长(超过5000字符),会自动截断并显示摘要 -- 包含报告文件路径提示 - -## 添加自定义数据源 - -### 1. 创建数据源类 - -数据源类需要继承 `DataSource` 基类并实现以下方法: - -```python -from applications.reporter.base_reporter import DataSource -from typing import List, Dict, Any -from datetime import datetime - -class MyCustomDataSource(DataSource): - def fetch_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]: - """获取指定时间范围内的数据""" - # 返回格式: - return [ - { - 'title': '标题', - 'link': '链接', - 'summary': '摘要', - 'publish_time': '发布时间', - 'source_url': '来源URL' - } - ] - - def get_source_name(self) -> str: - return "数据源名称" -``` - -### 2. 添加到报告生成器 - -```python -from applications.reporter.daily import DailyReporter -from my_module import MyCustomDataSource - -reporter = DailyReporter() -custom_source = MyCustomDataSource(...) -reporter.add_data_source(custom_source) - -# 生成报告(会自动包含新数据源的数据) -report_path = reporter.generate() -``` - -## 使用外部HTML模板 - -### 1. 创建HTML模板文件 - -创建外部HTML模板文件(如 `custom_template.html`): - -```html - - - - - 自定义报告模板 - - - - - --> - {{content}} - - -``` - -### 2. 使用外部模板生成报告 - -```python -from applications.reporter.daily import DailyReporter - -reporter = DailyReporter() -report_path = reporter.generate(template_path="path/to/custom_template.html") -``` - -## 配置说明 - -### AI配置 - -在 `config.py` 中配置百度AI API: - -```python -BAIDU_AI_CONFIG = { - 'api_key': 'your_api_key', - 'model': 'ernie-x1-turbo-32k', -} -``` - -### 数据库配置 - -确保 `config.py` 中的数据库配置正确: - -```python -MYSQL_CONFIG = { - 'host': 'your_host', - 'port': 3306, - 'user': 'your_user', - 'password': 'your_password', - 'database': 'intelligence_system', -} -``` - -## 输出目录 - -- 日报:`output/reports/daily/` -- 周报:`output/reports/weekly/` - -报告文件名格式: -- HTML:`daily_report_YYYYMMDD_HHMMSS.html` / `weekly_report_YYYYMMDD_HHMMSS.html` -- Markdown:`daily_report_YYYYMMDD_HHMMSS.md` / `weekly_report_YYYYMMDD_HHMMSS.md` - -## 报告内容 - -生成的报告包含: - -1. **报告时间信息**:生成时间和时间范围 -2. **数据统计**:相关文章数 -3. **相关新闻列表**(从AI分析结果表筛选,是否相关=1): - - 标题 - - 分类 - - 标签 - - 摘要 - - 链接 - - 发布时间 - - 相关度评分 - - 分析说明 - -如果没有相关数据,会显示: -- 日报:`昨日无汽车后市场相关的新闻` -- 周报:`上周无汽车后市场相关的新闻` - -## AI筛选说明 - -AI会根据以下定义筛选汽车后市场相关内容: - -- 汽车维修保养 -- 汽车配件 -- 汽车改装 -- 汽车美容 -- 汽车用品 -- 汽车金融 -- 汽车保险 -- 二手车交易 -- 汽车租赁 -- 汽车检测 -- 汽车报废回收 -- 汽车相关法律法规和政策 - -## 扩展示例 - -参考 `data_source_example.py` 查看如何: -- 添加数据库数据源 -- 添加外部API数据源 -- 实现自定义数据源 - -## 注意事项 - -1. 确保数据库连接正常 -2. 确保AI API配置正确且有足够配额 -3. 外部模板文件需要包含内容占位符 -4. 数据源返回的数据格式需要符合规范 - diff --git a/applications/reporter/data_source_example.py b/applications/reporter/data_source_example.py deleted file mode 100644 index 1b093ce..0000000 --- a/applications/reporter/data_source_example.py +++ /dev/null @@ -1,135 +0,0 @@ -""" -数据源扩展示例 -演示如何添加新的数据源 -""" -from applications.reporter.base_reporter import DataSource -from typing import List, Dict, Any -from datetime import datetime -from loguru import logger - - -class ComplaintDataSource(DataSource): - """投诉数据源示例(可根据实际情况实现)""" - - def __init__(self, db_agent, table_name: str = "complaint_data"): - """ - Args: - db_agent: MySQLAgent实例 - table_name: 数据表名 - """ - self.db_agent = db_agent - self.table_name = table_name - self.logger = logger.bind(module="ComplaintDataSource") - - def fetch_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]: - """从投诉数据表获取数据""" - try: - sql = f""" - SELECT - `标题` as title, - `链接` as link, - `内容` as summary, - `发布时间` as publish_time, - `来源` as source_url - FROM `{self.table_name}` - WHERE `发布时间` >= %s AND `发布时间` < %s - ORDER BY `发布时间` DESC - """ - - params = ( - start_time.strftime('%Y-%m-%d %H:%M:%S'), - end_time.strftime('%Y-%m-%d %H:%M:%S') - ) - - df = self.db_agent.query_to_df(sql, params=params, is_print=False) - - if df.empty: - self.logger.info(f"时间范围 {start_time} 到 {end_time} 内没有投诉数据") - return [] - - data_list = df.to_dict('records') - self.logger.info(f"获取到 {len(data_list)} 条投诉数据") - return data_list - - except Exception as e: - self.logger.error(f"获取投诉数据失败: {str(e)}", exc_info=True) - return [] - - def get_source_name(self) -> str: - return "投诉数据" - - -class CustomAPIDataSource(DataSource): - """外部API数据源示例""" - - def __init__(self, api_url: str, api_key: str = None): - """ - Args: - api_url: API地址 - api_key: API密钥(如果需要) - """ - self.api_url = api_url - self.api_key = api_key - self.logger = logger.bind(module="CustomAPIDataSource") - - def fetch_data(self, start_time: datetime, end_time: datetime) -> List[Dict[str, Any]]: - """从外部API获取数据""" - import requests - - try: - headers = {} - if self.api_key: - headers['Authorization'] = f'Bearer {self.api_key}' - - params = { - 'start_time': start_time.isoformat(), - 'end_time': end_time.isoformat() - } - - response = requests.get(self.api_url, headers=headers, params=params, timeout=30) - response.raise_for_status() - - data = response.json() - - # 将API返回的数据转换为标准格式 - articles = [] - for item in data.get('articles', []): - articles.append({ - 'title': item.get('title', ''), - 'link': item.get('url', ''), - 'summary': item.get('description', ''), - 'publish_time': item.get('published_at', ''), - 'source_url': self.api_url - }) - - self.logger.info(f"从API获取到 {len(articles)} 条数据") - return articles - - except Exception as e: - self.logger.error(f"从API获取数据失败: {str(e)}", exc_info=True) - return [] - - def get_source_name(self) -> str: - return "外部API" - - -# 使用示例: -""" -from applications.reporter.daily import DailyReporter -from applications.reporter.data_source_example import ComplaintDataSource -from utils.mysql_agent import MySQLAgent -from config import Config - -# 创建日报生成器 -reporter = DailyReporter() - -# 添加投诉数据源 -db_agent = MySQLAgent(Config.MYSQL_CONFIG) -complaint_source = ComplaintDataSource(db_agent, table_name="complaint_data") -reporter.add_data_source(complaint_source) - -# 生成报告 -report_path = reporter.generate() -print(f"报告已生成: {report_path}") -""" - diff --git a/applications/reporter/dingtalk_example.py b/applications/reporter/dingtalk_example.py deleted file mode 100644 index 57f3fd5..0000000 --- a/applications/reporter/dingtalk_example.py +++ /dev/null @@ -1,149 +0,0 @@ -""" -钉钉推送使用示例 -演示如何配置和使用钉钉推送功能 -""" -import os -import sys - -# 添加父目录到路径 -current_dir = os.path.dirname(os.path.abspath(__file__)) -parent_dir = os.path.dirname(os.path.dirname(current_dir)) -if parent_dir not in sys.path: - sys.path.insert(0, parent_dir) - -from applications.reporter.daily import DailyReporter -from applications.reporter.weekly import WeeklyReporter -from applications.reporter.dingtalk_webhook import DingTalkWebhook - - -def example_with_config(): - """示例1:通过config.py配置""" - print("=" * 50) - print("示例1:使用config.py中的配置") - print("=" * 50) - - # 需要在config.py中设置 DINGTALK_WEBHOOK - reporter = DailyReporter() - result = reporter.generate() - print(f"✅ 报告已生成并推送\n") - - -def example_with_env_var(): - """示例2:通过环境变量配置""" - print("=" * 50) - print("示例2:使用环境变量配置") - print("=" * 50) - - # 设置环境变量 - webhook_url = os.getenv('DINGTALK_WEBHOOK', '') - if webhook_url: - reporter = DailyReporter(dingtalk_webhook=webhook_url) - result = reporter.generate() - print(f"✅ 报告已生成并推送\n") - else: - print("⚠️ 未设置环境变量 DINGTALK_WEBHOOK\n") - - -def example_with_direct_url(): - """示例3:直接指定Webhook地址""" - print("=" * 50) - print("示例3:直接指定Webhook地址") - print("=" * 50) - - # 直接指定webhook地址(请替换为实际的webhook地址) - webhook_url = "https://oapi.dingtalk.com/robot/send?access_token=YOUR_ACCESS_TOKEN" - - if webhook_url != "https://oapi.dingtalk.com/robot/send?access_token=YOUR_ACCESS_TOKEN": - reporter = DailyReporter(dingtalk_webhook=webhook_url) - result = reporter.generate() - print(f"✅ 报告已生成并推送\n") - else: - print("⚠️ 请先设置实际的webhook地址\n") - - -def example_without_push(): - """示例4:生成报告但不推送""" - print("=" * 50) - print("示例4:生成报告但不推送到钉钉") - print("=" * 50) - - reporter = DailyReporter() - result = reporter.generate(send_dingtalk=False) - print(f"✅ 报告已生成(未推送)\n") - - -def example_weekly_report(): - """示例5:生成周报并推送""" - print("=" * 50) - print("示例5:生成周报并推送") - print("=" * 50) - - reporter = WeeklyReporter() - result = reporter.generate() - print(f"✅ 周报已生成并推送\n") - - -def example_test_webhook(): - """示例6:测试钉钉Webhook连接""" - print("=" * 50) - print("示例6:测试钉钉Webhook连接") - print("=" * 50) - - webhook_url = input("请输入钉钉Webhook地址(直接回车跳过): ").strip() - if not webhook_url: - print("⚠️ 未输入Webhook地址,跳过测试\n") - return - - client = DingTalkWebhook(webhook_url) - - # 发送测试消息 - success = client.send_text("这是一条测试消息", at_all=False) - - if success: - print("✅ 测试消息发送成功,Webhook配置正确\n") - else: - print("❌ 测试消息发送失败,请检查Webhook地址是否正确\n") - - -def main(): - """主函数""" - print("\n" + "=" * 50) - print("钉钉推送功能使用示例") - print("=" * 50 + "\n") - - print("请选择要运行的示例:") - print("1. 通过config.py配置(需要先在config.py中设置DINGTALK_WEBHOOK)") - print("2. 通过环境变量配置") - print("3. 直接指定Webhook地址") - print("4. 生成报告但不推送") - print("5. 生成周报并推送") - print("6. 测试钉钉Webhook连接") - print("0. 退出") - - choice = input("\n请输入选项(0-6): ").strip() - - if choice == "1": - example_with_config() - elif choice == "2": - example_with_env_var() - elif choice == "3": - example_with_direct_url() - elif choice == "4": - example_without_push() - elif choice == "5": - example_weekly_report() - elif choice == "6": - example_test_webhook() - elif choice == "0": - print("退出") - else: - print("无效选项") - - print("=" * 50) - print("示例运行完成!") - print("=" * 50 + "\n") - - -if __name__ == "__main__": - main() - diff --git a/applications/reporter/dingtalk_webhook.py b/applications/reporter/dingtalk_webhook.py deleted file mode 100644 index 80fb9db..0000000 --- a/applications/reporter/dingtalk_webhook.py +++ /dev/null @@ -1,236 +0,0 @@ -""" -钉钉Webhook推送工具 -支持推送Markdown格式消息到钉钉群 -""" -import requests -import json -from typing import Optional, Dict, Any -from loguru import logger - - -class DingTalkWebhook: - """钉钉Webhook推送工具""" - - def __init__(self, webhook_url: str): - """ - 初始化钉钉Webhook - - Args: - webhook_url: 钉钉机器人Webhook地址 - """ - self.webhook_url = webhook_url - self.logger = logger.bind(module="DingTalkWebhook") - - def send_text(self, content: str, at_mobiles: list = None, at_all: bool = False) -> bool: - """ - 发送文本消息 - - Args: - content: 消息内容 - at_mobiles: 要@的手机号列表 - at_all: 是否@所有人 - - Returns: - 是否发送成功 - """ - data = { - "msgtype": "text", - "text": { - "content": content - } - } - - if at_mobiles or at_all: - data["at"] = {} - if at_mobiles: - data["at"]["atMobiles"] = at_mobiles - if at_all: - data["at"]["isAtAll"] = True - - return self._send(data) - - def send_markdown(self, title: str, text: str, at_mobiles: list = None, at_all: bool = False) -> bool: - """ - 发送Markdown消息 - - Args: - title: 消息标题 - text: Markdown内容(钉钉支持的格式) - at_mobiles: 要@的手机号列表 - at_all: 是否@所有人 - - Returns: - 是否发送成功 - """ - # 钉钉markdown消息有长度限制,需要截断 - max_length = 5000 - if len(text) > max_length: - text = text[:max_length - 100] + "\n\n...(内容已截断,完整内容请查看附件)" - self.logger.warning(f"Markdown内容过长,已截断至{max_length}字符") - - data = { - "msgtype": "markdown", - "markdown": { - "title": title, - "text": text - } - } - - if at_mobiles or at_all: - data["at"] = {} - if at_mobiles: - data["at"]["atMobiles"] = at_mobiles - if at_all: - data["at"]["isAtAll"] = True - - return self._send(data) - - def send_markdown_from_file(self, title: str, markdown_file: str, - max_length: int = 5000, at_mobiles: list = None, - at_all: bool = False) -> bool: - """ - 从Markdown文件发送消息 - - Args: - title: 消息标题 - markdown_file: Markdown文件路径 - max_length: 最大长度限制(默认5000字符) - at_mobiles: 要@的手机号列表 - at_all: 是否@所有人 - - Returns: - 是否发送成功 - """ - try: - with open(markdown_file, 'r', encoding='utf-8') as f: - content = f.read() - - # 转换为钉钉markdown格式(简化一些不支持的语法) - text = self._convert_to_dingtalk_markdown(content, max_length) - - return self.send_markdown(title, text, at_mobiles, at_all) - - except Exception as e: - self.logger.error(f"读取Markdown文件失败: {str(e)}", exc_info=True) - return False - - def _convert_to_dingtalk_markdown(self, content: str, max_length: int = 5000) -> str: - """ - 将标准Markdown转换为钉钉支持的格式 - - 钉钉Markdown支持的语法: - - 标题:# ## ### - - 加粗:**text** - - 链接:[text](url) - - 列表:- 或 1. - - 引用:> - - 代码:`code` - - 换行:两个换行符 - - 不支持: - - 表格(需要转换为文本) - - HTML标签 - - 复杂嵌套 - """ - # 如果内容太长,截断并添加提示 - if len(content) > max_length: - content = content[:max_length - 200] + "\n\n---\n\n**提示**: 内容已截断,完整内容请查看报告文件。" - - # 钉钉markdown基本兼容标准markdown,但需要清理一些不支持的语法 - # 保留基本格式即可 - text = content - - return text - - def _send(self, data: Dict[str, Any]) -> bool: - """ - 发送消息到钉钉 - - Args: - data: 消息数据 - - Returns: - 是否发送成功 - """ - try: - headers = { - 'Content-Type': 'application/json' - } - - response = requests.post( - self.webhook_url, - headers=headers, - data=json.dumps(data), - timeout=10 - ) - - response.raise_for_status() - result = response.json() - - if result.get('errcode') == 0: - self.logger.info("消息发送成功") - return True - else: - self.logger.error(f"消息发送失败: {result.get('errmsg', '未知错误')}") - return False - - except requests.exceptions.RequestException as e: - self.logger.error(f"发送消息请求失败: {str(e)}", exc_info=True) - return False - except Exception as e: - self.logger.error(f"发送消息失败: {str(e)}", exc_info=True) - return False - - def send_report(self, title: str, markdown_content: str, markdown_file: str = None) -> bool: - """ - 发送报告消息(优化版本,自动处理长内容) - - Args: - title: 消息标题 - markdown_content: Markdown内容 - markdown_file: Markdown文件路径(可选,用于提示) - - Returns: - 是否发送成功 - """ - # 钉钉markdown有长度限制,需要截断或分段 - max_length = 4500 # 留一些余量 - - if len(markdown_content) <= max_length: - # 内容不长,直接发送 - text = markdown_content - if markdown_file: - text += f"\n\n---\n\n**完整报告**: 已保存到 `{markdown_file}`" - return self.send_markdown(title, text) - else: - # 内容太长,发送摘要 - # 提取关键部分(标题、统计、前几条新闻) - lines = markdown_content.split('\n') - summary_lines = [] - news_count = 0 - max_news_items = 5 - - for line in lines: - summary_lines.append(line) - # 计算已添加的新闻条目数 - if line.startswith('### ') and news_count < max_news_items: - news_count += 1 - # 添加接下来的几行(摘要、链接等) - continue - elif news_count >= max_news_items and line.startswith('### '): - # 达到最大条目数,停止添加 - break - - summary = '\n'.join(summary_lines) - - # 如果还有更多内容,添加提示 - if len(markdown_content) > len(summary): - remaining_count = markdown_content.count('### ') - news_count - summary += f"\n\n---\n\n**提示**: 报告内容较长,已显示前{news_count}条新闻。" - if remaining_count > 0: - summary += f" 还有{remaining_count}条新闻未显示。" - if markdown_file: - summary += f"\n\n**完整报告**: 已保存到 `{markdown_file}`" - - return self.send_markdown(title, summary) - diff --git a/applications/reporter/example_usage.py b/applications/reporter/example_usage.py deleted file mode 100644 index c2f51c2..0000000 --- a/applications/reporter/example_usage.py +++ /dev/null @@ -1,136 +0,0 @@ -""" -报告生成器使用示例 -展示各种使用场景 -""" -import os -import sys - -# 添加父目录到路径 -current_dir = os.path.dirname(os.path.abspath(__file__)) -parent_dir = os.path.dirname(os.path.dirname(current_dir)) -if parent_dir not in sys.path: - sys.path.insert(0, parent_dir) - -from applications.reporter.daily import DailyReporter -from applications.reporter.weekly import WeeklyReporter - - -def example_daily_report(): - """示例1:生成简单日报""" - print("=" * 50) - print("示例1:生成日报(使用内置模板)") - print("=" * 50) - - reporter = DailyReporter() - report_path = reporter.generate() - print(f"✅ 日报已生成: {report_path}\n") - - -def example_weekly_report(): - """示例2:生成简单周报""" - print("=" * 50) - print("示例2:生成周报(使用内置模板)") - print("=" * 50) - - reporter = WeeklyReporter() - report_path = reporter.generate() - print(f"✅ 周报已生成: {report_path}\n") - - -def example_custom_template(): - """示例3:使用外部模板""" - print("=" * 50) - print("示例3:使用外部HTML模板生成日报") - print("=" * 50) - - # 获取模板路径(相对于当前文件) - template_path = os.path.join( - os.path.dirname(__file__), - 'templates', - 'custom_template_example.html' - ) - - if os.path.exists(template_path): - reporter = DailyReporter() - report_path = reporter.generate(template_path=template_path) - print(f"✅ 使用外部模板生成的日报: {report_path}\n") - else: - print(f"⚠️ 模板文件不存在: {template_path}\n") - - -def example_custom_output_dir(): - """示例4:指定输出目录""" - print("=" * 50) - print("示例4:指定自定义输出目录") - print("=" * 50) - - reporter = DailyReporter() - custom_dir = "output/reports/custom" - report_path = reporter.generate(output_dir=custom_dir) - print(f"✅ 报告已保存到自定义目录: {report_path}\n") - - -def example_add_data_source(): - """示例5:添加自定义数据源""" - print("=" * 50) - print("示例5:添加自定义数据源") - print("=" * 50) - - try: - from applications.reporter.data_source_example import ComplaintDataSource - from utils.mysql_agent import MySQLAgent - from config import Config - - reporter = DailyReporter() - - # 添加投诉数据源(如果数据库中有该表) - db_agent = MySQLAgent(Config.MYSQL_CONFIG) - complaint_source = ComplaintDataSource(db_agent, table_name="complaint_data") - reporter.add_data_source(complaint_source) - - report_path = reporter.generate() - print(f"✅ 包含自定义数据源的报告已生成: {report_path}\n") - except Exception as e: - print(f"⚠️ 添加自定义数据源失败(可能是表不存在): {str(e)}\n") - - -def main(): - """主函数""" - print("\n" + "=" * 50) - print("汽车后市场情报报告生成器 - 使用示例") - print("=" * 50 + "\n") - - # 运行各种示例 - try: - example_daily_report() - except Exception as e: - print(f"❌ 示例1失败: {str(e)}\n") - - try: - example_weekly_report() - except Exception as e: - print(f"❌ 示例2失败: {str(e)}\n") - - try: - example_custom_template() - except Exception as e: - print(f"❌ 示例3失败: {str(e)}\n") - - try: - example_custom_output_dir() - except Exception as e: - print(f"❌ 示例4失败: {str(e)}\n") - - try: - example_add_data_source() - except Exception as e: - print(f"❌ 示例5失败: {str(e)}\n") - - print("=" * 50) - print("所有示例运行完成!") - print("=" * 50 + "\n") - - -if __name__ == "__main__": - main() - diff --git a/applications/reporter/调用api.py b/applications/reporter/调用api.py deleted file mode 100644 index e6c0dc5..0000000 --- a/applications/reporter/调用api.py +++ /dev/null @@ -1,234 +0,0 @@ -from openai import OpenAI -import markdown -from bs4 import BeautifulSoup -import re -import json - - -def process_markdown_content(raw_content): - """智能处理各种可能的Markdown格式输入,包括图表语法""" - # 处理图表代码块(支持mermaid、vega-lite等) - chart_patterns = [ - r'```mermaid(.*?)```', - r'```vega-lite(.*?)```', - r'```chart(.*?)```' - ] - - # 保留原始图表代码块 - for pattern in chart_patterns: - raw_content = re.sub(pattern, lambda m: f'
{m.group(0)}
', raw_content, - flags=re.DOTALL) - - # 处理普通代码块 - code_block_patterns = [ - r'```markdown(.*?)```', - r'```(.*?)```', - r'~~~(.*?)~~~' - ] - - for pattern in code_block_patterns: - matches = re.findall(pattern, raw_content, re.DOTALL) - if matches: - return matches[0].strip() - - return raw_content.strip() - - -def enhance_html_structure(soup): - """增强HTML结构,特别处理图表""" - # 图表块特殊处理 - for pre in soup.find_all('pre', class_='chart-block'): - chart_type = 'mermaid' if 'mermaid' in pre.get_text() else 'vega-lite' if 'vega-lite' in pre.get_text() else 'chart' - pre['class'] = f'chart-container {chart_type}-container' - pre['data-chart-type'] = chart_type - - # 添加图表渲染占位符 - div = soup.new_tag('div', **{ - 'class': 'rendered-chart', - 'data-chart-spec': pre.get_text() - }) - pre.insert_after(div) - - # 表格增强 - for table in soup.find_all('table'): - table['class'] = 'data-table' - if not table.find('thead'): - first_row = table.find('tr') - if first_row: - first_row.name = 'thead' - for cell in first_row.find_all('td'): - cell.name = 'th' - - # 代码块增强 - for pre in soup.find_all('pre'): - if not any( - cls in pre.get('class', []) for cls in ['chart-container', 'mermaid-container', 'vega-lite-container']): - if not pre.find('code'): - code = soup.new_tag('code') - code.string = pre.get_text() - pre.clear() - pre.append(code) - pre['class'] = 'code-block' - - return soup - - -def generate_analysis_report(markdown_file): - """生成自适应分析报告的主函数""" - # 1. 读取Markdown文件 - with open(markdown_file, 'r', encoding='utf-8') as file: - input_content = file.read() - - # 2. 调用API - client = OpenAI( - base_url='https://qianfan.baidubce.com/v2', - api_key='bce-v3/ALTAK-X8C1AorvpdAI3ILPiRerh/4022de183e6b0a38e6b3baeb8af19e937f4a73d4' - ) - - response = client.chat.completions.create( - model="ernie-x1-turbo-32k", - messages=[{ - "role": "user", - "content": f"{input_content}\n\n请生成专业的数据分析报告,要求:\n" - "1. 使用规范的Markdown格式\n" - "2. 包含业务表现分析、产品结构洞察、优化实施方案\n" - "3. 增加门店维度的数据分析" - "4.每个分析下需要展示对应的明细数据" - }] - ) - - # 3. 处理API返回内容 - raw_content = response.choices[0].message.content - processed_md = process_markdown_content(raw_content) - - # 4. 转换为HTML并增强结构 - html_content = markdown.markdown(processed_md, extensions=['tables', 'fenced_code', 'codehilite']) - soup = BeautifulSoup(html_content, 'html.parser') - enhanced_soup = enhance_html_structure(soup) - - # 5. 生成完整HTML报告(包含图表渲染支持) - html_template = f""" - - - - - AI数据分析报告(含图表) - - - - - - - - - -
- {enhanced_soup} -
- - - -""" - - # 6. 保存报告 - output_file = 'chart_analysis_report.html' - with open(output_file, 'w', encoding='utf-8') as f: - f.write(html_template) - - return output_file - - -# 使用示例 -if __name__ == "__main__": - report_file = generate_analysis_report('analysis_report.md') - print(f"带图表的分析报告已生成: {report_file}") \ No newline at end of file diff --git a/deploy/start.bat b/deploy/start.bat index fa7d2af..14d288a 100644 --- a/deploy/start.bat +++ b/deploy/start.bat @@ -4,6 +4,7 @@ REM 功能: Python环境检测 + 系统启动 REM 作者: AI Assistant REM 版本: 1.0 REM 日期: 2025-10-29 +chcp 65001 >nul setlocal enabledelayedexpansion @@ -18,7 +19,7 @@ set "RESET=%ESC%[0m" REM 配置变量 set "CONDA_ENV_NAME=intelligence_env" set "PROJECT_PATH=%~dp0.." -set "PYTHON_VERSION=3.11" +set "PYTHON_VERSION=3.13" echo %CYAN%===============================================%RESET% echo %CYAN% 情报数据处理系统启动器%RESET% diff --git a/logs/application.log b/logs/application.log index 6cb59df..7da5ee9 100644 --- a/logs/application.log +++ b/logs/application.log @@ -133970,3 +133970,69 @@ → module: 'WeeklyReporter' 2025-10-29 17:36:38.235 | INFO | weekly:72 - 周报生成完成: output/reports/weekly\weekly_report_20251029_173637.html → module: 'WeeklyReporter' +2025-10-30 13:47:56.057 | INFO | task_scheduler:27 - 任务调度器已初始化,最大工作线程数: 5 + → module: 'TaskScheduler' +2025-10-30 13:48:20.079 | DEBUG | mysql_agent:116 - 执行SQL查询 + → module: 'MySQLAgent(Windows)' + → sql: 'SELECT * FROM main_task WHERE task_id = %s' +2025-10-30 13:48:20.346 | INFO | mysql_agent:134 - 查询执行成功 + → module: 'MySQLAgent(Windows)' + → 行数: 1 +2025-10-30 13:48:20.349 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理 + → module: 'TaskNotebook' +2025-10-30 13:56:55.700 | DEBUG | mysql_agent:116 - 执行SQL查询 + → module: 'MySQLAgent(Windows)' + → sql: 'SELECT * FROM main_task WHERE task_id = %s' +2025-10-30 13:56:55.939 | INFO | mysql_agent:134 - 查询执行成功 + → module: 'MySQLAgent(Windows)' + → 行数: 1 +2025-10-30 13:56:55.941 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理 + → module: 'TaskNotebook' +2025-10-30 13:57:07.496 | INFO | task_scheduler:27 - 任务调度器已初始化,最大工作线程数: 5 + → module: 'TaskScheduler' +2025-10-30 13:57:10.827 | DEBUG | mysql_agent:116 - 执行SQL查询 + → module: 'MySQLAgent(Windows)' + → sql: 'SELECT * FROM main_task WHERE task_id = %s' +2025-10-30 13:57:11.121 | INFO | mysql_agent:134 - 查询执行成功 + → module: 'MySQLAgent(Windows)' + → 行数: 1 +2025-10-30 13:57:11.125 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理 + → module: 'TaskNotebook' +2025-10-30 13:57:49.005 | DEBUG | mysql_agent:116 - 执行SQL查询 + → module: 'MySQLAgent(Windows)' + → sql: 'SELECT * FROM main_task WHERE task_id = %s' +2025-10-30 13:57:49.286 | INFO | mysql_agent:134 - 查询执行成功 + → module: 'MySQLAgent(Windows)' + → 行数: 1 +2025-10-30 13:57:49.737 | INFO | processor_rss_data:65 - RSS数据处理器初始化完成 + → module: 'RSSDataProcessor' +2025-10-30 13:57:49.738 | INFO | processor_rss_data:335 - 开始处理RSS数据... + → module: 'RSSDataProcessor' +2025-10-30 13:57:49.740 | DEBUG | mysql_agent:116 - 执行SQL查询 + → module: 'MySQLAgent(Windows)' + → sql: '\n SELECT id, 文章标题, 文章摘要, 发布时间, 来源URL, 文章链接\n FROM collector_rss_subscriptions\n WHERE 是否已处理 = 0\n ORDER BY 发布时间 DESC\n LIMIT %s\n ' +2025-10-30 13:57:50.013 | INFO | processor_rss_data:107 - 成功加载 6 条未处理的RSS数据 + → module: 'RSSDataProcessor' +2025-10-30 13:57:50.014 | INFO | processor_rss_data:146 - 成功加载停用词表,共 98 个词 + → module: 'RSSDataProcessor' +2025-10-30 13:57:50.015 | INFO | processor_rss_data:82 - 成功加载汽车后市场关键词,共 37 个 + → module: 'RSSDataProcessor' +2025-10-30 13:57:50.899 | INFO | processor_rss_data:235 - 数据处理完成,共处理 6 条记录 + → module: 'RSSDataProcessor' +2025-10-30 13:57:50.905 | INFO | processor_rss_data:246 - 过滤出 0 条汽车后市场相关新闻 + → module: 'RSSDataProcessor' +2025-10-30 13:57:51.015 | DEBUG | mysql_agent:614 - 更新执行完成 + → module: 'MySQLAgent(Windows)' + → 受影响行数: 6 +2025-10-30 13:57:51.015 | INFO | processor_rss_data:129 - 成功标记 6 条数据为已处理 + → module: 'RSSDataProcessor' +2025-10-30 13:57:51.016 | INFO | processor_rss_data:372 - RSS数据处理完成 + → module: 'RSSDataProcessor' + → total_articles: 6 + → filtered_articles: 0 + → filter_rate: 0.0 + → processing_time: '2025-10-30 13:57:50' + → mark_success: True +2025-10-30 13:57:53.702 | DEBUG | mysql_agent:614 - 更新执行完成 + → module: 'MySQLAgent(Windows)' + → 受影响行数: 1 diff --git a/logs/errors.log b/logs/errors.log index 555bea5..c802960 100644 --- a/logs/errors.log +++ b/logs/errors.log @@ -71074,3 +71074,744 @@ AttributeError: 类 RSSDataProcessor 中未找到方法 main 2025-10-29 17:34:57.329 | ERROR | daily:83 - 生成日报失败: No module named 'markdown' → exc_info: True +2025-10-30 13:48:20.349 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理 + → module: 'TaskNotebook' +Traceback (most recent call last): + + File "", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in + app.launch_new_instance() + │ └ > + └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start + self.io_loop.start() + │ │ └ + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start + self.asyncio_loop.run_forever() + │ │ └ + │ └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever + self._run_once() + │ └ + └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once + handle._run() + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run + self._context.run(self._callback, *self._args) + │ │ │ │ │ └ + │ │ │ │ └ , ...],))>)> + │ │ │ └ + │ │ └ , ...],))>)> + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue + await self.process_one() + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one + await dispatch(*args) + │ └ ([, ')>, , > + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell + await result + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request + await super().execute_request(stream, ident, parent) + │ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 48, 20, 70000, tzinfo=tzutc()), 'msg_id': '7653bb92-3d92-4584-b5fd-f30... + │ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe'] + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request + reply_content = await reply_content + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute + res = shell.run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell + return super().run_cell(*args, **kwargs) + │ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task... + └ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ... + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell + result = self._run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell + result = runner(coro) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner + coro.send(None) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async + has_raised = await self.run_ast_nodes(code_ast.body, cell_name, + │ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py' + │ │ │ └ [, , + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes + if await self.run_code(code, result, async_=asy): + │ │ │ │ └ False + │ │ │ └ at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1> + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code + exec(code_obj, self.user_global_ns, self.user_ns) + │ │ │ │ └ + │ │ │ └ + │ │ └ + │ └ + └ at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1> + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in + run_task_with_details(2) + └ + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details + result = manager.run_task_synchronously(task_id) + │ │ └ 2 + │ └ + └ <__main__.TaskManager object at 0x00000241919D6AB0> + +> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously + self.scheduler._execute_task_logic(task) + │ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex... + │ │ └ + │ └ + └ <__main__.TaskManager object at 0x00000241919D6AB0> + + File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic + raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}") + │ └ 'processor_rss_data' + └ 'processors' + +AttributeError: 模块 processors 中未找到类 processor_rss_data + +Traceback (most recent call last): + + File "", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in + app.launch_new_instance() + │ └ > + └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start + self.io_loop.start() + │ │ └ + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start + self.asyncio_loop.run_forever() + │ │ └ + │ └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever + self._run_once() + │ └ + └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once + handle._run() + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run + self._context.run(self._callback, *self._args) + │ │ │ │ │ └ + │ │ │ │ └ , ...],))>)> + │ │ │ └ + │ │ └ , ...],))>)> + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue + await self.process_one() + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one + await dispatch(*args) + │ └ ([, ')>, , > + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell + await result + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request + await super().execute_request(stream, ident, parent) + │ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 48, 20, 70000, tzinfo=tzutc()), 'msg_id': '7653bb92-3d92-4584-b5fd-f30... + │ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe'] + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request + reply_content = await reply_content + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute + res = shell.run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell + return super().run_cell(*args, **kwargs) + │ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task... + └ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ... + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell + result = self._run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell + result = runner(coro) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner + coro.send(None) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async + has_raised = await self.run_ast_nodes(code_ast.body, cell_name, + │ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py' + │ │ │ └ [, , + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes + if await self.run_code(code, result, async_=asy): + │ │ │ │ └ False + │ │ │ └ at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1> + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code + exec(code_obj, self.user_global_ns, self.user_ns) + │ │ │ │ └ + │ │ │ └ + │ │ └ + │ └ + └ at 0x0000024191C1D610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1> + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in + run_task_with_details(2) + └ + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details + result = manager.run_task_synchronously(task_id) + │ │ └ 2 + │ └ + └ <__main__.TaskManager object at 0x00000241919D6AB0> + +> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously + self.scheduler._execute_task_logic(task) + │ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex... + │ │ └ + │ └ + └ <__main__.TaskManager object at 0x00000241919D6AB0> + + File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic + raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}") + │ └ 'processor_rss_data' + └ 'processors' + +AttributeError: 模块 processors 中未找到类 processor_rss_data +2025-10-30 13:56:55.941 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理 + → module: 'TaskNotebook' +Traceback (most recent call last): + + File "", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in + app.launch_new_instance() + │ └ > + └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start + self.io_loop.start() + │ │ └ + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start + self.asyncio_loop.run_forever() + │ │ └ + │ └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever + self._run_once() + │ └ + └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once + handle._run() + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run + self._context.run(self._callback, *self._args) + │ │ │ │ │ └ + │ │ │ │ └ , ...],))>)> + │ │ │ └ + │ │ └ , ...],))>)> + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue + await self.process_one() + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one + await dispatch(*args) + │ └ ([, ')>, , > + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell + await result + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request + await super().execute_request(stream, ident, parent) + │ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 56, 55, 692000, tzinfo=tzutc()), 'msg_id': '788ee0fd-a13f-4e53-98b0-e9... + │ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe'] + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request + reply_content = await reply_content + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute + res = shell.run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell + return super().run_cell(*args, **kwargs) + │ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task... + └ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ... + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell + result = self._run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell + result = runner(coro) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner + coro.send(None) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async + has_raised = await self.run_ast_nodes(code_ast.body, cell_name, + │ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py' + │ │ │ └ [, , + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes + if await self.run_code(code, result, async_=asy): + │ │ │ │ └ False + │ │ │ └ at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1> + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code + exec(code_obj, self.user_global_ns, self.user_ns) + │ │ │ │ └ + │ │ │ └ + │ │ └ + │ └ + └ at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1> + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in + run_task_with_details(2) + └ + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details + result = manager.run_task_synchronously(task_id) + │ │ └ 2 + │ └ + └ <__main__.TaskManager object at 0x00000241919D6AB0> + +> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously + self.scheduler._execute_task_logic(task) + │ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex... + │ │ └ + │ └ + └ <__main__.TaskManager object at 0x00000241919D6AB0> + + File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic + raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}") + │ └ 'processor_rss_data' + └ 'processors' + +AttributeError: 模块 processors 中未找到类 processor_rss_data + +Traceback (most recent call last): + + File "", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in + app.launch_new_instance() + │ └ > + └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start + self.io_loop.start() + │ │ └ + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start + self.asyncio_loop.run_forever() + │ │ └ + │ └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever + self._run_once() + │ └ + └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once + handle._run() + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run + self._context.run(self._callback, *self._args) + │ │ │ │ │ └ + │ │ │ │ └ , ...],))>)> + │ │ │ └ + │ │ └ , ...],))>)> + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue + await self.process_one() + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one + await dispatch(*args) + │ └ ([, ')>, , > + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell + await result + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request + await super().execute_request(stream, ident, parent) + │ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 56, 55, 692000, tzinfo=tzutc()), 'msg_id': '788ee0fd-a13f-4e53-98b0-e9... + │ └ [b'be29372e-3914-4138-ba04-8dcacb3dcfbe'] + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request + reply_content = await reply_content + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute + res = shell.run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell + return super().run_cell(*args, **kwargs) + │ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task... + └ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ... + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell + result = self._run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell + result = runner(coro) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner + coro.send(None) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async + has_raised = await self.run_ast_nodes(code_ast.body, cell_name, + │ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_15044\\1819114417.py' + │ │ │ └ [, , + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes + if await self.run_code(code, result, async_=asy): + │ │ │ │ └ False + │ │ │ └ at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1> + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code + exec(code_obj, self.user_global_ns, self.user_ns) + │ │ │ │ └ + │ │ │ └ + │ │ └ + │ └ + └ at 0x0000024191CC1DF0, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 1> + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 45, in + run_task_with_details(2) + └ + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1819114417.py", line 16, in run_task_with_details + result = manager.run_task_synchronously(task_id) + │ │ └ 2 + │ └ + └ <__main__.TaskManager object at 0x00000241919D6AB0> + +> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_15044\1757831752.py", line 122, in run_task_synchronously + self.scheduler._execute_task_logic(task) + │ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex... + │ │ └ + │ └ + └ <__main__.TaskManager object at 0x00000241919D6AB0> + + File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic + raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}") + │ └ 'processor_rss_data' + └ 'processors' + +AttributeError: 模块 processors 中未找到类 processor_rss_data +2025-10-30 13:57:11.125 | ERROR | 1757831752:141 - 任务执行失败: RSS基于规则数据处理 + → module: 'TaskNotebook' +Traceback (most recent call last): + + File "", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in + app.launch_new_instance() + │ └ > + └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start + self.io_loop.start() + │ │ └ + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start + self.asyncio_loop.run_forever() + │ │ └ + │ └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever + self._run_once() + │ └ + └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once + handle._run() + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run + self._context.run(self._callback, *self._args) + │ │ │ │ │ └ + │ │ │ │ └ , ...],))>)> + │ │ │ └ + │ │ └ , ...],))>)> + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue + await self.process_one() + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one + await dispatch(*args) + │ └ ([, ')>, , > + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell + await result + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request + await super().execute_request(stream, ident, parent) + │ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 57, 10, 815000, tzinfo=tzutc()), 'msg_id': '132b59a7-4a02-4a8d-a25c-e2... + │ └ [b'4c713768-9d1a-49ca-83ed-6814787009a5'] + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request + reply_content = await reply_content + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute + res = shell.run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell + return super().run_cell(*args, **kwargs) + │ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task... + └ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ... + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell + result = self._run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell + result = runner(coro) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner + coro.send(None) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async + has_raised = await self.run_ast_nodes(code_ast.body, cell_name, + │ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_19964\\1819114417.py' + │ │ │ └ [, , + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes + if await self.run_code(code, result, async_=asy): + │ │ │ │ └ False + │ │ │ └ at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1> + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code + exec(code_obj, self.user_global_ns, self.user_ns) + │ │ │ │ └ + │ │ │ └ + │ │ └ + │ └ + └ at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1> + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 45, in + run_task_with_details(2) + └ + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 16, in run_task_with_details + result = manager.run_task_synchronously(task_id) + │ │ └ 2 + │ └ + └ <__main__.TaskManager object at 0x000001E4FEBA31D0> + +> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1757831752.py", line 122, in run_task_synchronously + self.scheduler._execute_task_logic(task) + │ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex... + │ │ └ + │ └ + └ <__main__.TaskManager object at 0x000001E4FEBA31D0> + + File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic + raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}") + │ └ 'processor_rss_data' + └ 'processors' + +AttributeError: 模块 processors 中未找到类 processor_rss_data + +Traceback (most recent call last): + + File "", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel_launcher.py", line 18, in + app.launch_new_instance() + │ └ > + └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start + self.io_loop.start() + │ │ └ + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\tornado\platform\asyncio.py", line 211, in start + self.asyncio_loop.run_forever() + │ │ └ + │ └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 645, in run_forever + self._run_once() + │ └ + └ <_WindowsSelectorEventLoop running=True closed=False debug=False> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\base_events.py", line 1999, in _run_once + handle._run() + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\asyncio\events.py", line 88, in _run + self._context.run(self._callback, *self._args) + │ │ │ │ │ └ + │ │ │ │ └ , ...],))>)> + │ │ │ └ + │ │ └ , ...],))>)> + │ └ + └ , ...],))>)> + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 519, in dispatch_queue + await self.process_one() + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 508, in process_one + await dispatch(*args) + │ └ ([, ')>, , > + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 400, in dispatch_shell + await result + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 368, in execute_request + await super().execute_request(stream, ident, parent) + │ │ └ {'header': {'date': datetime.datetime(2025, 10, 30, 5, 57, 10, 815000, tzinfo=tzutc()), 'msg_id': '132b59a7-4a02-4a8d-a25c-e2... + │ └ [b'4c713768-9d1a-49ca-83ed-6814787009a5'] + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\kernelbase.py", line 767, in execute_request + reply_content = await reply_content + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\ipkernel.py", line 455, in do_execute + res = shell.run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\ipykernel\zmqshell.py", line 577, in run_cell + return super().run_cell(*args, **kwargs) + │ └ {'store_history': True, 'silent': False, 'cell_id': 'vscode-notebook-cell:/d%3A/Idea%20Project/intelligence_system/tools/task... + └ ('# 手动执行任务(异步方式,快速返回)\ndef run_task_manually(task_id):\n display(Markdown(f"### 正在手动执行任务ID {task_id}..."))\n success = ... + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell + result = self._run_cell( + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3153, in _run_cell + result = runner(coro) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner + coro.send(None) + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3362, in run_cell_async + has_raised = await self.run_ast_nodes(code_ast.body, cell_name, + │ │ │ │ └ 'C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_19964\\1819114417.py' + │ │ │ └ [, , + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3607, in run_ast_nodes + if await self.run_code(code, result, async_=asy): + │ │ │ │ └ False + │ │ │ └ at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1> + │ └ + └ + File "d:\ProgramTools\anaconda3\envs\intelligence_system\Lib\site-packages\IPython\core\interactiveshell.py", line 3667, in run_code + exec(code_obj, self.user_global_ns, self.user_ns) + │ │ │ │ └ + │ │ │ └ + │ │ └ + │ └ + └ at 0x000001E4FFCE9610, file "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 1> + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 45, in + run_task_with_details(2) + └ + + File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1819114417.py", line 16, in run_task_with_details + result = manager.run_task_synchronously(task_id) + │ │ └ 2 + │ └ + └ <__main__.TaskManager object at 0x000001E4FEBA31D0> + +> File "C:\Users\zy187\AppData\Local\Temp\ipykernel_19964\1757831752.py", line 122, in run_task_synchronously + self.scheduler._execute_task_logic(task) + │ │ │ └ {'task_id': 2, 'task_name': 'RSS基于规则数据处理', 'task_type': 'processor', 'module_path': 'processors.processor_rss_data', 'cron_ex... + │ │ └ + │ └ + └ <__main__.TaskManager object at 0x000001E4FEBA31D0> + + File "d:\Idea Project\intelligence_system\system_management\scheduler\task_scheduler.py", line 278, in _execute_task_logic + raise AttributeError(f"模块 {module_name} 中未找到类 {class_name}") + │ └ 'processor_rss_data' + └ 'processors' + +AttributeError: 模块 processors 中未找到类 processor_rss_data diff --git a/processors/processor_rss_data.py b/processors/processor_rss_data.py index 3f43968..86cfdf1 100644 --- a/processors/processor_rss_data.py +++ b/processors/processor_rss_data.py @@ -383,27 +383,9 @@ class RSSDataProcessor: return {'success': False, 'message': f'处理失败: {str(e)}'} -def main(): - """主函数入口""" - try: - # 创建处理器实例 - processor = RSSDataProcessor() - - # 处理RSS数据 - result = processor.process_rss_data( - limit=5000, # 处理最近5000条数据 - save_to_db=True # 保存到数据库 - ) - - if result['success']: - print("RSS数据处理完成!") - print(f"处理统计: {result['statistics']}") - else: - print(f"处理失败: {result['message']}") - - except Exception as e: - print(f"程序运行出错: {str(e)}") - + def main(self, limit: int = 1000, save_to_db: bool = True) -> Dict[str, Any]: + """主函数入口(实例方法),对外统一调用""" + return self.process_rss_data(limit=limit, save_to_db=save_to_db) if __name__ == "__main__": - main() + RSSDataProcessor().main(limit=5000, save_to_db=True) diff --git a/system_management/scheduler/task_scheduler.py b/system_management/scheduler/task_scheduler.py index c012edb..d1ea334 100644 --- a/system_management/scheduler/task_scheduler.py +++ b/system_management/scheduler/task_scheduler.py @@ -1,6 +1,5 @@ import importlib import threading -import time from datetime import datetime from typing import Dict, List, Optional, Any import croniter diff --git a/test/钉钉api测试.ipynb b/test/钉钉api测试.ipynb new file mode 100644 index 0000000..a09cae5 --- /dev/null +++ b/test/钉钉api测试.ipynb @@ -0,0 +1,67 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": "## 获取钉钉token", + "id": "4a7d18176711daad" + }, + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2025-10-30T02:59:09.458462Z", + "start_time": "2025-10-30T02:59:09.015765Z" + } + }, + "source": [ + "from utils.Ding_api import DingAPI\n", + "\n", + "api_instance = DingAPI()\n", + "token = api_instance.get_token()\n", + "print(token)" + ], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\ProgramTools\\anaconda3\\envs\\intelligence_system\\Lib\\site-packages\\requests\\__init__.py:86: RequestsDependencyWarning: Unable to find acceptable character detection dependency (chardet or charset_normalizer).\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2b166a1c8e683ee38f8d2112a7de5e05\n" + ] + } + ], + "execution_count": 1 + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tools/task_manager.ipynb b/tools/task_manager.ipynb index be5ca0d..c3c37d3 100644 --- a/tools/task_manager.ipynb +++ b/tools/task_manager.ipynb @@ -10,14 +10,25 @@ }, { "cell_type": "code", + "execution_count": 1, "id": "initial_id", "metadata": { - "collapsed": true, "ExecuteTime": { "end_time": "2025-10-29T02:25:08.582541Z", "start_time": "2025-10-29T02:25:08.473381Z" - } + }, + "collapsed": true }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PROJECT_ROOT = d:\\Idea Project\\intelligence_system\n", + "\u001b[32m2025-10-30 13:57:07\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m任务调度器已初始化,最大工作线程数: 5\u001b[0m\n" + ] + } + ], "source": [ "# 使 Notebook 可从项目根导入\n", "import sys\n", @@ -206,18 +217,7 @@ " except Exception:\n", " pass\n", " return str(dt)" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PROJECT_ROOT = D:\\Idea Project\\intelligence_system\n", - "\u001B[32m2025-10-29 10:25:08\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mtask_scheduler\u001B[0m - \u001B[1m任务调度器已初始化,最大工作线程数: 5\u001B[0m\n" - ] - } - ], - "execution_count": 8 + ] }, { "cell_type": "markdown", @@ -242,7 +242,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[32m2025-10-29 09:54:09\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n" + "\u001b[32m2025-10-29 09:54:09\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n" ] }, { @@ -469,6 +469,7 @@ }, { "cell_type": "code", + "execution_count": 10, "id": "eab90de72c35429e", "metadata": { "ExecuteTime": { @@ -476,6 +477,62 @@ "start_time": "2025-10-29T02:26:12.648420Z" } }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2025-10-29 10:26:12\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n" + ] + }, + { + "data": { + "text/markdown": [ + "### 任务详情\n", + "**任务ID**: 1\n", + "**任务名称**: RSS新闻订阅\n", + "**任务类型**: collector\n", + "**模块路径**: processors.processor_rss_data.RSSDataProcessor\n", + "**Cron表达式**: */5 * * * *\n", + "**时区**: Asia/Shanghai\n", + "**最后运行时间**: 2025-10-28 13:35:09\n", + "**下次运行时间**: 2025-10-29 10:25:00\n", + "**运行状态**: success\n", + "**是否活跃**: 是\n", + "**运行次数**: 496\n", + "**创建时间**: 2025-10-16 15:47:34" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "{'task_id': 1,\n", + " 'task_name': 'RSS新闻订阅',\n", + " 'task_type': 'collector',\n", + " 'module_path': 'processors.processor_rss_data.RSSDataProcessor',\n", + " 'cron_expression': '*/5 * * * *',\n", + " 'time_zone': 'Asia/Shanghai',\n", + " 'next_run_time': Timestamp('2025-10-29 10:25:00'),\n", + " 'last_run_time': Timestamp('2025-10-28 13:35:09'),\n", + " 'last_run_status': 'success',\n", + " 'run_count': 496,\n", + " 'is_active': 1,\n", + " 'is_running': 0,\n", + " 'created_at': Timestamp('2025-10-16 15:47:34'),\n", + " 'updated_at': Timestamp('2025-10-29 10:24:49')}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 查看指定任务的详情\n", "def show_task_details(task_id):\n", @@ -503,53 +560,7 @@ "\n", "# 执行:查看任务ID为1的详情(替换为实际ID)\n", "show_task_details(1)" - ], - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001B[32m2025-10-29 10:26:12\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ], - "text/markdown": "### 任务详情\n**任务ID**: 1\n**任务名称**: RSS新闻订阅\n**任务类型**: collector\n**模块路径**: processors.processor_rss_data.RSSDataProcessor\n**Cron表达式**: */5 * * * *\n**时区**: Asia/Shanghai\n**最后运行时间**: 2025-10-28 13:35:09\n**下次运行时间**: 2025-10-29 10:25:00\n**运行状态**: success\n**是否活跃**: 是\n**运行次数**: 496\n**创建时间**: 2025-10-16 15:47:34" - }, - "metadata": {}, - "output_type": "display_data", - "jetTransient": { - "display_id": null - } - }, - { - "data": { - "text/plain": [ - "{'task_id': 1,\n", - " 'task_name': 'RSS新闻订阅',\n", - " 'task_type': 'collector',\n", - " 'module_path': 'processors.processor_rss_data.RSSDataProcessor',\n", - " 'cron_expression': '*/5 * * * *',\n", - " 'time_zone': 'Asia/Shanghai',\n", - " 'next_run_time': Timestamp('2025-10-29 10:25:00'),\n", - " 'last_run_time': Timestamp('2025-10-28 13:35:09'),\n", - " 'last_run_status': 'success',\n", - " 'run_count': 496,\n", - " 'is_active': 1,\n", - " 'is_running': 0,\n", - " 'created_at': Timestamp('2025-10-16 15:47:34'),\n", - " 'updated_at': Timestamp('2025-10-29 10:24:49')}" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 10 + ] }, { "cell_type": "markdown", @@ -569,8 +580,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[32m2025-10-29 09:56:52\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n", - "\u001B[32m2025-10-29 09:56:52\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mtask_scheduler\u001B[0m - \u001B[1m新任务添加成功\u001B[0m\n" + "\u001b[32m2025-10-29 09:56:52\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n", + "\u001b[32m2025-10-29 09:56:52\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mtask_scheduler\u001b[0m - \u001b[1m新任务添加成功\u001b[0m\n" ] }, { @@ -646,6 +657,7 @@ }, { "cell_type": "code", + "execution_count": 21, "id": "c892fd8ad2f0dd9d", "metadata": { "ExecuteTime": { @@ -653,6 +665,61 @@ "start_time": "2025-10-29T02:29:55.754298Z" } }, + "outputs": [ + { + "data": { + "text/markdown": [ + "### 任务ID 2 更新成功" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2025-10-29 10:29:56\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n" + ] + }, + { + "data": { + "text/markdown": [ + "### 任务详情\n", + "**任务ID**: 2\n", + "**任务名称**: RSS基于规则数据处理\n", + "**任务类型**: processor\n", + "**模块路径**: processors.processor_rss_data\n", + "**Cron表达式**: 0 8,20 * * *\n", + "**时区**: Asia/Shanghai\n", + "**最后运行时间**: 2025-10-28 13:34:49\n", + "**下次运行时间**: 2025-10-28 20:00:00\n", + "**运行状态**: success\n", + "**是否活跃**: 是\n", + "**运行次数**: 10\n", + "**创建时间**: 2025-10-22 16:06:42" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 更新任务属性\n", "def update_task(task_id, **kwargs):\n", @@ -685,53 +752,7 @@ "\n", "# 执行:同时更新多个属性(名称和Cron表达式)\n", "# update_task(1, name=\"每日早间新闻采集\", cron=\"0 8 * * *\")" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "" - ], - "text/markdown": "### 任务ID 2 更新成功" - }, - "metadata": {}, - "output_type": "display_data", - "jetTransient": { - "display_id": null - } - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001B[32m2025-10-29 10:29:56\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ], - "text/markdown": "### 任务详情\n**任务ID**: 2\n**任务名称**: RSS基于规则数据处理\n**任务类型**: processor\n**模块路径**: processors.processor_rss_data\n**Cron表达式**: 0 8,20 * * *\n**时区**: Asia/Shanghai\n**最后运行时间**: 2025-10-28 13:34:49\n**下次运行时间**: 2025-10-28 20:00:00\n**运行状态**: success\n**是否活跃**: 是\n**运行次数**: 10\n**创建时间**: 2025-10-22 16:06:42" - }, - "metadata": {}, - "output_type": "display_data", - "jetTransient": { - "display_id": null - } - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 21 + ] }, { "cell_type": "markdown", @@ -800,15 +821,149 @@ }, { "cell_type": "code", + "execution_count": 3, "id": "94892f4134316f8e", "metadata": { - "jupyter": { - "is_executing": true - }, "ExecuteTime": { "start_time": "2025-10-29T02:30:10.298891Z" + }, + "jupyter": { + "is_executing": true } }, + "outputs": [ + { + "data": { + "text/markdown": [ + "### 开始执行任务ID 2" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2025-10-30 13:57:49\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mmysql_agent\u001b[0m - \u001b[1m查询执行成功\u001b[0m\n", + "\u001b[32m2025-10-30 13:57:49\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1mRSS数据处理器初始化完成\u001b[0m\n", + "\u001b[32m2025-10-30 13:57:49\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m开始处理RSS数据...\u001b[0m\n", + "\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载 6 条未处理的RSS数据\u001b[0m\n", + "\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载停用词表,共 98 个词\u001b[0m\n", + "\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功加载汽车后市场关键词,共 37 个\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Building prefix dict from the default dictionary ...\n", + "Loading model from cache C:\\Users\\zy187\\AppData\\Local\\Temp\\jieba.cache\n", + "Loading model cost 0.839 seconds.\n", + "Prefix dict has been built successfully.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m数据处理完成,共处理 6 条记录\u001b[0m\n", + "\u001b[32m2025-10-30 13:57:50\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m过滤出 0 条汽车后市场相关新闻\u001b[0m\n", + "\u001b[32m2025-10-30 13:57:51\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1m成功标记 6 条数据为已处理\u001b[0m\n", + "\u001b[32m2025-10-30 13:57:51\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprocessor_rss_data\u001b[0m - \u001b[1mRSS数据处理完成\u001b[0m\n" + ] + }, + { + "data": { + "text/markdown": [ + "**任务名称**: RSS基于规则数据处理" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "**任务ID**: 2" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "**执行时长**: 4.41 秒" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "---" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "### ✅ 任务执行成功" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "{'success': True,\n", + " 'task_name': 'RSS基于规则数据处理',\n", + " 'task_id': 2,\n", + " 'execution_time': 4.414557695388794,\n", + " 'output': '',\n", + " 'error': None}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 手动执行任务(异步方式,快速返回)\n", "def run_task_manually(task_id):\n", @@ -854,47 +1009,8 @@ " return result\n", "\n", "# 执行:手动运行任务ID为2的任务(显示详细执行过程)\n", - "run_task_with_details(3)" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "" - ], - "text/markdown": "### 开始执行任务ID 3" - }, - "metadata": {}, - "output_type": "display_data", - "jetTransient": { - "display_id": null - } - }, - { - "data": { - "text/plain": [ - "" - ], - "text/markdown": "---" - }, - "metadata": {}, - "output_type": "display_data", - "jetTransient": { - "display_id": null - } - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001B[32m2025-10-29 10:30:10\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mmysql_agent\u001B[0m - \u001B[1m查询执行成功\u001B[0m\n", - "\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1mRSS数据AI处理器初始化完成\u001B[0m\n", - "\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1m开始批量处理数据,批次大小: 200, 延迟: 1.5秒\u001B[0m\n", - "\u001B[32m2025-10-29 10:30:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mai_processor_rss_data\u001B[0m - \u001B[1m成功加载 3 条未处理的数据\u001B[0m\n" - ] - } - ], - "execution_count": null + "run_task_with_details(2)" + ] }, { "cell_type": "markdown", diff --git a/utils/Ding_api.py b/utils/Ding_api.py new file mode 100644 index 0000000..aef4c7b --- /dev/null +++ b/utils/Ding_api.py @@ -0,0 +1,41 @@ +import requests +from typing import Optional + + +class DingAPI(): + def __init__(self): + self.token = None + self.url = '' + + def get_token(self) -> Optional: + """ + 获取Access Token + return: token(str) + """ + + url = 'https://api.dingtalk.com/v1.0/oauth2/dinga88e3d35525b86ca/token' + + payload = { + "client_id": "dingn3de1pyuwkymohhe", + "client_secret": "qv__egWJnLVXh14_R1rfD_vBi7M8Gzhnk94EJN6puMzsqqpBCP8U7Ow-zA7SV8Rx", + "grant_type": "client_credentials" + } + + response = requests.post(url, json=payload) + token = response.json().get('access_token') + + return token + + +def send_message(self, message): + data = { + "msgtype": "text", + "text": { + "content": message + } + } + headers = { + 'Content-Type': 'application/json' + } + response = requests.post(self.url, json=data, headers=headers) + return response.status_code