Files
intelligence_system/main.py
T
2025-08-05 15:00:46 +08:00

185 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
情报收集系统主程序
功能:
1. 调度数据采集、处理、存储流程
2. 生成日报/月报
3. 异常监控和报警
"""
import sys
import logging
from datetime import datetime, timedelta
from typing import List, Dict, Any
# 自定义模块
from config.settings import API_KEYS, DATA_SOURCES
from collectors.news_api import NewsAPICollector
from collectors.complaint_spider import ComplaintSpider
from processors.data_processor import DataProcessor
from storage.database import IntelligenceDB
from applications.reporter import ReportGenerator
from applications.alert import AlertService
from utils.logger import setup_logging
from utils.mail import send_email
class IntelligenceSystem:
def __init__(self):
# 初始化核心组件
setup_logging()
self.logger = logging.getLogger(__name__)
self.db = IntelligenceDB()
self.processor = DataProcessor()
self.alert = AlertService()
# 数据采集器注册
self.collectors = {
"news": NewsAPICollector(API_KEYS['newsapi']),
"complaint": ComplaintSpider(
base_url=DATA_SOURCES['blackcat'],
rate_limit=30 # 30秒爬取间隔
)
}
def run_daily_pipeline(self):
"""每日数据采集处理流程"""
try:
# 阶段1:数据采集
raw_data = self._collect_data()
# 阶段2:数据处理
processed_data = self._process_data(raw_data)
# 阶段3:数据存储
self._store_data(processed_data)
# 阶段4:生成日报
self._generate_reports()
# 阶段5:异常检测
self._check_alerts()
except Exception as e:
self.logger.error(f"主流程执行失败: {str(e)}", exc_info=True)
self.alert.send_critical(f"系统异常: {str(e)}")
def _collect_data(self) -> Dict[str, List[Dict]]:
"""执行所有数据采集任务"""
collected = {}
for name, collector in self.collectors.items():
try:
self.logger.info(f"开始采集 {name} 数据...")
data = collector.fetch_data({
'keywords': '汽车后市场',
'max_results': 100
})
collected[name] = data
self.logger.info(f"{name} 采集完成,共 {len(data)} 条数据")
except Exception as e:
self.logger.error(f"{name} 采集器异常: {str(e)}")
continue
return collected
def _process_data(self, raw_data: Dict) -> Dict:
"""处理原始数据"""
processed = {}
for data_type, items in raw_data.items():
processed[data_type] = []
for item in items:
try:
# 文本数据标准处理
if data_type in ['news', 'complaint']:
result = self.processor.process_text(item['content'])
processed_item = {
**item,
'keywords': result['keywords'],
'category': result['category']
}
processed[data_type].append(processed_item)
# 图像处理(预留接口)
elif data_type == 'images':
processed[data_type].append(
self.processor.image_to_text(item)
)
except Exception as e:
self.logger.warning(f"数据处理失败: {item.get('id', '')} - {str(e)}")
continue
return processed
def _store_data(self, processed_data: Dict):
"""存储到数据库"""
for data_type, items in processed_data.items():
success_count = 0
for item in items:
try:
if self.db.insert_data(data_type, item):
success_count += 1
except Exception as e:
self.logger.error(f"数据存储失败: {str(e)}")
self.logger.info(
f"{data_type} 数据存储完成,成功 {success_count}/{len(items)}"
)
def _generate_reports(self):
"""生成报告并发送"""
try:
# 日报生成
report_html = ReportGenerator(self.db).generate_daily()
with open(f"reports/daily_{datetime.now().date()}.html", 'w') as f:
f.write(report_html)
# 每月1号生成月报
if datetime.now().day == 1:
monthly_report = ReportGenerator(self.db).generate_monthly()
send_email(
to="team@example.com",
subject=f"{datetime.now().strftime('%Y-%m')} 情报月报",
content=monthly_report
)
except Exception as e:
self.logger.error(f"报告生成失败: {str(e)}")
def _check_alerts(self):
"""检查预警信息"""
# 负面舆情监测
negative_keywords = ['投诉', '造假', '违规']
alerts = self.alert.check_negative(negative_keywords)
if alerts:
self.alert.send_urgent(
"负面舆情警报",
"\n".join([f"[{a['source']}] {a['content']}" for a in alerts])
)
def cleanup(self):
"""资源清理"""
self.db.close()
self.logger.info("系统资源已释放")
if __name__ == "__main__":
system = IntelligenceSystem()
try:
# 执行每日任务
if len(sys.argv) > 1 and sys.argv[1] == "--manual":
system.logger.info("手动执行模式启动")
system.run_daily_pipeline()
else:
# 定时任务模式(实际部署时改用crontab或APScheduler
system.logger.info("定时任务模式启动")
while True:
now = datetime.now()
if now.hour == 9 and now.minute == 0: # 每天9点执行
system.run_daily_pipeline()
time.sleep(60) # 避免重复执行
time.sleep(30)
except KeyboardInterrupt:
system.logger.info("用户中断执行")
finally:
system.cleanup()