#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 情报收集系统主程序 功能: 1. 调度数据采集、处理、存储流程 2. 生成日报/月报 3. 异常监控和报警 """ import sys import logging from datetime import datetime, timedelta from typing import List, Dict, Any # 自定义模块 from config.settings import API_KEYS, DATA_SOURCES from collectors.news_api import NewsAPICollector from collectors.complaint_spider import ComplaintSpider from processors.data_processor import DataProcessor from storage.database import IntelligenceDB from applications.reporter import ReportGenerator from applications.alert import AlertService from utils.logger import setup_logging from utils.mail import send_email class IntelligenceSystem: def __init__(self): # 初始化核心组件 setup_logging() self.logger = logging.getLogger(__name__) self.db = IntelligenceDB() self.processor = DataProcessor() self.alert = AlertService() # 数据采集器注册 self.collectors = { "news": NewsAPICollector(API_KEYS['newsapi']), "complaint": ComplaintSpider( base_url=DATA_SOURCES['blackcat'], rate_limit=30 # 30秒爬取间隔 ) } def run_daily_pipeline(self): """每日数据采集处理流程""" try: # 阶段1:数据采集 raw_data = self._collect_data() # 阶段2:数据处理 processed_data = self._process_data(raw_data) # 阶段3:数据存储 self._store_data(processed_data) # 阶段4:生成日报 self._generate_reports() # 阶段5:异常检测 self._check_alerts() except Exception as e: self.logger.error(f"主流程执行失败: {str(e)}", exc_info=True) self.alert.send_critical(f"系统异常: {str(e)}") def _collect_data(self) -> Dict[str, List[Dict]]: """执行所有数据采集任务""" collected = {} for name, collector in self.collectors.items(): try: self.logger.info(f"开始采集 {name} 数据...") data = collector.fetch_data({ 'keywords': '汽车后市场', 'max_results': 100 }) collected[name] = data self.logger.info(f"{name} 采集完成,共 {len(data)} 条数据") except Exception as e: self.logger.error(f"{name} 采集器异常: {str(e)}") continue return collected def _process_data(self, raw_data: Dict) -> Dict: """处理原始数据""" processed = {} for data_type, items in raw_data.items(): processed[data_type] = [] for item in items: try: # 文本数据标准处理 if data_type in ['news', 'complaint']: result = self.processor.process_text(item['content']) processed_item = { **item, 'keywords': result['keywords'], 'category': result['category'] } processed[data_type].append(processed_item) # 图像处理(预留接口) elif data_type == 'images': processed[data_type].append( self.processor.image_to_text(item) ) except Exception as e: self.logger.warning(f"数据处理失败: {item.get('id', '')} - {str(e)}") continue return processed def _store_data(self, processed_data: Dict): """存储到数据库""" for data_type, items in processed_data.items(): success_count = 0 for item in items: try: if self.db.insert_data(data_type, item): success_count += 1 except Exception as e: self.logger.error(f"数据存储失败: {str(e)}") self.logger.info( f"{data_type} 数据存储完成,成功 {success_count}/{len(items)} 条" ) def _generate_reports(self): """生成报告并发送""" try: # 日报生成 report_html = ReportGenerator(self.db).generate_daily() with open(f"reports/daily_{datetime.now().date()}.html", 'w') as f: f.write(report_html) # 每月1号生成月报 if datetime.now().day == 1: monthly_report = ReportGenerator(self.db).generate_monthly() send_email( to="team@example.com", subject=f"{datetime.now().strftime('%Y-%m')} 情报月报", content=monthly_report ) except Exception as e: self.logger.error(f"报告生成失败: {str(e)}") def _check_alerts(self): """检查预警信息""" # 负面舆情监测 negative_keywords = ['投诉', '造假', '违规'] alerts = self.alert.check_negative(negative_keywords) if alerts: self.alert.send_urgent( "负面舆情警报", "\n".join([f"[{a['source']}] {a['content']}" for a in alerts]) ) def cleanup(self): """资源清理""" self.db.close() self.logger.info("系统资源已释放") if __name__ == "__main__": system = IntelligenceSystem() try: # 执行每日任务 if len(sys.argv) > 1 and sys.argv[1] == "--manual": system.logger.info("手动执行模式启动") system.run_daily_pipeline() else: # 定时任务模式(实际部署时改用crontab或APScheduler) system.logger.info("定时任务模式启动") while True: now = datetime.now() if now.hour == 9 and now.minute == 0: # 每天9点执行 system.run_daily_pipeline() time.sleep(60) # 避免重复执行 time.sleep(30) except KeyboardInterrupt: system.logger.info("用户中断执行") finally: system.cleanup()