From 7726be4526157acc012a990b04d05bd11c126a4b Mon Sep 17 00:00:00 2001
From: BaiFu <670939375@qq.com>
Date: Sun, 2 Nov 2025 00:52:08 +0800
Subject: [PATCH 1/7] Update README with important deployment announcement.

Added important announcement about online deployment experience on Monday (11.3) to the README.
---
 README.md | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/README.md b/README.md
index 9307f9e..598ea3c 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,9 @@
 
 </div>
 
+> [!IMPORTANT]
+> 周一（11.3）会上**在线一键部署体验**，欢迎持续关注！
+
 ## ⚡ 项目概述
 
 “**微舆**” 是一个从0实现的创新型 多智能体 舆情分析系统，帮助大家破除信息茧房，还原舆情原貌，预测未来走向，辅助决策。用户只需像聊天一样提出分析需求，智能体开始全自动分析 国内外30+主流社媒 与 数百万条大众评论。

From 0aaf81ba067715c165a5ef88fbb7606362e634dc Mon Sep 17 00:00:00 2001
From: Doiiars <doiiars@qq.com>
Date: Mon, 3 Nov 2025 11:26:51 +0800
Subject: [PATCH 2/7] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dfetch=5Fnews=E6=97=A0?=
 =?UTF-8?q?=E6=B3=95=E6=AD=A3=E5=B8=B8=E8=8E=B7=E5=8F=96=E7=9A=84=E9=97=AE?=
 =?UTF-8?q?=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../BroadTopicExtraction/get_today_news.py    | 78 ++++++++++---------
 requirements.txt                              |  3 +-
 2 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/MindSpider/BroadTopicExtraction/get_today_news.py b/MindSpider/BroadTopicExtraction/get_today_news.py
index 2745381..972d441 100644
--- a/MindSpider/BroadTopicExtraction/get_today_news.py
+++ b/MindSpider/BroadTopicExtraction/get_today_news.py
@@ -12,6 +12,7 @@ import json
 from datetime import datetime, date
 from pathlib import Path
 from typing import List, Dict, Optional
+from loguru import logger
 
 # 添加项目根目录到路径
 project_root = Path(__file__).parent.parent
@@ -38,8 +39,7 @@ SOURCE_NAMES = {
     "wallstreetcn": "华尔街见闻",
     "thepaper": "澎湃新闻",
     "cls-hot": "财联社",
-    "xueqiu": "雪球热榜",
-    "kuaishou": "快手热榜"
+    "xueqiu": "雪球热榜"
 }
 
 class NewsCollector:
@@ -72,15 +72,25 @@ class NewsCollector:
     async def fetch_news(self, source: str) -> dict:
         """从指定源获取最新新闻"""
         url = f"{BASE_URL}/api/s?id={source}&latest"
-        headers = {"Accept": "application/json"}
+        headers = {
+            "Accept": "application/json, text/plain, */*",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "User-Agent": (
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/124.0.0.0 Safari/537.36"
+            ),
+            "Referer": BASE_URL,
+            "Connection": "keep-alive",
+        }
         
         try:
-            async with httpx.AsyncClient(timeout=30.0) as client:
+            async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
                 response = await client.get(url, headers=headers)
                 response.raise_for_status()
                 
                 # 解析JSON响应
-                data = json.loads(response.text)
+                data = response.json()
                 return {
                     "source": source,
                     "status": "success",
@@ -91,21 +101,21 @@ class NewsCollector:
             return {
                 "source": source,
                 "status": "timeout",
-                "error": "请求超时",
+                "error": f"请求超时: {source}({url})",
                 "timestamp": datetime.now().isoformat()
             }
         except httpx.HTTPStatusError as e:
             return {
                 "source": source,
                 "status": "http_error",
-                "error": f"HTTP错误: {e.response.status_code}",
+                "error": f"HTTP错误: {source}({url}) - {e.response.status_code}",
                 "timestamp": datetime.now().isoformat()
             }
         except Exception as e:
             return {
                 "source": source,
                 "status": "error",
-                "error": f"未知错误: {str(e)}",
+                "error": f"未知错误: {source}({url}) - {str(e)}",
                 "timestamp": datetime.now().isoformat()
             }
     
@@ -114,13 +124,13 @@ class NewsCollector:
         if sources is None:
             sources = list(SOURCE_NAMES.keys())
         
-        print(f"正在获取 {len(sources)} 个新闻源的最新内容...")
-        print("=" * 80)
+        logger.info(f"正在获取 {len(sources)} 个新闻源的最新内容...")
+        logger.info("=" * 80)
         
         results = []
         for source in sources:
             source_name = SOURCE_NAMES.get(source, source)
-            print(f"正在获取 {source_name} 的新闻...")
+            logger.info(f"正在获取 {source_name} 的新闻...")
             result = await self.fetch_news(source)
             results.append(result)
             
@@ -128,11 +138,11 @@ class NewsCollector:
                 data = result["data"]
                 if 'items' in data and isinstance(data['items'], list):
                     count = len(data['items'])
-                    print(f"✓ {source_name}: 获取成功，共 {count} 条新闻")
+                    logger.info(f"✓ {source_name}: 获取成功，共 {count} 条新闻")
                 else:
-                    print(f"✓ {source_name}: 获取成功")
+                    logger.info(f"✓ {source_name}: 获取成功")
             else:
-                print(f"✗ {source_name}: {result.get('error', '获取失败')}")
+                logger.error(f"✗ {source_name}: {result.get('error', '获取失败')}")
             
             # 避免请求过快
             await asyncio.sleep(0.5)
@@ -151,18 +161,21 @@ class NewsCollector:
         Returns:
             包含收集结果的字典
         """
-        print(f"开始收集每日热点新闻...")
-        print(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        collection_summary_message = ""
+        collection_summary_message += "\n开始收集每日热点新闻...\n"
+        collection_summary_message += f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
         
         # 选择新闻源
         if sources is None:
             # 使用所有支持的新闻源
             sources = list(SOURCE_NAMES.keys())
         
-        print(f"将从 {len(sources)} 个新闻源收集数据:")
+        collection_summary_message += f"将从 {len(sources)} 个新闻源收集数据:\n"
         for source in sources:
             source_name = SOURCE_NAMES.get(source, source)
-            print(f"  - {source_name}")
+            collection_summary_message += f"  - {source_name}\n"
+        
+        logger.info(collection_summary_message)
         
         try:
             # 获取新闻数据
@@ -185,7 +198,7 @@ class NewsCollector:
             return processed_data
             
         except Exception as e:
-            print(f"收集新闻失败: {e}")
+            logger.exception(f"收集新闻失败: {e}")
             return {
                 'success': False,
                 'error': str(e),
@@ -255,35 +268,30 @@ class NewsCollector:
                 }
                 
         except Exception as e:
-            print(f"处理新闻项失败: {e}")
+            logger.exception(f"处理新闻项失败: {e}")
             return None
     
     def _print_collection_summary(self, data: Dict):
         """打印收集摘要"""
-        print("\n" + "=" * 50)
-        print("新闻收集摘要")
-        print("=" * 50)
-        
-        print(f"总新闻源: {data['total_sources']}")
-        print(f"成功源数: {data['successful_sources']}")
-        print(f"总新闻数: {data['total_news']}")
-        
+        collection_summary_message = ""
+        collection_summary_message += f"\n总新闻源: {data['total_sources']}\n"
+        collection_summary_message += f"成功源数: {data['successful_sources']}\n"
+        collection_summary_message += f"总新闻数: {data['total_news']}\n"
         if 'saved_count' in data:
-            print(f"已保存数: {data['saved_count']}")
-        
-        print("=" * 50)
+            collection_summary_message += f"已保存数: {data['saved_count']}\n"
+        logger.info(collection_summary_message)
     
     def get_today_news(self) -> List[Dict]:
         """获取今天的新闻"""
         try:
             return self.db_manager.get_daily_news(date.today())
         except Exception as e:
-            print(f"获取今日新闻失败: {e}")
+            logger.exception(f"获取今日新闻失败: {e}")
             return []
 
 async def main():
     """测试新闻收集器"""
-    print("测试新闻收集器...")
+    logger.info("测试新闻收集器...")
     
     async with NewsCollector() as collector:
         # 收集新闻
@@ -292,9 +300,9 @@ async def main():
         )
         
         if result['success']:
-            print(f"收集成功！共获取 {result['total_news']} 条新闻")
+            logger.info(f"收集成功！共获取 {result['total_news']} 条新闻")
         else:
-            print(f"收集失败: {result.get('error', '未知错误')}")
+            logger.error(f"收集失败: {result.get('error', '未知错误')}")
 
 if __name__ == "__main__":
     asyncio.run(main())
diff --git a/requirements.txt b/requirements.txt
index 6bee269..a1ae6c0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -71,4 +71,5 @@ flake8>=6.0.0
 
 # ===== Web服务器 =====
 fastapi==0.110.2
-uvicorn==0.29.0
\ No newline at end of file
+uvicorn==0.29.0
+loguru
\ No newline at end of file

From bda4343c48a04983a82288c40cd118b7c3ed59c7 Mon Sep 17 00:00:00 2001
From: ghmark675 <188834327+ghmark675@users.noreply.github.com>
Date: Mon, 3 Nov 2025 08:28:36 +0800
Subject: [PATCH 3/7] chore: stop track config.py

---
 .gitignore                     | 1 +
 README.md                      | 2 ++
 config.py => config.py.example | 0
 3 files changed, 3 insertions(+)
 rename config.py => config.py.example (100%)

diff --git a/.gitignore b/.gitignore
index 76eea29..ac6c0e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -295,6 +295,7 @@ secrets.json
 *.key
 *.pem
 *.crt
+config.py
 
 # API 密钥
 api_keys.txt
diff --git a/README.md b/README.md
index 598ea3c..790579b 100644
--- a/README.md
+++ b/README.md
@@ -223,6 +223,8 @@ playwright install chromium
 
 #### 4.1 配置API密钥
 
+复制一份 `config.py.example` 文件，命名为 `config.py`
+
 编辑 `config.py` 文件，填入您的API密钥（您也可以选择自己的模型、搜索代理，详情见config文件内）：
 
 ```python
diff --git a/config.py b/config.py.example
similarity index 100%
rename from config.py
rename to config.py.example

From 46b2f00a6eb7d92ef695a254df8c4ed3c44d3287 Mon Sep 17 00:00:00 2001
From: ghmark675 <188834327+ghmark675@users.noreply.github.com>
Date: Mon, 3 Nov 2025 08:38:09 +0800
Subject: [PATCH 4/7] docs(README-EN): Update configuration instructions

Instruct users to copy config.py.example to config.py for local setup.
---
 README-EN.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README-EN.md b/README-EN.md
index 0650bb5..d03bd09 100644
--- a/README-EN.md
+++ b/README-EN.md
@@ -218,6 +218,8 @@ playwright install chromium
 
 #### 4.1 Configure API Keys
 
+Copy the `config.py.example` file to `config.py`
+
 Edit the `config.py` file and fill in your API keys (you can also choose your own models and search proxies; see the config file for details):
 
 ```python

From 6fd897d82a9564081f60e53b5718b9eb9994f75a Mon Sep 17 00:00:00 2001
From: ghmark675 <ghmark675@163.com>
Date: Mon, 3 Nov 2025 12:16:17 +0800
Subject: [PATCH 5/7] chore(config): stop track MindSpider config.py

change it to config.py.example
---
 .gitignore                                  | 1 +
 MindSpider/{config.py => config.py.example} | 0
 2 files changed, 1 insertion(+)
 rename MindSpider/{config.py => config.py.example} (100%)

diff --git a/.gitignore b/.gitignore
index ac6c0e9..8582217 100644
--- a/.gitignore
+++ b/.gitignore
@@ -296,6 +296,7 @@ secrets.json
 *.pem
 *.crt
 config.py
+MindSpider/config.py
 
 # API 密钥
 api_keys.txt
diff --git a/MindSpider/config.py b/MindSpider/config.py.example
similarity index 100%
rename from MindSpider/config.py
rename to MindSpider/config.py.example

From dba7fa9902e676dbcec2aa65bdc694c433a37a68 Mon Sep 17 00:00:00 2001
From: ghmark675 <ghmark675@163.com>
Date: Mon, 3 Nov 2025 12:20:58 +0800
Subject: [PATCH 6/7] docs(README): Update configuration instructions

Instruct users to copy config.py.example to config.py for local setup.
---
 README-EN.md | 3 +++
 README.md    | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/README-EN.md b/README-EN.md
index d03bd09..57c117d 100644
--- a/README-EN.md
+++ b/README-EN.md
@@ -245,6 +245,9 @@ INSIGHT_ENGINE_MODEL_NAME = "kimi-k2-0711-preview"
 #### 4.2 Database Initialization
 
 **Option 1: Use Local Database**
+
+You can refer to `MindSpider\config.py\config.py.example` for the configuration template, and you can copy this file and rename it to `config.py`.
+
 ```bash
 # Local MySQL database initialization
 cd MindSpider
diff --git a/README.md b/README.md
index 790579b..a512efd 100644
--- a/README.md
+++ b/README.md
@@ -253,6 +253,8 @@ INSIGHT_ENGINE_MODEL_NAME = "kimi-k2-0711-preview"
 
 > MindSpider爬虫系统跟舆情系统是各自独立的，所以需要再去`MindSpider\config.py`配置一下
 
+配置模板可以参考`MindSpider\config.py\config.py.example`，可以复制该文件并命名为`config.py`
+
 ```bash
 # 本地MySQL数据库初始化
 cd MindSpider

From 5b125ea91ab9c1f22015133840f70f1a08e04628 Mon Sep 17 00:00:00 2001
From: ghmark675 <ghmark675@163.com>
Date: Mon, 3 Nov 2025 13:49:29 +0800
Subject: [PATCH 7/7] hotfix(database): fix `source_keyword` not in table
 bilibili_video

Fix: #51
---
 .../MediaCrawler/schema/tables.sql            | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/MindSpider/DeepSentimentCrawling/MediaCrawler/schema/tables.sql b/MindSpider/DeepSentimentCrawling/MediaCrawler/schema/tables.sql
index 7310625..f5d1899 100644
--- a/MindSpider/DeepSentimentCrawling/MediaCrawler/schema/tables.sql
+++ b/MindSpider/DeepSentimentCrawling/MediaCrawler/schema/tables.sql
@@ -455,19 +455,12 @@ CREATE TABLE tieba_comment
     KEY               `idx_tieba_comment_publish_time` (`publish_time`)
 ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='贴吧评论表';
 
--- 增加搜索来源关键字字段
-alter table bilibili_video
-    add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
-alter table douyin_aweme
-    add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
-alter table kuaishou_video
-    add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
-alter table weibo_note
-    add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
-alter table xhs_note
-    add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
-alter table tieba_note
-    add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
+alter table bilibili_video add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
+alter table douyin_aweme add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
+alter table kuaishou_video add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
+alter table weibo_note add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
+alter table xhs_note add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
+alter table tieba_note add column `source_keyword` varchar(255) default '' comment '搜索来源关键字';
 
 
 DROP TABLE IF EXISTS `weibo_creator`;