修复fetch_news无法正常获取的问题

This commit is contained in:
Doiiars
2025-11-03 11:26:51 +08:00
parent 7726be4526
commit 3ad807778c
2 changed files with 45 additions and 36 deletions
@@ -12,6 +12,7 @@ import json
from datetime import datetime, date from datetime import datetime, date
from pathlib import Path from pathlib import Path
from typing import List, Dict, Optional from typing import List, Dict, Optional
from loguru import logger
# 添加项目根目录到路径 # 添加项目根目录到路径
project_root = Path(__file__).parent.parent project_root = Path(__file__).parent.parent
@@ -38,8 +39,7 @@ SOURCE_NAMES = {
"wallstreetcn": "华尔街见闻", "wallstreetcn": "华尔街见闻",
"thepaper": "澎湃新闻", "thepaper": "澎湃新闻",
"cls-hot": "财联社", "cls-hot": "财联社",
"xueqiu": "雪球热榜", "xueqiu": "雪球热榜"
"kuaishou": "快手热榜"
} }
class NewsCollector: class NewsCollector:
@@ -72,15 +72,25 @@ class NewsCollector:
async def fetch_news(self, source: str) -> dict: async def fetch_news(self, source: str) -> dict:
"""从指定源获取最新新闻""" """从指定源获取最新新闻"""
url = f"{BASE_URL}/api/s?id={source}&latest" url = f"{BASE_URL}/api/s?id={source}&latest"
headers = {"Accept": "application/json"} headers = {
"Accept": "application/json, text/plain, */*",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/124.0.0.0 Safari/537.36"
),
"Referer": BASE_URL,
"Connection": "keep-alive",
}
try: try:
async with httpx.AsyncClient(timeout=30.0) as client: async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
response = await client.get(url, headers=headers) response = await client.get(url, headers=headers)
response.raise_for_status() response.raise_for_status()
# 解析JSON响应 # 解析JSON响应
data = json.loads(response.text) data = response.json()
return { return {
"source": source, "source": source,
"status": "success", "status": "success",
@@ -91,21 +101,21 @@ class NewsCollector:
return { return {
"source": source, "source": source,
"status": "timeout", "status": "timeout",
"error": "请求超时", "error": f"请求超时: {source}({url})",
"timestamp": datetime.now().isoformat() "timestamp": datetime.now().isoformat()
} }
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
return { return {
"source": source, "source": source,
"status": "http_error", "status": "http_error",
"error": f"HTTP错误: {e.response.status_code}", "error": f"HTTP错误: {source}({url}) - {e.response.status_code}",
"timestamp": datetime.now().isoformat() "timestamp": datetime.now().isoformat()
} }
except Exception as e: except Exception as e:
return { return {
"source": source, "source": source,
"status": "error", "status": "error",
"error": f"未知错误: {str(e)}", "error": f"未知错误: {source}({url}) - {str(e)}",
"timestamp": datetime.now().isoformat() "timestamp": datetime.now().isoformat()
} }
@@ -114,13 +124,13 @@ class NewsCollector:
if sources is None: if sources is None:
sources = list(SOURCE_NAMES.keys()) sources = list(SOURCE_NAMES.keys())
print(f"正在获取 {len(sources)} 个新闻源的最新内容...") logger.info(f"正在获取 {len(sources)} 个新闻源的最新内容...")
print("=" * 80) logger.info("=" * 80)
results = [] results = []
for source in sources: for source in sources:
source_name = SOURCE_NAMES.get(source, source) source_name = SOURCE_NAMES.get(source, source)
print(f"正在获取 {source_name} 的新闻...") logger.info(f"正在获取 {source_name} 的新闻...")
result = await self.fetch_news(source) result = await self.fetch_news(source)
results.append(result) results.append(result)
@@ -128,11 +138,11 @@ class NewsCollector:
data = result["data"] data = result["data"]
if 'items' in data and isinstance(data['items'], list): if 'items' in data and isinstance(data['items'], list):
count = len(data['items']) count = len(data['items'])
print(f"{source_name}: 获取成功,共 {count} 条新闻") logger.info(f"{source_name}: 获取成功,共 {count} 条新闻")
else: else:
print(f"{source_name}: 获取成功") logger.info(f"{source_name}: 获取成功")
else: else:
print(f"{source_name}: {result.get('error', '获取失败')}") logger.error(f"{source_name}: {result.get('error', '获取失败')}")
# 避免请求过快 # 避免请求过快
await asyncio.sleep(0.5) await asyncio.sleep(0.5)
@@ -151,18 +161,21 @@ class NewsCollector:
Returns: Returns:
包含收集结果的字典 包含收集结果的字典
""" """
print(f"开始收集每日热点新闻...") collection_summary_message = ""
print(f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") collection_summary_message += "\n开始收集每日热点新闻...\n"
collection_summary_message += f"时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
# 选择新闻源 # 选择新闻源
if sources is None: if sources is None:
# 使用所有支持的新闻源 # 使用所有支持的新闻源
sources = list(SOURCE_NAMES.keys()) sources = list(SOURCE_NAMES.keys())
print(f"将从 {len(sources)} 个新闻源收集数据:") collection_summary_message += f"将从 {len(sources)} 个新闻源收集数据:\n"
for source in sources: for source in sources:
source_name = SOURCE_NAMES.get(source, source) source_name = SOURCE_NAMES.get(source, source)
print(f" - {source_name}") collection_summary_message += f" - {source_name}\n"
logger.info(collection_summary_message)
try: try:
# 获取新闻数据 # 获取新闻数据
@@ -185,7 +198,7 @@ class NewsCollector:
return processed_data return processed_data
except Exception as e: except Exception as e:
print(f"收集新闻失败: {e}") logger.exception(f"收集新闻失败: {e}")
return { return {
'success': False, 'success': False,
'error': str(e), 'error': str(e),
@@ -255,35 +268,30 @@ class NewsCollector:
} }
except Exception as e: except Exception as e:
print(f"处理新闻项失败: {e}") logger.exception(f"处理新闻项失败: {e}")
return None return None
def _print_collection_summary(self, data: Dict): def _print_collection_summary(self, data: Dict):
"""打印收集摘要""" """打印收集摘要"""
print("\n" + "=" * 50) collection_summary_message = ""
print("新闻收集摘要") collection_summary_message += f"\n总新闻源: {data['total_sources']}\n"
print("=" * 50) collection_summary_message += f"成功源数: {data['successful_sources']}\n"
collection_summary_message += f"总新闻数: {data['total_news']}\n"
print(f"总新闻源: {data['total_sources']}")
print(f"成功源数: {data['successful_sources']}")
print(f"总新闻数: {data['total_news']}")
if 'saved_count' in data: if 'saved_count' in data:
print(f"已保存数: {data['saved_count']}") collection_summary_message += f"已保存数: {data['saved_count']}\n"
logger.info(collection_summary_message)
print("=" * 50)
def get_today_news(self) -> List[Dict]: def get_today_news(self) -> List[Dict]:
"""获取今天的新闻""" """获取今天的新闻"""
try: try:
return self.db_manager.get_daily_news(date.today()) return self.db_manager.get_daily_news(date.today())
except Exception as e: except Exception as e:
print(f"获取今日新闻失败: {e}") logger.exception(f"获取今日新闻失败: {e}")
return [] return []
async def main(): async def main():
"""测试新闻收集器""" """测试新闻收集器"""
print("测试新闻收集器...") logger.info("测试新闻收集器...")
async with NewsCollector() as collector: async with NewsCollector() as collector:
# 收集新闻 # 收集新闻
@@ -292,9 +300,9 @@ async def main():
) )
if result['success']: if result['success']:
print(f"收集成功!共获取 {result['total_news']} 条新闻") logger.info(f"收集成功!共获取 {result['total_news']} 条新闻")
else: else:
print(f"收集失败: {result.get('error', '未知错误')}") logger.error(f"收集失败: {result.get('error', '未知错误')}")
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main())
+2 -1
View File
@@ -71,4 +71,5 @@ flake8>=6.0.0
# ===== Web服务器 ===== # ===== Web服务器 =====
fastapi==0.110.2 fastapi==0.110.2
uvicorn==0.29.0 uvicorn==0.29.0
loguru