更新部分爬虫以兼容本地运行及数据库存储
This commit is contained in:
@@ -119,6 +119,32 @@ class BiliDbStoreImplement(AbstractStore):
|
||||
content_item: content item dict
|
||||
"""
|
||||
video_id = content_item.get("video_id")
|
||||
if not video_id:
|
||||
return
|
||||
|
||||
# 关键词过滤:仅在落库时进行,仅对主贴/视频过滤,不过滤评论
|
||||
# 支持精确匹配和模糊匹配两种模式
|
||||
try:
|
||||
import sys
|
||||
from pathlib import Path
|
||||
project_root = Path(__file__).resolve().parents[4]
|
||||
if str(project_root) not in sys.path:
|
||||
sys.path.insert(0, str(project_root))
|
||||
from config import settings
|
||||
|
||||
title = content_item.get("title", "")
|
||||
desc = content_item.get("desc", "")
|
||||
content_text = title + " " + desc
|
||||
strict_keywords = getattr(settings, 'STRICT_KEYWORDS', None)
|
||||
fuzzy_keywords = getattr(settings, 'FUZZY_KEYWORDS', None)
|
||||
|
||||
if strict_keywords or fuzzy_keywords:
|
||||
if not utils.check_keyword_match_with_modes(content_text, strict_keywords, fuzzy_keywords):
|
||||
utils.logger.warning(f"[BilibiliDbStoreImplement.store_content] ❌ Filtered video {video_id} - content does not match any keyword")
|
||||
return
|
||||
except Exception as e:
|
||||
utils.logger.debug(f"[BilibiliDbStoreImplement.store_content] Failed to load keyword config: {e}")
|
||||
|
||||
# 确保 video_id 为整数类型,匹配数据库 BigInteger 字段
|
||||
if video_id is not None:
|
||||
video_id = int(video_id) if not isinstance(video_id, int) else video_id
|
||||
|
||||
Reference in New Issue
Block a user