本地化&2.0
This commit is contained in:
@@ -32,6 +32,7 @@ class KeywordManager:
|
||||
def __init__(self):
|
||||
"""初始化关键词管理器"""
|
||||
self.engine: Engine = None
|
||||
self.custom_keywords_path: Optional[Path] = self._resolve_custom_keywords_path()
|
||||
self.connect()
|
||||
|
||||
def connect(self):
|
||||
@@ -68,24 +69,31 @@ class KeywordManager:
|
||||
Returns:
|
||||
关键词列表
|
||||
"""
|
||||
if not getattr(settings, "ENABLE_KEYWORD_SEARCH", True):
|
||||
logger.info("关键词搜索已通过配置禁用,返回默认关键词列表")
|
||||
return self._limit_keywords(self._get_default_keywords(), max_keywords)
|
||||
|
||||
if not target_date:
|
||||
target_date = date.today()
|
||||
|
||||
if getattr(settings, "USE_DEFAULT_KEYWORDS_ONLY", False):
|
||||
logger.info("配置启用默认关键词模式,直接返回默认关键词")
|
||||
return self._limit_keywords(self._get_default_keywords(), max_keywords)
|
||||
|
||||
logger.info(f"正在获取 {target_date} 的关键词...")
|
||||
|
||||
# 优先使用自定义关键词
|
||||
custom_keywords = self._get_custom_keywords(target_date, max_keywords)
|
||||
if custom_keywords:
|
||||
return custom_keywords
|
||||
|
||||
# 首先尝试获取指定日期的关键词
|
||||
topics_data = self.get_daily_topics(target_date)
|
||||
|
||||
if topics_data and topics_data.get('keywords'):
|
||||
keywords = topics_data['keywords']
|
||||
logger.info(f"成功获取 {target_date} 的 {len(keywords)} 个关键词")
|
||||
|
||||
# 如果关键词太多,随机选择指定数量
|
||||
if len(keywords) > max_keywords:
|
||||
keywords = random.sample(keywords, max_keywords)
|
||||
logger.info(f"随机选择了 {max_keywords} 个关键词")
|
||||
|
||||
return keywords
|
||||
return self._limit_keywords(keywords, max_keywords)
|
||||
|
||||
# 如果没有当天的关键词,尝试获取最近几天的
|
||||
logger.info(f"{target_date} 没有关键词数据,尝试获取最近的关键词...")
|
||||
@@ -100,15 +108,14 @@ class KeywordManager:
|
||||
|
||||
# 去重并限制数量
|
||||
unique_keywords = list(set(all_keywords))
|
||||
if len(unique_keywords) > max_keywords:
|
||||
unique_keywords = random.sample(unique_keywords, max_keywords)
|
||||
limited_keywords = self._limit_keywords(unique_keywords, max_keywords)
|
||||
|
||||
logger.info(f"从最近7天的数据中获取到 {len(unique_keywords)} 个关键词")
|
||||
return unique_keywords
|
||||
logger.info(f"从最近7天的数据中获取到 {len(limited_keywords)} 个关键词")
|
||||
return limited_keywords
|
||||
|
||||
# 如果都没有,返回默认关键词
|
||||
logger.info("没有找到任何关键词数据,使用默认关键词")
|
||||
return self._get_default_keywords()
|
||||
return self._limit_keywords(self._get_default_keywords(), max_keywords)
|
||||
|
||||
def get_daily_topics(self, extract_date: date = None) -> Optional[Dict]:
|
||||
"""
|
||||
@@ -176,17 +183,75 @@ class KeywordManager:
|
||||
except Exception as e:
|
||||
logger.exception(f"获取最近话题分析失败: {e}")
|
||||
return []
|
||||
|
||||
def _resolve_custom_keywords_path(self) -> Optional[Path]:
|
||||
"""解析自定义关键词文件路径"""
|
||||
raw_path = getattr(settings, "CUSTOM_KEYWORDS_FILE", None)
|
||||
if not raw_path:
|
||||
return None
|
||||
path = Path(raw_path).expanduser()
|
||||
if not path.is_absolute():
|
||||
path = project_root / path
|
||||
return path
|
||||
|
||||
@staticmethod
|
||||
def _limit_keywords(keywords: List[str], max_keywords: int) -> List[str]:
|
||||
"""根据最大数量限制关键词"""
|
||||
if not keywords:
|
||||
return []
|
||||
if max_keywords and len(keywords) > max_keywords:
|
||||
keywords = random.sample(keywords, max_keywords)
|
||||
return keywords
|
||||
|
||||
def _get_custom_keywords(self, target_date: date, max_keywords: int) -> Optional[List[str]]:
|
||||
"""
|
||||
从自定义关键词文件中获取指定日期的关键词
|
||||
支持格式:
|
||||
1. ["AI", "大模型"]
|
||||
2. {"2025-11-26": ["AI"], "default": ["科技"]}
|
||||
"""
|
||||
if not self.custom_keywords_path:
|
||||
return None
|
||||
|
||||
path = self.custom_keywords_path
|
||||
if not path.exists():
|
||||
logger.warning(f"自定义关键词文件不存在: {path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"读取自定义关键词文件失败({path}): {e}")
|
||||
return None
|
||||
|
||||
keywords: Optional[List[str]] = None
|
||||
if isinstance(data, list):
|
||||
keywords = data
|
||||
elif isinstance(data, dict):
|
||||
date_key = target_date.isoformat()
|
||||
if date_key in data and isinstance(data[date_key], list):
|
||||
keywords = data[date_key]
|
||||
elif "default" in data and isinstance(data["default"], list):
|
||||
keywords = data["default"]
|
||||
elif "keywords" in data and isinstance(data["keywords"], list):
|
||||
keywords = data["keywords"]
|
||||
|
||||
if not keywords:
|
||||
return None
|
||||
|
||||
cleaned_keywords = [kw.strip() for kw in keywords if isinstance(kw, str) and kw.strip()]
|
||||
if not cleaned_keywords:
|
||||
return None
|
||||
|
||||
limited = self._limit_keywords(cleaned_keywords, max_keywords)
|
||||
logger.info(f"使用自定义关键词({path})共 {len(limited)} 个")
|
||||
return limited
|
||||
|
||||
def _get_default_keywords(self) -> List[str]:
|
||||
"""获取默认关键词列表"""
|
||||
return [
|
||||
"科技", "人工智能", "AI", "编程", "互联网",
|
||||
"创业", "投资", "理财", "股市", "经济",
|
||||
"教育", "学习", "考试", "大学", "就业",
|
||||
"健康", "养生", "运动", "美食", "旅游",
|
||||
"时尚", "美妆", "购物", "生活", "家居",
|
||||
"电影", "音乐", "游戏", "娱乐", "明星",
|
||||
"新闻", "热点", "社会", "政策", "环保"
|
||||
"F6智慧门店","南京爱福路汽车科技有限公司","汽车后市场","汽修店","新康众"
|
||||
]
|
||||
|
||||
def get_all_keywords_for_platforms(self, platforms: List[str], target_date: date = None,
|
||||
@@ -286,6 +351,16 @@ class KeywordManager:
|
||||
if not target_date:
|
||||
target_date = date.today()
|
||||
|
||||
# 当配置为仅使用默认关键词时,直接返回默认关键词的摘要,避免上层误判为“无数据”
|
||||
if getattr(settings, "USE_DEFAULT_KEYWORDS_ONLY", False):
|
||||
default_keywords = self._get_default_keywords()
|
||||
return {
|
||||
'date': target_date,
|
||||
'keywords_count': len(default_keywords),
|
||||
'summary': '使用默认关键词模式',
|
||||
'has_data': bool(default_keywords)
|
||||
}
|
||||
|
||||
topics_data = self.get_daily_topics(target_date)
|
||||
|
||||
if topics_data:
|
||||
|
||||
Reference in New Issue
Block a user