本地化&2.0

This commit is contained in:
z66
2025-12-02 14:01:39 +08:00
parent ec1baf539c
commit a9eda60493
15 changed files with 409 additions and 140 deletions
@@ -32,6 +32,7 @@ class KeywordManager:
def __init__(self):
"""初始化关键词管理器"""
self.engine: Engine = None
self.custom_keywords_path: Optional[Path] = self._resolve_custom_keywords_path()
self.connect()
def connect(self):
@@ -68,24 +69,31 @@ class KeywordManager:
Returns:
关键词列表
"""
if not getattr(settings, "ENABLE_KEYWORD_SEARCH", True):
logger.info("关键词搜索已通过配置禁用,返回默认关键词列表")
return self._limit_keywords(self._get_default_keywords(), max_keywords)
if not target_date:
target_date = date.today()
if getattr(settings, "USE_DEFAULT_KEYWORDS_ONLY", False):
logger.info("配置启用默认关键词模式,直接返回默认关键词")
return self._limit_keywords(self._get_default_keywords(), max_keywords)
logger.info(f"正在获取 {target_date} 的关键词...")
# 优先使用自定义关键词
custom_keywords = self._get_custom_keywords(target_date, max_keywords)
if custom_keywords:
return custom_keywords
# 首先尝试获取指定日期的关键词
topics_data = self.get_daily_topics(target_date)
if topics_data and topics_data.get('keywords'):
keywords = topics_data['keywords']
logger.info(f"成功获取 {target_date}{len(keywords)} 个关键词")
# 如果关键词太多,随机选择指定数量
if len(keywords) > max_keywords:
keywords = random.sample(keywords, max_keywords)
logger.info(f"随机选择了 {max_keywords} 个关键词")
return keywords
return self._limit_keywords(keywords, max_keywords)
# 如果没有当天的关键词,尝试获取最近几天的
logger.info(f"{target_date} 没有关键词数据,尝试获取最近的关键词...")
@@ -100,15 +108,14 @@ class KeywordManager:
# 去重并限制数量
unique_keywords = list(set(all_keywords))
if len(unique_keywords) > max_keywords:
unique_keywords = random.sample(unique_keywords, max_keywords)
limited_keywords = self._limit_keywords(unique_keywords, max_keywords)
logger.info(f"从最近7天的数据中获取到 {len(unique_keywords)} 个关键词")
return unique_keywords
logger.info(f"从最近7天的数据中获取到 {len(limited_keywords)} 个关键词")
return limited_keywords
# 如果都没有,返回默认关键词
logger.info("没有找到任何关键词数据,使用默认关键词")
return self._get_default_keywords()
return self._limit_keywords(self._get_default_keywords(), max_keywords)
def get_daily_topics(self, extract_date: date = None) -> Optional[Dict]:
"""
@@ -176,17 +183,75 @@ class KeywordManager:
except Exception as e:
logger.exception(f"获取最近话题分析失败: {e}")
return []
def _resolve_custom_keywords_path(self) -> Optional[Path]:
"""解析自定义关键词文件路径"""
raw_path = getattr(settings, "CUSTOM_KEYWORDS_FILE", None)
if not raw_path:
return None
path = Path(raw_path).expanduser()
if not path.is_absolute():
path = project_root / path
return path
@staticmethod
def _limit_keywords(keywords: List[str], max_keywords: int) -> List[str]:
"""根据最大数量限制关键词"""
if not keywords:
return []
if max_keywords and len(keywords) > max_keywords:
keywords = random.sample(keywords, max_keywords)
return keywords
def _get_custom_keywords(self, target_date: date, max_keywords: int) -> Optional[List[str]]:
"""
从自定义关键词文件中获取指定日期的关键词
支持格式:
1. ["AI", "大模型"]
2. {"2025-11-26": ["AI"], "default": ["科技"]}
"""
if not self.custom_keywords_path:
return None
path = self.custom_keywords_path
if not path.exists():
logger.warning(f"自定义关键词文件不存在: {path}")
return None
try:
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
except Exception as e:
logger.error(f"读取自定义关键词文件失败({path}): {e}")
return None
keywords: Optional[List[str]] = None
if isinstance(data, list):
keywords = data
elif isinstance(data, dict):
date_key = target_date.isoformat()
if date_key in data and isinstance(data[date_key], list):
keywords = data[date_key]
elif "default" in data and isinstance(data["default"], list):
keywords = data["default"]
elif "keywords" in data and isinstance(data["keywords"], list):
keywords = data["keywords"]
if not keywords:
return None
cleaned_keywords = [kw.strip() for kw in keywords if isinstance(kw, str) and kw.strip()]
if not cleaned_keywords:
return None
limited = self._limit_keywords(cleaned_keywords, max_keywords)
logger.info(f"使用自定义关键词({path})共 {len(limited)}")
return limited
def _get_default_keywords(self) -> List[str]:
"""获取默认关键词列表"""
return [
"科技", "人工智能", "AI", "编程", "互联网",
"创业", "投资", "理财", "股市", "经济",
"教育", "学习", "考试", "大学", "就业",
"健康", "养生", "运动", "美食", "旅游",
"时尚", "美妆", "购物", "生活", "家居",
"电影", "音乐", "游戏", "娱乐", "明星",
"新闻", "热点", "社会", "政策", "环保"
"F6智慧门店","南京爱福路汽车科技有限公司","汽车后市场","汽修店","新康众"
]
def get_all_keywords_for_platforms(self, platforms: List[str], target_date: date = None,
@@ -286,6 +351,16 @@ class KeywordManager:
if not target_date:
target_date = date.today()
# 当配置为仅使用默认关键词时,直接返回默认关键词的摘要,避免上层误判为“无数据”
if getattr(settings, "USE_DEFAULT_KEYWORDS_ONLY", False):
default_keywords = self._get_default_keywords()
return {
'date': target_date,
'keywords_count': len(default_keywords),
'summary': '使用默认关键词模式',
'has_data': bool(default_keywords)
}
topics_data = self.get_daily_topics(target_date)
if topics_data: