修复news-id冲突问题
This commit is contained in:
@@ -25,6 +25,7 @@ except ImportError:
|
||||
|
||||
from config import settings
|
||||
|
||||
|
||||
class DatabaseManager:
|
||||
"""数据库管理器"""
|
||||
|
||||
@@ -46,14 +47,15 @@ class DatabaseManager:
|
||||
except ModuleNotFoundError as e:
|
||||
missing: str = str(e)
|
||||
if "psycopg" in missing:
|
||||
logger.error("数据库连接失败: 未安装PostgreSQL驱动 psycopg。请安装: psycopg[binary]。参考指令:uv pip install psycopg[binary]")
|
||||
logger.error(
|
||||
"数据库连接失败: 未安装PostgreSQL驱动 psycopg。请安装: psycopg[binary]。参考指令:uv pip install psycopg[binary]")
|
||||
elif "pymysql" in missing:
|
||||
logger.error("数据库连接失败: 未安装MySQL驱动 pymysql。请安装: pymysql。参考指令:uv pip install pymysql")
|
||||
else:
|
||||
logger.error(f"数据库连接失败(缺少驱动): {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"数据库连接失败: {e}")
|
||||
logger.exception(f"数据库连接失败: {e}")
|
||||
raise
|
||||
|
||||
def close(self):
|
||||
@@ -97,7 +99,13 @@ class DatabaseManager:
|
||||
# 逐条插入,单条失败不影响后续(每条独立事务)
|
||||
for news_item in news_data:
|
||||
try:
|
||||
news_id = f"{news_item.get('source', 'unknown')}_{news_item.get('id', news_item.get('rank', 0))}"
|
||||
# news_item.get('id') 已经是完整的 news_id(格式:source_item_id)
|
||||
# 为了支持同一条新闻在不同日期出现,将 crawl_date 加入到 news_id 中
|
||||
base_news_id = news_item.get(
|
||||
'id') or f"{news_item.get('source', 'unknown')}_rank_{news_item.get('rank', 0)}"
|
||||
# 将日期格式化为字符串并加入到 news_id 中,确保全局唯一性
|
||||
news_id = f"{base_news_id}_{crawl_date.strftime('%Y%m%d')}"
|
||||
|
||||
title_val = (news_item.get("title", "") or "")
|
||||
if len(title_val) > 500:
|
||||
title_val = title_val[:500]
|
||||
@@ -124,7 +132,7 @@ class DatabaseManager:
|
||||
)
|
||||
saved_count += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"保存单条新闻失败: {e}")
|
||||
logger.exception(f"保存单条新闻失败: {e}")
|
||||
continue
|
||||
logger.info(f"成功保存 {saved_count} 条新闻记录")
|
||||
return saved_count
|
||||
@@ -174,17 +182,21 @@ class DatabaseManager:
|
||||
|
||||
try:
|
||||
keywords_json = json.dumps(keywords, ensure_ascii=False)
|
||||
# 为了支持外键引用,topic_id 需要全局唯一,所以将日期加入到 topic_id 中
|
||||
topic_id = f"summary_{extract_date.strftime('%Y%m%d')}"
|
||||
|
||||
with self.engine.begin() as conn:
|
||||
check = conn.execute(
|
||||
text("SELECT id FROM daily_topics WHERE extract_date = :d AND topic_id = :tid"),
|
||||
{"d": extract_date, "tid": "summary"},
|
||||
{"d": extract_date, "tid": topic_id},
|
||||
).first()
|
||||
if check:
|
||||
conn.execute(
|
||||
text(
|
||||
"UPDATE daily_topics SET keywords = :k, topic_description = :s, add_ts = :ts, last_modify_ts = :lmt, topic_name = :tn WHERE extract_date = :d AND topic_id = :tid"
|
||||
),
|
||||
{"k": keywords_json, "s": summary, "ts": current_timestamp, "lmt": current_timestamp, "d": extract_date, "tid": "summary", "tn": "每日新闻分析"},
|
||||
{"k": keywords_json, "s": summary, "ts": current_timestamp, "lmt": current_timestamp,
|
||||
"d": extract_date, "tid": topic_id, "tn": "每日新闻分析"},
|
||||
)
|
||||
logger.info(f"更新了 {extract_date} 的话题分析")
|
||||
else:
|
||||
@@ -192,7 +204,8 @@ class DatabaseManager:
|
||||
text(
|
||||
"INSERT INTO daily_topics (extract_date, topic_id, topic_name, keywords, topic_description, add_ts, last_modify_ts) VALUES (:d, :tid, :tn, :k, :s, :ts, :lmt)"
|
||||
),
|
||||
{"d": extract_date, "tid": "summary", "tn": "每日新闻分析", "k": keywords_json, "s": summary, "ts": current_timestamp, "lmt": current_timestamp},
|
||||
{"d": extract_date, "tid": topic_id, "tn": "每日新闻分析", "k": keywords_json, "s": summary,
|
||||
"ts": current_timestamp, "lmt": current_timestamp},
|
||||
)
|
||||
logger.info(f"保存了 {extract_date} 的话题分析")
|
||||
return True
|
||||
@@ -215,7 +228,8 @@ class DatabaseManager:
|
||||
|
||||
try:
|
||||
with self.engine.connect() as conn:
|
||||
result = conn.execute(text("SELECT * FROM daily_topics WHERE extract_date = :d"), {"d": extract_date}).mappings().first()
|
||||
result = conn.execute(text("SELECT * FROM daily_topics WHERE extract_date = :d"),
|
||||
{"d": extract_date}).mappings().first()
|
||||
if result:
|
||||
result = dict(result) # 转为可变dict以支持item赋值
|
||||
result["keywords"] = json.loads(result["keywords"]) if result.get("keywords") else []
|
||||
@@ -290,6 +304,7 @@ class DatabaseManager:
|
||||
logger.exception(f"获取统计摘要失败: {e}")
|
||||
return {"news_stats": [], "topics_stats": []}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 测试数据库管理器
|
||||
with DatabaseManager() as db:
|
||||
|
||||
Reference in New Issue
Block a user