f4fe4141d4
2. 修复数据库not null错误 3. 支持PG数据库 4. 规范环境变量及配置使用 5. 规范为uv安装 6. 使用loggru
127 lines
5.8 KiB
Python
127 lines
5.8 KiB
Python
"""
|
||
MindSpider 数据库ORM模型(SQLAlchemy 2.x)
|
||
|
||
此模块定义 MindSpider 扩展表(与原 MediaCrawler 表解耦)的 ORM 模型。
|
||
数据模型定义位置:
|
||
- 本文件(MindSpider/schema/models_sa.py)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Optional
|
||
from datetime import date
|
||
|
||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
||
from sqlalchemy import Integer, String, Text, BigInteger, Date, Float, ForeignKey, Index, UniqueConstraint
|
||
from sqlalchemy.schema import ForeignKeyConstraint
|
||
from sqlalchemy.orm import relationship
|
||
__all__ = [
|
||
"Base",
|
||
"DailyNews",
|
||
"DailyTopic",
|
||
"TopicNewsRelation",
|
||
"CrawlingTask",
|
||
]
|
||
|
||
|
||
class Base(DeclarativeBase):
|
||
pass
|
||
|
||
|
||
class DailyNews(Base):
|
||
__tablename__ = "daily_news"
|
||
__table_args__ = (
|
||
UniqueConstraint("news_id", name="uq_daily_news_id_unique"), # 为外键引用添加唯一约束
|
||
UniqueConstraint("news_id", "source_platform", "crawl_date", name="uq_daily_news_unique"),
|
||
Index("idx_daily_news_date", "crawl_date"),
|
||
Index("idx_daily_news_platform", "source_platform"),
|
||
Index("idx_daily_news_rank", "rank_position"),
|
||
)
|
||
|
||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||
news_id: Mapped[str] = mapped_column(String(128), nullable=False)
|
||
source_platform: Mapped[str] = mapped_column(String(32), nullable=False)
|
||
title: Mapped[str] = mapped_column(String(500), nullable=False)
|
||
url: Mapped[Optional[str]] = mapped_column(String(512))
|
||
description: Mapped[Optional[str]] = mapped_column(Text)
|
||
extra_info: Mapped[Optional[str]] = mapped_column(Text)
|
||
crawl_date: Mapped[date] = mapped_column(Date, nullable=False)
|
||
rank_position: Mapped[Optional[int]] = mapped_column(Integer)
|
||
add_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
|
||
last_modify_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
|
||
|
||
|
||
class DailyTopic(Base):
|
||
__tablename__ = "daily_topics"
|
||
__table_args__ = (
|
||
UniqueConstraint("topic_id", name="uq_daily_topics_id_unique"), # 为外键引用添加唯一约束
|
||
UniqueConstraint("topic_id", "extract_date", name="uq_daily_topics_unique"),
|
||
Index("idx_daily_topics_date", "extract_date"),
|
||
Index("idx_daily_topics_status", "processing_status"),
|
||
Index("idx_daily_topics_score", "relevance_score"),
|
||
Index("idx_topic_date_status", "extract_date", "processing_status"),
|
||
)
|
||
|
||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||
topic_id: Mapped[str] = mapped_column(String(64), nullable=False)
|
||
topic_name: Mapped[str] = mapped_column(String(255), nullable=False)
|
||
topic_description: Mapped[Optional[str]] = mapped_column(Text)
|
||
keywords: Mapped[Optional[str]] = mapped_column(Text)
|
||
extract_date: Mapped[date] = mapped_column(Date, nullable=False)
|
||
relevance_score: Mapped[Optional[float]] = mapped_column(Float)
|
||
news_count: Mapped[Optional[int]] = mapped_column(Integer, default=0)
|
||
processing_status: Mapped[Optional[str]] = mapped_column(String(16), default="pending")
|
||
add_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
|
||
last_modify_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
|
||
|
||
|
||
class TopicNewsRelation(Base):
|
||
__tablename__ = "topic_news_relation"
|
||
__table_args__ = (
|
||
UniqueConstraint("topic_id", "news_id", "extract_date", name="uq_topic_news_unique"),
|
||
Index("idx_topic_news_topic", "topic_id"),
|
||
Index("idx_topic_news_news", "news_id"),
|
||
Index("idx_topic_news_date", "extract_date"),
|
||
ForeignKeyConstraint(["topic_id"], ["daily_topics.topic_id"], ondelete="CASCADE"),
|
||
ForeignKeyConstraint(["news_id"], ["daily_news.news_id"], ondelete="CASCADE"),
|
||
)
|
||
|
||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||
topic_id: Mapped[str] = mapped_column(String(64), nullable=False)
|
||
news_id: Mapped[str] = mapped_column(String(128), nullable=False)
|
||
relation_score: Mapped[Optional[float]] = mapped_column(Float)
|
||
extract_date: Mapped[date] = mapped_column(Date, nullable=False)
|
||
add_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
|
||
|
||
|
||
class CrawlingTask(Base):
|
||
__tablename__ = "crawling_tasks"
|
||
__table_args__ = (
|
||
UniqueConstraint("task_id", name="uq_crawling_tasks_unique"),
|
||
Index("idx_crawling_tasks_topic", "topic_id"),
|
||
Index("idx_crawling_tasks_platform", "platform"),
|
||
Index("idx_crawling_tasks_status", "task_status"),
|
||
Index("idx_crawling_tasks_date", "scheduled_date"),
|
||
Index("idx_task_topic_platform", "topic_id", "platform", "task_status"),
|
||
ForeignKeyConstraint(["topic_id"], ["daily_topics.topic_id"], ondelete="CASCADE"),
|
||
)
|
||
|
||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||
task_id: Mapped[str] = mapped_column(String(64), nullable=False)
|
||
topic_id: Mapped[str] = mapped_column(String(64), nullable=False)
|
||
platform: Mapped[str] = mapped_column(String(32), nullable=False)
|
||
search_keywords: Mapped[str] = mapped_column(Text, nullable=False)
|
||
task_status: Mapped[Optional[str]] = mapped_column(String(16), default="pending")
|
||
start_time: Mapped[Optional[int]] = mapped_column(BigInteger)
|
||
end_time: Mapped[Optional[int]] = mapped_column(BigInteger)
|
||
total_crawled: Mapped[Optional[int]] = mapped_column(Integer, default=0)
|
||
success_count: Mapped[Optional[int]] = mapped_column(Integer, default=0)
|
||
error_count: Mapped[Optional[int]] = mapped_column(Integer, default=0)
|
||
error_message: Mapped[Optional[str]] = mapped_column(Text)
|
||
config_params: Mapped[Optional[str]] = mapped_column(Text)
|
||
scheduled_date: Mapped[date] = mapped_column(Date, nullable=False)
|
||
add_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
|
||
last_modify_ts: Mapped[int] = mapped_column(BigInteger, nullable=False)
|
||
|
||
|