1. 同步MediaCrawler为最新版本
2. 修复数据库not null错误 3. 支持PG数据库 4. 规范环境变量及配置使用 5. 规范为uv安装 6. 使用loggru
This commit is contained in:
@@ -15,7 +15,7 @@ from typing import Optional
|
||||
|
||||
import cmd_arg
|
||||
import config
|
||||
import db
|
||||
from database import db
|
||||
from base.base_crawler import AbstractCrawler
|
||||
from media_platform.bilibili import BilibiliCrawler
|
||||
from media_platform.douyin import DouYinCrawler
|
||||
@@ -24,6 +24,8 @@ from media_platform.tieba import TieBaCrawler
|
||||
from media_platform.weibo import WeiboCrawler
|
||||
from media_platform.xhs import XiaoHongShuCrawler
|
||||
from media_platform.zhihu import ZhihuCrawler
|
||||
from tools.async_file_writer import AsyncFileWriter
|
||||
from var import crawler_type_var
|
||||
|
||||
|
||||
class CrawlerFactory:
|
||||
@@ -50,20 +52,40 @@ class CrawlerFactory:
|
||||
crawler: Optional[AbstractCrawler] = None
|
||||
|
||||
|
||||
# persist-1<persist1@126.com>
|
||||
# 原因:增加 --init_db 功能,用于数据库初始化。
|
||||
# 副作用:无
|
||||
# 回滚策略:还原此文件。
|
||||
async def main():
|
||||
# Init crawler
|
||||
global crawler
|
||||
|
||||
# parse cmd
|
||||
await cmd_arg.parse_cmd()
|
||||
args = await cmd_arg.parse_cmd()
|
||||
|
||||
# init db
|
||||
if config.SAVE_DATA_OPTION in ["db", "sqlite"]:
|
||||
await db.init_db()
|
||||
if args.init_db:
|
||||
await db.init_db(args.init_db)
|
||||
print(f"Database {args.init_db} initialized successfully.")
|
||||
return # Exit the main function cleanly
|
||||
|
||||
|
||||
|
||||
crawler = CrawlerFactory.create_crawler(platform=config.PLATFORM)
|
||||
await crawler.start()
|
||||
|
||||
# Generate wordcloud after crawling is complete
|
||||
# Only for JSON save mode
|
||||
if config.SAVE_DATA_OPTION == "json" and config.ENABLE_GET_WORDCLOUD:
|
||||
try:
|
||||
file_writer = AsyncFileWriter(
|
||||
platform=config.PLATFORM,
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
await file_writer.generate_wordcloud_from_comments()
|
||||
except Exception as e:
|
||||
print(f"Error generating wordcloud: {e}")
|
||||
|
||||
|
||||
def cleanup():
|
||||
if crawler:
|
||||
|
||||
Reference in New Issue
Block a user