The framework has been restructured again, and the Flask framework has been abandoned.

2025-08-22 13:52:05 +08:00
parent 15b3a3343b
commit 0c31be4287
279 changed files with 2725 additions and 1648837 deletions
@@ -1,17 +0,0 @@
-# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：  
-# 1. 不得用于任何商业用途。  
-# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。  
-# 3. 不得进行大规模爬取或对平台造成运营干扰。  
-# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。   
-# 5. 不得用于任何非法或不当的用途。
-#   
-# 详细许可条款请参阅项目根目录下的LICENSE文件。  
-# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。  
-
-
-# -*- coding: utf-8 -*-
-# @Author  : relakkes@gmail.com
-# @Time    : 2023/12/2 18:36
-# @Desc    :
-
-from .core import *
@@ -1,553 +0,0 @@
-# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：
-# 1. 不得用于任何商业用途。
-# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
-# 3. 不得进行大规模爬取或对平台造成运营干扰。
-# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。
-# 5. 不得用于任何非法或不当的用途。
-#
-# 详细许可条款请参阅项目根目录下的LICENSE文件。
-# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
-
-# -*- coding: utf-8 -*-
-# @Author  : relakkes@gmail.com
-# @Time    : 2023/12/2 18:44
-# @Desc    : bilibili 请求客户端
-import asyncio
-import json
-import random
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-from urllib.parse import urlencode
-
-import httpx
-from playwright.async_api import BrowserContext, Page
-
-import config
-from base.base_crawler import AbstractApiClient
-from tools import utils
-
-from .exception import DataFetchError
-from .field import CommentOrderType, SearchOrderType
-from .help import BilibiliSign
-
-
-class BilibiliClient(AbstractApiClient):
-
-    def __init__(
-        self,
-        timeout=60,  # 若开启爬取媒体选项，b 站的长视频需要更久的超时时间
-        proxy=None,
-        *,
-        headers: Dict[str, str],
-        playwright_page: Page,
-        cookie_dict: Dict[str, str],
-    ):
-        self.proxy = proxy
-        self.timeout = timeout
-        self.headers = headers
-        self._host = "https://api.bilibili.com"
-        self.playwright_page = playwright_page
-        self.cookie_dict = cookie_dict
-
-    async def request(self, method, url, **kwargs) -> Any:
-        async with httpx.AsyncClient(proxy=self.proxy) as client:
-            response = await client.request(method, url, timeout=self.timeout, **kwargs)
-        try:
-            data: Dict = response.json()
-        except json.JSONDecodeError:
-            utils.logger.error(f"[BilibiliClient.request] Failed to decode JSON from response. status_code: {response.status_code}, response_text: {response.text}")
-            raise DataFetchError(f"Failed to decode JSON, content: {response.text}")
-        if data.get("code") != 0:
-            raise DataFetchError(data.get("message", "unkonw error"))
-        else:
-            return data.get("data", {})
-
-    async def pre_request_data(self, req_data: Dict) -> Dict:
-        """
-        发送请求进行请求参数签名
-        需要从 localStorage 拿 wbi_img_urls 这参数，值如下：
-        https://i0.hdslb.com/bfs/wbi/7cd084941338484aae1ad9425b84077c.png-https://i0.hdslb.com/bfs/wbi/4932caff0ff746eab6f01bf08b70ac45.png
-        :param req_data:
-        :return:
-        """
-        if not req_data:
-            return {}
-        img_key, sub_key = await self.get_wbi_keys()
-        return BilibiliSign(img_key, sub_key).sign(req_data)
-
-    async def get_wbi_keys(self) -> Tuple[str, str]:
-        """
-        获取最新的 img_key 和 sub_key
-        :return:
-        """
-        local_storage = await self.playwright_page.evaluate("() => window.localStorage")
-        wbi_img_urls = local_storage.get("wbi_img_urls", "")
-        if not wbi_img_urls:
-            img_url_from_storage = local_storage.get("wbi_img_url")
-            sub_url_from_storage = local_storage.get("wbi_sub_url")
-            if img_url_from_storage and sub_url_from_storage:
-                wbi_img_urls = f"{img_url_from_storage}-{sub_url_from_storage}"
-        if wbi_img_urls and "-" in wbi_img_urls:
-            img_url, sub_url = wbi_img_urls.split("-")
-        else:
-            resp = await self.request(method="GET", url=self._host + "/x/web-interface/nav")
-            img_url: str = resp['wbi_img']['img_url']
-            sub_url: str = resp['wbi_img']['sub_url']
-        img_key = img_url.rsplit('/', 1)[1].split('.')[0]
-        sub_key = sub_url.rsplit('/', 1)[1].split('.')[0]
-        return img_key, sub_key
-
-    async def get(self, uri: str, params=None, enable_params_sign: bool = True) -> Dict:
-        final_uri = uri
-        if enable_params_sign:
-            params = await self.pre_request_data(params)
-        if isinstance(params, dict):
-            final_uri = (f"{uri}?"
-                         f"{urlencode(params)}")
-        return await self.request(method="GET", url=f"{self._host}{final_uri}", headers=self.headers)
-
-    async def post(self, uri: str, data: dict) -> Dict:
-        data = await self.pre_request_data(data)
-        json_str = json.dumps(data, separators=(',', ':'), ensure_ascii=False)
-        return await self.request(method="POST", url=f"{self._host}{uri}", data=json_str, headers=self.headers)
-
-    async def pong(self) -> bool:
-        """get a note to check if login state is ok"""
-        utils.logger.info("[BilibiliClient.pong] Begin pong bilibili...")
-        ping_flag = False
-        try:
-            check_login_uri = "/x/web-interface/nav"
-            response = await self.get(check_login_uri)
-            if response.get("isLogin"):
-                utils.logger.info("[BilibiliClient.pong] Use cache login state get web interface successfull!")
-                ping_flag = True
-        except Exception as e:
-            utils.logger.error(f"[BilibiliClient.pong] Pong bilibili failed: {e}, and try to login again...")
-            ping_flag = False
-        return ping_flag
-
-    async def update_cookies(self, browser_context: BrowserContext):
-        cookie_str, cookie_dict = utils.convert_cookies(await browser_context.cookies())
-        self.headers["Cookie"] = cookie_str
-        self.cookie_dict = cookie_dict
-
-    async def search_video_by_keyword(
-        self,
-        keyword: str,
-        page: int = 1,
-        page_size: int = 20,
-        order: SearchOrderType = SearchOrderType.DEFAULT,
-        pubtime_begin_s: int = 0,
-        pubtime_end_s: int = 0,
-    ) -> Dict:
-        """
-        KuaiShou web search api
-        :param keyword: 搜索关键词
-        :param page: 分页参数具体第几页
-        :param page_size: 每一页参数的数量
-        :param order: 搜索结果排序，默认位综合排序
-        :param pubtime_begin_s: 发布时间开始时间戳
-        :param pubtime_end_s: 发布时间结束时间戳
-        :return:
-        """
-        uri = "/x/web-interface/wbi/search/type"
-        post_data = {
-            "search_type": "video",
-            "keyword": keyword,
-            "page": page,
-            "page_size": page_size,
-            "order": order.value,
-            "pubtime_begin_s": pubtime_begin_s,
-            "pubtime_end_s": pubtime_end_s
-        }
-        return await self.get(uri, post_data)
-
-    async def get_video_info(self, aid: Union[int, None] = None, bvid: Union[str, None] = None) -> Dict:
-        """
-        Bilibli web video detail api, aid 和 bvid任选一个参数
-        :param aid: 稿件avid
-        :param bvid: 稿件bvid
-        :return:
-        """
-        if not aid and not bvid:
-            raise ValueError("请提供 aid 或 bvid 中的至少一个参数")
-
-        uri = "/x/web-interface/view/detail"
-        params = dict()
-        if aid:
-            params.update({"aid": aid})
-        else:
-            params.update({"bvid": bvid})
-        return await self.get(uri, params, enable_params_sign=False)
-
-    async def get_video_play_url(self, aid: int, cid: int) -> Dict:
-        """
-        Bilibli web video play url api
-        :param aid: 稿件avid
-        :param cid: cid
-        :return:
-        """
-        if not aid or not cid or aid <= 0 or cid <= 0:
-            raise ValueError("aid 和 cid 必须存在")
-        uri = "/x/player/wbi/playurl"
-        params = {
-            "avid": aid,
-            "cid": cid,
-            "qn": 80,
-            "fourk": 1,
-            "fnval": 1,
-            "platform": "pc",
-        }
-
-        return await self.get(uri, params, enable_params_sign=True)
-
-    async def get_video_media(self, url: str) -> Union[bytes, None]:
-        async with httpx.AsyncClient(proxy=self.proxy) as client:
-            try:
-                response = await client.request("GET", url, timeout=self.timeout, headers=self.headers)
-                response.raise_for_status()
-                if not response.reason_phrase == "OK":
-                    utils.logger.error(f"[BilibiliClient.get_video_media] request {url} err, res:{response.text}")
-                    return None
-                else:
-                    return response.content
-            except httpx.HTTPError as exc:  # some wrong when call httpx.request method, such as connection error, client error, server error or response status code is not 2xx
-                utils.logger.error(f"[BilibiliClient.get_video_media] {exc.__class__.__name__} for {exc.request.url} - {exc}")  # 保留原始异常类型名称，以便开发者调试
-                return None
-
-    async def get_video_comments(
-        self,
-        video_id: str,
-        order_mode: CommentOrderType = CommentOrderType.DEFAULT,
-        next: int = 0,
-    ) -> Dict:
-        """get video comments
-        :param video_id: 视频 ID
-        :param order_mode: 排序方式
-        :param next: 评论页选择
-        :return:
-        """
-        uri = "/x/v2/reply/wbi/main"
-        post_data = {"oid": video_id, "mode": order_mode.value, "type": 1, "ps": 20, "next": next}
-        return await self.get(uri, post_data)
-
-    async def get_video_all_comments(
-        self,
-        video_id: str,
-        crawl_interval: float = 1.0,
-        is_fetch_sub_comments=False,
-        callback: Optional[Callable] = None,
-        max_count: int = 10,
-    ):
-        """
-        get video all comments include sub comments
-        :param video_id:
-        :param crawl_interval:
-        :param is_fetch_sub_comments:
-        :param callback:
-        max_count: 一次笔记爬取的最大评论数量
-
-        :return:
-        """
-        result = []
-        is_end = False
-        next_page = 0
-        max_retries = 3
-        while not is_end and len(result) < max_count:
-            comments_res = None
-            for attempt in range(max_retries):
-                try:
-                    comments_res = await self.get_video_comments(video_id, CommentOrderType.DEFAULT, next_page)
-                    break  # Success
-                except DataFetchError as e:
-                    if attempt < max_retries - 1:
-                        delay = 5 * (2**attempt) + random.uniform(0, 1)
-                        utils.logger.warning(f"[BilibiliClient.get_video_all_comments] Retrying video_id {video_id} in {delay:.2f}s... (Attempt {attempt + 1}/{max_retries})")
-                        await asyncio.sleep(delay)
-                    else:
-                        utils.logger.error(f"[BilibiliClient.get_video_all_comments] Max retries reached for video_id: {video_id}. Skipping comments. Error: {e}")
-                        is_end = True
-                        break
-            if not comments_res:
-                break
-
-            cursor_info: Dict = comments_res.get("cursor")
-            if not cursor_info:
-                utils.logger.warning(f"[BilibiliClient.get_video_all_comments] Could not find 'cursor' in response for video_id: {video_id}. Skipping.")
-                break
-
-            comment_list: List[Dict] = comments_res.get("replies", [])
-
-            # 检查 is_end 和 next 是否存在
-            if "is_end" not in cursor_info or "next" not in cursor_info:
-                utils.logger.warning(f"[BilibiliClient.get_video_all_comments] 'is_end' or 'next' not in cursor for video_id: {video_id}. Assuming end of comments.")
-                is_end = True
-            else:
-                is_end = cursor_info.get("is_end")
-                next_page = cursor_info.get("next")
-
-            if not isinstance(is_end, bool):
-                utils.logger.warning(f"[BilibiliClient.get_video_all_comments] 'is_end' is not a boolean for video_id: {video_id}. Assuming end of comments.")
-                is_end = True
-            if is_fetch_sub_comments:
-                for comment in comment_list:
-                    comment_id = comment['rpid']
-                    if (comment.get("rcount", 0) > 0):
-                        {await self.get_video_all_level_two_comments(video_id, comment_id, CommentOrderType.DEFAULT, 10, crawl_interval, callback)}
-            if len(result) + len(comment_list) > max_count:
-                comment_list = comment_list[:max_count - len(result)]
-            if callback:  # 如果有回调函数，就执行回调函数
-                await callback(video_id, comment_list)
-            await asyncio.sleep(crawl_interval)
-            if not is_fetch_sub_comments:
-                result.extend(comment_list)
-                continue
-        return result
-
-    async def get_video_all_level_two_comments(
-        self,
-        video_id: str,
-        level_one_comment_id: int,
-        order_mode: CommentOrderType,
-        ps: int = 10,
-        crawl_interval: float = 1.0,
-        callback: Optional[Callable] = None,
-    ) -> Dict:
-        """
-        get video all level two comments for a level one comment
-        :param video_id: 视频 ID
-        :param level_one_comment_id: 一级评论 ID
-        :param order_mode:
-        :param ps: 一页评论数
-        :param crawl_interval:
-        :param callback:
-        :return:
-        """
-
-        pn = 1
-        while True:
-            result = await self.get_video_level_two_comments(video_id, level_one_comment_id, pn, ps, order_mode)
-            comment_list: List[Dict] = result.get("replies", [])
-            if callback:  # 如果有回调函数，就执行回调函数
-                await callback(video_id, comment_list)
-            await asyncio.sleep(crawl_interval)
-            if (int(result["page"]["count"]) <= pn * ps):
-                break
-
-            pn += 1
-
-    async def get_video_level_two_comments(
-        self,
-        video_id: str,
-        level_one_comment_id: int,
-        pn: int,
-        ps: int,
-        order_mode: CommentOrderType,
-    ) -> Dict:
-        """get video level two comments
-        :param video_id: 视频 ID
-        :param level_one_comment_id: 一级评论 ID
-        :param order_mode: 排序方式
-
-        :return:
-        """
-        uri = "/x/v2/reply/reply"
-        post_data = {
-            "oid": video_id,
-            "mode": order_mode.value,
-            "type": 1,
-            "ps": ps,
-            "pn": pn,
-            "root": level_one_comment_id,
-        }
-        result = await self.get(uri, post_data)
-        return result
-
-    async def get_creator_videos(self, creator_id: str, pn: int, ps: int = 30, order_mode: SearchOrderType = SearchOrderType.LAST_PUBLISH) -> Dict:
-        """get all videos for a creator
-        :param creator_id: 创作者 ID
-        :param pn: 页数
-        :param ps: 一页视频数
-        :param order_mode: 排序方式
-
-        :return:
-        """
-        uri = "/x/space/wbi/arc/search"
-        post_data = {
-            "mid": creator_id,
-            "pn": pn,
-            "ps": ps,
-            "order": order_mode,
-        }
-        return await self.get(uri, post_data)
-
-    async def get_creator_info(self, creator_id: int) -> Dict:
-        """
-        get creator info
-        :param creator_id: 作者 ID
-        """
-        uri = "/x/space/wbi/acc/info"
-        post_data = {
-            "mid": creator_id,
-        }
-        return await self.get(uri, post_data)
-
-    async def get_creator_fans(
-        self,
-        creator_id: int,
-        pn: int,
-        ps: int = 24,
-    ) -> Dict:
-        """
-        get creator fans
-        :param creator_id: 创作者 ID
-        :param pn: 开始页数
-        :param ps: 每页数量
-        :return:
-        """
-        uri = "/x/relation/fans"
-        post_data = {
-            'vmid': creator_id,
-            "pn": pn,
-            "ps": ps,
-            "gaia_source": "main_web",
-        }
-        return await self.get(uri, post_data)
-
-    async def get_creator_followings(
-        self,
-        creator_id: int,
-        pn: int,
-        ps: int = 24,
-    ) -> Dict:
-        """
-        get creator followings
-        :param creator_id: 创作者 ID
-        :param pn: 开始页数
-        :param ps: 每页数量
-        :return:
-        """
-        uri = "/x/relation/followings"
-        post_data = {
-            "vmid": creator_id,
-            "pn": pn,
-            "ps": ps,
-            "gaia_source": "main_web",
-        }
-        return await self.get(uri, post_data)
-
-    async def get_creator_dynamics(self, creator_id: int, offset: str = ""):
-        """
-        get creator comments
-        :param creator_id: 创作者 ID
-        :param offset: 发送请求所需参数
-        :return:
-        """
-        uri = "/x/polymer/web-dynamic/v1/feed/space"
-        post_data = {
-            "offset": offset,
-            "host_mid": creator_id,
-            "platform": "web",
-        }
-
-        return await self.get(uri, post_data)
-
-    async def get_creator_all_fans(
-        self,
-        creator_info: Dict,
-        crawl_interval: float = 1.0,
-        callback: Optional[Callable] = None,
-        max_count: int = 100,
-    ) -> List:
-        """
-        get creator all fans
-        :param creator_info:
-        :param crawl_interval:
-        :param callback:
-        :param max_count: 一个up主爬取的最大粉丝数量
-
-        :return: up主粉丝数列表
-        """
-        creator_id = creator_info["id"]
-        result = []
-        pn = config.START_CONTACTS_PAGE
-        while len(result) < max_count:
-            fans_res: Dict = await self.get_creator_fans(creator_id, pn=pn)
-            fans_list: List[Dict] = fans_res.get("list", [])
-
-            pn += 1
-            if len(result) + len(fans_list) > max_count:
-                fans_list = fans_list[:max_count - len(result)]
-            if callback:  # 如果有回调函数，就执行回调函数
-                await callback(creator_info, fans_list)
-            await asyncio.sleep(crawl_interval)
-            if not fans_list:
-                break
-            result.extend(fans_list)
-        return result
-
-    async def get_creator_all_followings(
-        self,
-        creator_info: Dict,
-        crawl_interval: float = 1.0,
-        callback: Optional[Callable] = None,
-        max_count: int = 100,
-    ) -> List:
-        """
-        get creator all followings
-        :param creator_info:
-        :param crawl_interval:
-        :param callback:
-        :param max_count: 一个up主爬取的最大关注者数量
-
-        :return: up主关注者列表
-        """
-        creator_id = creator_info["id"]
-        result = []
-        pn = config.START_CONTACTS_PAGE
-        while len(result) < max_count:
-            followings_res: Dict = await self.get_creator_followings(creator_id, pn=pn)
-            followings_list: List[Dict] = followings_res.get("list", [])
-
-            pn += 1
-            if len(result) + len(followings_list) > max_count:
-                followings_list = followings_list[:max_count - len(result)]
-            if callback:  # 如果有回调函数，就执行回调函数
-                await callback(creator_info, followings_list)
-            await asyncio.sleep(crawl_interval)
-            if not followings_list:
-                break
-            result.extend(followings_list)
-        return result
-
-    async def get_creator_all_dynamics(
-        self,
-        creator_info: Dict,
-        crawl_interval: float = 1.0,
-        callback: Optional[Callable] = None,
-        max_count: int = 20,
-    ) -> List:
-        """
-        get creator all followings
-        :param creator_info:
-        :param crawl_interval:
-        :param callback:
-        :param max_count: 一个up主爬取的最大动态数量
-
-        :return: up主关注者列表
-        """
-        creator_id = creator_info["id"]
-        result = []
-        offset = ""
-        has_more = True
-        while has_more and len(result) < max_count:
-            dynamics_res = await self.get_creator_dynamics(creator_id, offset)
-            dynamics_list: List[Dict] = dynamics_res["items"]
-            has_more = dynamics_res["has_more"]
-            offset = dynamics_res["offset"]
-            if len(result) + len(dynamics_list) > max_count:
-                dynamics_list = dynamics_list[:max_count - len(result)]
-            if callback:
-                await callback(creator_info, dynamics_list)
-            await asyncio.sleep(crawl_interval)
-            result.extend(dynamics_list)
-        return result
@@ -1,657 +0,0 @@
-# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：
-# 1. 不得用于任何商业用途。
-# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
-# 3. 不得进行大规模爬取或对平台造成运营干扰。
-# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。
-# 5. 不得用于任何非法或不当的用途。
-#
-# 详细许可条款请参阅项目根目录下的LICENSE文件。
-# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
-
-# -*- coding: utf-8 -*-
-# @Author  : relakkes@gmail.com
-# @Time    : 2023/12/2 18:44
-# @Desc    : B站爬虫
-
-import asyncio
-import os
-import random
-from asyncio import Task
-from typing import Dict, List, Optional, Tuple, Union
-from datetime import datetime, timedelta
-import pandas as pd
-
-from playwright.async_api import (
-    BrowserContext,
-    BrowserType,
-    Page,
-    Playwright,
-    async_playwright,
-)
-from playwright._impl._errors import TargetClosedError
-
-import config
-from base.base_crawler import AbstractCrawler
-from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool
-from store import bilibili as bilibili_store
-from tools import utils
-from tools.cdp_browser import CDPBrowserManager
-from var import crawler_type_var, source_keyword_var
-
-from .client import BilibiliClient
-from .exception import DataFetchError
-from .field import SearchOrderType
-from .login import BilibiliLogin
-
-
-class BilibiliCrawler(AbstractCrawler):
-    context_page: Page
-    bili_client: BilibiliClient
-    browser_context: BrowserContext
-    cdp_manager: Optional[CDPBrowserManager]
-
-    def __init__(self):
-        self.index_url = "https://www.bilibili.com"
-        self.user_agent = utils.get_user_agent()
-        self.cdp_manager = None
-
-    async def start(self):
-        playwright_proxy_format, httpx_proxy_format = None, None
-        if config.ENABLE_IP_PROXY:
-            ip_proxy_pool = await create_ip_pool(config.IP_PROXY_POOL_COUNT, enable_validate_ip=True)
-            ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy()
-            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info)
-
-        async with async_playwright() as playwright:
-            # 根据配置选择启动模式
-            if config.ENABLE_CDP_MODE:
-                utils.logger.info("[BilibiliCrawler] 使用CDP模式启动浏览器")
-                self.browser_context = await self.launch_browser_with_cdp(
-                    playwright,
-                    playwright_proxy_format,
-                    self.user_agent,
-                    headless=config.CDP_HEADLESS,
-                )
-            else:
-                utils.logger.info("[BilibiliCrawler] 使用标准模式启动浏览器")
-                # Launch a browser context.
-                chromium = playwright.chromium
-                self.browser_context = await self.launch_browser(chromium, None, self.user_agent, headless=config.HEADLESS)
-            # stealth.min.js is a js script to prevent the website from detecting the crawler.
-            await self.browser_context.add_init_script(path="libs/stealth.min.js")
-            self.context_page = await self.browser_context.new_page()
-            await self.context_page.goto(self.index_url)
-
-            # Create a client to interact with the xiaohongshu website.
-            self.bili_client = await self.create_bilibili_client(httpx_proxy_format)
-            if not await self.bili_client.pong():
-                login_obj = BilibiliLogin(
-                    login_type=config.LOGIN_TYPE,
-                    login_phone="",  # your phone number
-                    browser_context=self.browser_context,
-                    context_page=self.context_page,
-                    cookie_str=config.COOKIES,
-                )
-                await login_obj.begin()
-                await self.bili_client.update_cookies(browser_context=self.browser_context)
-
-            crawler_type_var.set(config.CRAWLER_TYPE)
-            if config.CRAWLER_TYPE == "search":
-                await self.search()
-            elif config.CRAWLER_TYPE == "detail":
-                # Get the information and comments of the specified post
-                await self.get_specified_videos(config.BILI_SPECIFIED_ID_LIST)
-            elif config.CRAWLER_TYPE == "creator":
-                if config.CREATOR_MODE:
-                    for creator_id in config.BILI_CREATOR_ID_LIST:
-                        await self.get_creator_videos(int(creator_id))
-                else:
-                    await self.get_all_creator_details(config.BILI_CREATOR_ID_LIST)
-            else:
-                pass
-            utils.logger.info("[BilibiliCrawler.start] Bilibili Crawler finished ...")
-
-    async def search(self):
-        """
-        search bilibili video
-        """
-        # Search for video and retrieve their comment information.
-        if config.BILI_SEARCH_MODE == "normal":
-            await self.search_by_keywords()
-        elif config.BILI_SEARCH_MODE == "all_in_time_range":
-            await self.search_by_keywords_in_time_range(daily_limit=False)
-        elif config.BILI_SEARCH_MODE == "daily_limit_in_time_range":
-            await self.search_by_keywords_in_time_range(daily_limit=True)
-        else:
-            utils.logger.warning(f"Unknown BILI_SEARCH_MODE: {config.BILI_SEARCH_MODE}")
-
-    @staticmethod
-    async def get_pubtime_datetime(
-        start: str = config.START_DAY,
-        end: str = config.END_DAY,
-    ) -> Tuple[str, str]:
-        """
-        获取 bilibili 作品发布日期起始时间戳 pubtime_begin_s 与发布日期结束时间戳 pubtime_end_s
-        ---
-        :param start: 发布日期起始时间，YYYY-MM-DD
-        :param end: 发布日期结束时间，YYYY-MM-DD
-
-        Note
-        ---
-        - 搜索的时间范围为 start 至 end，包含 start 和 end
-        - 若要搜索同一天的内容，为了包含 start 当天的搜索内容，则 pubtime_end_s 的值应该为 pubtime_begin_s 的值加上一天再减去一秒，即 start 当天的最后一秒
-            - 如仅搜索 2024-01-05 的内容，pubtime_begin_s = 1704384000，pubtime_end_s = 1704470399
-              转换为可读的 datetime 对象：pubtime_begin_s = datetime.datetime(2024, 1, 5, 0, 0)，pubtime_end_s = datetime.datetime(2024, 1, 5, 23, 59, 59)
-        - 若要搜索 start 至 end 的内容，为了包含 end 当天的搜索内容，则 pubtime_end_s 的值应该为 pubtime_end_s 的值加上一天再减去一秒，即 end 当天的最后一秒
-            - 如搜索 2024-01-05 - 2024-01-06 的内容，pubtime_begin_s = 1704384000，pubtime_end_s = 1704556799
-              转换为可读的 datetime 对象：pubtime_begin_s = datetime.datetime(2024, 1, 5, 0, 0)，pubtime_end_s = datetime.datetime(2024, 1, 6, 23, 59, 59)
-        """
-        # 转换 start 与 end 为 datetime 对象
-        start_day: datetime = datetime.strptime(start, "%Y-%m-%d")
-        end_day: datetime = datetime.strptime(end, "%Y-%m-%d")
-        if start_day > end_day:
-            raise ValueError("Wrong time range, please check your start and end argument, to ensure that the start cannot exceed end")
-        elif start_day == end_day:  # 搜索同一天的内容
-            end_day = (start_day + timedelta(days=1) - timedelta(seconds=1))  # 则将 end_day 设置为 start_day + 1 day - 1 second
-        else:  # 搜索 start 至 end
-            end_day = (end_day + timedelta(days=1) - timedelta(seconds=1))  # 则将 end_day 设置为 end_day + 1 day - 1 second
-        # 将其重新转换为时间戳
-        return str(int(start_day.timestamp())), str(int(end_day.timestamp()))
-
-    async def search_by_keywords(self):
-        """
-        search bilibili video with keywords in normal mode
-        :return:
-        """
-        utils.logger.info("[BilibiliCrawler.search_by_keywords] Begin search bilibli keywords")
-        bili_limit_count = 20  # bilibili limit page fixed value
-        if config.CRAWLER_MAX_NOTES_COUNT < bili_limit_count:
-            config.CRAWLER_MAX_NOTES_COUNT = bili_limit_count
-        start_page = config.START_PAGE  # start page number
-        for keyword in config.KEYWORDS.split(","):
-            source_keyword_var.set(keyword)
-            utils.logger.info(f"[BilibiliCrawler.search_by_keywords] Current search keyword: {keyword}")
-            page = 1
-            while (page - start_page + 1) * bili_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
-                if page < start_page:
-                    utils.logger.info(f"[BilibiliCrawler.search_by_keywords] Skip page: {page}")
-                    page += 1
-                    continue
-
-                utils.logger.info(f"[BilibiliCrawler.search_by_keywords] search bilibili keyword: {keyword}, page: {page}")
-                video_id_list: List[str] = []
-                videos_res = await self.bili_client.search_video_by_keyword(
-                    keyword=keyword,
-                    page=page,
-                    page_size=bili_limit_count,
-                    order=SearchOrderType.DEFAULT,
-                    pubtime_begin_s=0,  # 作品发布日期起始时间戳
-                    pubtime_end_s=0,  # 作品发布日期结束日期时间戳
-                )
-                video_list: List[Dict] = videos_res.get("result")
-
-                if not video_list:
-                    utils.logger.info(f"[BilibiliCrawler.search_by_keywords] No more videos for '{keyword}', moving to next keyword.")
-                    break
-
-                semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-                task_list = []
-                try:
-                    task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
-                except Exception as e:
-                    utils.logger.warning(f"[BilibiliCrawler.search_by_keywords] error in the task list. The video for this page will not be included. {e}")
-                video_items = await asyncio.gather(*task_list)
-                for video_item in video_items:
-                    if video_item:
-                        video_id_list.append(video_item.get("View").get("aid"))
-                        await bilibili_store.update_bilibili_video(video_item)
-                        await bilibili_store.update_up_info(video_item)
-                        await self.get_bilibili_video(video_item, semaphore)
-                page += 1
-                await self.batch_get_video_comments(video_id_list)
-
-    async def search_by_keywords_in_time_range(self, daily_limit: bool):
-        """
-        Search bilibili video with keywords in a given time range.
-        :param daily_limit: if True, strictly limit the number of notes per day and total.
-        """
-        utils.logger.info(f"[BilibiliCrawler.search_by_keywords_in_time_range] Begin search with daily_limit={daily_limit}")
-        bili_limit_count = 20
-        start_page = config.START_PAGE
-
-        for keyword in config.KEYWORDS.split(","):
-            source_keyword_var.set(keyword)
-            utils.logger.info(f"[BilibiliCrawler.search_by_keywords_in_time_range] Current search keyword: {keyword}")
-            total_notes_crawled_for_keyword = 0
-
-            for day in pd.date_range(start=config.START_DAY, end=config.END_DAY, freq="D"):
-                if (daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
-                    utils.logger.info(f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}', skipping remaining days.")
-                    break
-
-                if (not daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
-                    utils.logger.info(f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}', skipping remaining days.")
-                    break
-
-                pubtime_begin_s, pubtime_end_s = await self.get_pubtime_datetime(start=day.strftime("%Y-%m-%d"), end=day.strftime("%Y-%m-%d"))
-                page = 1
-                notes_count_this_day = 0
-
-                while True:
-                    if notes_count_this_day >= config.MAX_NOTES_PER_DAY:
-                        utils.logger.info(f"[BilibiliCrawler.search] Reached MAX_NOTES_PER_DAY limit for {day.ctime()}.")
-                        break
-                    if (daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
-                        utils.logger.info(f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}'.")
-                        break
-                    if (not daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
-                        break
-
-                    try:
-                        utils.logger.info(f"[BilibiliCrawler.search] search bilibili keyword: {keyword}, date: {day.ctime()}, page: {page}")
-                        video_id_list: List[str] = []
-                        videos_res = await self.bili_client.search_video_by_keyword(
-                            keyword=keyword,
-                            page=page,
-                            page_size=bili_limit_count,
-                            order=SearchOrderType.DEFAULT,
-                            pubtime_begin_s=pubtime_begin_s,
-                            pubtime_end_s=pubtime_end_s,
-                        )
-                        video_list: List[Dict] = videos_res.get("result")
-
-                        if not video_list:
-                            utils.logger.info(f"[BilibiliCrawler.search] No more videos for '{keyword}' on {day.ctime()}, moving to next day.")
-                            break
-
-                        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-                        task_list = [self.get_video_info_task(aid=video_item.get("aid"), bvid="", semaphore=semaphore) for video_item in video_list]
-                        video_items = await asyncio.gather(*task_list)
-
-                        for video_item in video_items:
-                            if video_item:
-                                if (daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
-                                    break
-                                if (not daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT):
-                                    break
-                                if notes_count_this_day >= config.MAX_NOTES_PER_DAY:
-                                    break
-                                notes_count_this_day += 1
-                                total_notes_crawled_for_keyword += 1
-                                video_id_list.append(video_item.get("View").get("aid"))
-                                await bilibili_store.update_bilibili_video(video_item)
-                                await bilibili_store.update_up_info(video_item)
-                                await self.get_bilibili_video(video_item, semaphore)
-
-                        page += 1
-                        await self.batch_get_video_comments(video_id_list)
-
-                    except Exception as e:
-                        utils.logger.error(f"[BilibiliCrawler.search] Error searching on {day.ctime()}: {e}")
-                        break
-
-    async def batch_get_video_comments(self, video_id_list: List[str]):
-        """
-        batch get video comments
-        :param video_id_list:
-        :return:
-        """
-        if not config.ENABLE_GET_COMMENTS:
-            utils.logger.info(f"[BilibiliCrawler.batch_get_note_comments] Crawling comment mode is not enabled")
-            return
-
-        utils.logger.info(f"[BilibiliCrawler.batch_get_video_comments] video ids:{video_id_list}")
-        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-        task_list: List[Task] = []
-        for video_id in video_id_list:
-            task = asyncio.create_task(self.get_comments(video_id, semaphore), name=video_id)
-            task_list.append(task)
-        await asyncio.gather(*task_list)
-
-    async def get_comments(self, video_id: str, semaphore: asyncio.Semaphore):
-        """
-        get comment for video id
-        :param video_id:
-        :param semaphore:
-        :return:
-        """
-        async with semaphore:
-            try:
-                utils.logger.info(f"[BilibiliCrawler.get_comments] begin get video_id: {video_id} comments ...")
-                await asyncio.sleep(random.uniform(0.5, 1.5))
-                await self.bili_client.get_video_all_comments(
-                    video_id=video_id,
-                    crawl_interval=random.random(),
-                    is_fetch_sub_comments=config.ENABLE_GET_SUB_COMMENTS,
-                    callback=bilibili_store.batch_update_bilibili_video_comments,
-                    max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
-                )
-
-            except DataFetchError as ex:
-                utils.logger.error(f"[BilibiliCrawler.get_comments] get video_id: {video_id} comment error: {ex}")
-            except Exception as e:
-                utils.logger.error(f"[BilibiliCrawler.get_comments] may be been blocked, err:{e}")
-                # Propagate the exception to be caught by the main loop
-                raise
-
-    async def get_creator_videos(self, creator_id: int):
-        """
-        get videos for a creator
-        :return:
-        """
-        ps = 30
-        pn = 1
-        while True:
-            result = await self.bili_client.get_creator_videos(creator_id, pn, ps)
-            video_bvids_list = [video["bvid"] for video in result["list"]["vlist"]]
-            await self.get_specified_videos(video_bvids_list)
-            if int(result["page"]["count"]) <= pn * ps:
-                break
-            await asyncio.sleep(random.random())
-            pn += 1
-
-    async def get_specified_videos(self, bvids_list: List[str]):
-        """
-        get specified videos info
-        :return:
-        """
-        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-        task_list = [self.get_video_info_task(aid=0, bvid=video_id, semaphore=semaphore) for video_id in bvids_list]
-        video_details = await asyncio.gather(*task_list)
-        video_aids_list = []
-        for video_detail in video_details:
-            if video_detail is not None:
-                video_item_view: Dict = video_detail.get("View")
-                video_aid: str = video_item_view.get("aid")
-                if video_aid:
-                    video_aids_list.append(video_aid)
-                await bilibili_store.update_bilibili_video(video_detail)
-                await bilibili_store.update_up_info(video_detail)
-                await self.get_bilibili_video(video_detail, semaphore)
-        await self.batch_get_video_comments(video_aids_list)
-
-    async def get_video_info_task(self, aid: int, bvid: str, semaphore: asyncio.Semaphore) -> Optional[Dict]:
-        """
-        Get video detail task
-        :param aid:
-        :param bvid:
-        :param semaphore:
-        :return:
-        """
-        async with semaphore:
-            try:
-                result = await self.bili_client.get_video_info(aid=aid, bvid=bvid)
-                return result
-            except DataFetchError as ex:
-                utils.logger.error(f"[BilibiliCrawler.get_video_info_task] Get video detail error: {ex}")
-                return None
-            except KeyError as ex:
-                utils.logger.error(f"[BilibiliCrawler.get_video_info_task] have not fund note detail video_id:{bvid}, err: {ex}")
-                return None
-
-    async def get_video_play_url_task(self, aid: int, cid: int, semaphore: asyncio.Semaphore) -> Union[Dict, None]:
-        """
-        Get video play url
-        :param aid:
-        :param cid:
-        :param semaphore:
-        :return:
-        """
-        async with semaphore:
-            try:
-                result = await self.bili_client.get_video_play_url(aid=aid, cid=cid)
-                return result
-            except DataFetchError as ex:
-                utils.logger.error(f"[BilibiliCrawler.get_video_play_url_task] Get video play url error: {ex}")
-                return None
-            except KeyError as ex:
-                utils.logger.error(f"[BilibiliCrawler.get_video_play_url_task] have not fund play url from :{aid}|{cid}, err: {ex}")
-                return None
-
-    async def create_bilibili_client(self, httpx_proxy: Optional[str]) -> BilibiliClient:
-        """
-        create bilibili client
-        :param httpx_proxy: httpx proxy
-        :return: bilibili client
-        """
-        utils.logger.info("[BilibiliCrawler.create_bilibili_client] Begin create bilibili API client ...")
-        cookie_str, cookie_dict = utils.convert_cookies(await self.browser_context.cookies())
-        bilibili_client_obj = BilibiliClient(
-            proxy=httpx_proxy,
-            headers={
-                "User-Agent": self.user_agent,
-                "Cookie": cookie_str,
-                "Origin": "https://www.bilibili.com",
-                "Referer": "https://www.bilibili.com",
-                "Content-Type": "application/json;charset=UTF-8",
-            },
-            playwright_page=self.context_page,
-            cookie_dict=cookie_dict,
-        )
-        return bilibili_client_obj
-
-    async def launch_browser(
-        self,
-        chromium: BrowserType,
-        playwright_proxy: Optional[Dict],
-        user_agent: Optional[str],
-        headless: bool = True,
-    ) -> BrowserContext:
-        """
-        launch browser and create browser context
-        :param chromium: chromium browser
-        :param playwright_proxy: playwright proxy
-        :param user_agent: user agent
-        :param headless: headless mode
-        :return: browser context
-        """
-        utils.logger.info("[BilibiliCrawler.launch_browser] Begin create browser context ...")
-        if config.SAVE_LOGIN_STATE:
-            # feat issue #14
-            # we will save login state to avoid login every time
-            user_data_dir = os.path.join(os.getcwd(), "browser_data", config.USER_DATA_DIR % config.PLATFORM)  # type: ignore
-            browser_context = await chromium.launch_persistent_context(
-                user_data_dir=user_data_dir,
-                accept_downloads=True,
-                headless=headless,
-                proxy=playwright_proxy,  # type: ignore
-                viewport={
-                    "width": 1920,
-                    "height": 1080
-                },
-                user_agent=user_agent,
-            )
-            return browser_context
-        else:
-            # type: ignore
-            browser = await chromium.launch(headless=headless, proxy=playwright_proxy)
-            browser_context = await browser.new_context(viewport={"width": 1920, "height": 1080}, user_agent=user_agent)
-            return browser_context
-
-    async def launch_browser_with_cdp(
-        self,
-        playwright: Playwright,
-        playwright_proxy: Optional[Dict],
-        user_agent: Optional[str],
-        headless: bool = True,
-    ) -> BrowserContext:
-        """
-        使用CDP模式启动浏览器
-        """
-        try:
-            self.cdp_manager = CDPBrowserManager()
-            browser_context = await self.cdp_manager.launch_and_connect(
-                playwright=playwright,
-                playwright_proxy=playwright_proxy,
-                user_agent=user_agent,
-                headless=headless,
-            )
-
-            # 显示浏览器信息
-            browser_info = await self.cdp_manager.get_browser_info()
-            utils.logger.info(f"[BilibiliCrawler] CDP浏览器信息: {browser_info}")
-
-            return browser_context
-
-        except Exception as e:
-            utils.logger.error(f"[BilibiliCrawler] CDP模式启动失败，回退到标准模式: {e}")
-            # 回退到标准模式
-            chromium = playwright.chromium
-            return await self.launch_browser(chromium, playwright_proxy, user_agent, headless)
-
-    async def close(self):
-        """Close browser context"""
-        try:
-            # 如果使用CDP模式，需要特殊处理
-            if self.cdp_manager:
-                await self.cdp_manager.cleanup()
-                self.cdp_manager = None
-            elif self.browser_context:
-                await self.browser_context.close()
-            utils.logger.info("[BilibiliCrawler.close] Browser context closed ...")
-        except TargetClosedError:
-            utils.logger.warning("[BilibiliCrawler.close] Browser context was already closed.")
-        except Exception as e:
-            utils.logger.error(f"[BilibiliCrawler.close] An error occurred during close: {e}")
-
-    async def get_bilibili_video(self, video_item: Dict, semaphore: asyncio.Semaphore):
-        """
-        download bilibili video
-        :param video_item:
-        :param semaphore:
-        :return:
-        """
-        if not config.ENABLE_GET_MEIDAS:
-            utils.logger.info(f"[BilibiliCrawler.get_bilibili_video] Crawling image mode is not enabled")
-            return
-        video_item_view: Dict = video_item.get("View")
-        aid = video_item_view.get("aid")
-        cid = video_item_view.get("cid")
-        result = await self.get_video_play_url_task(aid, cid, semaphore)
-        if result is None:
-            utils.logger.info("[BilibiliCrawler.get_bilibili_video] get video play url failed")
-            return
-        durl_list = result.get("durl")
-        max_size = -1
-        video_url = ""
-        for durl in durl_list:
-            size = durl.get("size")
-            if size > max_size:
-                max_size = size
-                video_url = durl.get("url")
-        if video_url == "":
-            utils.logger.info("[BilibiliCrawler.get_bilibili_video] get video url failed")
-            return
-
-        content = await self.bili_client.get_video_media(video_url)
-        await asyncio.sleep(random.random())
-        if content is None:
-            return
-        extension_file_name = f"video.mp4"
-        await bilibili_store.store_video(aid, content, extension_file_name)
-
-    async def get_all_creator_details(self, creator_id_list: List[int]):
-        """
-        creator_id_list: get details for creator from creator_id_list
-        """
-        utils.logger.info(f"[BilibiliCrawler.get_creator_details] Crawling the detalis of creator")
-        utils.logger.info(f"[BilibiliCrawler.get_creator_details] creator ids:{creator_id_list}")
-
-        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
-        task_list: List[Task] = []
-        try:
-            for creator_id in creator_id_list:
-                task = asyncio.create_task(self.get_creator_details(creator_id, semaphore), name=creator_id)
-                task_list.append(task)
-        except Exception as e:
-            utils.logger.warning(f"[BilibiliCrawler.get_all_creator_details] error in the task list. The creator will not be included. {e}")
-
-        await asyncio.gather(*task_list)
-
-    async def get_creator_details(self, creator_id: int, semaphore: asyncio.Semaphore):
-        """
-        get details for creator id
-        :param creator_id:
-        :param semaphore:
-        :return:
-        """
-        async with semaphore:
-            creator_unhandled_info: Dict = await self.bili_client.get_creator_info(creator_id)
-            creator_info: Dict = {
-                "id": creator_id,
-                "name": creator_unhandled_info.get("name"),
-                "sign": creator_unhandled_info.get("sign"),
-                "avatar": creator_unhandled_info.get("face"),
-            }
-        await self.get_fans(creator_info, semaphore)
-        await self.get_followings(creator_info, semaphore)
-        await self.get_dynamics(creator_info, semaphore)
-
-    async def get_fans(self, creator_info: Dict, semaphore: asyncio.Semaphore):
-        """
-        get fans for creator id
-        :param creator_info:
-        :param semaphore:
-        :return:
-        """
-        creator_id = creator_info["id"]
-        async with semaphore:
-            try:
-                utils.logger.info(f"[BilibiliCrawler.get_fans] begin get creator_id: {creator_id} fans ...")
-                await self.bili_client.get_creator_all_fans(
-                    creator_info=creator_info,
-                    crawl_interval=random.random(),
-                    callback=bilibili_store.batch_update_bilibili_creator_fans,
-                    max_count=config.CRAWLER_MAX_CONTACTS_COUNT_SINGLENOTES,
-                )
-
-            except DataFetchError as ex:
-                utils.logger.error(f"[BilibiliCrawler.get_fans] get creator_id: {creator_id} fans error: {ex}")
-            except Exception as e:
-                utils.logger.error(f"[BilibiliCrawler.get_fans] may be been blocked, err:{e}")
-
-    async def get_followings(self, creator_info: Dict, semaphore: asyncio.Semaphore):
-        """
-        get followings for creator id
-        :param creator_info:
-        :param semaphore:
-        :return:
-        """
-        creator_id = creator_info["id"]
-        async with semaphore:
-            try:
-                utils.logger.info(f"[BilibiliCrawler.get_followings] begin get creator_id: {creator_id} followings ...")
-                await self.bili_client.get_creator_all_followings(
-                    creator_info=creator_info,
-                    crawl_interval=random.random(),
-                    callback=bilibili_store.batch_update_bilibili_creator_followings,
-                    max_count=config.CRAWLER_MAX_CONTACTS_COUNT_SINGLENOTES,
-                )
-
-            except DataFetchError as ex:
-                utils.logger.error(f"[BilibiliCrawler.get_followings] get creator_id: {creator_id} followings error: {ex}")
-            except Exception as e:
-                utils.logger.error(f"[BilibiliCrawler.get_followings] may be been blocked, err:{e}")
-
-    async def get_dynamics(self, creator_info: Dict, semaphore: asyncio.Semaphore):
-        """
-        get dynamics for creator id
-        :param creator_info:
-        :param semaphore:
-        :return:
-        """
-        creator_id = creator_info["id"]
-        async with semaphore:
-            try:
-                utils.logger.info(f"[BilibiliCrawler.get_dynamics] begin get creator_id: {creator_id} dynamics ...")
-                await self.bili_client.get_creator_all_dynamics(
-                    creator_info=creator_info,
-                    crawl_interval=random.random(),
-                    callback=bilibili_store.batch_update_bilibili_creator_dynamics,
-                    max_count=config.CRAWLER_MAX_DYNAMICS_COUNT_SINGLENOTES,
-                )
-
-            except DataFetchError as ex:
-                utils.logger.error(f"[BilibiliCrawler.get_dynamics] get creator_id: {creator_id} dynamics error: {ex}")
-            except Exception as e:
-                utils.logger.error(f"[BilibiliCrawler.get_dynamics] may be been blocked, err:{e}")
@@ -1,25 +0,0 @@
-# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：  
-# 1. 不得用于任何商业用途。  
-# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。  
-# 3. 不得进行大规模爬取或对平台造成运营干扰。  
-# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。   
-# 5. 不得用于任何非法或不当的用途。
-#   
-# 详细许可条款请参阅项目根目录下的LICENSE文件。  
-# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。  
-
-
-# -*- coding: utf-8 -*-
-# @Author  : relakkes@gmail.com
-# @Time    : 2023/12/2 18:44
-# @Desc    :
-
-from httpx import RequestError
-
-
-class DataFetchError(RequestError):
-    """something error when fetch"""
-
-
-class IPBlockError(RequestError):
-    """fetch so fast that the server block us ip"""
@@ -1,45 +0,0 @@
-# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：  
-# 1. 不得用于任何商业用途。  
-# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。  
-# 3. 不得进行大规模爬取或对平台造成运营干扰。  
-# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。   
-# 5. 不得用于任何非法或不当的用途。
-#   
-# 详细许可条款请参阅项目根目录下的LICENSE文件。  
-# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。  
-
-
-# -*- coding: utf-8 -*-
-# @Author  : relakkes@gmail.com
-# @Time    : 2023/12/3 16:20
-# @Desc    :
-
-from enum import Enum
-
-
-class SearchOrderType(Enum):
-    # 综合排序
-    DEFAULT = ""
-
-    # 最多点击
-    MOST_CLICK = "click"
-
-    # 最新发布
-    LAST_PUBLISH = "pubdate"
-
-    # 最多弹幕
-    MOST_DANMU = "dm"
-
-    # 最多收藏
-    MOST_MARK = "stow"
-
-
-class CommentOrderType(Enum):
-    # 仅按热度
-    DEFAULT = 0
-
-    # 按热度+按时间
-    MIXED = 1
-
-    # 按时间
-    TIME = 2
@@ -1,81 +0,0 @@
-# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：  
-# 1. 不得用于任何商业用途。  
-# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。  
-# 3. 不得进行大规模爬取或对平台造成运营干扰。  
-# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。   
-# 5. 不得用于任何非法或不当的用途。
-#   
-# 详细许可条款请参阅项目根目录下的LICENSE文件。  
-# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。  
-
-
-    # -*- coding: utf-8 -*-
-# @Author  : relakkes@gmail.com
-# @Time    : 2023/12/2 23:26
-# @Desc    : bilibili 请求参数签名
-# 逆向实现参考：https://socialsisteryi.github.io/bilibili-API-collect/docs/misc/sign/wbi.html#wbi%E7%AD%BE%E5%90%8D%E7%AE%97%E6%B3%95
-import urllib.parse
-from hashlib import md5
-from typing import Dict
-
-from tools import utils
-
-
-class BilibiliSign:
-    def __init__(self, img_key: str, sub_key: str):
-        self.img_key = img_key
-        self.sub_key = sub_key
-        self.map_table = [
-            46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
-            33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
-            61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
-            36, 20, 34, 44, 52
-        ]
-
-    def get_salt(self) -> str:
-        """
-        获取加盐的 key
-        :return:
-        """
-        salt = ""
-        mixin_key = self.img_key + self.sub_key
-        for mt in self.map_table:
-            salt += mixin_key[mt]
-        return salt[:32]
-
-    def sign(self, req_data: Dict) -> Dict:
-        """
-        请求参数中加上当前时间戳对请求参数中的key进行字典序排序
-        再将请求参数进行 url 编码集合 salt 进行 md5 就可以生成w_rid参数了
-        :param req_data:
-        :return:
-        """
-        current_ts = utils.get_unix_timestamp()
-        req_data.update({"wts": current_ts})
-        req_data = dict(sorted(req_data.items()))
-        req_data = {
-            # 过滤 value 中的 "!'()*" 字符
-            k: ''.join(filter(lambda ch: ch not in "!'()*", str(v)))
-            for k, v
-            in req_data.items()
-        }
-        query = urllib.parse.urlencode(req_data)
-        salt = self.get_salt()
-        wbi_sign = md5((query + salt).encode()).hexdigest()  # 计算 w_rid
-        req_data['w_rid'] = wbi_sign
-        return req_data
-
-
-if __name__ == '__main__':
-    _img_key = "7cd084941338484aae1ad9425b84077c"
-    _sub_key = "4932caff0ff746eab6f01bf08b70ac45"
-    _search_url = "__refresh__=true&_extra=&ad_resource=5654&category_id=&context=&dynamic_offset=0&from_source=&from_spmid=333.337&gaia_vtoken=&highlight=1&keyword=python&order=click&page=1&page_size=20&platform=pc&qv_id=OQ8f2qtgYdBV1UoEnqXUNUl8LEDAdzsD&search_type=video&single_column=0&source_tag=3&web_location=1430654"
-    _req_data = dict()
-    for params in _search_url.split("&"):
-        kvalues = params.split("=")
-        key = kvalues[0]
-        value = kvalues[1]
-        _req_data[key] = value
-    print("pre req_data", _req_data)
-    _req_data = BilibiliSign(img_key=_img_key, sub_key=_sub_key).sign(req_data={"aid":170001})
-    print(_req_data)
@@ -1,118 +0,0 @@
-# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：  
-# 1. 不得用于任何商业用途。  
-# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。  
-# 3. 不得进行大规模爬取或对平台造成运营干扰。  
-# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。   
-# 5. 不得用于任何非法或不当的用途。
-#   
-# 详细许可条款请参阅项目根目录下的LICENSE文件。  
-# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。  
-
-
-# -*- coding: utf-8 -*-
-# @Author  : relakkes@gmail.com
-# @Time    : 2023/12/2 18:44
-# @Desc    : bilibli登录实现类
-
-import asyncio
-import functools
-import sys
-from typing import Optional
-
-from playwright.async_api import BrowserContext, Page
-from tenacity import (RetryError, retry, retry_if_result, stop_after_attempt,
-                      wait_fixed)
-
-import config
-from base.base_crawler import AbstractLogin
-from tools import utils
-
-
-class BilibiliLogin(AbstractLogin):
-    def __init__(self,
-                 login_type: str,
-                 browser_context: BrowserContext,
-                 context_page: Page,
-                 login_phone: Optional[str] = "",
-                 cookie_str: str = ""
-                 ):
-        config.LOGIN_TYPE = login_type
-        self.browser_context = browser_context
-        self.context_page = context_page
-        self.login_phone = login_phone
-        self.cookie_str = cookie_str
-
-    async def begin(self):
-        """Start login bilibili"""
-        utils.logger.info("[BilibiliLogin.begin] Begin login Bilibili ...")
-        if config.LOGIN_TYPE == "qrcode":
-            await self.login_by_qrcode()
-        elif config.LOGIN_TYPE == "phone":
-            await self.login_by_mobile()
-        elif config.LOGIN_TYPE == "cookie":
-            await self.login_by_cookies()
-        else:
-            raise ValueError(
-                "[BilibiliLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...")
-
-    @retry(stop=stop_after_attempt(600), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
-    async def check_login_state(self) -> bool:
-        """
-            Check if the current login status is successful and return True otherwise return False
-            retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second
-            if max retry times reached, raise RetryError
-        """
-        current_cookie = await self.browser_context.cookies()
-        _, cookie_dict = utils.convert_cookies(current_cookie)
-        if cookie_dict.get("SESSDATA", "") or cookie_dict.get("DedeUserID"):
-            return True
-        return False
-
-    async def login_by_qrcode(self):
-        """login bilibili website and keep webdriver login state"""
-        utils.logger.info("[BilibiliLogin.login_by_qrcode] Begin login bilibili by qrcode ...")
-
-        # click login button
-        login_button_ele = self.context_page.locator(
-            "xpath=//div[@class='right-entry__outside go-login-btn']//div"
-        )
-        await login_button_ele.click()
-        await asyncio.sleep(1)
-        # find login qrcode
-        qrcode_img_selector = "//div[@class='login-scan-box']//img"
-        base64_qrcode_img = await utils.find_login_qrcode(
-            self.context_page,
-            selector=qrcode_img_selector
-        )
-        if not base64_qrcode_img:
-            utils.logger.info("[BilibiliLogin.login_by_qrcode] login failed , have not found qrcode please check ....")
-            sys.exit()
-
-        # show login qrcode
-        partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img)
-        asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode)
-
-        utils.logger.info(f"[BilibiliLogin.login_by_qrcode] Waiting for scan code login, remaining time is 20s")
-        try:
-            await self.check_login_state()
-        except RetryError:
-            utils.logger.info("[BilibiliLogin.login_by_qrcode] Login bilibili failed by qrcode login method ...")
-            sys.exit()
-
-        wait_redirect_seconds = 5
-        utils.logger.info(
-            f"[BilibiliLogin.login_by_qrcode] Login successful then wait for {wait_redirect_seconds} seconds redirect ...")
-        await asyncio.sleep(wait_redirect_seconds)
-
-    async def login_by_mobile(self):
-        pass
-
-    async def login_by_cookies(self):
-        utils.logger.info("[BilibiliLogin.login_by_qrcode] Begin login bilibili by cookie ...")
-        for key, value in utils.convert_str_cookie_to_dict(self.cookie_str).items():
-            await self.browser_context.add_cookies([{
-                'name': key,
-                'value': value,
-                'domain': ".bilibili.com",
-                'path': "/"
-            }])