更新部分爬虫以兼容本地运行及数据库存储

This commit is contained in:
z66
2025-12-16 10:56:56 +08:00
parent a9eda60493
commit ff1ce2a3ba
28 changed files with 1394 additions and 126 deletions
@@ -49,8 +49,27 @@ class BilibiliClient(AbstractApiClient):
self.cookie_dict = cookie_dict
async def request(self, method, url, **kwargs) -> Any:
async with httpx.AsyncClient(proxy=self.proxy) as client:
response = await client.request(method, url, timeout=self.timeout, **kwargs)
"""
Basic HTTP request wrapper with retries for transient network errors.
"""
verify = getattr(config, "HTTPX_VERIFY", True)
# 优先使用传入 proxy,其次是 config.HTTPX_PROXY,最后走系统环境变量
proxy = self.proxy or getattr(config, "HTTPX_PROXY", "") or None
async with httpx.AsyncClient(proxy=proxy, timeout=self.timeout, verify=verify) as client:
# 简单重试,处理短暂的连接失败
last_exc: Optional[Exception] = None
for attempt in range(3):
try:
response = await client.request(method, url, **kwargs)
break
except httpx.HTTPError as e:
last_exc = e
if attempt == 2:
# 3rd failure -> give up
utils.logger.error(f"[BilibiliClient.request] Network error on {method} {url}: {repr(e)}")
raise DataFetchError(f"network error: {e}") from e
await asyncio.sleep(1)
try:
data: Dict = response.json()
except json.JSONDecodeError:
@@ -68,10 +68,23 @@ class BilibiliLogin(AbstractLogin):
return True
return False
async def _has_valid_login_cookie(self) -> bool:
"""
快速检查当前上下文是否已有登录态,用于避免重复扫码。
"""
current_cookie = await self.browser_context.cookies()
_, cookie_dict = utils.convert_cookies(current_cookie)
return bool(cookie_dict.get("SESSDATA") or cookie_dict.get("DedeUserID"))
async def login_by_qrcode(self):
"""login bilibili website and keep webdriver login state"""
utils.logger.info("[BilibiliLogin.login_by_qrcode] Begin login bilibili by qrcode ...")
# 如果已经登录则直接跳过扫码流程
if await self._has_valid_login_cookie():
utils.logger.info("[BilibiliLogin.login_by_qrcode] 已检测到有效登录态,跳过扫码登录")
return
# click login button
login_button_ele = self.context_page.locator(
"xpath=//div[@class='right-entry__outside go-login-btn']//div"