更新部分爬虫以兼容本地运行及数据库存储

This commit is contained in:
z66
2025-12-16 10:56:56 +08:00
parent a9eda60493
commit ff1ce2a3ba
28 changed files with 1394 additions and 126 deletions
@@ -45,13 +45,51 @@ class KuaiShouClient(AbstractApiClient):
self.graphql = KuaiShouGraphQL()
async def request(self, method, url, **kwargs) -> Any:
async with httpx.AsyncClient(proxy=self.proxy) as client:
response = await client.request(method, url, timeout=self.timeout, **kwargs)
data: Dict = response.json()
if data.get("errors"):
raise DataFetchError(data.get("errors", "unkonw error"))
else:
return data.get("data", {})
"""Make HTTP request with retry and proxy fallback."""
max_retries = 3
# build proxy attempts: try proxy first (if set), then no-proxy
proxy_attempts: List[Optional[str]] = []
if self.proxy:
proxy_attempts.append(self.proxy)
proxy_attempts.append(None) # always allow a direct attempt
last_exc: Optional[Exception] = None
for attempt in range(max_retries):
proxy_to_use = proxy_attempts[min(attempt, len(proxy_attempts) - 1)]
try:
async with httpx.AsyncClient(proxy=proxy_to_use) as client:
response = await client.request(method, url, timeout=self.timeout, **kwargs)
data: Dict = response.json()
if data.get("errors"):
raise DataFetchError(data.get("errors", "unkonw error"))
return data.get("data", {})
except (httpx.ConnectError, httpx.ConnectTimeout, httpx.NetworkError) as e:
last_exc = e
utils.logger.warning(
f"[KuaiShouClient.request] Network error (attempt {attempt+1}/{max_retries}) "
f"proxy={proxy_to_use} url={url} err={e!r}"
)
if attempt < max_retries - 1:
await asyncio.sleep(1)
continue
utils.logger.error(
f"[KuaiShouClient.request] Network failed after {max_retries} attempts "
f"proxy={proxy_to_use} url={url} err={e!r}"
)
raise
except Exception as e:
# For other exceptions (like DataFetchError), don't retry
last_exc = e
utils.logger.error(
f"[KuaiShouClient.request] Request failed proxy={proxy_to_use} url={url} err={e!r}"
)
raise
# If somehow we exit the loop without returning, raise last exception
if last_exc:
raise last_exc
async def get(self, uri: str, params=None) -> Dict:
final_uri = uri