更新部分爬虫以兼容本地运行及数据库存储
This commit is contained in:
@@ -34,7 +34,7 @@ class ZhiHuClient(AbstractApiClient):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
timeout=10,
|
||||
timeout=30, # 增加超时时间到30秒,避免请求卡住
|
||||
proxy=None,
|
||||
*,
|
||||
headers: Dict[str, str],
|
||||
@@ -57,7 +57,8 @@ class ZhiHuClient(AbstractApiClient):
|
||||
"""
|
||||
d_c0 = self.cookie_dict.get("d_c0")
|
||||
if not d_c0:
|
||||
raise Exception("d_c0 not found in cookies")
|
||||
utils.logger.error(f"[ZhiHuClient._pre_headers] d_c0 not found in cookies. Available cookies: {list(self.cookie_dict.keys())}")
|
||||
raise Exception("d_c0 not found in cookies. Please make sure you have logged in and cookies are updated.")
|
||||
sign_res = sign(url, self.default_headers["cookie"])
|
||||
headers = self.default_headers.copy()
|
||||
headers['x-zst-81'] = sign_res["x-zst-81"]
|
||||
@@ -184,6 +185,7 @@ class ZhiHuClient(AbstractApiClient):
|
||||
Returns:
|
||||
|
||||
"""
|
||||
utils.logger.info(f"[ZhiHuClient.get_note_by_keyword] 开始搜索关键词: {keyword}, 页码: {page}")
|
||||
uri = "/api/v4/search_v3"
|
||||
params = {
|
||||
"gk_version": "gz-gaokao",
|
||||
@@ -200,9 +202,16 @@ class ZhiHuClient(AbstractApiClient):
|
||||
"sort": sort.value,
|
||||
"vertical": note_type.value,
|
||||
}
|
||||
search_res = await self.get(uri, params)
|
||||
utils.logger.info(f"[ZhiHuClient.get_note_by_keyword] Search result: {search_res}")
|
||||
return self._extractor.extract_contents_from_search(search_res)
|
||||
try:
|
||||
utils.logger.debug(f"[ZhiHuClient.get_note_by_keyword] 发送搜索请求: {uri}, params: {params}")
|
||||
search_res = await self.get(uri, params)
|
||||
utils.logger.info(f"[ZhiHuClient.get_note_by_keyword] 搜索请求成功,开始解析结果")
|
||||
contents = self._extractor.extract_contents_from_search(search_res)
|
||||
utils.logger.info(f"[ZhiHuClient.get_note_by_keyword] 解析完成,找到 {len(contents)} 条内容")
|
||||
return contents
|
||||
except Exception as e:
|
||||
utils.logger.error(f"[ZhiHuClient.get_note_by_keyword] 搜索失败: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def get_root_comments(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user