# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则: # 1. 不得用于任何商业用途。 # 2. 使用时应遵守目标平台的使用条款和robots.txt规则。 # 3. 不得进行大规模爬取或对平台造成运营干扰。 # 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。 # 5. 不得用于任何非法或不当的用途。 # # 详细许可条款请参阅项目根目录下的LICENSE文件。 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。 # -*- coding: utf-8 -*- from typing import List import config from base.base_crawler import AbstractStore from model.m_zhihu import ZhihuComment, ZhihuContent, ZhihuCreator from ._store_impl import (ZhihuCsvStoreImplement, ZhihuDbStoreImplement, ZhihuJsonStoreImplement, ZhihuSqliteStoreImplement) from tools import utils from var import source_keyword_var class ZhihuStoreFactory: STORES = { "csv": ZhihuCsvStoreImplement, "db": ZhihuDbStoreImplement, "json": ZhihuJsonStoreImplement, "sqlite": ZhihuSqliteStoreImplement, "postgresql": ZhihuDbStoreImplement, } @staticmethod def create_store() -> AbstractStore: store_class = ZhihuStoreFactory.STORES.get(config.SAVE_DATA_OPTION) if not store_class: raise ValueError( "[ZhihuStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or postgresql ...") return store_class() async def batch_update_zhihu_contents(contents: List[ZhihuContent]): """ 批量更新知乎内容 Args: contents: Returns: """ if not contents: return for content_item in contents: await update_zhihu_content(content_item) async def update_zhihu_content(content_item: ZhihuContent): """ 更新知乎内容 Args: content_item: Returns: """ content_item.source_keyword = source_keyword_var.get() local_db_item = content_item.model_dump() local_db_item.update({"last_modify_ts": utils.get_current_timestamp()}) utils.logger.info(f"[store.zhihu.update_zhihu_content] zhihu content: {local_db_item}") await ZhihuStoreFactory.create_store().store_content(local_db_item) async def batch_update_zhihu_note_comments(comments: List[ZhihuComment]): """ 批量更新知乎内容评论 Args: comments: Returns: """ if not comments: return success_count = 0 error_count = 0 for comment_item in comments: try: await update_zhihu_content_comment(comment_item) success_count += 1 except Exception as e: error_count += 1 comment_id = getattr(comment_item, 'comment_id', 'unknown') utils.logger.error(f"[store.zhihu.batch_update_zhihu_note_comments] 保存评论失败 (comment_id={comment_id}): {e}", exc_info=True) if error_count > 0: utils.logger.warning(f"[store.zhihu.batch_update_zhihu_note_comments] 批量保存完成: 成功 {success_count} 条, 失败 {error_count} 条") else: utils.logger.info(f"[store.zhihu.batch_update_zhihu_note_comments] 批量保存完成: 成功 {success_count} 条") async def update_zhihu_content_comment(comment_item: ZhihuComment): """ 更新知乎内容评论 Args: comment_item: Returns: """ try: local_db_item = comment_item.model_dump() local_db_item.update({"last_modify_ts": utils.get_current_timestamp()}) # 使用更安全的日志记录方式,避免编码问题导致日志输出异常 comment_id = local_db_item.get('comment_id', 'unknown') utils.logger.debug(f"[store.zhihu.update_zhihu_note_comment] 准备保存评论: comment_id={comment_id}") await ZhihuStoreFactory.create_store().store_comment(local_db_item) except Exception as e: comment_id = getattr(comment_item, 'comment_id', 'unknown') utils.logger.error(f"[store.zhihu.update_zhihu_note_comment] 保存评论异常 (comment_id={comment_id}): {e}", exc_info=True) raise async def save_creator(creator: ZhihuCreator): """ 保存知乎创作者信息 Args: creator: Returns: """ if not creator: return local_db_item = creator.model_dump() local_db_item.update({"last_modify_ts": utils.get_current_timestamp()}) await ZhihuStoreFactory.create_store().store_creator(local_db_item)