本地化&2.0
This commit is contained in:
@@ -9,8 +9,8 @@
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
# 基础配置
|
||||
PLATFORM = "bili" # 平台,xhs | dy | ks | bili | wb | tieba | zhihu
|
||||
KEYWORDS = "电影鬼灭之刃,亲属想侵吞3姐妹亡父赔偿款,网警斩断侵害未成年人网络黑色产业链,2007年后出生的人不能在马尔代夫吸烟,沈月,是公主也是自己的骑士,以军虐囚视频,唐朝诡事录,广州地铁回应APP乘车码频繁弹窗广告,全红婵的减肥计划精确到克" # 关键词搜索配置,以英文逗号分隔
|
||||
PLATFORM = "zhihu" # 平台,xhs | dy | ks | bili | wb | tieba | zhihu
|
||||
KEYWORDS = "F6智慧门店,南京爱福路汽车科技有限公司,汽车后市场,汽修店,新康众" # 关键词搜索配置,以英文逗号分隔
|
||||
LOGIN_TYPE = "qrcode" # qrcode or phone or cookie
|
||||
COOKIES = ""
|
||||
CRAWLER_TYPE = "search" # 爬取类型,search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
|
||||
@@ -61,7 +61,7 @@ BROWSER_LAUNCH_TIMEOUT = 30
|
||||
AUTO_CLOSE_BROWSER = True
|
||||
|
||||
# 数据保存类型选项配置,支持五种类型:csv、db、json、sqlite、postgresql, 最好保存到DB,有排重的功能。
|
||||
SAVE_DATA_OPTION = "postgresql" # csv or db or json or sqlite or postgresql
|
||||
SAVE_DATA_OPTION = "db" # csv or db or json or sqlite or postgresql
|
||||
|
||||
# 用户浏览器缓存的浏览器文件配置
|
||||
USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name
|
||||
@@ -70,7 +70,7 @@ USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name
|
||||
START_PAGE = 1
|
||||
|
||||
# 爬取视频/帖子的数量控制
|
||||
CRAWLER_MAX_NOTES_COUNT = 5
|
||||
CRAWLER_MAX_NOTES_COUNT = 50
|
||||
|
||||
# 并发爬虫数量控制
|
||||
MAX_CONCURRENCY_NUM = 1
|
||||
@@ -84,6 +84,11 @@ ENABLE_GET_COMMENTS = True
|
||||
# 爬取一级评论的数量控制(单视频/帖子)
|
||||
CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES = 20
|
||||
|
||||
# 是否对评论做去重及重复页跳出(针对贴吧等平台)
|
||||
ENABLE_COMMENT_DEDUP = True
|
||||
# 连续多少页没有新评论时中断评论循环
|
||||
COMMENT_DUP_BREAK_THRESHOLD = 2
|
||||
|
||||
# 是否开启爬二级评论模式, 默认不开启爬二级评论
|
||||
# 老版本项目使用了 db, 则需参考 schema/tables.sql line 287 增加表字段
|
||||
ENABLE_GET_SUB_COMMENTS = False
|
||||
|
||||
@@ -12,10 +12,10 @@
|
||||
import os
|
||||
|
||||
# mysql config - 使用MindSpider的数据库配置
|
||||
MYSQL_DB_PWD = "bettafish"
|
||||
MYSQL_DB_USER = "bettafish"
|
||||
MYSQL_DB_HOST = "127.0.0.1"
|
||||
MYSQL_DB_PORT = 5444
|
||||
MYSQL_DB_PWD = "123123"
|
||||
MYSQL_DB_USER = "intelligence"
|
||||
MYSQL_DB_HOST = "123.60.167.249"
|
||||
MYSQL_DB_PORT = 3306
|
||||
MYSQL_DB_NAME = "bettafish"
|
||||
|
||||
mysql_db_config = {
|
||||
@@ -48,7 +48,7 @@ sqlite_db_config = {
|
||||
POSTGRESQL_DB_PWD = os.getenv("POSTGRESQL_DB_PWD", "bettafish")
|
||||
POSTGRESQL_DB_USER = os.getenv("POSTGRESQL_DB_USER", "bettafish")
|
||||
POSTGRESQL_DB_HOST = os.getenv("POSTGRESQL_DB_HOST", "127.0.0.1")
|
||||
POSTGRESQL_DB_PORT = os.getenv("POSTGRESQL_DB_PORT", "5444")
|
||||
POSTGRESQL_DB_PORT = os.getenv("POSTGRESQL_DB_PORT", "5432")
|
||||
POSTGRESQL_DB_NAME = os.getenv("POSTGRESQL_DB_NAME", "bettafish")
|
||||
|
||||
postgresql_db_config = {
|
||||
|
||||
Reference in New Issue
Block a user