变更
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
# Scrapy settings for example project
|
||||
#
|
||||
# For simplicity, this file contains only the most important settings by
|
||||
# default. All the other settings are documented here:
|
||||
#
|
||||
# http://doc.scrapy.org/topics/settings.html
|
||||
#
|
||||
SPIDER_MODULES = ["example.spiders"]
|
||||
NEWSPIDER_MODULE = "example.spiders"
|
||||
|
||||
LOG_LEVEL = "WARNING"
|
||||
|
||||
USER_AGENT = "scrapy-redis (+https://github.com/rolando/scrapy-redis)"
|
||||
|
||||
#设置重复过滤器模块
|
||||
DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
|
||||
#设置调度器,scrapy_redis具备与数据库交互的功能
|
||||
SCHEDULER = "scrapy_redis.scheduler.Scheduler"
|
||||
#设置当爬虫结束时是否保持redis数据库中的去重集合与任务队列
|
||||
SCHEDULER_PERSIST = True
|
||||
# SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderPriorityQueue"
|
||||
# SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderQueue"
|
||||
# SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderStack"
|
||||
|
||||
ITEM_PIPELINES = {
|
||||
"example.pipelines.ExamplePipeline": 300,
|
||||
#当开启该管道,该管道将会把数据存到redis数据库中
|
||||
"scrapy_redis.pipelines.RedisPipeline": 400,
|
||||
}
|
||||
#设置redis数据库
|
||||
REDIS_URL = "redis://127.0.0.1:6379"
|
||||
|
||||
LOG_LEVEL = "DEBUG"
|
||||
|
||||
# Introduce an artifical delay to make use of parallelism. to speed up the
|
||||
# crawl.
|
||||
DOWNLOAD_DELAY = 1
|
||||
Reference in New Issue
Block a user