变更

2025-08-05 09:19:34 +08:00
commit 584548d006
1696 changed files with 53855 additions and 0 deletions
@@ -0,0 +1,18 @@
+skips:
+- B101
+- B105
+- B301
+- B303
+- B306
+- B307
+- B311
+- B320
+- B321
+- B324
+- B403
+- B404
+- B406
+- B410
+- B503
+- B603
+- B605
@@ -0,0 +1,35 @@
+[bumpversion]
+current_version = 0.9.1
+commit = False
+tag = False
+parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>\w+))?
+serialize = 
+	{major}.{minor}.{patch}-{release}
+	{major}.{minor}.{patch}
+
+[bumpversion:part:release]
+optional_value = placeholder
+values = 
+	a1
+	b1
+	rc1
+	placeholder
+
+[bumpversion:file:VERSION]
+search = {current_version}
+replace = {new_version}
+
+[bumpversion:file:src/scrapy_redis/__init__.py]
+search = __version__ = "{current_version}"
+replace = __version__ = "{new_version}"
+
+[bumpversion:file:.cookiecutterrc]
+search = version: {current_version}
+replace = version: {new_version}
+
+[bumpversion:file:HISTORY.rst]
+search = .. bumpversion marker
+replace = .. bumpversion marker
+	
+	{new_version} ({now:%Y-%m-%d})
+	------------------
@@ -0,0 +1,19 @@
+# Generated by cookiepatcher, a small shim around cookiecutter (pip install cookiepatcher)
+
+cookiecutter:
+    email: rolando at rmax.io
+    full_name: Rolando Espinoza
+    github_username: rolando
+    project_name: Scrapy-Redis
+    project_package: scrapy_redis
+    project_short_description: Redis-based components for Scrapy.
+    project_slug: scrapy-redis
+    pypi_username: rolando
+    use_codecov: y
+    use_cython: n
+    use_landscape: y
+    use_pypi_deployment_with_travis: n
+    use_pytest: y
+    use_requiresio: y
+    version: 0.9.1
+    year: 2011-2022
@@ -0,0 +1,25 @@
+[paths]
+source =
+    src
+
+[run]
+omit = setup.py
+branch = true
+source =
+    scrapy_redis
+    tests
+parallel = true
+
+[report]
+show_missing = true
+precision = 2
+omit = */__init__.py
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    if self.debug:
+    if settings.DEBUG
+    raise AssertionError
+    raise NotImplementedError
+    if 0:
+    if __name__ == .__main__.:
@@ -0,0 +1,46 @@
+*.py[cod]
+*.swp
+*~
+
+.ropeproject
+
+# C extensions
+*.so
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+lib
+lib64
+__pycache__
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.tox
+nosetests.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# JetBrains PyCharm IDE
+/.idea/
+
+.venv
+.tags
@@ -0,0 +1,21 @@
+# http://editorconfig.org
+
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace = true
+insert_final_newline = true
+charset = utf-8
+end_of_line = lf
+
+[*.bat]
+indent_style = tab
+end_of_line = crlf
+
+[LICENSE]
+insert_final_newline = false
+
+[Makefile]
+indent_style = tab
@@ -0,0 +1,12 @@
+
+[flake8]
+
+max-line-length = 119
+ignore =
+    W503
+    P102
+    P103
+
+exclude =
+    tests/test_spiders.py E731
+    docs/conf.py E265
@@ -0,0 +1,3 @@
+# GitHub syntax highlighting
+pixi.lock linguist-language=YAML
+
@@ -0,0 +1,11 @@
+# Description
+
+Please describe your problem/feature request/bug
+
+# Step to Reproduce
+
+Please offer the steps to reproduce your problem/bug
+
+# Error log
+
+Please provide error message or screen shot for better understanding.
@@ -0,0 +1,25 @@
+# Description
+
+Please include a summary of the changes and the related issue. Please also include relevant motivation and context. List any dependencies that are required for this change.
+
+Fixes #(issue)
+
+# How Has This Been Tested?
+
+Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration
+- [] pytest
+- [] Other test (please specify)
+
+# Test Configuration:
+- OS version:
+- Necessary Libraries (optional):
+
+# Checklist:
+- [] My code follows the style guidelines of this project
+- [] I have performed a self-review of my code
+- [] I have commented my code, particularly in hard-to-understand areas
+- [] I have made corresponding changes to the documentation
+- [] My changes generate no new warnings
+- [] I have added tests that prove my fix is effective or that my feature works
+- [] New and existing unit tests pass locally with my changes
+- [] Any dependent changes have been merged and published in downstream modules
@@ -0,0 +1,31 @@
+# This is GitHub Action for cross platform building
+name: build
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+jobs:
+  builds:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.12"]
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Run build
+      env:
+        TOXENV: build
+      run: |
+        pip install -r requirements-tests.txt
+        tox
@@ -0,0 +1,41 @@
+# This is GitHub Action for linting and security check
+name: check
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+concurrency:
+  group: ${{github.workflow}}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  checks:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+        env: [security, flake8]
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Run check
+      env:
+        TOXENV: ${{ matrix.env }}
+      run: |
+        pip install -r requirements-tests.txt
+        tox
+
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    - uses: pre-commit/action@v3.0.0
@@ -0,0 +1,30 @@
+# This is GitHub Action for cross platform building
+name: docs
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+jobs:
+  builds:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Build docs
+      env:
+        TOXENV: docs
+      run: |
+        pip install -r requirements-tests.txt
+        tox
@@ -0,0 +1,43 @@
+# This is GitHub Action for tests
+name: test
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+
+    services:
+      redis:
+        image: redis
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
+    container: python:${{ matrix.python-version }}
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Run pytest
+      env:
+        REDIS_HOST: redis
+        TOXENV: pytest
+        TOX_TESTENV_PASSENV: REDIS_HOST
+      run: |
+        pip install -r requirements-tests.txt
+        tox
@@ -0,0 +1,67 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+.venv
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# rope-vim
+.ropeproject
+
+# Extra
+.DS_Store
+.vscode
@@ -0,0 +1,2 @@
+[settings]
+profile = black
@@ -0,0 +1,36 @@
+repos:
+- repo: https://github.com/PyCQA/bandit
+  rev: 1.7.7
+  hooks:
+  - id: bandit
+    args: [-r, -c, .bandit.yml]
+- repo: https://github.com/PyCQA/flake8
+  rev: 7.0.0
+  hooks:
+  - id: flake8
+    additional_dependencies:
+    - flake8-bugbear
+    - flake8-comprehensions
+    - flake8-debugger
+    #- flake8-docstrings
+    - flake8-string-format
+    - flake8-type-checking
+- repo: https://github.com/psf/black.git
+  rev: 24.2.0
+  hooks:
+  - id: black
+- repo: https://github.com/pycqa/isort
+  rev: 5.13.2
+  hooks:
+  - id: isort
+- repo: https://github.com/adamchainz/blacken-docs
+  rev: 1.16.0
+  hooks:
+  - id: blacken-docs
+    additional_dependencies:
+    - black==24.2.0
+- repo: https://github.com/asottile/pyupgrade
+  rev: v3.15.2
+  hooks:
+  - id: pyupgrade
+    args: [--py38-plus, --keep-runtime-typing]
@@ -0,0 +1 @@
+3.10.13
@@ -0,0 +1,17 @@
+version: 2
+formats: all
+sphinx:
+  configuration: docs/conf.py
+  fail_on_warning: true
+
+build:
+  os: ubuntu-22.04
+  tools:
+    # For available versions, see:
+    # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python
+    python: "3.12"
+
+python:
+  install:
+    - requirements: docs/requirements.txt
+    - path: .
@@ -0,0 +1,68 @@
+language: python
+python: 3.5
+sudo: false
+
+services:
+  - redis-server
+
+env:
+  - TOXENV=py27-scrapyrel
+  - TOXENV=py34-scrapyrel
+  - TOXENV=py35-scrapyrel
+
+matrix:
+  fast_finish: true
+
+before_install:
+  - python --version
+  - uname -a
+  - lsb_release -a
+
+# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
+install: 
+  - pip install -U pip wheel
+  - pip install -U tox twine coverage
+  - virtualenv --version
+  - pip --version
+  - tox --version
+
+# command to run tests, e.g. python setup.py test
+script:
+  - tox -e $TOXENV --workdir $HOME/.tox
+
+after_success:
+  # Codecov requires a single .coverage and will run 'coverage xml' to
+  # generate the report.
+  - coverage combine
+  - bash <(curl -s https://codecov.io/bash)
+
+after_failure:
+  - more $HOME/.tox/log/* | cat
+  - more $HOME/.tox/*/log/* | cat
+
+before_cache:
+  - rm -fr $HOME/.cache/pip/log
+  - rm -fr $HOME/.tox/log/*
+  - rm -fr $HOME/.tox/*/log/*
+
+cache:
+  directories:
+    - $HOME/.cache/pip
+    - $HOME/.tox/
+
+notifications:
+  email:
+    on_sucess: never
+    on_failure: always
+
+deploy:
+  provider: pypi
+  distributions: "sdist bdist_wheel"
+  user: darkrho
+  password:
+    secure: "Pgcj+Otx9o2MxOuXibvz9LUd5DqlW0jaKDScVOAcFT+//U0esjRqY08bRFQlrSTXokJa6X/dVZlb2mQE8L4vr7mLFspRGO4FByK34L089/ETwsLKI2rks2zVbmPSyweL3sz88EXLKmYs7WsKtCnET67qra6hreKbO67ALAh5WWk="
+  on:
+    tags: true
+    all_branches: true
+    repo: rolando/scrapy-redis
+    condition: "$TOXENV == py35-scrapyrel"
@@ -0,0 +1,13 @@
+=======
+Credits
+=======
+
+Development Lead
+----------------
+
+* R Max Espinoza <hey at rmax.dev>
+
+Contributors
+------------
+
+None yet. Why not be the first?
@@ -0,0 +1,138 @@
+.. highlight:: shell
+
+============
+Contribution
+============
+
+Contributions are welcome, and they are greatly appreciated! Every
+little bit helps, and credit will always be given.
+
+You can contribute in many ways:
+
+Types of Contributions
+----------------------
+
+New to here
+~~~~~~~~~~~
+
+Any issue with good first issue tag on it is a great place to start! Feel free to ask any questions here.
+
+Don't know how to start
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Review codebases and PRs can give you quite a knowledge to know what's going on here!
+
+Report Bugs
+~~~~~~~~~~~
+
+Report bugs at https://github.com/rmax/scrapy-redis/issues.
+
+If you are reporting a bug, please include:
+
+* Your operating system name and version.
+* Any details about your local setup that might be helpful in troubleshooting.
+* Detailed steps to reproduce the bug.
+
+Fix Bugs
+~~~~~~~~
+
+Look through the GitHub issues for bugs. Anything tagged with "bug"
+is open to whoever wants to implement it.
+
+Implement Features & improvments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Look through the GitHub issues for features. Anything tagged with "feature" or "improvments"
+is open to whoever wants to implement it.
+
+Write Documentation
+~~~~~~~~~~~~~~~~~~~
+
+Scrapy-Redis could always use more documentation, whether as part of the
+official Scrapy-Redis docs, in docstrings, or even on the web in blog posts,
+articles, and such.
+
+Submit Feedback
+~~~~~~~~~~~~~~~
+
+The best way to send feedback is to file an issue at https://github.com/rmax/scrapy-redis/issues.
+
+If you are proposing a feature:
+
+* Explain in detail how it would work.
+* Keep the scope as narrow as possible, to make it easier to implement.
+* Remember that this is a volunteer-driven project, and that contributions
+  are welcome :)
+
+Get Started!
+------------
+
+Ready to contribute? Here's how to set up `scrapy-redis` for local development.
+
+Setup environment
+~~~~~~~~~~~~~~~~~
+
+1. Fork the `scrapy-redis` repo on GitHub.
+2. Clone your fork locally::
+
+       git clone git@github.com:your_name_here/scrapy-redis.git
+
+3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
+
+       pip install virtualenv==20.0.23
+       virtualenv --python=/usr/bin/python3 ~/scrapy_redis
+       source ~/scrapy_redis/bin/activate
+       cd scrapy-redis/
+       pip install -r requirements-install.txt
+       pip install .
+
+4. Create a branch for local development::
+
+       git checkout -b name-of-your-bugfix-or-feature
+
+   Now you can make your changes locally.
+
+Setup testing environment
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox::
+
+       pip install -r requirements-tests.txt
+       flake8 src/ tests/
+       python -m pytest --ignore=setup.py
+       tox
+
+2. Note that if the error of `No module named scrapy_redis` shows, please check the install `scrapy-redis` of your branch by::
+   
+       pip install .
+
+3. Or change the import lines::
+
+       from scrapy_redis import xxx # from this
+       from src.scrapy_redis import xxx # to this
+
+4. Commit your changes and push your branch to GitHub::
+
+       git add .
+       git commit -m "Your detailed description of your changes."
+       git push origin name-of-your-bugfix-or-feature
+
+5. Submit a pull request through the GitHub website.
+
+Pull Request Guidelines
+-----------------------
+
+Before you submit a pull request, check that it meets these guidelines:
+
+1. The pull request should include tests.
+2. If the pull request adds functionality, the docs should be updated. Put
+   your new functionality into a function with a docstring, and add the
+   feature to the list in README.rst.
+3. Make sure that the tests pass for all supported Python versions.
+
+Tips
+----
+
+To run a subset of tests::
+
+    pytest tests/test_scrapy_redis
@@ -0,0 +1,16 @@
+FROM python:3.11-slim
+
+# Set working directory
+WORKDIR /app
+
+# Install tox and dependencies (replace 'your-requirements.txt' with your actual file)
+COPY requirements.txt .
+COPY requirements-tests.txt .
+RUN pip install -r requirements.txt -r requirements-tests.txt
+
+# Copy your project code
+COPY . .
+
+# Run Tox tests
+CMD ["tox"]
+
@@ -0,0 +1,136 @@
+=======
+History
+=======
+
+.. bumpversion marker
+
+0.9.1 (2024-07-06)
+------------------
+* Fixed docs build.
+
+0.9.0 (2024-07-06)
+------------------
+* Fixed ``Scheduler`` not compatible with BaseDupeFilter (#294)
+* Added precommit hooks.
+* Switched to Python 3.12 as default build version.
+
+0.8.0 (2024-07-03)
+------------------
+* Fixed request fingerprint method.
+* Fixed support for Scrapy 2.6+.
+* Fixed tox tests and github workflow.
+* Deprecated ``REDIS_START_URLS_BATCH_SIZE``.
+
+0.7.3 (2022-07-21)
+------------------
+* Move docs to GitHub Wiki
+* Update tox and support dynamic tests
+* Update support for json data
+* Refactor max idle time
+* Add support for python3.7~python3.10
+* Deprecate python2.x support
+
+0.7.2 (2021-12-27)
+------------------
+* Fix RedisStatsCollector._get_key()
+* Fix redis-py dependency version
+* Added maximum idle waiting time MAX_IDLE_TIME_BEFORE_CLOSE
+
+0.7.1 (2021-03-27)
+------------------
+* Fixes datetime parse error for redis-py 3.x.
+* Add support for stats extensions.
+
+0.7.1-rc1 (2021-03-27)
+----------------------
+* Fixes datetime parse error for redis-py 3.x.
+
+0.7.1-b1 (2021-03-22)
+---------------------
+* Add support for stats extensions.
+
+0.7.0-dev (unreleased)
+----------------------
+* Unreleased.
+
+0.6.8 (2017-02-14)
+------------------
+* Fixed automated release due to not matching registered email.
+
+0.6.7 (2016-12-27)
+------------------
+* Fixes bad formatting in logging message.
+
+0.6.6 (2016-12-20)
+------------------
+* Fixes wrong message on dupefilter duplicates.
+
+0.6.5 (2016-12-19)
+------------------
+* Fixed typo in default settings.
+
+0.6.4 (2016-12-18)
+------------------
+* Fixed data decoding in Python 3.x.
+* Added ``REDIS_ENCODING`` setting (default ``utf-8``).
+* Default to ``CONCURRENT_REQUESTS`` value for ``REDIS_START_URLS_BATCH_SIZE``.
+* Renamed queue classes to a proper naming conventiong (backwards compatible).
+
+0.6.3 (2016-07-03)
+------------------
+* Added ``REDIS_START_URLS_KEY`` setting.
+* Fixed spider method ``from_crawler`` signature.
+
+0.6.2 (2016-06-26)
+------------------
+* Support ``redis_cls`` parameter in ``REDIS_PARAMS`` setting.
+* Python 3.x compatibility fixed.
+* Added ``SCHEDULER_SERIALIZER`` setting.
+
+0.6.1 (2016-06-25)
+------------------
+* **Backwards incompatible change:** Require explicit ``DUPEFILTER_CLASS``
+  setting.
+* Added ``SCHEDULER_FLUSH_ON_START`` setting.
+* Added ``REDIS_START_URLS_AS_SET`` setting.
+* Added ``REDIS_ITEMS_KEY`` setting.
+* Added ``REDIS_ITEMS_SERIALIZER`` setting.
+* Added ``REDIS_PARAMS`` setting.
+* Added ``REDIS_START_URLS_BATCH_SIZE`` spider attribute to read start urls
+  in batches.
+* Added ``RedisCrawlSpider``.
+
+0.6.0 (2015-07-05)
+------------------
+* Updated code to be compatible with Scrapy 1.0.
+* Added `-a domain=...` option for example spiders.
+
+0.5.0 (2013-09-02)
+------------------
+* Added `REDIS_URL` setting to support Redis connection string.
+* Added `SCHEDULER_IDLE_BEFORE_CLOSE` setting to prevent the spider closing too
+  quickly when the queue is empty. Default value is zero keeping the previous
+  behavior.
+* Schedule preemptively requests on item scraped.
+* This version is the latest release compatible with Scrapy 0.24.x.
+
+0.4.0 (2013-04-19)
+------------------
+* Added `RedisSpider` and `RedisMixin` classes as building blocks for spiders
+  to be fed through a redis queue.
+* Added redis queue stats.
+* Let the encoder handle the item as it comes instead converting it to a dict.
+
+0.3.0 (2013-02-18)
+------------------
+* Added support for different queue classes.
+* Changed requests serialization from `marshal` to `cPickle`.
+
+0.2.0 (2013-02-17)
+------------------
+* Improved backward compatibility.
+* Added example project.
+
+0.1.0 (2011-09-01)
+------------------
+* First release on PyPI.
@@ -0,0 +1,19 @@
+Copyright (c) 2011-2024, R Max Espinoza
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,16 @@
+graft docs
+graft src
+graft tests
+graft example-project
+
+include *.in
+include *.ini
+include *.rst
+include *.txt
+
+include LICENSE
+include VERSION
+include Makefile
+
+global-exclude __pycache__ *.py[cod]
+global-exclude *.so *.dylib
@@ -0,0 +1,156 @@
+.PHONY: clean-so clean-test clean-pyc clean-build clean-docs clean
+.PHONY: docs check check-manifest check-setup check-history lint
+.PHONY: test test-all coverage
+.PHONY: compile-reqs install-reqs
+.PHONY: release dist install build-inplace
+define BROWSER_PYSCRIPT
+import os, webbrowser, sys
+FAIL = "\033[91m"
+ENDC = "\033[0m"
+
+try:
+	from urllib.request import pathname2url
+except:
+	print(FAIL + "Python2 is deprecated, please upgrade your python >= 3.7" + ENDC)
+
+webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
+endef
+export BROWSER_PYSCRIPT
+BROWSER := python -c "$$BROWSER_PYSCRIPT"
+
+SPHINX_BUILD := html
+
+help:
+	@echo "check - check setup, code style, setup, etc"
+	@echo "check-manifest - check manifest"
+	@echo "check-setup - check setup"
+	@echo "check-history - check history"
+	@echo "clean - remove all build, test, coverage and Python artifacts"
+	@echo "clean-build - remove build artifacts"
+	@echo "clean-docs - remove docs artifacts"
+	@echo "clean-pyc - remove Python file artifacts"
+	@echo "clean-test - remove test and coverage artifacts"
+	@echo "clean-so - remove compiled extensions"
+	@echo "lint - check style with flake8"
+	@echo "test - run tests quickly with the default Python"
+	@echo "test-all - run tests on every Python version with tox"
+	@echo "coverage - check code coverage quickly with the default Python"
+	@echo "compile-reqs - compile requirements"
+	@echo "install-reqs - install requirements"
+	@echo "docs - generate Sphinx HTML documentation, including API docs"
+	@echo "dist-upload - package and upload a release"
+	@echo "release - bump release and push changes"
+	@echo "dist - package"
+	@echo "develop - install package in develop mode"
+	@echo "install - install the package to the active Python's site-packages"
+
+check: check-setup check-manifest check-history lint
+
+check-setup:
+	@echo "Checking package metadata (name, description, etc)"
+	python setup.py check --strict --metadata --restructuredtext
+
+check-manifest:
+	@echo "Checking MANIFEST.in"
+	check-manifest --ignore ".*"
+
+check-history:
+	@echo "Checking latest version in HISTORY"
+	VERSION=`cat VERSION`; grep "^$${VERSION}\b" HISTORY.rst
+
+clean: clean-build clean-docs clean-pyc clean-test clean-so
+
+clean-build:
+	rm -fr build/
+	rm -fr dist/
+	rm -fr .eggs/
+	find . -name '*.egg-info' -exec rm -fr {} +
+	find . -name '*.egg' -exec rm -f {} +
+
+clean-docs:
+	$(MAKE) -C docs clean
+
+clean-pyc:
+	find . -name '*.pyc' -exec rm -f {} +
+	find . -name '*.pyo' -exec rm -f {} +
+	find . -name '*~' -exec rm -f {} +
+	find . -name '__pycache__' -exec rm -fr {} +
+
+clean-test:
+	rm -fr .tox/
+	rm -f .coverage
+	rm -fr htmlcov/
+
+clean-so:
+	find . -name '*.so' -exec rm -f {} +
+
+lint:
+	flake8 src tests
+
+build-inplace:
+	python setup.py build_ext --inplace
+
+develop: clean
+	pip install -e .
+
+test: develop
+	pytest --ignore=setup.py
+
+test-all:
+	tox -v
+
+coverage: develop
+	coverage run -m pytest --ignore=setup.py
+	coverage combine
+	coverage report
+	coverage html
+	$(BROWSER) htmlcov/index.html
+
+docs-build: develop
+	rm -f docs/scrapy_redis.rst
+	rm -f docs/modules.rst
+	sphinx-apidoc -o docs/ src/scrapy_redis
+	$(MAKE) -C docs clean
+	$(MAKE) -C docs $(SPHINX_BUILD)
+
+docs: docs-build
+	$(BROWSER) docs/_build/$(SPHINX_BUILD)/index.html
+
+servedocs: docs
+	watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
+
+release:
+	@echo "To do a release, follow the steps:"
+	@echo "- bumpversion release"
+	@echo "- Review and commit"
+	@echo "- git tag -a \`cat VERSION\`"
+	@echo "- git push --follow-tags"
+
+dist-upload: clean check dist
+	twine upload dist/*
+
+dist: clean
+	python setup.py sdist
+	python setup.py bdist_wheel
+	ls -l dist
+
+install: clean
+	pip install .
+
+REQUIREMENTS_IN := $(wildcard requirements*.in)
+.PHONY: $(REQUIREMENTS_IN)
+
+requirements%.txt: requirements%.in
+	pip-compile -v $< -o $@
+
+REQUIREMENTS_TXT := $(REQUIREMENTS_IN:.in=.txt)
+ifndef REQUIREMENTS_TXT
+REQUIREMENTS_TXT := $(wildcard requirements*.txt)
+endif
+
+compile-reqs: $(REQUIREMENTS_TXT)
+	@test -z "$$REQUIREMENTS_TXT" && echo "No 'requirements*.in' files. Nothing to do"
+
+install-reqs:
+	@test -z "$$REQUIREMENTS_TXT" && echo "No 'requirements*.txt' files. Nothing to do"
+	$(foreach req,$(REQUIREMENTS_TXT),pip install -r $(req);)
@@ -0,0 +1,110 @@
+============
+Scrapy-Redis
+============
+
+.. image:: https://readthedocs.org/projects/scrapy-redis/badge/?version=latest
+        :alt: Documentation Status
+        :target: https://readthedocs.org/projects/scrapy-redis/?badge=latest
+
+.. image:: https://img.shields.io/pypi/v/scrapy-redis.svg
+        :target: https://pypi.python.org/pypi/scrapy-redis
+
+.. image:: https://img.shields.io/pypi/pyversions/scrapy-redis.svg
+        :target: https://pypi.python.org/pypi/scrapy-redis
+
+.. image:: https://github.com/rmax/scrapy-redis/actions/workflows/builds.yml/badge.svg
+        :target: https://github.com/rmax/scrapy-redis/actions/workflows/builds.yml
+        
+.. image:: https://github.com/rmax/scrapy-redis/actions/workflows/checks.yml/badge.svg
+        :target: https://github.com/rmax/scrapy-redis/actions/workflows/checks.yml
+        
+.. image:: https://github.com/rmax/scrapy-redis/actions/workflows/tests.yml/badge.svg
+        :target: https://github.com/rmax/scrapy-redis/actions/workflows/tests.yml
+        
+.. image:: https://codecov.io/github/rmax/scrapy-redis/coverage.svg?branch=master
+        :alt: Coverage Status
+        :target: https://codecov.io/github/rmax/scrapy-redis
+
+.. image:: https://img.shields.io/badge/security-bandit-green.svg
+        :alt: Security Status
+        :target: https://github.com/rmax/scrapy-redis
+    
+Redis-based components for Scrapy.
+
+* Usage: https://github.com/rmax/scrapy-redis/wiki/Usage
+* Documentation: https://github.com/rmax/scrapy-redis/wiki.
+* Release: https://github.com/rmax/scrapy-redis/wiki/History
+* Contribution: https://github.com/rmax/scrapy-redis/wiki/Getting-Started
+* LICENSE: MIT license
+
+Features
+--------
+
+* Distributed crawling/scraping
+
+    You can start multiple spider instances that share a single redis queue.
+    Best suitable for broad multi-domain crawls.
+
+* Distributed post-processing
+
+    Scraped items gets pushed into a redis queued meaning that you can start as
+    many as needed post-processing processes sharing the items queue.
+
+* Scrapy plug-and-play components
+
+    Scheduler + Duplication Filter, Item Pipeline, Base Spiders.
+
+* In this forked version: added ``json`` supported data in Redis
+
+    data contains ``url``, ```meta``` and other optional parameters. ``meta`` is a nested json which contains sub-data.
+    this function extract this data and send another FormRequest with ``url``, ``meta`` and addition ``formdata``.
+
+    For example:
+
+    .. code-block:: json
+
+        { "url": "https://exaple.com", "meta": {"job-id":"123xsd", "start-date":"dd/mm/yy"}, "url_cookie_key":"fertxsas" }
+
+    this data can be accessed in `scrapy spider` through response.
+    like: `request.url`, `request.meta`, `request.cookies`
+    
+.. note:: This features cover the basic case of distributing the workload across multiple workers. If you need more features like URL expiration, advanced URL prioritization, etc., we suggest you to take a look at the Frontera_ project.
+
+Requirements
+------------
+
+* Python 3.7+
+* Redis >= 5.0
+* ``Scrapy`` >=  2.0
+* ``redis-py`` >= 4.0
+
+Installation
+------------
+
+From pip 
+
+.. code-block:: bash
+
+    pip install scrapy-redis
+
+From GitHub
+
+.. code-block:: bash
+
+    git clone https://github.com/darkrho/scrapy-redis.git
+    cd scrapy-redis
+    python setup.py install
+
+.. note:: For using this json supported data feature, please make sure you have not installed the scrapy-redis through pip. If you already did it, you first uninstall that one.
+  
+.. code-block:: bash
+
+    pip uninstall scrapy-redis
+
+Alternative Choice
+---------------------------
+
+Frontera_  is a web crawling framework consisting of `crawl frontier`_, and distribution/scaling primitives, allowing to build a large scale online web crawler.
+
+.. _Frontera: https://github.com/scrapinghub/frontera
+.. _crawl frontier: http://nlp.stanford.edu/IR-book/html/htmledition/the-url-frontier-1.html
@@ -0,0 +1,11 @@
+TODO
+====
+
+* Add SCRAPY_JOB global support (jobs sharing same SCRAPY_JOB share same queues).
+* Use a spider middleware instead of spider mixin. This will avoid the spider
+  idle signal hack.
+* Allow to use pubsub whenever appropriate.
+* Move example project to its own repository. Include different crawling use
+  cases (i.e.: producer/consumer).
+* Add pyrebloom dupefilter.
+* Warn and pass unserializable requests.
@@ -0,0 +1 @@
+0.9.1
@@ -0,0 +1,20 @@
+version: '3.8'
+
+services:
+  python:
+    build: .
+    command: tox -e security,flake8,pytest
+    environment:
+      REDIS_HOST: redis  # Use service name for hostname within docker network
+      REDIS_PORT: 6379
+      TOX_TESTENV_PASSENV: "REDIS_HOST REDIS_PORT"
+    volumes:
+      - ./:/app  # Mount your project directory into the container
+    depends_on:
+      - redis
+
+  redis:
+    image: redis:6.2-alpine
+    ports:
+      - "6379:6379"  # Map Redis port to host port
+
@@ -0,0 +1,177 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  xml        to make Docutils-native XML files"
+	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/scrapy-redis.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/scrapy-redis.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/scrapy-redis"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/scrapy-redis"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through platex and dvipdfmx..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+	@echo
+	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+	@echo
+	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
@@ -0,0 +1 @@
+.. include:: ../AUTHORS.rst
@@ -0,0 +1,273 @@
+#!/usr/bin/env python
+#
+# scrapy-redis documentation build configuration file, created by
+# sphinx-quickstart on Tue Jul  9 22:26:36 2013.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import os
+import re
+
+# If extensions (or modules to document with autodoc) are in another
+# directory, add these directories to sys.path here. If the directory is
+# relative to the documentation root, use os.path.abspath to make it
+# absolute, like shown here.
+# sys.path.insert(0, os.path.abspath('.'))
+
+# Get the project root dir, which is the parent dir of this
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+# -- General configuration ---------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# The suffix of source filenames.
+source_suffix = ".rst"
+
+# The encoding of source files.
+# source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = "index"
+
+# General information about the project.
+project = "Scrapy-Redis"
+copyright = "2011-2024, R Max Espinoza"
+
+# The version info for the project you're documenting, acts as replacement
+# for |version| and |release|, also used in various other places throughout
+# the built documents.
+#
+# The full version, including alpha/beta/rc tags.
+release = open(os.path.join(project_root, "VERSION")).read().strip()
+# The short X.Y version.
+version = re.findall(r"\d+\.\d+\.\d+", release)[0]
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+# language = None
+
+# There are two options for replacing |today|: either, you set today to
+# some non-false value, then it is used:
+# today = ''
+# Else, today_fmt is used as the format for a strftime call.
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ["_build"]
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = "sphinx"
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built
+# documents.
+# keep_warnings = False
+
+
+# -- Options for HTML output -------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = "default"
+
+# Theme options are theme-specific and customize the look and feel of a
+# theme further.  For a list of options available for each theme, see the
+# documentation.
+# html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+# html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+# html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as
+# html_title.
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the
+# top of the sidebar.
+# html_logo = None
+
+# The name of an image file (within the static path) to use as favicon
+# of the docs.  This file should be a Windows icon file (.ico) being
+# 16x16 or 32x32 pixels large.
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets)
+# here, relative to this directory. They are copied after the builtin
+# static files, so a file named "default.css" will overwrite the builtin
+# "default.css".
+# html_static_path = ["_static"]
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page
+# bottom, using the given strftime format.
+# html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names
+# to template names.
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+# html_domain_indices = True
+
+# If false, no index is generated.
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+# html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer.
+# Default is True.
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer.
+# Default is True.
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages
+# will contain a <link> tag referring to it.  The value of this option
+# must be the base URL from which the finished HTML is served.
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = "scrapy_redisdoc"
+
+
+# -- Options for LaTeX output ------------------------------------------
+
+latex_elements = {
+    #  The paper size ('letterpaper' or 'a4paper').
+    # 'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    # 'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    # 'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass
+# [howto/manual]).
+latex_documents = [
+    (
+        "index",
+        "scrapy_redis.tex",
+        "Scrapy-Redis Documentation",
+        "R Max Espinoza",
+        "manual",
+    ),
+]
+
+# The name of an image file (relative to this directory) to place at
+# the top of the title page.
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings
+# are parts, not chapters.
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+# latex_appendices = []
+
+# If false, no module index is generated.
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ("index", "scrapy_redis", "Scrapy-Redis Documentation", ["R Max Espinoza"], 1)
+]
+
+# If true, show URL addresses after external links.
+# man_show_urls = False
+
+
+# -- Options for Texinfo output ----------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (
+        "index",
+        "scrapy_redis",
+        "Scrapy-Redis Documentation",
+        "R Max Espinoza",
+        "scrapy-redis",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
+]
+
+# Documents to append as an appendix to all manuals.
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+# texinfo_no_detailmenu = False
@@ -0,0 +1 @@
+.. include:: ../CONTRIBUTING.rst
@@ -0,0 +1 @@
+.. include:: ../HISTORY.rst
@@ -0,0 +1,27 @@
+.. scrapy-redis documentation master file, created by
+   sphinx-quickstart on Tue Jul  9 22:26:36 2013.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to Scrapy-Redis's documentation!
+========================================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   readme
+   installation
+   modules
+   contributing
+   history
+   authors
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
@@ -0,0 +1,49 @@
+.. highlight:: shell
+
+============
+Installation
+============
+
+
+Stable release
+--------------
+
+To install Scrapy-Redis, run this command in your terminal:
+
+.. code-block:: console
+
+    pip install scrapy-redis
+
+If you don't have `pip`_ installed, this `Python installation guide`_ can guide
+you through the process.
+
+.. _pip: https://pip.pypa.io
+.. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/
+
+
+From sources
+------------
+
+The sources for Scrapy-Redis can be downloaded from the `Github repo`_.
+
+You can either clone the public repository:
+
+.. code-block:: console
+
+    git clone git://github.com/rolando/scrapy-redis
+
+Or download the `tarball`_:
+
+.. code-block:: console
+
+    curl  -OL https://github.com/rolando/scrapy-redis/tarball/master
+
+Once you have a copy of the source, you can install it with:
+
+.. code-block:: console
+
+    pip install -e .
+
+
+.. _Github repo: https://github.com/rolando/scrapy-redis
+.. _tarball: https://github.com/rolando/scrapy-redis/tarball/master
@@ -0,0 +1,242 @@
+@ECHO OFF
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set BUILDDIR=_build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
+set I18NSPHINXOPTS=%SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+	:help
+	echo.Please use `make ^<target^>` where ^<target^> is one of
+	echo.  html       to make standalone HTML files
+	echo.  dirhtml    to make HTML files named index.html in directories
+	echo.  singlehtml to make a single large HTML file
+	echo.  pickle     to make pickle files
+	echo.  json       to make JSON files
+	echo.  htmlhelp   to make HTML files and a HTML help project
+	echo.  qthelp     to make HTML files and a qthelp project
+	echo.  devhelp    to make HTML files and a Devhelp project
+	echo.  epub       to make an epub
+	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+	echo.  text       to make text files
+	echo.  man        to make manual pages
+	echo.  texinfo    to make Texinfo files
+	echo.  gettext    to make PO message catalogs
+	echo.  changes    to make an overview over all changed/added/deprecated items
+	echo.  xml        to make Docutils-native XML files
+	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
+	echo.  linkcheck  to check all external links for integrity
+	echo.  doctest    to run all doctests embedded in the documentation if enabled
+	goto end
+)
+
+if "%1" == "clean" (
+	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+	del /q /s %BUILDDIR%\*
+	goto end
+)
+
+
+%SPHINXBUILD% 2> nul
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "html" (
+	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+	goto end
+)
+
+if "%1" == "dirhtml" (
+	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+	goto end
+)
+
+if "%1" == "singlehtml" (
+	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+	goto end
+)
+
+if "%1" == "pickle" (
+	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the pickle files.
+	goto end
+)
+
+if "%1" == "json" (
+	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the JSON files.
+	goto end
+)
+
+if "%1" == "htmlhelp" (
+	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+	goto end
+)
+
+if "%1" == "qthelp" (
+	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\scrapy-redis.qhcp
+	echo.To view the help file:
+	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\scrapy-redis.ghc
+	goto end
+)
+
+if "%1" == "devhelp" (
+	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished.
+	goto end
+)
+
+if "%1" == "epub" (
+	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The epub file is in %BUILDDIR%/epub.
+	goto end
+)
+
+if "%1" == "latex" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdf" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf
+	cd %BUILDDIR%/..
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdfja" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf-ja
+	cd %BUILDDIR%/..
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "text" (
+	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The text files are in %BUILDDIR%/text.
+	goto end
+)
+
+if "%1" == "man" (
+	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The manual pages are in %BUILDDIR%/man.
+	goto end
+)
+
+if "%1" == "texinfo" (
+	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+	goto end
+)
+
+if "%1" == "gettext" (
+	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+	goto end
+)
+
+if "%1" == "changes" (
+	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.The overview file is in %BUILDDIR%/changes.
+	goto end
+)
+
+if "%1" == "linkcheck" (
+	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+	goto end
+)
+
+if "%1" == "doctest" (
+	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+	goto end
+)
+
+if "%1" == "xml" (
+	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The XML files are in %BUILDDIR%/xml.
+	goto end
+)
+
+if "%1" == "pseudoxml" (
+	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
+	goto end
+)
+
+:end
@@ -0,0 +1,7 @@
+API Reference
+=============
+
+.. toctree::
+   :maxdepth: 4
+
+   scrapy_redis
@@ -0,0 +1 @@
+.. include:: ../README.rst
@@ -0,0 +1,8 @@
+# This packages are requires only for development and release management.
+Sphinx
+bumpversion
+check-manifest
+pip-tools
+twine
+watchdog
+wheel
@@ -0,0 +1,62 @@
+scrapy_redis package
+====================
+
+Submodules
+----------
+
+scrapy_redis.connection module
+------------------------------
+
+.. automodule:: scrapy_redis.connection
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+scrapy_redis.dupefilter module
+------------------------------
+
+.. automodule:: scrapy_redis.dupefilter
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+scrapy_redis.pipelines module
+-----------------------------
+
+.. automodule:: scrapy_redis.pipelines
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+scrapy_redis.queue module
+-------------------------
+
+.. automodule:: scrapy_redis.queue
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+scrapy_redis.scheduler module
+-----------------------------
+
+.. automodule:: scrapy_redis.scheduler
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+scrapy_redis.spiders module
+---------------------------
+
+.. automodule:: scrapy_redis.spiders
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: scrapy_redis
+    :members:
+    :undoc-members:
+    :show-inheritance:
@@ -0,0 +1,5 @@
+#@IgnoreInspection BashAddShebang
+FROM python:2.7-onbuild
+
+ENTRYPOINT ["scrapy"]
+CMD ["crawl", "dmoz"]
@@ -0,0 +1,154 @@
+============================
+Scrapy Redis Example Project
+============================
+
+
+This directory contains an example Scrapy project integrated with scrapy-redis.
+By default, all items are sent to redis (key ``<spider>:items``). All spiders
+schedule requests through redis, so you can start additional spiders to speed
+up the crawling.
+
+Spiders
+-------
+
+* **dmoz**
+
+  This spider simply scrapes dmoz.org.
+
+* **myspider_redis**
+
+  This spider uses redis as a shared requests queue and uses
+  ``myspider:start_urls`` as start URLs seed. For each URL, the spider outputs
+  one item.
+
+* **mycrawler_redis**
+
+  This spider uses redis as a shared requests queue and uses
+  ``mycrawler:start_urls`` as start URLs seed. For each URL, the spider follows
+  are links.
+
+
+.. note::
+
+    All requests are persisted by default. You can clear the queue by using the
+    ``SCHEDULER_FLUSH_ON_START`` setting. For example: ``scrapy crawl dmoz -s
+    SCHEDULER_FLUSH_ON_START=1``.
+
+
+Running the example project
+---------------------------
+
+This example illustrates how to share a spider's requests queue
+across multiple spider instances, highly suitable for broad crawls.
+
+1. Check scrapy_redis package in your ``PYTHONPATH``
+
+2. Run the crawler for first time then stop it
+
+.. code-block:: bash
+
+    cd example-project
+    scrapy crawl dmoz
+    ... [dmoz] ...
+    ^C
+
+3. Run the crawler again to resume stopped crawling
+
+.. code-block:: bash
+
+    scrapy crawl dmoz
+    ... [dmoz] DEBUG: Resuming crawl (9019 requests scheduled)
+
+4. Start one or more additional scrapy crawlers
+
+.. code-block:: bash
+
+    scrapy crawl dmoz
+    ... [dmoz] DEBUG: Resuming crawl (8712 requests scheduled)
+
+5. Start one or more post-processing workers
+
+.. code-block:: bash
+
+    python process_items.py dmoz:items -v
+    ...
+    Processing: Kilani Giftware (http://www.dmoz.org/Computers/Shopping/Gifts/)
+    Processing: NinjaGizmos.com (http://www.dmoz.org/Computers/Shopping/Gifts/)
+    ...
+
+
+Feeding a Spider from Redis
+---------------------------
+
+The class ``scrapy_redis.spiders.RedisSpider`` enables a spider to read the
+urls from redis. The urls in the redis queue will be processed one
+after another, if the first request yields more requests, the spider
+will process those requests before fetching another url from redis.
+
+For example, create a file ``myspider.py`` with the code below:
+
+.. code-block:: python
+
+    from scrapy_redis.spiders import RedisSpider
+
+
+    class MySpider(RedisSpider):
+        name = "myspider"
+
+        def parse(self, response):
+            # do stuff
+            pass
+
+
+Then:
+
+1. run the spider
+
+.. code-block:: bash
+
+    scrapy runspider myspider.py
+
+2. push json data to redis
+
+.. code-block:: bash
+
+    redis-cli lpush myspider '{"url": "https://exaple.com", "meta": {"job-id":"123xsd", "start-date":"dd/mm/yy"}, "url_cookie_key":"fertxsas" }'
+
+
+.. note::
+
+    * These spiders rely on the spider idle signal to fetch start urls, hence it
+    may have a few seconds of delay between the time you push a new url and the
+    spider starts crawling it.
+
+    * Also please pay attention to json formatting.
+
+
+Processing items
+----------------
+
+The ``process_items.py`` provides an example of consuming the items queue::
+
+.. code-block:: bash
+
+    python process_items.py --help
+
+
+Run via Docker
+--------------
+
+You require the following applications:
+
+* docker (https://docs.docker.com/installation/)
+* docker-compose (https://docs.docker.com/compose/install/)
+
+For implementation details see `Dockerfile` and `docker-compose.yml` and read
+official docker documentation.
+
+1. To start sample `example-project` (`-d` for daemon)::
+
+    docker-compose up
+
+2. To scale `crawler` (4 instances for example)::
+
+    docker-compose scale crawler=4
@@ -0,0 +1,9 @@
+redis:
+  image: redis
+  ports:
+   - "6379:6379" # added port for external db provisioning
+
+crawler:
+  build: .
+  links:
+    - redis:localhost
@@ -0,0 +1,24 @@
+# Define here the models for your scraped items
+#
+# See documentation in:
+# http://doc.scrapy.org/topics/items.html
+
+from scrapy.item import Field, Item
+from scrapy.loader import ItemLoader
+from scrapy.loader.processors import Join, MapCompose, TakeFirst
+
+
+class ExampleItem(Item):
+    name = Field()
+    description = Field()
+    link = Field()
+    crawled = Field()
+    spider = Field()
+    url = Field()
+
+
+class ExampleLoader(ItemLoader):
+    default_item_class = ExampleItem
+    default_input_processor = MapCompose(lambda s: s.strip())
+    default_output_processor = TakeFirst()
+    description_out = Join()
@@ -0,0 +1,12 @@
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: http://doc.scrapy.org/topics/item-pipeline.html
+from datetime import datetime
+
+
+class ExamplePipeline:
+    def process_item(self, item, spider):
+        item["crawled"] = datetime.utcnow()
+        item["spider"] = spider.name
+        return item
@@ -0,0 +1,37 @@
+# Scrapy settings for example project
+#
+# For simplicity, this file contains only the most important settings by
+# default. All the other settings are documented here:
+#
+#     http://doc.scrapy.org/topics/settings.html
+#
+SPIDER_MODULES = ["example.spiders"]
+NEWSPIDER_MODULE = "example.spiders"
+
+LOG_LEVEL = "WARNING"
+
+USER_AGENT = "scrapy-redis (+https://github.com/rolando/scrapy-redis)"
+
+#设置重复过滤器模块
+DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
+#设置调度器，scrapy_redis具备与数据库交互的功能
+SCHEDULER = "scrapy_redis.scheduler.Scheduler"
+#设置当爬虫结束时是否保持redis数据库中的去重集合与任务队列
+SCHEDULER_PERSIST = True
+# SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderPriorityQueue"
+# SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderQueue"
+# SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.SpiderStack"
+
+ITEM_PIPELINES = {
+    "example.pipelines.ExamplePipeline": 300,
+    #当开启该管道，该管道将会把数据存到redis数据库中
+    "scrapy_redis.pipelines.RedisPipeline": 400,
+}
+#设置redis数据库
+REDIS_URL = "redis://127.0.0.1:6379"
+
+LOG_LEVEL = "DEBUG"
+
+# Introduce an artifical delay to make use of parallelism. to speed up the
+# crawl.
+DOWNLOAD_DELAY = 1
@@ -0,0 +1,8 @@
+# This package will contain the spiders of your Scrapy project
+#
+# To create the first spider for your project use this command:
+#
+#   scrapy genspider myspider myspider-domain.com
+#
+# For more info see:
+# http://doc.scrapy.org/topics/spiders.html
@@ -0,0 +1,26 @@
+from scrapy.linkextractors import LinkExtractor
+from scrapy.spiders import CrawlSpider, Rule
+
+
+class DmozSpider(CrawlSpider):
+    """Follow categories and extract links."""
+
+    name = "dmoz"
+    allowed_domains = ["dmoztools.net"]
+    start_urls = ["http://www.dmoztools.net/"]
+
+    rules = [
+        Rule(
+            LinkExtractor(restrict_css=(".top-cat", ".sub-cat", ".cat-item")),
+            callback="parse_directory",
+            follow=True,
+        ),
+    ]
+
+    def parse_directory(self, response):
+        for div in response.css(".title-and-desc"):
+            yield {
+                "name": div.css(".site-title::text").extract_first(),
+                "description": div.css(".site-descr::text").extract_first().strip(),
+                "link": div.css("a::attr(href)").extract_first(),
+            }
@@ -0,0 +1,28 @@
+from scrapy.linkextractors import LinkExtractor
+from scrapy.spiders import Rule
+
+from scrapy_redis.spiders import RedisCrawlSpider
+
+
+class MyCrawler(RedisCrawlSpider):
+    """Spider that reads urls from redis queue (myspider:start_urls)."""
+
+    name = "mycrawler_redis"
+    redis_key = "mycrawler:start_urls"
+
+    rules = (
+        # follow all links
+        Rule(LinkExtractor(), callback="parse_page", follow=True),
+    )
+
+    def __init__(self, *args, **kwargs):
+        # Dynamically define the allowed domains list.
+        domain = kwargs.pop("domain", "")
+        self.allowed_domains = filter(None, domain.split(","))
+        super().__init__(*args, **kwargs)
+
+    def parse_page(self, response):
+        return {
+            "name": response.css("title::text").extract_first(),
+            "url": response.url,
+        }
@@ -0,0 +1,20 @@
+from scrapy_redis.spiders import RedisSpider
+
+
+class MySpider(RedisSpider):
+    """Spider that reads urls from redis queue (myspider:start_urls)."""
+
+    name = "myspider_redis"
+    redis_key = "myspider:start_urls"
+
+    def __init__(self, *args, **kwargs):
+        # Dynamically define the allowed domains list.
+        domain = kwargs.pop("domain", "")
+        self.allowed_domains = filter(None, domain.split(","))
+        super().__init__(*args, **kwargs)
+
+    def parse(self, response):
+        return {
+            "name": response.css("title::text").extract_first(),
+            "url": response.url,
+        }
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+
+# -*- coding: utf-8 -*-
+"""A script to process items from a redis queue."""
+
+import argparse
+import json
+import logging
+import pprint
+import sys
+import time
+
+from scrapy_redis import get_redis
+
+logger = logging.getLogger("process_items")
+
+
+def process_items(r, keys, timeout, limit=0, log_every=1000, wait=0.1):
+    """Process items from a redis queue.
+
+    Parameters
+    ----------
+    r : Redis
+        Redis connection instance.
+    keys : list
+        List of keys to read the items from.
+    timeout: int
+        Read timeout.
+
+    """
+    limit = limit or float("inf")
+    processed = 0
+    while processed < limit:
+        # Change ``blpop`` to ``brpop`` to process as LIFO.
+        ret = r.blpop(keys, timeout)
+        # If data is found before the timeout then we consider we are done.
+        if ret is None:
+            time.sleep(wait)
+            continue
+
+        source, data = ret
+        try:
+            item = json.loads(data)
+        except Exception:
+            logger.exception("Failed to load item:\n%r", pprint.pformat(data))
+            continue
+
+        try:
+            name = item.get("name") or item.get("title")
+            url = item.get("url") or item.get("link")
+            logger.debug("[%s] Processing item: %s <%s>", source, name, url)
+        except KeyError:
+            logger.exception(
+                "[%s] Failed to process item:\n%r", source, pprint.pformat(item)
+            )
+            continue
+
+        processed += 1
+        if processed % log_every == 0:
+            logger.info("Processed %s items", processed)
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("key", help="Redis key where items are stored")
+    parser.add_argument("--host")
+    parser.add_argument("--port")
+    parser.add_argument("--timeout", type=int, default=5)
+    parser.add_argument("--limit", type=int, default=0)
+    parser.add_argument("--progress-every", type=int, default=100)
+    parser.add_argument("-v", "--verbose", action="store_true")
+
+    args = parser.parse_args()
+
+    params = {}
+    if args.host:
+        params["host"] = args.host
+    if args.port:
+        params["port"] = args.port
+
+    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
+
+    r = get_redis(**params)
+    host = r.connection_pool.get_connection("info").host
+    logger.info("Waiting for items in '%s' (server: %s)", args.key, host)
+    kwargs = {
+        "keys": [args.key],
+        "timeout": args.timeout,
+        "limit": args.limit,
+        "log_every": args.progress_every,
+    }
+    try:
+        process_items(r, **kwargs)
+        retcode = 0  # ok
+    except KeyboardInterrupt:
+        retcode = 0  # ok
+    except Exception:
+        logger.exception("Unhandled exception")
+        retcode = 2
+
+    return retcode
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,2 @@
+scrapy
+scrapy-redis
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# http://doc.scrapy.org/topics/scrapyd.html
+
+[settings]
+default = example.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = example
@@ -0,0 +1,125 @@
+[MASTER]
+persistent=no
+jobs=1  # >1 hides results
+suggestion-mode=yes  # guess common misconfiguration and emit user-friendly hints
+py-version = 3.11.3
+
+[MESSAGES CONTROL]
+disable=abstract-method,
+        anomalous-backslash-in-string,
+        arguments-differ,
+        arguments-renamed,
+        attribute-defined-outside-init,
+        bad-classmethod-argument,
+        bad-continuation,
+        bad-indentation,
+        bad-mcs-classmethod-argument,
+        bad-super-call,
+        bad-whitespace,
+        bare-except,
+        blacklisted-name,
+        broad-except,
+        c-extension-no-member,
+        catching-non-exception,
+        cell-var-from-loop,
+        comparison-with-callable,
+        consider-iterating-dictionary,
+        consider-using-dict-items,
+        consider-using-from-import,
+        consider-using-in,
+        consider-using-set-comprehension,
+        consider-using-sys-exit,
+        consider-using-with,
+        cyclic-import,
+        dangerous-default-value,
+        deprecated-method,
+        deprecated-module,
+        duplicate-code,  # https://github.com/PyCQA/pylint/issues/214
+        eval-used,
+        expression-not-assigned,
+        fixme,
+        function-redefined,
+        global-statement,
+        import-error,
+        import-outside-toplevel,
+        import-self,
+        inconsistent-return-statements,
+        inherit-non-class,
+        invalid-name,
+        invalid-overridden-method,
+        isinstance-second-argument-not-valid-type,
+        keyword-arg-before-vararg,
+        line-too-long,
+        logging-format-interpolation,
+        logging-not-lazy,
+        lost-exception,
+        method-hidden,
+        misplaced-comparison-constant,
+        missing-docstring,
+        missing-final-newline,
+        multiple-imports,
+        multiple-statements,
+        no-else-continue,
+        no-else-raise,
+        no-else-return,
+        no-init,
+        no-member,
+        no-method-argument,
+        no-name-in-module,
+        no-self-argument,
+        no-self-use,
+        no-value-for-parameter,
+        not-an-iterable,
+        not-callable,
+        pointless-statement,
+        pointless-string-statement,
+        protected-access,
+        raise-missing-from,
+        redefined-argument-from-local,
+        redefined-builtin,
+        redefined-outer-name,
+        reimported,
+        signature-differs,
+        singleton-comparison,
+        super-init-not-called,
+        super-with-arguments,
+        superfluous-parens,
+        too-few-public-methods,
+        too-many-ancestors,
+        too-many-arguments,
+        too-many-branches,
+        too-many-format-args,
+        too-many-function-args,
+        too-many-instance-attributes,
+        too-many-lines,
+        too-many-locals,
+        too-many-public-methods,
+        too-many-return-statements,
+        trailing-newlines,
+        trailing-whitespace,
+        unbalanced-tuple-unpacking,
+        undefined-variable,
+        undefined-loop-variable,
+        unexpected-special-method-signature,
+        ungrouped-imports,
+        unidiomatic-typecheck,
+        unnecessary-comprehension,
+        unnecessary-lambda,
+        unnecessary-pass,
+        unreachable,
+        unspecified-encoding,
+        unsupported-assignment-operation,
+        unsubscriptable-object,
+        unused-argument,
+        unused-import,
+        unused-private-member,
+        unused-variable,
+        unused-wildcard-import,
+        use-implicit-booleaness-not-comparison,
+        used-before-assignment,
+        useless-object-inheritance,  # Required for Python 2 support
+        useless-return,
+        useless-super-delegation,
+        wildcard-import,
+        wrong-import-order,
+        wrong-import-position
@@ -0,0 +1,11 @@
+[pytest]
+norecursedirs =
+    .*
+    dist
+    build
+python_files =
+    test_*.py
+    *_test.py
+    tests.py
+addopts =
+    -rxEfsw -v
@@ -0,0 +1,6 @@
+# This packages are required to run all the tests.
+flake8
+mock
+pytest>=6.0,<7
+pytest-cov
+tox>=4.0,<5
@@ -0,0 +1,3 @@
+scrapy>=2.6.0
+redis>=4.2
+six>=1.15
@@ -0,0 +1,6 @@
+[wheel]
+universal = 1
+
+[flake8]
+exclude = docs, tests
+max-line-length = 120
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+import io
+from pkgutil import walk_packages
+
+from setuptools import setup
+
+
+def find_packages(path):
+    # This method returns packages and subpackages as well.
+    return [name for _, name, is_pkg in walk_packages([path]) if is_pkg]
+
+
+def read_file(filename):
+    with open(filename) as fp:
+        return fp.read().strip()
+
+
+def read_rst(filename):
+    # Ignore unsupported directives by pypi.
+    content = read_file(filename)
+    return "".join(
+        line for line in io.StringIO(content) if not line.startswith(".. comment::")
+    )
+
+
+def read_requirements(filename):
+    return [
+        line.strip()
+        for line in read_file(filename).splitlines()
+        if not line.startswith("#")
+    ]
+
+
+setup(
+    name="scrapy-redis",
+    version=read_file("VERSION"),
+    description="Redis-based components for Scrapy.",
+    long_description=read_rst("README.rst") + "\n\n" + read_rst("HISTORY.rst"),
+    author="R Max Espinoza",
+    author_email="hey@rmax.dev",
+    url="https://github.com/rmax/scrapy-redis",
+    packages=list(find_packages("src")),
+    package_dir={"": "src"},
+    install_requires=read_requirements("requirements.txt"),
+    include_package_data=True,
+    license="MIT",
+    keywords="scrapy-redis",
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Developers",
+        "License :: OSI Approved :: MIT License",
+        "Natural Language :: English",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+    ],
+)
@@ -0,0 +1,5 @@
+from .connection import get_redis, get_redis_from_settings  # NOQA
+
+__author__ = "R Max Espinoza"
+__email__ = "hey at rmax.dev"
+__version__ = "0.9.1"
@@ -0,0 +1,97 @@
+from scrapy.utils.misc import load_object
+
+from . import defaults
+
+# Shortcut maps 'setting name' -> 'parmater name'.
+SETTINGS_PARAMS_MAP = {
+    "REDIS_URL": "url",
+    "REDIS_HOST": "host",
+    "REDIS_PORT": "port",
+    "REDIS_DB": "db",
+    "REDIS_ENCODING": "encoding",
+}
+
+SETTINGS_PARAMS_MAP["REDIS_DECODE_RESPONSES"] = "decode_responses"
+
+
+def get_redis_from_settings(settings):
+    """Returns a redis client instance from given Scrapy settings object.
+
+    This function uses ``get_client`` to instantiate the client and uses
+    ``defaults.REDIS_PARAMS`` global as defaults values for the parameters. You
+    can override them using the ``REDIS_PARAMS`` setting.
+
+    Parameters
+    ----------
+    settings : Settings
+        A scrapy settings object. See the supported settings below.
+
+    Returns
+    -------
+    server
+        Redis client instance.
+
+    Other Parameters
+    ----------------
+    REDIS_URL : str, optional
+        Server connection URL.
+    REDIS_HOST : str, optional
+        Server host.
+    REDIS_PORT : str, optional
+        Server port.
+    REDIS_DB : int, optional
+        Server database
+    REDIS_ENCODING : str, optional
+        Data encoding.
+    REDIS_PARAMS : dict, optional
+        Additional client parameters.
+
+    Python 3 Only
+    ----------------
+    REDIS_DECODE_RESPONSES : bool, optional
+        Sets the `decode_responses` kwarg in Redis cls ctor
+
+    """
+    params = defaults.REDIS_PARAMS.copy()
+    params.update(settings.getdict("REDIS_PARAMS"))
+    # XXX: Deprecate REDIS_* settings.
+    for source, dest in SETTINGS_PARAMS_MAP.items():
+        val = settings.get(source)
+        if val:
+            params[dest] = val
+
+    # Allow ``redis_cls`` to be a path to a class.
+    if isinstance(params.get("redis_cls"), str):
+        params["redis_cls"] = load_object(params["redis_cls"])
+
+    return get_redis(**params)
+
+
+# Backwards compatible alias.
+from_settings = get_redis_from_settings
+
+
+def get_redis(**kwargs):
+    """Returns a redis client instance.
+
+    Parameters
+    ----------
+    redis_cls : class, optional
+        Defaults to ``redis.StrictRedis``.
+    url : str, optional
+        If given, ``redis_cls.from_url`` is used to instantiate the class.
+    **kwargs
+        Extra parameters to be passed to the ``redis_cls`` class.
+
+    Returns
+    -------
+    server
+        Redis client instance.
+
+    """
+    redis_cls = kwargs.pop("redis_cls", defaults.REDIS_CLS)
+    url = kwargs.pop("url", None)
+    if url:
+        return redis_cls.from_url(url, **kwargs)
+    else:
+        return redis_cls(**kwargs)
@@ -0,0 +1,29 @@
+import redis
+
+# For standalone use.
+DUPEFILTER_KEY = "dupefilter:%(timestamp)s"
+
+PIPELINE_KEY = "%(spider)s:items"
+
+STATS_KEY = "%(spider)s:stats"
+
+REDIS_CLS = redis.StrictRedis
+REDIS_ENCODING = "utf-8"
+# Sane connection defaults.
+REDIS_PARAMS = {
+    "socket_timeout": 30,
+    "socket_connect_timeout": 30,
+    "retry_on_timeout": True,
+    "encoding": REDIS_ENCODING,
+}
+REDIS_CONCURRENT_REQUESTS = 16
+
+SCHEDULER_QUEUE_KEY = "%(spider)s:requests"
+SCHEDULER_QUEUE_CLASS = "scrapy_redis.queue.PriorityQueue"
+SCHEDULER_DUPEFILTER_KEY = "%(spider)s:dupefilter"
+SCHEDULER_DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
+SCHEDULER_PERSIST = False
+START_URLS_KEY = "%(name)s:start_urls"
+START_URLS_AS_SET = False
+START_URLS_AS_ZSET = False
+MAX_IDLE_TIME = 0
@@ -0,0 +1,169 @@
+import hashlib
+import json
+import logging
+import time
+
+from scrapy.dupefilters import BaseDupeFilter
+from scrapy.utils.python import to_unicode
+from w3lib.url import canonicalize_url
+
+from . import defaults
+from .connection import get_redis_from_settings
+
+logger = logging.getLogger(__name__)
+
+
+# TODO: Rename class to RedisDupeFilter.
+class RFPDupeFilter(BaseDupeFilter):
+    """Redis-based request duplicates filter.
+
+    This class can also be used with default Scrapy's scheduler.
+
+    """
+
+    logger = logger
+
+    def __init__(self, server, key, debug=False):
+        """Initialize the duplicates filter.
+
+        Parameters
+        ----------
+        server : redis.StrictRedis
+            The redis server instance.
+        key : str
+            Redis key Where to store fingerprints.
+        debug : bool, optional
+            Whether to log filtered requests.
+
+        """
+        self.server = server
+        self.key = key
+        self.debug = debug
+        self.logdupes = True
+
+    @classmethod
+    def from_settings(cls, settings):
+        """Returns an instance from given settings.
+
+        This uses by default the key ``dupefilter:<timestamp>``. When using the
+        ``scrapy_redis.scheduler.Scheduler`` class, this method is not used as
+        it needs to pass the spider name in the key.
+
+        Parameters
+        ----------
+        settings : scrapy.settings.Settings
+
+        Returns
+        -------
+        RFPDupeFilter
+            A RFPDupeFilter instance.
+
+
+        """
+        server = get_redis_from_settings(settings)
+        # XXX: This creates one-time key. needed to support to use this
+        # class as standalone dupefilter with scrapy's default scheduler
+        # if scrapy passes spider on open() method this wouldn't be needed
+        # TODO: Use SCRAPY_JOB env as default and fallback to timestamp.
+        key = defaults.DUPEFILTER_KEY % {"timestamp": int(time.time())}
+        debug = settings.getbool("DUPEFILTER_DEBUG")
+        return cls(server, key=key, debug=debug)
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        """Returns instance from crawler.
+
+        Parameters
+        ----------
+        crawler : scrapy.crawler.Crawler
+
+        Returns
+        -------
+        RFPDupeFilter
+            Instance of RFPDupeFilter.
+
+        """
+        return cls.from_settings(crawler.settings)
+
+    def request_seen(self, request):
+        """Returns True if request was already seen.
+
+        Parameters
+        ----------
+        request : scrapy.http.Request
+
+        Returns
+        -------
+        bool
+
+        """
+        fp = self.request_fingerprint(request)
+        # This returns the number of values added, zero if already exists.
+        added = self.server.sadd(self.key, fp)
+        return added == 0
+
+    def request_fingerprint(self, request):
+        """Returns a fingerprint for a given request.
+
+        Parameters
+        ----------
+        request : scrapy.http.Request
+
+        Returns
+        -------
+        str
+
+        """
+        fingerprint_data = {
+            "method": to_unicode(request.method),
+            "url": canonicalize_url(request.url),
+            "body": (request.body or b"").hex(),
+        }
+        fingerprint_json = json.dumps(fingerprint_data, sort_keys=True)
+        return hashlib.sha1(fingerprint_json.encode()).hexdigest()
+
+    @classmethod
+    def from_spider(cls, spider):
+        settings = spider.settings
+        server = get_redis_from_settings(settings)
+        dupefilter_key = settings.get(
+            "SCHEDULER_DUPEFILTER_KEY", defaults.SCHEDULER_DUPEFILTER_KEY
+        )
+        key = dupefilter_key % {"spider": spider.name}
+        debug = settings.getbool("DUPEFILTER_DEBUG")
+        return cls(server, key=key, debug=debug)
+
+    def close(self, reason=""):
+        """Delete data on close. Called by Scrapy's scheduler.
+
+        Parameters
+        ----------
+        reason : str, optional
+
+        """
+        self.clear()
+
+    def clear(self):
+        """Clears fingerprints data."""
+        self.server.delete(self.key)
+
+    def log(self, request, spider):
+        """Logs given request.
+
+        Parameters
+        ----------
+        request : scrapy.http.Request
+        spider : scrapy.spiders.Spider
+
+        """
+        if self.debug:
+            msg = "Filtered duplicate request: %(request)s"
+            self.logger.debug(msg, {"request": request}, extra={"spider": spider})
+        elif self.logdupes:
+            msg = (
+                "Filtered duplicate request %(request)s"
+                " - no more duplicates will be shown"
+                " (see DUPEFILTER_DEBUG to show all duplicates)"
+            )
+            self.logger.debug(msg, {"request": request}, extra={"spider": spider})
+            self.logdupes = False
@@ -0,0 +1,14 @@
+"""A pickle wrapper module with protocol=-1 by default."""
+
+try:
+    import cPickle as pickle  # PY2
+except ImportError:
+    import pickle
+
+
+def loads(s):
+    return pickle.loads(s)
+
+
+def dumps(obj):
+    return pickle.dumps(obj, protocol=-1)
@@ -0,0 +1,73 @@
+from scrapy.utils.misc import load_object
+from scrapy.utils.serialize import ScrapyJSONEncoder
+from twisted.internet.threads import deferToThread
+
+from . import connection, defaults
+
+default_serialize = ScrapyJSONEncoder().encode
+
+
+class RedisPipeline:
+    """Pushes serialized item into a redis list/queue
+
+    Settings
+    --------
+    REDIS_ITEMS_KEY : str
+        Redis key where to store items.
+    REDIS_ITEMS_SERIALIZER : str
+        Object path to serializer function.
+
+    """
+
+    def __init__(
+        self, server, key=defaults.PIPELINE_KEY, serialize_func=default_serialize
+    ):
+        """Initialize pipeline.
+
+        Parameters
+        ----------
+        server : StrictRedis
+            Redis client instance.
+        key : str
+            Redis key where to store items.
+        serialize_func : callable
+            Items serializer function.
+
+        """
+        self.server = server
+        self.key = key
+        self.serialize = serialize_func
+
+    @classmethod
+    def from_settings(cls, settings):
+        params = {
+            "server": connection.from_settings(settings),
+        }
+        if settings.get("REDIS_ITEMS_KEY"):
+            params["key"] = settings["REDIS_ITEMS_KEY"]
+        if settings.get("REDIS_ITEMS_SERIALIZER"):
+            params["serialize_func"] = load_object(settings["REDIS_ITEMS_SERIALIZER"])
+
+        return cls(**params)
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls.from_settings(crawler.settings)
+
+    def process_item(self, item, spider):
+        return deferToThread(self._process_item, item, spider)
+
+    def _process_item(self, item, spider):
+        key = self.item_key(item, spider)
+        data = self.serialize(item)
+        self.server.rpush(key, data)
+        return item
+
+    def item_key(self, item, spider):
+        """Returns redis key based on given spider.
+
+        Override this function to use a different key depending on the item
+        and/or spider.
+
+        """
+        return self.key % {"spider": spider.name}
@@ -0,0 +1,155 @@
+try:
+    from scrapy.utils.request import request_from_dict
+except ImportError:
+    from scrapy.utils.reqser import request_to_dict, request_from_dict
+
+from . import picklecompat
+
+
+class Base:
+    """Per-spider base queue class"""
+
+    def __init__(self, server, spider, key, serializer=None):
+        """Initialize per-spider redis queue.
+
+        Parameters
+        ----------
+        server : StrictRedis
+            Redis client instance.
+        spider : Spider
+            Scrapy spider instance.
+        key: str
+            Redis key where to put and get messages.
+        serializer : object
+            Serializer object with ``loads`` and ``dumps`` methods.
+
+        """
+        if serializer is None:
+            # Backward compatibility.
+            # TODO: deprecate pickle.
+            serializer = picklecompat
+        if not hasattr(serializer, "loads"):
+            raise TypeError(
+                f"serializer does not implement 'loads' function: {serializer}"
+            )
+        if not hasattr(serializer, "dumps"):
+            raise TypeError(
+                f"serializer does not implement 'dumps' function: {serializer}"
+            )
+
+        self.server = server
+        self.spider = spider
+        self.key = key % {"spider": spider.name}
+        self.serializer = serializer
+
+    def _encode_request(self, request):
+        """Encode a request object"""
+        try:
+            obj = request.to_dict(spider=self.spider)
+        except AttributeError:
+            obj = request_to_dict(request, self.spider)
+        return self.serializer.dumps(obj)
+
+    def _decode_request(self, encoded_request):
+        """Decode an request previously encoded"""
+        obj = self.serializer.loads(encoded_request)
+        return request_from_dict(obj, spider=self.spider)
+
+    def __len__(self):
+        """Return the length of the queue"""
+        raise NotImplementedError
+
+    def push(self, request):
+        """Push a request"""
+        raise NotImplementedError
+
+    def pop(self, timeout=0):
+        """Pop a request"""
+        raise NotImplementedError
+
+    def clear(self):
+        """Clear queue/stack"""
+        self.server.delete(self.key)
+
+
+class FifoQueue(Base):
+    """Per-spider FIFO queue"""
+
+    def __len__(self):
+        """Return the length of the queue"""
+        return self.server.llen(self.key)
+
+    def push(self, request):
+        """Push a request"""
+        self.server.lpush(self.key, self._encode_request(request))
+
+    def pop(self, timeout=0):
+        """Pop a request"""
+        if timeout > 0:
+            data = self.server.brpop(self.key, timeout)
+            if isinstance(data, tuple):
+                data = data[1]
+        else:
+            data = self.server.rpop(self.key)
+        if data:
+            return self._decode_request(data)
+
+
+class PriorityQueue(Base):
+    """Per-spider priority queue abstraction using redis' sorted set"""
+
+    def __len__(self):
+        """Return the length of the queue"""
+        return self.server.zcard(self.key)
+
+    def push(self, request):
+        """Push a request"""
+        data = self._encode_request(request)
+        score = -request.priority
+        # We don't use zadd method as the order of arguments change depending on
+        # whether the class is Redis or StrictRedis, and the option of using
+        # kwargs only accepts strings, not bytes.
+        self.server.execute_command("ZADD", self.key, score, data)
+
+    def pop(self, timeout=0):
+        """
+        Pop a request
+        timeout not support in this queue class
+        """
+        # use atomic range/remove using multi/exec
+        pipe = self.server.pipeline()
+        pipe.multi()
+        pipe.zrange(self.key, 0, 0).zremrangebyrank(self.key, 0, 0)
+        results, count = pipe.execute()
+        if results:
+            return self._decode_request(results[0])
+
+
+class LifoQueue(Base):
+    """Per-spider LIFO queue."""
+
+    def __len__(self):
+        """Return the length of the stack"""
+        return self.server.llen(self.key)
+
+    def push(self, request):
+        """Push a request"""
+        self.server.lpush(self.key, self._encode_request(request))
+
+    def pop(self, timeout=0):
+        """Pop a request"""
+        if timeout > 0:
+            data = self.server.blpop(self.key, timeout)
+            if isinstance(data, tuple):
+                data = data[1]
+        else:
+            data = self.server.lpop(self.key)
+
+        if data:
+            return self._decode_request(data)
+
+
+# TODO: Deprecate the use of these names.
+SpiderQueue = FifoQueue
+SpiderStack = LifoQueue
+SpiderPriorityQueue = PriorityQueue
@@ -0,0 +1,182 @@
+import importlib
+
+from scrapy.utils.misc import load_object
+
+from . import connection, defaults
+
+
+# TODO: add SCRAPY_JOB support.
+class Scheduler:
+    """Redis-based scheduler
+
+    Settings
+    --------
+    SCHEDULER_PERSIST : bool (default: False)
+        Whether to persist or clear redis queue.
+    SCHEDULER_FLUSH_ON_START : bool (default: False)
+        Whether to flush redis queue on start.
+    SCHEDULER_IDLE_BEFORE_CLOSE : int (default: 0)
+        How many seconds to wait before closing if no message is received.
+    SCHEDULER_QUEUE_KEY : str
+        Scheduler redis key.
+    SCHEDULER_QUEUE_CLASS : str
+        Scheduler queue class.
+    SCHEDULER_DUPEFILTER_KEY : str
+        Scheduler dupefilter redis key.
+    SCHEDULER_DUPEFILTER_CLASS : str
+        Scheduler dupefilter class.
+    SCHEDULER_SERIALIZER : str
+        Scheduler serializer.
+
+    """
+
+    def __init__(
+        self,
+        server,
+        persist=False,
+        flush_on_start=False,
+        queue_key=defaults.SCHEDULER_QUEUE_KEY,
+        queue_cls=defaults.SCHEDULER_QUEUE_CLASS,
+        dupefilter=None,
+        dupefilter_key=defaults.SCHEDULER_DUPEFILTER_KEY,
+        dupefilter_cls=defaults.SCHEDULER_DUPEFILTER_CLASS,
+        idle_before_close=0,
+        serializer=None,
+    ):
+        """Initialize scheduler.
+
+        Parameters
+        ----------
+        server : Redis
+            The redis server instance.
+        persist : bool
+            Whether to flush requests when closing. Default is False.
+        flush_on_start : bool
+            Whether to flush requests on start. Default is False.
+        queue_key : str
+            Requests queue key.
+        queue_cls : str
+            Importable path to the queue class.
+        dupefilter: Dupefilter
+            Custom dupefilter instance.
+        dupefilter_key : str
+            Duplicates filter key.
+        dupefilter_cls : str
+            Importable path to the dupefilter class.
+        idle_before_close : int
+            Timeout before giving up.
+
+        """
+        if idle_before_close < 0:
+            raise TypeError("idle_before_close cannot be negative")
+
+        self.server = server
+        self.persist = persist
+        self.flush_on_start = flush_on_start
+        self.queue_key = queue_key
+        self.queue_cls = queue_cls
+        self.df = dupefilter
+        self.dupefilter_cls = dupefilter_cls
+        self.dupefilter_key = dupefilter_key
+        self.idle_before_close = idle_before_close
+        self.serializer = serializer
+        self.stats = None
+
+    def __len__(self):
+        return len(self.queue)
+
+    @classmethod
+    def from_settings(cls, settings):
+        kwargs = {
+            "persist": settings.getbool("SCHEDULER_PERSIST"),
+            "flush_on_start": settings.getbool("SCHEDULER_FLUSH_ON_START"),
+            "idle_before_close": settings.getint("SCHEDULER_IDLE_BEFORE_CLOSE"),
+        }
+
+        # If these values are missing, it means we want to use the defaults.
+        optional = {
+            # TODO: Use custom prefixes for this settings to note that are
+            # specific to scrapy-redis.
+            "queue_key": "SCHEDULER_QUEUE_KEY",
+            "queue_cls": "SCHEDULER_QUEUE_CLASS",
+            "dupefilter_key": "SCHEDULER_DUPEFILTER_KEY",
+            # We use the default setting name to keep compatibility.
+            "dupefilter_cls": "DUPEFILTER_CLASS",
+            "serializer": "SCHEDULER_SERIALIZER",
+        }
+        for name, setting_name in optional.items():
+            val = settings.get(setting_name)
+            if val:
+                kwargs[name] = val
+
+        dupefilter_cls = load_object(kwargs["dupefilter_cls"])
+        if not hasattr(dupefilter_cls, "from_spider"):
+            kwargs["dupefilter"] = dupefilter_cls.from_settings(settings)
+
+        # Support serializer as a path to a module.
+        if isinstance(kwargs.get("serializer"), str):
+            kwargs["serializer"] = importlib.import_module(kwargs["serializer"])
+
+        server = connection.from_settings(settings)
+        # Ensure the connection is working.
+        server.ping()
+
+        return cls(server=server, **kwargs)
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        instance = cls.from_settings(crawler.settings)
+        # FIXME: for now, stats are only supported from this constructor
+        instance.stats = crawler.stats
+        return instance
+
+    def open(self, spider):
+        self.spider = spider
+
+        try:
+            self.queue = load_object(self.queue_cls)(
+                server=self.server,
+                spider=spider,
+                key=self.queue_key % {"spider": spider.name},
+                serializer=self.serializer,
+            )
+        except TypeError as e:
+            raise ValueError(
+                f"Failed to instantiate queue class '{self.queue_cls}': {e}"
+            )
+
+        if not self.df:
+            self.df = load_object(self.dupefilter_cls).from_spider(spider)
+
+        if self.flush_on_start:
+            self.flush()
+        # notice if there are requests already in the queue to resume the crawl
+        if len(self.queue):
+            spider.log(f"Resuming crawl ({len(self.queue)} requests scheduled)")
+
+    def close(self, reason):
+        if not self.persist:
+            self.flush()
+
+    def flush(self):
+        self.df.clear()
+        self.queue.clear()
+
+    def enqueue_request(self, request):
+        if not request.dont_filter and self.df.request_seen(request):
+            self.df.log(request, self.spider)
+            return False
+        if self.stats:
+            self.stats.inc_value("scheduler/enqueued/redis", spider=self.spider)
+        self.queue.push(request)
+        return True
+
+    def next_request(self):
+        block_pop_timeout = self.idle_before_close
+        request = self.queue.pop(block_pop_timeout)
+        if request and self.stats:
+            self.stats.inc_value("scheduler/dequeued/redis", spider=self.spider)
+        return request
+
+    def has_pending_requests(self):
+        return len(self) > 0
@@ -0,0 +1,297 @@
+import json
+import time
+from collections.abc import Iterable
+
+from scrapy import FormRequest, signals
+from scrapy import version_info as scrapy_version
+from scrapy.exceptions import DontCloseSpider
+from scrapy.spiders import CrawlSpider, Spider
+
+from scrapy_redis.utils import TextColor
+
+from . import connection, defaults
+from .utils import bytes_to_str, is_dict
+
+
+class RedisMixin:
+    """Mixin class to implement reading urls from a redis queue."""
+
+    redis_key = None
+    redis_batch_size = None
+    redis_encoding = None
+
+    # Redis client placeholder.
+    server = None
+
+    # Idle start time
+    spider_idle_start_time = int(time.time())
+    max_idle_time = None
+
+    def start_requests(self):
+        """Returns a batch of start requests from redis."""
+        return self.next_requests()
+
+    def setup_redis(self, crawler=None):
+        """Setup redis connection and idle signal.
+
+        This should be called after the spider has set its crawler object.
+        """
+        if self.server is not None:
+            return
+
+        if crawler is None:
+            # We allow optional crawler argument to keep backwards
+            # compatibility.
+            # XXX: Raise a deprecation warning.
+            crawler = getattr(self, "crawler", None)
+
+        if crawler is None:
+            raise ValueError("crawler is required")
+
+        settings = crawler.settings
+
+        if self.redis_key is None:
+            self.redis_key = settings.get(
+                "REDIS_START_URLS_KEY",
+                defaults.START_URLS_KEY,
+            )
+
+        self.redis_key = self.redis_key % {"name": self.name}
+
+        if not self.redis_key.strip():
+            raise ValueError("redis_key must not be empty")
+
+        if self.redis_batch_size is None:
+            self.redis_batch_size = settings.getint(
+                "CONCURRENT_REQUESTS", defaults.REDIS_CONCURRENT_REQUESTS
+            )
+
+        try:
+            self.redis_batch_size = int(self.redis_batch_size)
+        except (TypeError, ValueError):
+            raise ValueError("redis_batch_size must be an integer")
+
+        if self.redis_encoding is None:
+            self.redis_encoding = settings.get(
+                "REDIS_ENCODING", defaults.REDIS_ENCODING
+            )
+
+        self.logger.info(
+            "Reading start URLs from redis key '%(redis_key)s' "
+            "(batch size: %(redis_batch_size)s, encoding: %(redis_encoding)s)",
+            self.__dict__,
+        )
+
+        self.server = connection.from_settings(crawler.settings)
+
+        if settings.getbool("REDIS_START_URLS_AS_SET", defaults.START_URLS_AS_SET):
+            self.fetch_data = self.server.spop
+            self.count_size = self.server.scard
+        elif settings.getbool("REDIS_START_URLS_AS_ZSET", defaults.START_URLS_AS_ZSET):
+            self.fetch_data = self.pop_priority_queue
+            self.count_size = self.server.zcard
+        else:
+            self.fetch_data = self.pop_list_queue
+            self.count_size = self.server.llen
+
+        if self.max_idle_time is None:
+            self.max_idle_time = settings.get(
+                "MAX_IDLE_TIME_BEFORE_CLOSE", defaults.MAX_IDLE_TIME
+            )
+
+        try:
+            self.max_idle_time = int(self.max_idle_time)
+        except (TypeError, ValueError):
+            raise ValueError("max_idle_time must be an integer")
+
+        # The idle signal is called when the spider has no requests left,
+        # that's when we will schedule new requests from redis queue
+        crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)
+
+    def pop_list_queue(self, redis_key, batch_size):
+        with self.server.pipeline() as pipe:
+            pipe.lrange(redis_key, 0, batch_size - 1)
+            pipe.ltrim(redis_key, batch_size, -1)
+            datas, _ = pipe.execute()
+        return datas
+
+    def pop_priority_queue(self, redis_key, batch_size):
+        with self.server.pipeline() as pipe:
+            pipe.zrevrange(redis_key, 0, batch_size - 1)
+            pipe.zremrangebyrank(redis_key, -batch_size, -1)
+            datas, _ = pipe.execute()
+        return datas
+
+    def next_requests(self):
+        """Returns a request to be scheduled or none."""
+        # XXX: Do we need to use a timeout here?
+        found = 0
+        datas = self.fetch_data(self.redis_key, self.redis_batch_size)
+        for data in datas:
+            reqs = self.make_request_from_data(data)
+            if isinstance(reqs, Iterable):
+                for req in reqs:
+                    yield req
+                    # XXX: should be here?
+                    found += 1
+                    self.logger.info(f"start req url:{req.url}")
+            elif reqs:
+                yield reqs
+                found += 1
+            else:
+                self.logger.debug(f"Request not made from data: {data}")
+
+        if found:
+            self.logger.debug(f"Read {found} requests from '{self.redis_key}'")
+
+    def make_request_from_data(self, data):
+        """Returns a `Request` instance for data coming from Redis.
+
+        Overriding this function to support the `json` requested `data` that contains
+        `url` ,`meta` and other optional parameters. `meta` is a nested json which contains sub-data.
+
+        Along with:
+        After accessing the data, sending the FormRequest with `url`, `meta` and addition `formdata`, `method`
+
+        For example:
+
+        .. code:: json
+
+            {
+                "url": "https://example.com",
+                "meta": {
+                    "job-id":"123xsd",
+                    "start-date":"dd/mm/yy",
+                },
+                "url_cookie_key":"fertxsas",
+                "method":"POST",
+            }
+
+        If `url` is empty, return `[]`. So you should verify the `url` in the data.
+        If `method` is empty, the request object will set method to 'GET', optional.
+        If `meta` is empty, the request object will set `meta` to an empty dictionary, optional.
+
+        This json supported data can be accessed from 'scrapy.spider' through response.
+        'request.url', 'request.meta', 'request.cookies', 'request.method'
+
+        Parameters
+        ----------
+        data : bytes
+            Message from redis.
+
+        """
+        formatted_data = bytes_to_str(data, self.redis_encoding)
+
+        if is_dict(formatted_data):
+            parameter = json.loads(formatted_data)
+        else:
+            self.logger.warning(
+                f"{TextColor.WARNING}WARNING: String request is deprecated, please use JSON data format. "
+                f"Detail information, please check https://github.com/rmax/scrapy-redis#features{TextColor.ENDC}"
+            )
+            return FormRequest(formatted_data, dont_filter=True)
+
+        if parameter.get("url", None) is None:
+            self.logger.warning(
+                f"{TextColor.WARNING}The data from Redis has no url key in push data{TextColor.ENDC}"
+            )
+            return []
+
+        url = parameter.pop("url")
+        method = parameter.pop("method").upper() if "method" in parameter else "GET"
+        metadata = parameter.pop("meta") if "meta" in parameter else {}
+
+        return FormRequest(
+            url, dont_filter=True, method=method, formdata=parameter, meta=metadata
+        )
+
+    def schedule_next_requests(self):
+        """Schedules a request if available"""
+        # TODO: While there is capacity, schedule a batch of redis requests.
+        for req in self.next_requests():
+            # see https://github.com/scrapy/scrapy/issues/5994
+            if scrapy_version >= (2, 6):
+                self.crawler.engine.crawl(req)
+            else:
+                self.crawler.engine.crawl(req, spider=self)
+
+    def spider_idle(self):
+        """
+        Schedules a request if available, otherwise waits.
+        or close spider when waiting seconds > MAX_IDLE_TIME_BEFORE_CLOSE.
+        MAX_IDLE_TIME_BEFORE_CLOSE will not affect SCHEDULER_IDLE_BEFORE_CLOSE.
+        """
+        if self.server is not None and self.count_size(self.redis_key) > 0:
+            self.spider_idle_start_time = int(time.time())
+
+        self.schedule_next_requests()
+
+        idle_time = int(time.time()) - self.spider_idle_start_time
+        if self.max_idle_time != 0 and idle_time >= self.max_idle_time:
+            return
+        raise DontCloseSpider
+
+
+class RedisSpider(RedisMixin, Spider):
+    """Spider that reads urls from redis queue when idle.
+
+    Attributes
+    ----------
+    redis_key : str (default: REDIS_START_URLS_KEY)
+        Redis key where to fetch start URLs from..
+    redis_batch_size : int (default: CONCURRENT_REQUESTS)
+        Number of messages to fetch from redis on each attempt.
+    redis_encoding : str (default: REDIS_ENCODING)
+        Encoding to use when decoding messages from redis queue.
+
+    Settings
+    --------
+    REDIS_START_URLS_KEY : str (default: "<spider.name>:start_urls")
+        Default Redis key where to fetch start URLs from..
+    REDIS_START_URLS_BATCH_SIZE : int (deprecated by CONCURRENT_REQUESTS)
+        Default number of messages to fetch from redis on each attempt.
+    REDIS_START_URLS_AS_SET : bool (default: False)
+        Use SET operations to retrieve messages from the redis queue. If False,
+        the messages are retrieve using the LPOP command.
+    REDIS_ENCODING : str (default: "utf-8")
+        Default encoding to use when decoding messages from redis queue.
+
+    """
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        obj = super().from_crawler(crawler, *args, **kwargs)
+        obj.setup_redis(crawler)
+        return obj
+
+
+class RedisCrawlSpider(RedisMixin, CrawlSpider):
+    """Spider that reads urls from redis queue when idle.
+
+    Attributes
+    ----------
+    redis_key : str (default: REDIS_START_URLS_KEY)
+        Redis key where to fetch start URLs from..
+    redis_batch_size : int (default: CONCURRENT_REQUESTS)
+        Number of messages to fetch from redis on each attempt.
+    redis_encoding : str (default: REDIS_ENCODING)
+        Encoding to use when decoding messages from redis queue.
+
+    Settings
+    --------
+    REDIS_START_URLS_KEY : str (default: "<spider.name>:start_urls")
+        Default Redis key where to fetch start URLs from..
+    REDIS_START_URLS_BATCH_SIZE : int (deprecated by CONCURRENT_REQUESTS)
+        Default number of messages to fetch from redis on each attempt.
+    REDIS_START_URLS_AS_SET : bool (default: True)
+        Use SET operations to retrieve messages from the redis queue.
+    REDIS_ENCODING : str (default: "utf-8")
+        Default encoding to use when decoding messages from redis queue.
+
+    """
+
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        obj = super().from_crawler(crawler, *args, **kwargs)
+        obj.setup_redis(crawler)
+        return obj
@@ -0,0 +1,90 @@
+from datetime import datetime
+
+from scrapy.statscollectors import StatsCollector
+
+from .connection import from_settings as redis_from_settings
+from .defaults import SCHEDULER_PERSIST, STATS_KEY
+from .utils import convert_bytes_to_str
+
+
+class RedisStatsCollector(StatsCollector):
+    """
+    Stats Collector based on Redis
+    """
+
+    def __init__(self, crawler, spider=None):
+        super().__init__(crawler)
+        self.server = redis_from_settings(crawler.settings)
+        self.spider = spider
+        self.spider_name = spider.name if spider else crawler.spidercls.name
+        self.stats_key = crawler.settings.get("STATS_KEY", STATS_KEY)
+        self.persist = crawler.settings.get("SCHEDULER_PERSIST", SCHEDULER_PERSIST)
+
+    def _get_key(self, spider=None):
+        """Return the hash name of stats"""
+        if spider:
+            return self.stats_key % {"spider": spider.name}
+        if self.spider:
+            return self.stats_key % {"spider": self.spider.name}
+        return self.stats_key % {"spider": self.spider_name or "scrapy"}
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        return cls(crawler)
+
+    @classmethod
+    def from_spider(cls, spider):
+        return cls(spider.crawler)
+
+    def get_value(self, key, default=None, spider=None):
+        """Return the value of hash stats"""
+        if self.server.hexists(self._get_key(spider), key):
+            return int(self.server.hget(self._get_key(spider), key))
+        else:
+            return default
+
+    def get_stats(self, spider=None):
+        """Return the all of the values of hash stats"""
+        stats = self.server.hgetall(self._get_key(spider))
+        if stats:
+            return convert_bytes_to_str(stats)
+        return {}
+
+    def set_value(self, key, value, spider=None):
+        """Set the value according to hash key of stats"""
+        if isinstance(value, datetime):
+            value = value.timestamp()
+        self.server.hset(self._get_key(spider), key, value)
+
+    def set_stats(self, stats, spider=None):
+        """Set all the hash stats"""
+        self.server.hmset(self._get_key(spider), stats)
+
+    def inc_value(self, key, count=1, start=0, spider=None):
+        """Set increment of value according to key"""
+        if not self.server.hexists(self._get_key(spider), key):
+            self.set_value(key, start)
+        self.server.hincrby(self._get_key(spider), key, count)
+
+    def max_value(self, key, value, spider=None):
+        """Set max value between current and new value"""
+        self.set_value(key, max(self.get_value(key, value), value))
+
+    def min_value(self, key, value, spider=None):
+        """Set min value between current and new value"""
+        self.set_value(key, min(self.get_value(key, value), value))
+
+    def clear_stats(self, spider=None):
+        """Clear all the hash stats"""
+        self.server.delete(self._get_key(spider))
+
+    def open_spider(self, spider):
+        """Set spider to self"""
+        if spider:
+            self.spider = spider
+
+    def close_spider(self, spider, reason):
+        """Clear spider and clear stats"""
+        self.spider = None
+        if not self.persist:
+            self.clear_stats(spider)
@@ -0,0 +1,44 @@
+import json
+from json import JSONDecodeError
+
+import six
+
+
+class TextColor:
+    HEADER = "\033[95m"
+    OKBLUE = "\033[94m"
+    OKCYAN = "\033[96m"
+    OKGREEN = "\033[92m"
+    WARNING = "\033[93m"
+    FAIL = "\033[91m"
+    ENDC = "\033[0m"
+    BOLD = "\033[1m"
+    UNDERLINE = "\033[4m"
+
+
+def bytes_to_str(s, encoding="utf-8"):
+    """Returns a str if a bytes object is given."""
+    if six.PY3 and isinstance(s, bytes):
+        return s.decode(encoding)
+    return s
+
+
+def is_dict(string_content):
+    """Try load string_content as json, if failed, return False, else return True."""
+    try:
+        json.loads(string_content)
+    except JSONDecodeError:
+        return False
+    return True
+
+
+def convert_bytes_to_str(data, encoding="utf-8"):
+    """Convert a dict's keys & values from `bytes` to `str`
+    or convert bytes to str"""
+    if isinstance(data, bytes):
+        return data.decode(encoding)
+    if isinstance(data, dict):
+        return dict(map(convert_bytes_to_str, data.items()))
+    elif isinstance(data, tuple):
+        return map(convert_bytes_to_str, data)
+    return data
@@ -0,0 +1,69 @@
+from unittest import mock
+
+from scrapy.settings import Settings
+
+from scrapy_redis import defaults
+from scrapy_redis.connection import from_settings, get_redis, get_redis_from_settings
+
+
+class TestGetRedis:
+
+    def test_default_instance(self):
+        server = get_redis()
+        assert isinstance(server, defaults.REDIS_CLS)
+
+    def test_custom_class(self):
+        client_cls = mock.Mock()
+        server = get_redis(param="foo", redis_cls=client_cls)
+        assert server is client_cls.return_value
+        client_cls.assert_called_with(param="foo")
+
+    def test_from_url(self):
+        client_cls = mock.Mock()
+        url = "redis://localhost"
+        server = get_redis(redis_cls=client_cls, url=url, param="foo")
+        assert server is client_cls.from_url.return_value
+        client_cls.from_url.assert_called_with(url, param="foo")
+
+
+class TestFromSettings:
+
+    def setup(self):
+        self.redis_cls = mock.Mock()
+        self.expected_params = {
+            "timeout": 0,
+            "flag": False,
+        }
+        self.settings = Settings(
+            {
+                "REDIS_PARAMS": dict(self.expected_params, redis_cls=self.redis_cls),
+            }
+        )
+
+    def test_redis_cls_default(self):
+        server = from_settings(Settings())
+        assert isinstance(server, defaults.REDIS_CLS)
+
+    def test_redis_cls_custom_path(self):
+        self.settings["REDIS_PARAMS"]["redis_cls"] = "unittest.mock.Mock"
+        server = from_settings(self.settings)
+        assert isinstance(server, mock.Mock)
+
+    def test_default_params(self):
+        server = from_settings(self.settings)
+        assert server is self.redis_cls.return_value
+        self.redis_cls.assert_called_with(
+            **dict(defaults.REDIS_PARAMS, **self.expected_params)
+        )
+
+    def test_override_default_params(self):
+        for key, _ in defaults.REDIS_PARAMS.items():
+            self.expected_params[key] = self.settings["REDIS_PARAMS"][key] = object()
+
+        server = from_settings(self.settings)
+        assert server is self.redis_cls.return_value
+        self.redis_cls.assert_called_with(**self.expected_params)
+
+
+def test_get_server_from_settings_alias():
+    assert from_settings is get_redis_from_settings
@@ -0,0 +1,108 @@
+from unittest import mock
+
+from scrapy.http import Request
+from scrapy.settings import Settings
+
+from scrapy_redis.dupefilter import RFPDupeFilter
+
+
+def get_redis_mock():
+    server = mock.Mock()
+
+    def sadd(key, fp, added=0, db={}):  # noqa: mutable db
+        fingerprints = db.setdefault(key, set())
+        if fp not in fingerprints:
+            fingerprints.add(fp)
+            added += 1
+        return added
+
+    server.sadd = sadd
+
+    return server
+
+
+class TestRFPDupeFilter:
+
+    def setup(self):
+        self.server = get_redis_mock()
+        self.key = "dupefilter:1"
+        self.df = RFPDupeFilter(self.server, self.key)
+
+    def test_request_seen(self):
+        req = Request("http://example.com")
+
+        def same_request():
+            assert not self.df.request_seen(req)
+            assert self.df.request_seen(req)
+
+        def diff_method():
+            diff_method = Request("http://example.com", method="POST")
+            assert self.df.request_seen(req)
+            assert not self.df.request_seen(diff_method)
+
+        def diff_url():
+            diff_url = Request("http://example2.com")
+            assert self.df.request_seen(req)
+            assert not self.df.request_seen(diff_url)
+
+        same_request()
+        diff_method()
+        diff_url()
+
+    def test_overridable_request_fingerprinter(self):
+        req = Request("http://example.com")
+        self.df.request_fingerprint = mock.Mock(wraps=self.df.request_fingerprint)
+        assert not self.df.request_seen(req)
+        self.df.request_fingerprint.assert_called_with(req)
+
+    def test_clear_deletes(self):
+        self.df.clear()
+        self.server.delete.assert_called_with(self.key)
+
+    def test_close_calls_clear(self):
+        self.df.clear = mock.Mock(wraps=self.df.clear)
+        self.df.close()
+        self.df.close(reason="foo")
+        assert self.df.clear.call_count == 2
+
+
+def test_log_dupes():
+    def _test(df, dupes, logcount):
+        df.logger.debug = mock.Mock(wraps=df.logger.debug)
+        for _ in range(dupes):
+            req = Request("http://example")
+            df.log(req, spider=mock.Mock())
+        assert df.logger.debug.call_count == logcount
+
+    server = get_redis_mock()
+
+    df_quiet = RFPDupeFilter(server, "foo")  # debug=False
+    _test(df_quiet, 5, 1)
+
+    df_debug = RFPDupeFilter(server, "foo", debug=True)
+    _test(df_debug, 5, 5)
+
+
+@mock.patch("scrapy_redis.dupefilter.get_redis_from_settings")
+class TestFromMethods:
+
+    def setup(self):
+        self.settings = Settings(
+            {
+                "DUPEFILTER_DEBUG": True,
+            }
+        )
+
+    def test_from_settings(self, get_redis_from_settings):
+        df = RFPDupeFilter.from_settings(self.settings)
+        self.assert_dupefilter(df, get_redis_from_settings)
+
+    def test_from_crawler(self, get_redis_from_settings):
+        crawler = mock.Mock(settings=self.settings)
+        df = RFPDupeFilter.from_crawler(crawler)
+        self.assert_dupefilter(df, get_redis_from_settings)
+
+    def assert_dupefilter(self, df, get_redis_from_settings):
+        assert df.server is get_redis_from_settings.return_value
+        assert df.key.startswith("dupefilter:")
+        assert df.debug  # true
@@ -0,0 +1,7 @@
+import scrapy_redis
+
+
+def test_package_metadata():
+    assert scrapy_redis.__author__
+    assert scrapy_redis.__email__
+    assert scrapy_redis.__version__
@@ -0,0 +1,18 @@
+from scrapy_redis import picklecompat
+
+
+def test_picklecompat():
+    obj = {
+        "_encoding": "utf-8",
+        "body": "",
+        "callback": "_response_downloaded",
+        "cookies": {},
+        "dont_filter": False,
+        "errback": None,
+        "headers": {"Referer": ["http://www.dmoz.org/"]},
+        "meta": {"depth": 1, "link_text": "Fran\xe7ais", "rule": 0},
+        "method": "GET",
+        "priority": 0,
+        "url": "http://www.dmoz.org/World/Fran%C3%A7ais/",
+    }
+    assert obj == picklecompat.loads(picklecompat.dumps(obj))
@@ -0,0 +1,38 @@
+from unittest import mock
+
+from scrapy import Spider
+from scrapy.http import Request
+
+from scrapy_redis.queue import Base
+
+
+class TestBaseQueue:
+
+    queue_cls = Base
+
+    def setup(self):
+        self.server = mock.Mock()
+        self.spider = Spider(name="foo")
+        self.spider.parse_method = lambda x: x
+        self.key = "key"
+        self.q = self.queue_cls(self.server, self.spider, self.key)
+
+    def test_encode_decode_requests(self, q=None):
+        if q is None:
+            q = self.q
+        req = Request(
+            "http://example.com", callback=self.spider.parse, meta={"foo": "bar"}
+        )
+        out = q._decode_request(q._encode_request(req))
+        assert req.url == out.url
+        assert req.meta == out.meta
+        assert req.callback == out.callback
+
+    def test_custom_serializer(self):
+        serializer = mock.Mock()
+        serializer.dumps = mock.Mock(side_effect=lambda x: x)
+        serializer.loads = mock.Mock(side_effect=lambda x: x)
+        q = Base(self.server, self.spider, self.key, serializer=serializer)
+        self.test_encode_decode_requests(q)
+        assert serializer.dumps.call_count == 1
+        assert serializer.loads.call_count == 1
@@ -0,0 +1,296 @@
+import os
+from unittest import TestCase, mock
+
+import redis
+from scrapy import Request, Spider
+from scrapy.settings import Settings
+from scrapy.utils.test import get_crawler
+
+from scrapy_redis import connection
+from scrapy_redis.dupefilter import RFPDupeFilter
+from scrapy_redis.queue import FifoQueue, LifoQueue, PriorityQueue
+from scrapy_redis.scheduler import Scheduler
+
+# allow test settings from environment
+REDIS_HOST = os.environ.get("REDIS_HOST", "localhost")
+REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379))
+
+
+def get_spider(*args, **kwargs):
+    crawler = get_crawler(
+        spidercls=kwargs.pop("spidercls", None),
+        settings_dict=kwargs.pop("settings_dict", None),
+    )
+    return crawler._create_spider(*args, **kwargs)
+
+
+class RedisTestMixin:
+
+    @property
+    def server(self):
+        if not hasattr(self, "_redis"):
+            self._redis = redis.Redis(REDIS_HOST, REDIS_PORT)
+        return self._redis
+
+    def clear_keys(self, prefix):
+        keys = self.server.keys(prefix + "*")
+        if keys:
+            self.server.delete(*keys)
+
+
+class DupeFilterTest(RedisTestMixin, TestCase):
+
+    def setUp(self):
+        self.key = "scrapy_redis:tests:dupefilter:"
+        self.df = RFPDupeFilter(self.server, self.key)
+
+    def tearDown(self):
+        self.clear_keys(self.key)
+
+    def test_dupe_filter(self):
+        req = Request("http://example.com")
+
+        self.assertFalse(self.df.request_seen(req))
+        self.assertTrue(self.df.request_seen(req))
+
+        self.df.close("nothing")
+
+
+class QueueTestMixin(RedisTestMixin):
+
+    queue_cls = None
+
+    def setUp(self):
+        self.spider = get_spider(name="myspider")
+        self.key = f"scrapy_redis:tests:{self.spider.name}:queue"
+        self.q = self.queue_cls(self.server, Spider("myspider"), self.key)
+
+    def tearDown(self):
+        self.clear_keys(self.key)
+
+    def test_clear(self):
+        self.assertEqual(len(self.q), 0)
+
+        for i in range(10):
+            # XXX: can't use same url for all requests as SpiderPriorityQueue
+            # uses redis' set implemention and we will end with only one
+            # request in the set and thus failing the test. It should be noted
+            # that when using SpiderPriorityQueue it acts as a request
+            # duplication filter whenever the serielized requests are the same.
+            # This might be unwanted on repetitive requests to the same page
+            # even with dont_filter=True flag.
+            req = Request(f"http://example.com/?page={i}")
+            self.q.push(req)
+        self.assertEqual(len(self.q), 10)
+
+        self.q.clear()
+        self.assertEqual(len(self.q), 0)
+
+
+class FifoQueueTest(QueueTestMixin, TestCase):
+
+    queue_cls = FifoQueue
+
+    def test_queue(self):
+        req1 = Request("http://example.com/page1")
+        req2 = Request("http://example.com/page2")
+
+        self.q.push(req1)
+        self.q.push(req2)
+
+        out1 = self.q.pop()
+        out2 = self.q.pop(timeout=1)
+
+        self.assertEqual(out1.url, req1.url)
+        self.assertEqual(out2.url, req2.url)
+
+
+class PriorityQueueTest(QueueTestMixin, TestCase):
+
+    queue_cls = PriorityQueue
+
+    def test_queue(self):
+        req1 = Request("http://example.com/page1", priority=100)
+        req2 = Request("http://example.com/page2", priority=50)
+        req3 = Request("http://example.com/page2", priority=200)
+
+        self.q.push(req1)
+        self.q.push(req2)
+        self.q.push(req3)
+
+        out1 = self.q.pop()
+        out2 = self.q.pop(timeout=0)
+        out3 = self.q.pop(timeout=1)
+
+        self.assertEqual(out1.url, req3.url)
+        self.assertEqual(out2.url, req1.url)
+        self.assertEqual(out3.url, req2.url)
+
+
+class LifoQueueTest(QueueTestMixin, TestCase):
+
+    queue_cls = LifoQueue
+
+    def test_queue(self):
+        req1 = Request("http://example.com/page1")
+        req2 = Request("http://example.com/page2")
+
+        self.q.push(req1)
+        self.q.push(req2)
+
+        out1 = self.q.pop()
+        out2 = self.q.pop(timeout=1)
+
+        self.assertEqual(out1.url, req2.url)
+        self.assertEqual(out2.url, req1.url)
+
+
+class SchedulerTest(RedisTestMixin, TestCase):
+
+    def setUp(self):
+        self.key_prefix = "scrapy_redis:tests:"
+        self.queue_key = self.key_prefix + "%(spider)s:requests"
+        self.dupefilter_key = self.key_prefix + "%(spider)s:dupefilter"
+        self.spider = get_spider(
+            name="myspider",
+            settings_dict={
+                "REDIS_HOST": REDIS_HOST,
+                "REDIS_PORT": REDIS_PORT,
+                "SCHEDULER_QUEUE_KEY": self.queue_key,
+                "SCHEDULER_DUPEFILTER_KEY": self.dupefilter_key,
+                "SCHEDULER_FLUSH_ON_START": False,
+                "SCHEDULER_PERSIST": False,
+                "SCHEDULER_SERIALIZER": "pickle",
+                "DUPEFILTER_CLASS": "scrapy_redis.dupefilter.RFPDupeFilter",
+            },
+        )
+        self.scheduler = Scheduler.from_crawler(self.spider.crawler)
+
+    def tearDown(self):
+        self.clear_keys(self.key_prefix)
+
+    def test_scheduler(self):
+        # default no persist
+        self.assertFalse(self.scheduler.persist)
+
+        self.scheduler.open(self.spider)
+        self.assertEqual(len(self.scheduler), 0)
+
+        req = Request("http://example.com")
+        self.scheduler.enqueue_request(req)
+        self.assertTrue(self.scheduler.has_pending_requests())
+        self.assertEqual(len(self.scheduler), 1)
+
+        # dupefilter in action
+        self.scheduler.enqueue_request(req)
+        self.assertEqual(len(self.scheduler), 1)
+
+        out = self.scheduler.next_request()
+        self.assertEqual(out.url, req.url)
+
+        self.assertFalse(self.scheduler.has_pending_requests())
+        self.assertEqual(len(self.scheduler), 0)
+
+        self.scheduler.close("finish")
+
+    def test_scheduler_persistent(self):
+        # TODO: Improve this test to avoid the need to check for log messages.
+        self.spider.log = mock.Mock(spec=self.spider.log)
+
+        self.scheduler.persist = True
+        self.scheduler.open(self.spider)
+
+        self.assertEqual(self.spider.log.call_count, 0)
+
+        self.scheduler.enqueue_request(Request("http://example.com/page1"))
+        self.scheduler.enqueue_request(Request("http://example.com/page2"))
+
+        self.assertTrue(self.scheduler.has_pending_requests())
+        self.scheduler.close("finish")
+
+        self.scheduler.open(self.spider)
+        self.spider.log.assert_has_calls(
+            [
+                mock.call("Resuming crawl (2 requests scheduled)"),
+            ]
+        )
+        self.assertEqual(len(self.scheduler), 2)
+
+        self.scheduler.persist = False
+        self.scheduler.close("finish")
+
+        self.assertEqual(len(self.scheduler), 0)
+
+
+class ConnectionTest(TestCase):
+
+    # We can get a connection from just REDIS_URL.
+    def test_redis_url(self):
+        settings = Settings(
+            {
+                "REDIS_URL": "redis://foo:bar@localhost:9001/42",
+            }
+        )
+
+        server = connection.from_settings(settings)
+        connect_args = server.connection_pool.connection_kwargs
+
+        self.assertEqual(connect_args["host"], "localhost")
+        self.assertEqual(connect_args["port"], 9001)
+        self.assertEqual(connect_args["password"], "bar")
+        self.assertEqual(connect_args["db"], 42)
+
+    # We can get a connection from REDIS_HOST/REDIS_PORT.
+    def test_redis_host_port(self):
+        settings = Settings(
+            {
+                "REDIS_HOST": "localhost",
+                "REDIS_PORT": 9001,
+            }
+        )
+
+        server = connection.from_settings(settings)
+        connect_args = server.connection_pool.connection_kwargs
+
+        self.assertEqual(connect_args["host"], "localhost")
+        self.assertEqual(connect_args["port"], 9001)
+
+    # REDIS_URL takes precedence over REDIS_HOST/REDIS_PORT.
+    def test_redis_url_precedence(self):
+        settings = Settings(
+            {
+                "REDIS_HOST": "baz",
+                "REDIS_PORT": 1337,
+                "REDIS_URL": "redis://foo:bar@localhost:9001/42",
+            }
+        )
+
+        server = connection.from_settings(settings)
+        connect_args = server.connection_pool.connection_kwargs
+
+        self.assertEqual(connect_args["host"], "localhost")
+        self.assertEqual(connect_args["port"], 9001)
+        self.assertEqual(connect_args["password"], "bar")
+        self.assertEqual(connect_args["db"], 42)
+
+    # We fallback to REDIS_HOST/REDIS_PORT if REDIS_URL is None.
+    def test_redis_host_port_fallback(self):
+        settings = Settings(
+            {"REDIS_HOST": "baz", "REDIS_PORT": 1337, "REDIS_URL": None}
+        )
+
+        server = connection.from_settings(settings)
+        connect_args = server.connection_pool.connection_kwargs
+
+        self.assertEqual(connect_args["host"], "baz")
+        self.assertEqual(connect_args["port"], 1337)
+
+    # We use default values for REDIS_HOST/REDIS_PORT.
+    def test_redis_default(self):
+        settings = Settings()
+
+        server = connection.from_settings(settings)
+        connect_args = server.connection_pool.connection_kwargs
+
+        self.assertEqual(connect_args["host"], "localhost")
+        self.assertEqual(connect_args["port"], 6379)
@@ -0,0 +1,197 @@
+import contextlib
+import os
+from unittest import mock
+
+import pytest
+from scrapy import signals
+from scrapy.exceptions import DontCloseSpider
+from scrapy.settings import Settings
+
+from scrapy_redis.spiders import RedisCrawlSpider, RedisSpider
+
+REDIS_HOST = os.environ.get("REDIS_HOST", "localhost")
+REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379))
+
+
+@contextlib.contextmanager
+def flushall(server):
+    try:
+        yield
+    finally:
+        server.flushall()
+
+
+class MySpider(RedisSpider):
+    name = "myspider"
+
+
+class MyCrawlSpider(RedisCrawlSpider):
+    name = "myspider"
+
+
+def get_crawler(**kwargs):
+    return mock.Mock(
+        settings=Settings(
+            {
+                "REDIS_HOST": REDIS_HOST,
+                "REDIS_PORT": REDIS_PORT,
+            }
+        ),
+        **kwargs,
+    )
+
+
+class TestRedisMixin_setup_redis:
+
+    def setup(self):
+        self.myspider = MySpider()
+
+    def test_crawler_required(self):
+        with pytest.raises(ValueError) as excinfo:
+            self.myspider.setup_redis()
+        assert "crawler" in str(excinfo.value)
+
+    def test_requires_redis_key(self):
+        self.myspider.crawler = get_crawler()
+        self.myspider.redis_key = ""
+        with pytest.raises(ValueError) as excinfo:
+            self.myspider.setup_redis()
+        assert "redis_key" in str(excinfo.value)
+
+    def test_invalid_batch_size(self):
+        self.myspider.redis_batch_size = "x"
+        self.myspider.crawler = get_crawler()
+        with pytest.raises(ValueError) as excinfo:
+            self.myspider.setup_redis()
+        assert "redis_batch_size" in str(excinfo.value)
+
+    def test_invalid_idle_time(self):
+        self.myspider.max_idle_time = "x"
+        self.myspider.crawler = get_crawler()
+        with pytest.raises(ValueError) as excinfo:
+            self.myspider.setup_redis()
+        assert "max_idle_time" in str(excinfo.value)
+
+    @mock.patch("scrapy_redis.spiders.connection")
+    def test_via_from_crawler(self, connection):
+        server = connection.from_settings.return_value = mock.Mock()
+        crawler = get_crawler()
+        myspider = MySpider.from_crawler(crawler)
+        assert myspider.server is server
+        connection.from_settings.assert_called_with(crawler.settings)
+        crawler.signals.connect.assert_called_with(
+            myspider.spider_idle, signal=signals.spider_idle
+        )
+        # Second call does nothing.
+        server = myspider.server
+        crawler.signals.connect.reset_mock()
+        myspider.setup_redis()
+        assert myspider.server is server
+        assert crawler.signals.connect.call_count == 0
+
+
+@pytest.mark.parametrize(
+    "spider_cls",
+    [
+        MySpider,
+        MyCrawlSpider,
+    ],
+)
+def test_from_crawler_with_spider_arguments(spider_cls):
+    crawler = get_crawler()
+    spider = spider_cls.from_crawler(
+        crawler,
+        "foo",
+        redis_key="key:%(name)s",
+        redis_batch_size="2000",
+        max_idle_time="100",
+    )
+    assert spider.name == "foo"
+    assert spider.redis_key == "key:foo"
+    assert spider.redis_batch_size == 2000
+    assert spider.max_idle_time == 100
+
+
+class MockRequest(mock.Mock):
+    def __init__(self, url, **kwargs):
+        super().__init__()
+        self.url = url
+
+    def __eq__(self, other):
+        return self.url == other.url
+
+    def __hash__(self):
+        return hash(self.url)
+
+    def __repr__(self):
+        return f"<{self.__class__.__name__}({self.url})>"
+
+
+@pytest.mark.parametrize(
+    "spider_cls",
+    [
+        MySpider,
+        MyCrawlSpider,
+    ],
+)
+@pytest.mark.parametrize("start_urls_as_zset", [False, True])
+@pytest.mark.parametrize("start_urls_as_set", [False, True])
+@mock.patch("scrapy.spiders.Request", MockRequest)
+def test_consume_urls_from_redis(start_urls_as_zset, start_urls_as_set, spider_cls):
+    batch_size = 5
+    redis_key = "start:urls"
+    crawler = get_crawler()
+    crawler.settings.setdict(
+        {
+            "REDIS_HOST": REDIS_HOST,
+            "REDIS_PORT": REDIS_PORT,
+            "REDIS_START_URLS_KEY": redis_key,
+            "REDIS_START_URLS_AS_ZSET": start_urls_as_zset,
+            "REDIS_START_URLS_AS_SET": start_urls_as_set,
+            "CONCURRENT_REQUESTS": batch_size,
+        }
+    )
+    spider = spider_cls.from_crawler(crawler)
+    with flushall(spider.server):
+        urls = [f"http://example.com/{i}" for i in range(batch_size * 2)]
+        reqs = []
+        if start_urls_as_set:
+            server_put = spider.server.sadd
+        elif start_urls_as_zset:
+
+            def server_put(key, value):
+                spider.server.zadd(key, {value: 0})
+
+        else:
+            server_put = spider.server.rpush
+        for url in urls:
+            server_put(redis_key, url)
+            reqs.append(MockRequest(url))
+
+        # First call is to start requests.
+        start_requests = list(spider.start_requests())
+        if start_urls_as_zset or start_urls_as_set:
+            assert len(start_requests) == batch_size
+            assert {r.url for r in start_requests}.issubset(r.url for r in reqs)
+        else:
+            assert start_requests == reqs[:batch_size]
+
+        # Second call is to spider idle method.
+        with pytest.raises(DontCloseSpider):
+            spider.spider_idle()
+        # Process remaining requests in the queue.
+        with pytest.raises(DontCloseSpider):
+            spider.spider_idle()
+
+        # Last batch was passed to crawl.
+        assert crawler.engine.crawl.call_count == batch_size
+
+        if start_urls_as_zset or start_urls_as_set:
+            crawler.engine.crawl.assert_has_calls(
+                [mock.call(req) for req in reqs if req not in start_requests],
+                any_order=True,
+            )
+        else:
+            crawler.engine.crawl.assert_has_calls(
+                [mock.call(req) for req in reqs[batch_size:]]
+            )
@@ -0,0 +1,7 @@
+from scrapy_redis.utils import bytes_to_str
+
+
+def test_bytes_to_str():
+    assert bytes_to_str(b"foo") == "foo"
+    # This char is the same in bytes or latin1.
+    assert bytes_to_str(b"\xc1", "latin1") == "\xc1"
@@ -0,0 +1,90 @@
+[tox]
+requires =
+    tox>=4
+envlist = 
+    docs
+    security
+    flake8
+    py{38,39,310,311,312}-scrapy{26,27,28,29,210,211}-redis{42,43,44,45,46,50}
+minversion = 3.0.0
+
+[base]
+deps = 
+    -r requirements-tests.txt
+    -r requirements.txt
+    setuptools
+
+[testenv]
+basepython =
+    py38: python3.8
+    py39: python3.9
+    py310: python3.10
+    py311: python3.11
+    py312: python3.12
+deps = 
+    {[base]deps}
+    scrapy26: scrapy~=2.6.0
+    scrapy27: scrapy~=2.7.0
+    scrapy28: scrapy~=2.8.0
+    scrapy29: scrapy~=2.9.0
+    scrapy210: scrapy~=2.10.0
+    scrapy211: scrapy~=2.11.0
+    redis42: redis~=4.2.0
+    redis43: redis~=4.3.0
+    redis44: redis~=4.4.0
+    redis45: redis~=4.5.0
+    redis46: redis~=4.6.0
+    redis50: redis~=5.0.0
+passenv =
+    REDIS_HOST
+    REDIS_PORT
+commands = 
+    python -m pytest # --cov-report term --cov=scrapy_redis
+
+[testenv:flake8]
+basepython =
+    python3.12
+deps =
+    {[base]deps}
+commands =
+    flake8 --ignore=W503,E265,E731 docs src tests
+
+[testenv:security]
+basepython =
+    python3.12
+deps =
+    bandit~=1.7.3
+commands =
+    bandit -r -c .bandit.yml src/ tests/
+
+[testenv:pytest]
+basepython =
+    python3.12
+deps = 
+    {[testenv]deps}
+passenv =
+    REDIS_HOST
+    REDIS_PORT
+commands =
+    python -m pytest --cov-report term --cov=scrapy_redis
+
+[testenv:build]
+basepython =
+    python3.12
+deps = 
+    {[base]deps}
+    build
+commands =
+    python -m build
+
+[testenv:docs]
+basepython =
+    python3.12
+deps = 
+    {[base]deps}
+    -r docs/requirements.txt
+allowlist_externals =
+    make
+commands =
+    # Same command as readthedocs
+    make -C docs html SPHINXOPTS="-T -W --keep-going -D language=en"