26 lines
824 B
Python
26 lines
824 B
Python
import scrapy
|
|
|
|
|
|
class GameSpider(scrapy.Spider):
|
|
name = "game"
|
|
allowed_domains = ["4399.com"]
|
|
start_urls = ["http://4399.com/"]
|
|
|
|
def parse(self, response):
|
|
# print(response)
|
|
# print(response.text)
|
|
# 提取数据
|
|
# response.json()
|
|
# response.xpath()
|
|
# response.css()
|
|
# 获取页面所有游戏名字
|
|
# text =response.xpath('//*[@id="skinbody"]/div[10]/div[1]/div[1]/ul/li/a/text()').extract() # 提取内容extract()
|
|
# print(text)
|
|
# 分块提取数据
|
|
li_list = response.xpath('//*[@id="skinbody"]/div[10]/div[1]/div[1]/ul/li')
|
|
for li in li_list:
|
|
name = li.xpath('./a/text()').extract()
|
|
dict = {"name": name}
|
|
# 需要用yield将数据传到管道里
|
|
yield dict
|