32 lines
805 B
Python
32 lines
805 B
Python
# 拿页面源码
|
|
# 提取数据
|
|
|
|
import requests
|
|
from lxml import etree
|
|
|
|
url = 'https://www.zbj.com/search/service/?kw=sass&r=2'
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 '
|
|
'Safari/537.36 Edg/107.0.1418.35'}
|
|
r = None
|
|
try:
|
|
r = requests.get(url, headers=headers)
|
|
r.raise_for_status()
|
|
r.encoding = r.apparent_encoding
|
|
except:
|
|
print('异常')
|
|
# print(r.text)
|
|
|
|
# 解析
|
|
html =etree.HTML(r.text)
|
|
# 拿到每一个服务商div
|
|
divs = html.xpath('//*[@id="__layout"]/div/div[3]/div/div[3]/div[4]')
|
|
|
|
for i in divs:
|
|
price = i.xpath('./div[1]/div[1]/div/div[2]/div[1]/span/text()')
|
|
print(price)
|
|
title = i.xpath('./div/div/div/div[2]/div[2]/a[1]/text()')
|
|
print(title)
|
|
|
|
r.close()
|