66 lines
2.0 KiB
Python
66 lines
2.0 KiB
Python
import requests
|
|
import csv
|
|
import numpy as np
|
|
import os
|
|
import random
|
|
from .settings import navAddr
|
|
from requests.exceptions import RequestException
|
|
|
|
# 初始化导航数据文件
|
|
def init():
|
|
if not os.path.exists(navAddr):
|
|
with open(navAddr, 'w', encoding='utf-8', newline='') as csvFile:
|
|
writer = csv.writer(csvFile)
|
|
writer.writerow(['typeName', 'gid', 'containerid'])
|
|
|
|
# 写入导航数据
|
|
def write(row):
|
|
with open(navAddr, 'a', encoding='utf-8', newline='') as csvFile:
|
|
writer = csv.writer(csvFile)
|
|
writer.writerow(row)
|
|
|
|
# 获取数据,支持多账号
|
|
def fetchData(url, headers_list):
|
|
headers = random.choice(headers_list)
|
|
try:
|
|
response = requests.get(url, headers=headers, timeout=10)
|
|
if response.status_code == 200:
|
|
return response.json()['data']['modules']
|
|
else:
|
|
return None
|
|
except RequestException as e:
|
|
print(f"请求失败:{e}")
|
|
return None
|
|
|
|
# 解析导航数据
|
|
def readJson(response):
|
|
for module in response:
|
|
if 'type' in module and 'typeName' in module:
|
|
typeName = module['typeName']
|
|
for submodule in module['modules']:
|
|
if 'id' in submodule and 'containerid' in submodule:
|
|
gid = submodule['id']
|
|
containerid = submodule['containerid']
|
|
write([typeName, gid, containerid])
|
|
|
|
# 启动爬虫
|
|
def start(headers_list):
|
|
navUrl = 'https://weibo.com/ajax/side/hot'
|
|
init()
|
|
response = fetchData(navUrl, headers_list)
|
|
if response:
|
|
readJson(response)
|
|
|
|
if __name__ == '__main__':
|
|
headers_list = [
|
|
{
|
|
'Cookie': 'your_cookie_here',
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0'
|
|
},
|
|
{
|
|
'Cookie': 'another_cookie_here',
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0'
|
|
}
|
|
]
|
|
start(headers_list)
|