77 lines
4.2 KiB
Python
77 lines
4.2 KiB
Python
import requests
|
|
import json
|
|
import time
|
|
import pandas as pd
|
|
from tqdm import tqdm
|
|
from datetime import datetime, timedelta
|
|
|
|
headers = {
|
|
"cookie":"__jdv=135409966|direct|-|none|-|1746495871709; flash=3_fmYwX89tb2teRGeQ4SfQL-pKDSmxglqPBgD7qIuZXmReygIf2eagag6hUJes8tPuEqjJP9l1l2qwsJs1XTuyRzJEICXIDkXx6lzysw2bis8pDhQYXP31QXbNPfa5XiJ4K1NeoIjs2jKwQARaXdqu8cg2xJrLj74CClfK1rz_kwOPy__dMtZGSq-S13Lc; pin=jd_gEFaqZlekSYM; unick=jd_28m3wapraiha3z; thor=9ADBD5C0241E6AA7507DB6E6ECED6657690C3A23BC73A7FDFF4C59CFB5DC19D09141D8C81ECE715B83D420382764AC40A51836A77298AA3BA872D825E72E86FB855E24FFB4A3D661C3FF1A5388EB47EECE339286A888E357FCE532D1A8DB26AC4195C7CDF94F989D5EF977BAA810BB8FE80486E89D9A34C4464471AE9CFCDB55975C9092DB060D12120A2A864B48F07D2EF235B6C34DC65D4D9A5CA6CF1AD0B5; light_key=AASBKE7rOxgWQziEhC_QY6ya8T1mkut_nO-Zdj9XQRxIxK-0ug40Vcgbs_HMus6W8doYmG03; JD_UUID=ed02c724-9458-4fef-b74e-ff66090efca9; yunxiupin=jd_gEFaqZlekSYM; UUID=b1715a1a-b8f8-4d6b-b29f-b13da6358062; SESSION_USER_NAME=%E4%BD%95%E4%BA%9A%E5%B3%B0; __jda=154799550.17464958717091671508260.1746495872.1746495872.1746499819.2; __jdc=154799550; __jdb=154799550.8.17464958717091671508260|2.1746499819",
|
|
"referer": "https://www.yunxiu.com/legend/account",
|
|
"sec-ch-ua": "\"Not(A:Brand\";v=\"99\", \"Microsoft Edge\";v=\"133\", \"Chromium\";v=\"133\"",
|
|
"sec-ch-ua-mobile": "?0",
|
|
"sec-ch-ua-platform": "\"Windows\"",
|
|
"sec-fetch-dest": "empty",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-site": "same-origin",
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0",
|
|
"x-requested-with": "XMLHttpRequest"
|
|
}
|
|
|
|
all_data = []
|
|
number = 1746500626772 # 从车辆查询页面中 date参数获取
|
|
page = 1
|
|
max_retries = 5 # 最大重试次数
|
|
backoff_factor = 1 # 指数退避因子
|
|
|
|
current_date = datetime.now()
|
|
years_back = 10
|
|
|
|
end_date = current_date - timedelta(days=365 * years_back)
|
|
start_date = current_date
|
|
|
|
while start_date >= end_date:
|
|
try:
|
|
search_startTime = (start_date - timedelta(days=365)).strftime('%Y-%m-%d')
|
|
search_endTime = start_date.strftime('%Y-%m-%d')
|
|
print(f'search_startTime: {search_startTime}, search_endTime: {search_endTime}')
|
|
start_date -= timedelta(days=365) # 每次减去一年
|
|
exit_loop = False
|
|
for page in tqdm(range(1,1001)):
|
|
if exit_loop:
|
|
break
|
|
url = f"https://www.yunxiu.com/legend/archives/carinfo/locate/list?page={page}&size=12&search_startTime={search_startTime}&search_endTime={search_endTime}&_={number}"
|
|
attempt = 0
|
|
while attempt < max_retries:
|
|
try:
|
|
response = requests.get(url=url, headers=headers)
|
|
response.raise_for_status() # 如果响应状态码不是200,则抛出HTTPError异常
|
|
res = response.json().get("data", {}).get("content", [])
|
|
max_len = response.json().get("data", {}).get("totalPages", []) + 1
|
|
if page == max_len:
|
|
print(max_len)
|
|
exit_loop = True
|
|
break # 成功后退出重试循环
|
|
for item in res:
|
|
base_info = {k: v for k, v in item.items() if k != "licenseList"}
|
|
all_data.append(base_info)
|
|
|
|
break # 成功后退出重试循环
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"第{page}页请求失败: {e}")
|
|
if attempt < max_retries - 1:
|
|
sleep_time = backoff_factor * (2 ** attempt) # 计算等待时间
|
|
print(f"准备第{attempt + 1}次重试,等待{sleep_time}秒...")
|
|
time.sleep(sleep_time) # 等待一段时间后重试
|
|
else:
|
|
print(f"达到最大重试次数,跳过第{page}页")
|
|
attempt += 1
|
|
|
|
number += 1
|
|
time.sleep(1)
|
|
except:
|
|
pass
|
|
|
|
df1 = pd.DataFrame(all_data)
|
|
df1.to_csv(r"D:\Idea Project\F6+宜搭+其它(1)\new\文件输出\京东云修数据导出-13914209863 -车辆.csv")
|