Files
F6--/张阳脚本/竞品系统数据导出/京东云修.py
T
2026-01-30 11:28:35 +08:00

77 lines
4.2 KiB
Python

import requests
import json
import time
import pandas as pd
from tqdm import tqdm
from datetime import datetime, timedelta
headers = {
"cookie":"__jdv=135409966|direct|-|none|-|1746495871709; flash=3_fmYwX89tb2teRGeQ4SfQL-pKDSmxglqPBgD7qIuZXmReygIf2eagag6hUJes8tPuEqjJP9l1l2qwsJs1XTuyRzJEICXIDkXx6lzysw2bis8pDhQYXP31QXbNPfa5XiJ4K1NeoIjs2jKwQARaXdqu8cg2xJrLj74CClfK1rz_kwOPy__dMtZGSq-S13Lc; pin=jd_gEFaqZlekSYM; unick=jd_28m3wapraiha3z; thor=9ADBD5C0241E6AA7507DB6E6ECED6657690C3A23BC73A7FDFF4C59CFB5DC19D09141D8C81ECE715B83D420382764AC40A51836A77298AA3BA872D825E72E86FB855E24FFB4A3D661C3FF1A5388EB47EECE339286A888E357FCE532D1A8DB26AC4195C7CDF94F989D5EF977BAA810BB8FE80486E89D9A34C4464471AE9CFCDB55975C9092DB060D12120A2A864B48F07D2EF235B6C34DC65D4D9A5CA6CF1AD0B5; light_key=AASBKE7rOxgWQziEhC_QY6ya8T1mkut_nO-Zdj9XQRxIxK-0ug40Vcgbs_HMus6W8doYmG03; JD_UUID=ed02c724-9458-4fef-b74e-ff66090efca9; yunxiupin=jd_gEFaqZlekSYM; UUID=b1715a1a-b8f8-4d6b-b29f-b13da6358062; SESSION_USER_NAME=%E4%BD%95%E4%BA%9A%E5%B3%B0; __jda=154799550.17464958717091671508260.1746495872.1746495872.1746499819.2; __jdc=154799550; __jdb=154799550.8.17464958717091671508260|2.1746499819",
"referer": "https://www.yunxiu.com/legend/account",
"sec-ch-ua": "\"Not(A:Brand\";v=\"99\", \"Microsoft Edge\";v=\"133\", \"Chromium\";v=\"133\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0",
"x-requested-with": "XMLHttpRequest"
}
all_data = []
number = 1746500626772 # 从车辆查询页面中 date参数获取
page = 1
max_retries = 5 # 最大重试次数
backoff_factor = 1 # 指数退避因子
current_date = datetime.now()
years_back = 10
end_date = current_date - timedelta(days=365 * years_back)
start_date = current_date
while start_date >= end_date:
try:
search_startTime = (start_date - timedelta(days=365)).strftime('%Y-%m-%d')
search_endTime = start_date.strftime('%Y-%m-%d')
print(f'search_startTime: {search_startTime}, search_endTime: {search_endTime}')
start_date -= timedelta(days=365) # 每次减去一年
exit_loop = False
for page in tqdm(range(1,1001)):
if exit_loop:
break
url = f"https://www.yunxiu.com/legend/archives/carinfo/locate/list?page={page}&size=12&search_startTime={search_startTime}&search_endTime={search_endTime}&_={number}"
attempt = 0
while attempt < max_retries:
try:
response = requests.get(url=url, headers=headers)
response.raise_for_status() # 如果响应状态码不是200,则抛出HTTPError异常
res = response.json().get("data", {}).get("content", [])
max_len = response.json().get("data", {}).get("totalPages", []) + 1
if page == max_len:
print(max_len)
exit_loop = True
break # 成功后退出重试循环
for item in res:
base_info = {k: v for k, v in item.items() if k != "licenseList"}
all_data.append(base_info)
break # 成功后退出重试循环
except requests.exceptions.RequestException as e:
print(f"{page}页请求失败: {e}")
if attempt < max_retries - 1:
sleep_time = backoff_factor * (2 ** attempt) # 计算等待时间
print(f"准备第{attempt + 1}次重试,等待{sleep_time}秒...")
time.sleep(sleep_time) # 等待一段时间后重试
else:
print(f"达到最大重试次数,跳过第{page}")
attempt += 1
number += 1
time.sleep(1)
except:
pass
df1 = pd.DataFrame(all_data)
df1.to_csv(r"D:\Idea Project\F6+宜搭+其它(1)\new\文件输出\京东云修数据导出-13914209863 -车辆.csv")