Files
F6--/张阳脚本/竞品系统数据导出/神汽链导出.ipynb
T
2026-04-09 10:19:09 +08:00

238 lines
9.1 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "a84cbd49a7363225",
"metadata": {
"ExecuteTime": {
"end_time": "2026-03-31T01:58:35.248297Z",
"start_time": "2026-03-31T01:47:30.843891Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"正在抓取数据: 100%|██████████| 65/65 [15:41<00:00, 14.49s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ 全部完成!共抓取 23540 条数据,已保存至 D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\神汽链10年历史数据1.xlsx\n"
]
}
],
"source": [
"import requests\n",
"from tqdm import tqdm\n",
"import pandas as pd\n",
"from datetime import datetime, timedelta\n",
"import time\n",
"import os\n",
"\n",
"# 配置部分\n",
"COOKIES = {\n",
" 'JSESSIONID': 'NDliNDQ0OTYtMzg2NC00ZTAwLWEzMjgtNWE1YmIzMzRjOTMx',\n",
"}\n",
"\n",
"HEADERS = {\n",
" 'Accept': 'application/json, text/plain, */*',\n",
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
" 'Connection': 'keep-alive',\n",
" 'Content-Type': 'application/json;charset=UTF-8',\n",
" 'Origin': 'https://www.sqzone.com',\n",
" 'Referer': 'https://www.sqzone.com/launa/web/workOrder/woManage',\n",
" 'Sec-Fetch-Dest': 'empty',\n",
" 'Sec-Fetch-Mode': 'cors',\n",
" 'Sec-Fetch-Site': 'same-origin',\n",
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36 Edg/146.0.0.0',\n",
" 'X-Requested-With': 'XMLHttpRequest',\n",
" 'appName': 'SQLINK',\n",
" 'sec-ch-ua': '\"Chromium\";v=\"146\", \"Not-A.Brand\";v=\"24\", \"Microsoft Edge\";v=\"146\"',\n",
" 'sec-ch-ua-mobile': '?0',\n",
" 'sec-ch-ua-platform': '\"Windows\"',\n",
" # 'Cookie': 'JSESSIONID=NDliNDQ0OTYtMzg2NC00ZTAwLWEzMjgtNWE1YmIzMzRjOTMx',\n",
"}\n",
"\n",
"\n",
"URL = 'https://www.sqzone.com/launa/pc/dataCenter/queryShopTurnoverInfo'\n",
"OUTPUT_FILE = r'D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\神汽链10年历史数据1.xlsx'\n",
"\n",
"# 确保输出目录存在\n",
"os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)\n",
"\n",
"all_data = []\n",
"\n",
"def get_month_range(start_year, start_month, end_year, end_month):\n",
" dates = []\n",
" current = datetime(start_year, start_month, 1)\n",
" end = datetime(end_year, end_month, 1)\n",
" while current <= end:\n",
" next_month = (current.replace(day=28) + timedelta(days=4)).replace(day=1)\n",
" last_day = next_month - timedelta(days=1)\n",
" dates.append((\n",
" current.strftime('%Y-%m-%d'),\n",
" last_day.strftime('%Y-%m-%d')\n",
" ))\n",
" current = next_month\n",
" return dates\n",
"\n",
"def fetch_page_with_retry(start_date, end_date, page_num, max_retries=3):\n",
" \"\"\"带重试机制的单页请求函数\"\"\"\n",
" json_data = {\n",
" 'keyword': '',\n",
" 'pageSize': 50,\n",
" 'curPage': page_num,\n",
" 'shopId': '',\n",
" 'payId': [],\n",
" 'payStatus': '',\n",
" 'startTime': start_date,\n",
" 'endTime': end_date,\n",
" }\n",
"\n",
" for attempt in range(max_retries):\n",
" try:\n",
" response = requests.post(\n",
" URL,\n",
" cookies=COOKIES,\n",
" headers=HEADERS,\n",
" json=json_data,\n",
" timeout=15 # 增加超时设置,防止无限等待\n",
" )\n",
"\n",
" # 检查状态码\n",
" if response.status_code == 200:\n",
" res_json = response.json()\n",
" # 检查业务逻辑是否成功 (假设成功时有 'data' 字段)\n",
" if res_json.get('data'):\n",
" return res_json['data'].get('contents', [])\n",
" else:\n",
" # 可能是业务错误,打印并视为本次失败,尝试重试\n",
" print(f\"第{page_num}页业务返回异常: {res_json}\")\n",
" else:\n",
" print(f\"第{page_num}页HTTP错误: {response.status_code}\")\n",
"\n",
" except requests.exceptions.RequestException as e:\n",
" print(f\"第{page_num}页网络请求失败 (尝试 {attempt+1}/{max_retries}): {e}\")\n",
"\n",
" # 如果失败,等待一段时间后重试 (指数退避)\n",
" if attempt < max_retries - 1:\n",
" wait_time = (attempt + 1) * 2 # 2秒, 4秒, 6秒...\n",
" print(f\" -> 等待 {wait_time} 秒后重试...\")\n",
" time.sleep(wait_time)\n",
"\n",
" # 所有重试都失败\n",
" return None\n",
"\n",
"# 生成日期范围\n",
"month_ranges = get_month_range(2020, 11, 2026, 3)\n",
"\n",
"for start_date, end_date in tqdm(month_ranges, desc=\"正在抓取数据\"):\n",
" page = 1\n",
" while True:\n",
" data_list = fetch_page_with_retry(start_date, end_date, page, max_retries=3)\n",
"\n",
" # 如果重试多次后仍然失败 (返回 None),可以选择跳过该页或终止\n",
" if data_list is None:\n",
" print(f\"⚠️ 严重警告: {start_date}-{end_date} 第{page}页 多次重试失败,跳过该页继续下一页。\")\n",
" # 这里选择 break 跳出当前月份的循环,或者可以根据需求选择 continue 尝试下一页\n",
" # 为了数据安全,通常建议记录日志后 break 当前月份,防止数据错乱\n",
" break\n",
"\n",
" if not data_list:\n",
" # 正常结束:没有更多数据\n",
" # print(f\"{start_date}-{end_date} 数据已爬取完毕\")\n",
" break\n",
"\n",
" # 处理数据\n",
" for data in data_list:\n",
" # 安全获取 partsViews,防止某些记录没有该字段\n",
" parts = data.get('partsViews', [])\n",
" customer_info = {k: v for k, v in data.items() if k != 'partsViews'}\n",
"\n",
" if parts:\n",
" for part in parts:\n",
" record = {**customer_info, **part}\n",
" all_data.append(record)\n",
" else:\n",
" # 如果没有配件视图,是否也要保留主记录?视业务需求而定\n",
" # 这里假设必须有配件信息才保留,如果需要保留主记录请取消下面注释\n",
" # all_data.append(customer_info)\n",
" pass\n",
"\n",
" page += 1\n",
" # 正常翻页等待\n",
" time.sleep(1)\n",
"\n",
" # 可选:每完成一个月保存一次,防止程序运行几天后崩溃导致前功尽弃\n",
" if len(all_data) > 0 and len(all_data) % 5000 == 0:\n",
" temp_df = pd.DataFrame(all_data)\n",
" temp_df.to_excel(OUTPUT_FILE.replace('.xlsx', '_temp.xlsx'), index=False)\n",
" print(\"临时保存已完成\")\n",
"\n",
"# 最终保存\n",
"if all_data:\n",
" ndf = pd.DataFrame(all_data)\n",
" ndf.to_excel(OUTPUT_FILE, index=False)\n",
" print(f\"✅ 全部完成!共抓取 {len(all_data)} 条数据,已保存至 {OUTPUT_FILE}\")\n",
"else:\n",
" print(\"❌ 未抓取到任何数据。\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "d2abfb70a61d82a0",
"metadata": {
"ExecuteTime": {
"end_time": "2026-03-31T03:05:38.539504Z",
"start_time": "2026-03-31T03:05:27.394303900Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ 全部完成!共抓取 15906 条数据,已保存至 D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\神汽链10年历史数据1.xlsx\n"
]
}
],
"source": [
"# 最终保存\n",
"if all_data:\n",
" ndf = pd.DataFrame(all_data)\n",
" ndf.to_excel(OUTPUT_FILE, index=False)\n",
" print(f\"✅ 全部完成!共抓取 {len(all_data)} 条数据,已保存至 {OUTPUT_FILE}\")\n",
"else:\n",
" print(\"❌ 未抓取到任何数据。\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "F6+宜搭+其它",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}