{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a84cbd49a7363225", "metadata": { "ExecuteTime": { "end_time": "2026-03-31T01:58:35.248297Z", "start_time": "2026-03-31T01:47:30.843891Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "正在抓取数据: 100%|██████████| 65/65 [15:41<00:00, 14.49s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✅ 全部完成!共抓取 23540 条数据,已保存至 D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\神汽链10年历史数据1.xlsx\n" ] } ], "source": [ "import requests\n", "from tqdm import tqdm\n", "import pandas as pd\n", "from datetime import datetime, timedelta\n", "import time\n", "import os\n", "\n", "# 配置部分\n", "COOKIES = {\n", " 'JSESSIONID': 'NDliNDQ0OTYtMzg2NC00ZTAwLWEzMjgtNWE1YmIzMzRjOTMx',\n", "}\n", "\n", "HEADERS = {\n", " 'Accept': 'application/json, text/plain, */*',\n", " 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n", " 'Connection': 'keep-alive',\n", " 'Content-Type': 'application/json;charset=UTF-8',\n", " 'Origin': 'https://www.sqzone.com',\n", " 'Referer': 'https://www.sqzone.com/launa/web/workOrder/woManage',\n", " 'Sec-Fetch-Dest': 'empty',\n", " 'Sec-Fetch-Mode': 'cors',\n", " 'Sec-Fetch-Site': 'same-origin',\n", " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36 Edg/146.0.0.0',\n", " 'X-Requested-With': 'XMLHttpRequest',\n", " 'appName': 'SQLINK',\n", " 'sec-ch-ua': '\"Chromium\";v=\"146\", \"Not-A.Brand\";v=\"24\", \"Microsoft Edge\";v=\"146\"',\n", " 'sec-ch-ua-mobile': '?0',\n", " 'sec-ch-ua-platform': '\"Windows\"',\n", " # 'Cookie': 'JSESSIONID=NDliNDQ0OTYtMzg2NC00ZTAwLWEzMjgtNWE1YmIzMzRjOTMx',\n", "}\n", "\n", "\n", "URL = 'https://www.sqzone.com/launa/pc/dataCenter/queryShopTurnoverInfo'\n", "OUTPUT_FILE = r'D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\神汽链10年历史数据1.xlsx'\n", "\n", "# 确保输出目录存在\n", "os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)\n", "\n", "all_data = []\n", "\n", "def get_month_range(start_year, start_month, end_year, end_month):\n", " dates = []\n", " current = datetime(start_year, start_month, 1)\n", " end = datetime(end_year, end_month, 1)\n", " while current <= end:\n", " next_month = (current.replace(day=28) + timedelta(days=4)).replace(day=1)\n", " last_day = next_month - timedelta(days=1)\n", " dates.append((\n", " current.strftime('%Y-%m-%d'),\n", " last_day.strftime('%Y-%m-%d')\n", " ))\n", " current = next_month\n", " return dates\n", "\n", "def fetch_page_with_retry(start_date, end_date, page_num, max_retries=3):\n", " \"\"\"带重试机制的单页请求函数\"\"\"\n", " json_data = {\n", " 'keyword': '',\n", " 'pageSize': 50,\n", " 'curPage': page_num,\n", " 'shopId': '',\n", " 'payId': [],\n", " 'payStatus': '',\n", " 'startTime': start_date,\n", " 'endTime': end_date,\n", " }\n", "\n", " for attempt in range(max_retries):\n", " try:\n", " response = requests.post(\n", " URL,\n", " cookies=COOKIES,\n", " headers=HEADERS,\n", " json=json_data,\n", " timeout=15 # 增加超时设置,防止无限等待\n", " )\n", "\n", " # 检查状态码\n", " if response.status_code == 200:\n", " res_json = response.json()\n", " # 检查业务逻辑是否成功 (假设成功时有 'data' 字段)\n", " if res_json.get('data'):\n", " return res_json['data'].get('contents', [])\n", " else:\n", " # 可能是业务错误,打印并视为本次失败,尝试重试\n", " print(f\"第{page_num}页业务返回异常: {res_json}\")\n", " else:\n", " print(f\"第{page_num}页HTTP错误: {response.status_code}\")\n", "\n", " except requests.exceptions.RequestException as e:\n", " print(f\"第{page_num}页网络请求失败 (尝试 {attempt+1}/{max_retries}): {e}\")\n", "\n", " # 如果失败,等待一段时间后重试 (指数退避)\n", " if attempt < max_retries - 1:\n", " wait_time = (attempt + 1) * 2 # 2秒, 4秒, 6秒...\n", " print(f\" -> 等待 {wait_time} 秒后重试...\")\n", " time.sleep(wait_time)\n", "\n", " # 所有重试都失败\n", " return None\n", "\n", "# 生成日期范围\n", "month_ranges = get_month_range(2020, 11, 2026, 3)\n", "\n", "for start_date, end_date in tqdm(month_ranges, desc=\"正在抓取数据\"):\n", " page = 1\n", " while True:\n", " data_list = fetch_page_with_retry(start_date, end_date, page, max_retries=3)\n", "\n", " # 如果重试多次后仍然失败 (返回 None),可以选择跳过该页或终止\n", " if data_list is None:\n", " print(f\"⚠️ 严重警告: {start_date}-{end_date} 第{page}页 多次重试失败,跳过该页继续下一页。\")\n", " # 这里选择 break 跳出当前月份的循环,或者可以根据需求选择 continue 尝试下一页\n", " # 为了数据安全,通常建议记录日志后 break 当前月份,防止数据错乱\n", " break\n", "\n", " if not data_list:\n", " # 正常结束:没有更多数据\n", " # print(f\"{start_date}-{end_date} 数据已爬取完毕\")\n", " break\n", "\n", " # 处理数据\n", " for data in data_list:\n", " # 安全获取 partsViews,防止某些记录没有该字段\n", " parts = data.get('partsViews', [])\n", " customer_info = {k: v for k, v in data.items() if k != 'partsViews'}\n", "\n", " if parts:\n", " for part in parts:\n", " record = {**customer_info, **part}\n", " all_data.append(record)\n", " else:\n", " # 如果没有配件视图,是否也要保留主记录?视业务需求而定\n", " # 这里假设必须有配件信息才保留,如果需要保留主记录请取消下面注释\n", " # all_data.append(customer_info)\n", " pass\n", "\n", " page += 1\n", " # 正常翻页等待\n", " time.sleep(1)\n", "\n", " # 可选:每完成一个月保存一次,防止程序运行几天后崩溃导致前功尽弃\n", " if len(all_data) > 0 and len(all_data) % 5000 == 0:\n", " temp_df = pd.DataFrame(all_data)\n", " temp_df.to_excel(OUTPUT_FILE.replace('.xlsx', '_temp.xlsx'), index=False)\n", " print(\"临时保存已完成\")\n", "\n", "# 最终保存\n", "if all_data:\n", " ndf = pd.DataFrame(all_data)\n", " ndf.to_excel(OUTPUT_FILE, index=False)\n", " print(f\"✅ 全部完成!共抓取 {len(all_data)} 条数据,已保存至 {OUTPUT_FILE}\")\n", "else:\n", " print(\"❌ 未抓取到任何数据。\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "d2abfb70a61d82a0", "metadata": { "ExecuteTime": { "end_time": "2026-03-31T03:05:38.539504Z", "start_time": "2026-03-31T03:05:27.394303900Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✅ 全部完成!共抓取 15906 条数据,已保存至 D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\神汽链10年历史数据1.xlsx\n" ] } ], "source": [ "# 最终保存\n", "if all_data:\n", " ndf = pd.DataFrame(all_data)\n", " ndf.to_excel(OUTPUT_FILE, index=False)\n", " print(f\"✅ 全部完成!共抓取 {len(all_data)} 条数据,已保存至 {OUTPUT_FILE}\")\n", "else:\n", " print(\"❌ 未抓取到任何数据。\")" ] } ], "metadata": { "kernelspec": { "display_name": "F6+宜搭+其它", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.11" } }, "nbformat": 4, "nbformat_minor": 5 }