Files
F6--/张阳脚本/竞品系统数据导出/大唛云管理平台.ipynb
T
2026-01-30 11:28:35 +08:00

556 lines
28 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "## 订单明细",
"id": "8bdaf70d574d1868"
},
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2026-01-20T09:45:36.313844400Z",
"start_time": "2026-01-20T09:38:55.425173700Z"
}
},
"source": [
"import pandas as pd\n",
"import requests\n",
"from tqdm import tqdm\n",
"from bs4 import BeautifulSoup\n",
"import time\n",
"\n",
"all_data_list = []\n",
"for i in tqdm(range(1, 1303)):\n",
" time.sleep(0.2)\n",
" import requests\n",
"\n",
" headers = {\n",
" 'Accept': 'text/html, */*; q=0.01',\n",
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
" 'Connection': 'keep-alive',\n",
" 'Referer': 'http://rp.chezizhu.com/ReportServer?reportlet=czz/order/order_detail_md.cpt&servicer_id=HyBKFgc2uCNtqs59aDZhc2&database=chezizhu_14',\n",
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0',\n",
" 'X-Requested-With': 'XMLHttpRequest',\n",
" }\n",
"\n",
" params = {\n",
" '_': '1768901546535',\n",
" '__boxModel__': 'true',\n",
" 'op': 'page_content',\n",
" 'sessionID': '99282',\n",
" 'pn': i,\n",
" '__fit__': 'false',\n",
" }\n",
"\n",
" res = requests.get('http://rp.chezizhu.com/ReportServer', params=params, headers=headers, verify=False)\n",
"\n",
" # print(res.text)\n",
"\n",
" keys = [\n",
" \"单号\", \"用料\", \"图片\", \"客户姓名\", \"客户电话\", \"车牌号码\", \"车型\",\n",
" \"产品金额\", \"服务费\", \"上门费\", \"折扣金额\", \"折扣原因\", \"实收金额\",\n",
" \"结算方式\", \"新老用户\", \"订单来源\", \"下单方式\", \"服务类型\", \"服务商\",\n",
" \"技师\", \"下单时间\", \"预约时间\", \"完成时间\", \"评价状态\", \"工单状态\",\n",
" \"订单状态\", \"全车检测\", \"备注\"\n",
" ]\n",
" soup = BeautifulSoup(res.text, 'lxml')\n",
" for tr in soup.find_all('tr'):\n",
" tds = tr.find_all('td')\n",
" if \"单号\" in tds[0].text or \"总计\" in tds[0].text or \"订单列表\" in tds[\n",
" 0].text or \"注:红色订单为实收金额有误订单\" in tds[0].text:\n",
" continue\n",
" td_list = []\n",
" for td in tds:\n",
" td_list.append(td.text)\n",
"\n",
" order_dict = dict(zip(keys, td_list))\n",
" # print(order_dict)\n",
" all_data_list.append(order_dict)\n",
"\n",
"df = pd.DataFrame(all_data_list)\n",
"df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\大唛云订单明细-18135844455.xlsx', index=False)\n"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1302/1302 [06:34<00:00, 3.30it/s]\n"
]
}
],
"execution_count": 6
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2026-01-20T09:34:49.479362100Z",
"start_time": "2026-01-20T09:34:49.180981Z"
}
},
"cell_type": "code",
"source": [
"df = pd.DataFrame(all_data_list)\n",
"df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\大唛云订单明细-18135844455.xlsx', index=False)"
],
"id": "a848bd6b4137743e",
"outputs": [],
"execution_count": 4
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## 历史维修记录",
"id": "271853920f6210f"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-07-14T11:44:43.656887Z",
"start_time": "2025-07-14T09:38:43.922512Z"
}
},
"cell_type": "code",
"source": [
"import requests\n",
"import re\n",
"import time\n",
"\n",
"timestamp_ms = int(time.time() * 1000)\n",
"\n",
"import pandas as pd\n",
"import requests\n",
"from tqdm import tqdm\n",
"from bs4 import BeautifulSoup\n",
"from urllib.parse import urlencode, quote\n",
"import time\n",
"from requests.exceptions import RequestException\n",
"\n",
"# 读取Excel文件\n",
"df = pd.read_excel(r\"D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\大唛云订单明细-18135844455.xlsx\")\n",
"\n",
"# 定义要提取的基本字段列表\n",
"BASE_FIELDS = [\n",
" '保险公司', '保险到期', '年检日期', '车架号',\n",
" '购车日期', '服务日期', '服务里程',\n",
" '下次服务日期', '下次服务里程'\n",
"]\n",
"\n",
"# 创建两个DataFrame分别存储不同类型的数据\n",
"base_info_df = df.copy() # 存储基本信息\n",
"service_items_df = pd.DataFrame() # 存储消费项目明细\n",
"\n",
"\n",
"def make_request_with_retry(url, headers, max_retries=10, retry_delay=1):\n",
" \"\"\"带重试机制的请求函数\"\"\"\n",
" for attempt in range(max_retries):\n",
" try:\n",
" res = requests.get(url=url, headers=headers, timeout=10)\n",
" res.raise_for_status()\n",
" return res\n",
" except RequestException as e:\n",
" if attempt == max_retries - 1:\n",
" raise\n",
" print(f\"请求失败,第 {attempt + 1} 次重试... 错误: {str(e)}\")\n",
" time.sleep(retry_delay)\n",
" return None\n",
" # print(res.text)\n",
"\n",
"\n",
"def extract_session_id(html_content):\n",
" soup = BeautifulSoup(html_content, \"html.parser\")\n",
" scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
"\n",
" session_ids = set()\n",
" for script in scripts:\n",
" # 匹配注册语句中的sessionid\n",
" register_match = re.search(r\"FR\\.SessionMgr\\.register\\('(\\d+)'\", script.text)\n",
" if register_match:\n",
" session_ids.add(register_match.group(1))\n",
"\n",
" # 匹配变量赋值中的sessionid\n",
" current_match = re.search(r\"currentSessionID\\s*=\\s*'(\\d+)'\", script.text)\n",
" if current_match:\n",
" session_ids.add(current_match.group(1))\n",
"\n",
" return list(session_ids)\n",
"\n",
"\n",
"for index, row in tqdm(list(df.iterrows()), desc=\"处理订单\"):\n",
" order_number = row[\"单号\"]\n",
" success = False\n",
" # print(f\"正在处理订单 {order_number}\")\n",
"\n",
" timestamp_ms = int(time.time() * 1000)\n",
" for attempt in range(10): # 最大重试次数\n",
" try:\n",
" url = (\n",
" \"http://rp.chezizhu.com/ReportServer?reportlet=/czz/order/order.cpt&__parameters__={%22_%22:%22\" + f\"{timestamp_ms}\" + \"%22,%22PINGJIA%22:[5b][5d],%22DISCOUNT_DESC%22:%22%22,%22B%22:%222011-05-01%22,%22LABELPLATE_NUMBER%22:%22%25E8%25BD%25A6%25E7%2589%258C%25E5%258F%25B7%22,%22LABELUSER_NAME%22:%22%25E5%25A7%2593%25E5%2590%258D%22,%22E%22:%222025-05-07%22,%22SERVICER%22:[5b][5d],%22CAR_NAME%22:%22%22,%22ORDER_SERVICE_TYPE%22:%22%22,%22order_number%22:%22\" + order_number + \"%22,%22USER_MOBILE%22:%22%22,%22LABELUSER_STATE%22:%22%25E6%2596%25B0%25E8%2580%2581%25E7%2594%25A8%25E6%2588%25B7%22,%22LABELCAR_NAME%22:%22%25E8%25BD%25A6%25E5%259E%258B%22,%22SERVICER_ID%22:%22683XGWqnyn66mE1oa8Gkzb%22,%22LABELORDER_SOURCE%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E6%259D%25A5%25E6%25BA%2590%22,%22LABELE%22:%22%25E8%2587%25B3%22,%22LABELSERVER%22:%22%25E9%2597%25A8%25E5%25BA%2597%22,%22LABELGONGDAN_STATUS%22:%22%25E5%25B7%25A5%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22LABELORDER_STATUS%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22ORDER_STATUS%22:[5b][5d],%22CMD%22:%22parameters_d%22,%22PAY_METHOD%22:%22%22,%22LABELUSER_MOBILE%22:%22%25E5%25AE%25A2%25E6%2588%25B7%25E7%2594%25B5%25E8%25AF%259D%22,%22LABELPINGJIA%22:%22%25E8%25AF%2584%25E4%25BB%25B7%25E7%258A%25B6%25E6%2580%2581%22,%22USER_STATE%22:[5b][5d],%22LABELRP_TYPE%22:%22%25E6%258A%25A5%25E8%25A1%25A8%25E7%25B1%25BB%25E5%259E%258B%22,%22LABELPAY_METHOD%22:%22%25E7%25BB%2593%25E7%25AE%2597%25E6%2596%25B9%25E5%25BC%258F%22,%22USER_NAME%22:%22%22,%22REPORTNAME%22:%22czz%252Forder%252Forder_detail_md.cpt%22,%22DISCOUNT_TYPE%22:%22%22,%22DATE_TYPE%22:%22%25E5%25AE%258C%25E6%2588%2590%25E6%2597%25A5%25E6%259C%259F%22,%22LABELDISCOUNT_DESC%22:%22%25E6%258A%2598%25E6%2589%25A3%25E5%258E%259F%25E5%259B%25A0%22,%22GONGDAN_STATUS%22:[5b][5d],%22__STREAMCLOSED__%22:%22%22,%22__pi__%22:%22true%22,%22CHANNEL%22:%22%22,%22DATABASE%22:%22chezizhu_14%22,%22ORDER_SOURCE%22:[5b][5d],%22LABELORDER_NUMBER%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E5%258F%25B7%22,%22PLATE_NUMBER%22:%22%22,%22RP_TYPE%22:%22%25E9%25AB%2598%25E7%25BA%25A7%25E6%2590%259C%25E7%25B4%25A2%22}\"\n",
" )\n",
"\n",
" headers = {\n",
" 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',\n",
" 'Accept-Encoding': 'gzip, deflate',\n",
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
" 'Cache-Control': 'max-age=0',\n",
" 'Connection': 'keep-alive',\n",
" 'Cookie': 'td_cookie=2912605941',\n",
" 'Host': 'rp.chezizhu.com',\n",
" 'Referer': 'http://rp.chezizhu.com/ReportServer?reportlet=czz/order/order_detail_md.cpt&servicer_id=CJRuYehLSqLqy4snS5bLoo&database=chezizhu_14',\n",
" 'Upgrade-Insecure-Requests': '1',\n",
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'\n",
" }\n",
"\n",
" res = requests.get(url, headers=headers)\n",
"\n",
" session_ids = extract_session_id(res.text)\n",
" # print(\"提取到的SessionID:\", session_ids[0])\n",
" # ========== 请求部分 ==========\n",
" url = f\"http://rp.chezizhu.com/ReportServer?_={timestamp_ms}&__boxModel__=true&op=page_content&sessionID={session_ids[0]}&pn=1&__fit__=false\"\n",
"\n",
" headers = {\n",
" 'Accept': '*/*',\n",
" 'Accept-Encoding': 'gzip, deflate',\n",
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
" 'Connection': 'keep-alive',\n",
" 'Cookie': 'td_cookie=2912605941',\n",
" 'Host': 'rp.chezizhu.com',\n",
" \"Referer\": \"http://rp.chezizhu.com/ReportServer?reportlet=/czz/order/order.cpt&__parameters__={%22_%22:%22\" + f\"{timestamp_ms}\" + \"%22,%22PINGJIA%22:[5b][5d],%22DISCOUNT_DESC%22:%22%22,%22B%22:%222010-07-01%22,%22LABELPLATE_NUMBER%22:%22%25E8%25BD%25A6%25E7%2589%258C%25E5%258F%25B7%22,%22LABELUSER_NAME%22:%22%25E5%25A7%2593%25E5%2590%258D%22,%22E%22:%222025-07-14%22,%22SERVICER%22:[5b][5d],%22CAR_NAME%22:%22%22,%22ORDER_SERVICE_TYPE%22:%22%22,%22order_number%22:%22\" + order_number + \"%22,%22USER_MOBILE%22:%22%22,%22LABELUSER_STATE%22:%22%25E6%2596%25B0%25E8%2580%2581%25E7%2594%25A8%25E6%2588%25B7%22,%22LABELCAR_NAME%22:%22%25E8%25BD%25A6%25E5%259E%258B%22,%22SERVICER_ID%22:%22CJRuYehLSqLqy4snS5bLoo%22,%22LABELORDER_SOURCE%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E6%259D%25A5%25E6%25BA%2590%22,%22LABELE%22:%22%25E8%2587%25B3%22,%22LABELSERVER%22:%22%25E9%2597%25A8%25E5%25BA%2597%22,%22LABELGONGDAN_STATUS%22:%22%25E5%25B7%25A5%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22LABELORDER_STATUS%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22ORDER_STATUS%22:[5b][5d],%22CMD%22:%22parameters_d%22,%22PAY_METHOD%22:%22%22,%22LABELUSER_MOBILE%22:%22%25E5%25AE%25A2%25E6%2588%25B7%25E7%2594%25B5%25E8%25AF%259D%22,%22LABELPINGJIA%22:%22%25E8%25AF%2584%25E4%25BB%25B7%25E7%258A%25B6%25E6%2580%2581%22,%22USER_STATE%22:[5b][5d],%22LABELRP_TYPE%22:%22%25E6%258A%25A5%25E8%25A1%25A8%25E7%25B1%25BB%25E5%259E%258B%22,%22LABELPAY_METHOD%22:%22%25E7%25BB%2593%25E7%25AE%2597%25E6%2596%25B9%25E5%25BC%258F%22,%22USER_NAME%22:%22%22,%22REPORTNAME%22:%22czz%252Forder%252Forder_detail_md.cpt%22,%22DISCOUNT_TYPE%22:%22%22,%22DATE_TYPE%22:%22%25E5%25AE%258C%25E6%2588%2590%25E6%2597%25A5%25E6%259C%259F%22,%22LABELDISCOUNT_DESC%22:%22%25E6%258A%2598%25E6%2589%25A3%25E5%258E%259F%25E5%259B%25A0%22,%22GONGDAN_STATUS%22:[5b][5d],%22__STREAMCLOSED__%22:%22%22,%22__pi__%22:%22true%22,%22CHANNEL%22:%22%22,%22DATABASE%22:%22chezizhu_14%22,%22ORDER_SOURCE%22:[5b][5d],%22LABELORDER_NUMBER%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E5%258F%25B7%22,%22PLATE_NUMBER%22:%22%22,%22RP_TYPE%22:%22%25E9%25AB%2598%25E7%25BA%25A7%25E6%2590%259C%25E7%25B4%25A2%22}\",\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0\",\n",
" \"X-Requested-With\": \"XMLHttpRequest\"\n",
" }\n",
"\n",
" # 使用带重试的请求函数\n",
" res = make_request_with_retry(url, headers)\n",
" # print(res.text)\n",
"\n",
" soup = BeautifulSoup(res.text, 'lxml')\n",
"\n",
" # ========== 1. 提取基本信息(单行) ==========\n",
" base_info = {'单号': order_number} # 保留原始订单号\n",
"\n",
" for field in BASE_FIELDS:\n",
" field_td = soup.find('td', string=f'{field}')\n",
" if field_td:\n",
" value_td = field_td.find_next('td')\n",
" base_info[field] = value_td.get_text(strip=True) if value_td else None\n",
" else:\n",
" base_info[field] = None\n",
"\n",
" # 更新基本信息DataFrame\n",
" for col in base_info:\n",
" if col not in base_info_df.columns:\n",
" base_info_df[col] = None\n",
" base_info_df.at[index, col] = base_info[col]\n",
"\n",
" # ========== 2. 提取消费项目(多行) ==========\n",
" consumption_header = soup.find(lambda tag: tag.name == 'td' and '消费项目' in tag.text)\n",
" if consumption_header:\n",
" table = consumption_header.find_parent('table')\n",
" if table:\n",
" rows = table.find_all('tr')[1:] # 跳过表头\n",
"\n",
" for row_idx, row in enumerate(rows, 1):\n",
" cols = row.find_all('td')\n",
" if len(cols) >= 6:\n",
" item = {\n",
" '单号': order_number,\n",
" '行号': row_idx,\n",
" '套餐': cols[0].get_text(strip=True) if len(cols) > 0 else '',\n",
" '服务项目': cols[1].get_text(strip=True) if len(cols) > 1 else '',\n",
" '产品名称': cols[2].get_text(strip=True) if len(cols) > 2 else '',\n",
" '产品型号': cols[3].get_text(strip=True) if len(cols) > 3 else '',\n",
" '数量': cols[4].get_text(strip=True) if len(cols) > 4 else '',\n",
" '规格': cols[5].get_text(strip=True) if len(cols) > 5 else '',\n",
" '单位': cols[6].get_text(strip=True) if len(cols) > 6 else '',\n",
" '单价': cols[7].get_text(strip=True) if len(cols) > 7 else '',\n",
" '产品小计': cols[8].get_text(strip=True) if len(cols) > 8 else '',\n",
" '工时': cols[9].get_text(strip=True) if len(cols) > 9 else '',\n",
" '合计': cols[10].get_text(strip=True) if len(cols) > 10 else ''\n",
" }\n",
"\n",
" # 添加到消费项目DataFrame\n",
" service_items_df = pd.concat([\n",
" service_items_df,\n",
" pd.DataFrame([item])\n",
" ], ignore_index=True)\n",
" # print(item)\n",
"\n",
" success = True\n",
" break # 成功则跳出重试循环\n",
"\n",
" except Exception as e:\n",
" if attempt == 9: # 最后一次尝试仍然失败\n",
" print(f\"订单 {order_number} 处理失败,已达最大重试次数。错误: {str(e)}\")\n",
" # 记录失败订单(可选)\n",
" with open('failed_orders.txt', 'a') as f:\n",
" f.write(f\"{order_number}\\n\")\n",
" time.sleep(1) # 等待1秒后重试\n",
"\n",
" if not success:\n",
" continue # 跳过处理失败的订单\n",
" # break\n",
"\n",
"# ========== 3. 合并数据并保存 ==========\n",
"# 方法1:保存为两个关联表(推荐)\n",
"base_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\订单基本信息.xlsx'\n",
"items_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\消费项目明细.xlsx'\n",
"base_info_df.to_excel(base_output, index=False)\n",
"service_items_df.to_excel(items_output, index=False)\n",
"\n",
"# 方法2:合并为一个宽表(每行重复基本信息)\n",
"if not service_items_df.empty:\n",
" merged_df = pd.merge(\n",
" base_info_df,\n",
" service_items_df,\n",
" on='单号',\n",
" how='left'\n",
" )\n",
" merged_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\历史维修记录明细-古交腾飞路店13593189858.xlsx'\n",
" merged_df.to_excel(merged_output, index=False)\n",
"\n",
"print(\"\\n处理完成!\")\n",
"print(f\"- 基本信息保存至: {base_output}\")\n",
"print(f\"- 消费项目保存至: {items_output}\")\n",
"if not service_items_df.empty:\n",
" print(f\"- 合并数据保存至: {merged_output}\") # 输出:['57435']"
],
"id": "ed2755053dd426c",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"处理订单: 0%| | 0/10868 [00:00<?, ?it/s]C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_4532\\3409432529.py:48: DeprecationWarning: The 'text' argument to find()-type methods is deprecated. Use 'string' instead.\n",
" scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
"处理订单: 0%| | 1/10868 [00:00<28:50, 6.28it/s]C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_4532\\3409432529.py:48: DeprecationWarning: The 'text' argument to find()-type methods is deprecated. Use 'string' instead.\n",
" scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
"处理订单: 18%|█▊ | 1974/10868 [09:20<50:33, 2.93it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"请求失败,第 1 次重试... 错误: HTTPConnectionPool(host='rp.chezizhu.com', port=80): Max retries exceeded with url: /ReportServer?_=1752486488556&__boxModel__=true&op=page_content&sessionID=26250&pn=1&__fit__=false (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x000002C3E3298E90>, 'Connection to rp.chezizhu.com timed out. (connect timeout=10)'))\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"处理订单: 100%|██████████| 10868/10868 [2:03:15<00:00, 1.47it/s] \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"处理完成!\n",
"- 基本信息保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\订单基本信息.xlsx\n",
"- 消费项目保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\消费项目明细.xlsx\n",
"- 合并数据保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\历史维修记录明细-古交腾飞路店13593189858.xlsx\n"
]
}
],
"execution_count": 12
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## 库存 (页面已支持导出)",
"id": "5e7ac3ee321a3549"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-16T09:51:20.409717Z",
"start_time": "2025-04-16T09:51:13.116335Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import requests\n",
"from tqdm import tqdm\n",
"from bs4 import BeautifulSoup\n",
"from urllib.parse import urlencode, quote\n",
"import time\n",
"from requests.exceptions import RequestException\n",
"\n",
"url = \"http://sp.chezizhu.com/alliance/store/list.htm\"\n",
"header = {\n",
" \"content-type\": \"application/x-www-form-urlencoded; charset=UTF-8\",\n",
" \"cookie\": \"JSESSIONID=28FFBA865A6CAAA76CB8CDB6A34B5886\",\n",
" \"host\": \"sp.chezizhu.com\",\n",
" \"origin\": \"http://sp.chezizhu.com\",\n",
" \"referer\": \"http://sp.chezizhu.com/alliance/index.htm\",\n",
" \"user-agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0\",\n",
" \"x-requested-with\": \"XMLHttpRequest\"\n",
"}\n",
"all_data = []\n",
"for i in tqdm(range(1, 103)):\n",
" data = f'pageNum={i}&pageSize=20&dataType=json&keywords='\n",
"\n",
" res = requests.post(url, headers=header, data=data)\n",
" data_list = res.json().get(\"list\", [])\n",
"\n",
" for data in data_list:\n",
" tp_number = data.get(\"tp_number\")\n",
" damai_id = data.get(\"damai_id\")\n",
" prod_name = data.get(\"prod_name\")\n",
" price = data.get(\"price\")\n",
" standard = data.get(\"standard\")\n",
" unit = data.get(\"unit\")\n",
" count = data.get(\"count\")\n",
" check_count = data.get(\"check_count\")\n",
" check_date = data.get(\"check_date\")\n",
"\n",
" all_data.append([tp_number, damai_id, prod_name, price, standard, unit, count, check_count, check_date])\n",
"\n",
"df = pd.DataFrame(all_data,\n",
" columns=[\"tp_number\", \"damai_id\", \"prod_name\", \"price\", \"standard\", \"unit\", \"count\", \"check_count\",\n",
" \"check_date\"])\n",
"df.to_excel(r\"D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\库存.xlsx\", index=False)\n",
"\n",
"\n",
"\n"
],
"id": "95616ee8b933e5ce",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 102/102 [00:07<00:00, 14.43it/s]\n"
]
}
],
"execution_count": 52
},
{
"metadata": {},
"cell_type": "markdown",
"source": "",
"id": "70a45b885b2739c"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## 销售明细",
"id": "7df1375e5debe404"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-07-15T01:34:31.895645Z",
"start_time": "2025-07-15T01:32:17.223368Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import requests\n",
"from tqdm import tqdm\n",
"from bs4 import BeautifulSoup\n",
"\n",
"all_data_list = []\n",
"keys = [\"门店\", \"订单编号\", \"产品类型\", \"产品名称\", \"型号\", \"数量\", \"采购单价\", \"销售单价\",\n",
" \"采购金额\", \"销售金额\", \"毛利润\", \"下单人\", \"接单人\", \"车牌号\"]\n",
"reversed_keys = list(reversed(keys))\n",
"\n",
"for i in tqdm(range(1, 1390)):\n",
" url = f\"http://rp.chezizhu.com/ReportServer?_=1752541860351&__boxModel__=true&op=page_content&sessionID=91877&pn={i}&__fit__=false\"\n",
"\n",
" header = {\n",
" \"Referer\": \"http://rp.chezizhu.com/ReportServer?reportlet=czz/storage/prod_sale_dtl_jm.cpt&servicer_id=CJRuYehLSqLqy4snS5bLoo&hideexport=true&database=chezizhu_14\",\n",
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0\",\n",
" \"X-Requested-With\": \"XMLHttpRequest\"\n",
" }\n",
"\n",
" while True:\n",
" try:\n",
" res = requests.post(url, headers=header)\n",
" break\n",
" except RequestException as e:\n",
" print(f\"请求失败,正在重试... 错误信息:{str(e)}\")\n",
" time.sleep(1)\n",
" soup = BeautifulSoup(res.text, \"html.parser\")\n",
"\n",
" # 定位真正的数据行(跳过标题行)\n",
" for tr in soup.select('tr:not(:has(th))'): # 排除包含表头的行\n",
" tds = [td.get_text(strip=True) for td in tr.find_all(\"td\")]\n",
" reversed_tds = list(reversed(tds))\n",
"\n",
" # 如果 reversed_tds 长度不够,填充空字符串\n",
" if len(reversed_tds) < len(reversed_keys):\n",
" reversed_tds += [\"\"] * (len(reversed_keys) - len(reversed_tds))\n",
" elif len(reversed_tds) > len(reversed_keys):\n",
" reversed_tds = reversed_tds[:len(reversed_keys)]\n",
"\n",
" # 构建字典(反转后的 keys 和 tds\n",
" row_data = dict(zip(reversed_keys, reversed_tds))\n",
" all_data_list.append(row_data)\n",
"\n",
" # break\n",
"# print(all_data_list)\n",
"df = pd.DataFrame(all_data_list)\n",
"df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\大唛云销售明细.xlsx', index=False)"
],
"id": "b3100f77cc1061b1",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1389/1389 [02:09<00:00, 10.70it/s]\n"
]
}
],
"execution_count": 14
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-29T07:16:14.900349Z",
"start_time": "2025-04-29T07:16:07.255065Z"
}
},
"cell_type": "code",
"source": [
"df = pd.DataFrame(all_data_list)\n",
"df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\大唛云销售明细-古交腾飞路店13593189858.xlsx', index=False)"
],
"id": "7671bfd65cc413c7",
"outputs": [],
"execution_count": 9
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}