556 lines
28 KiB
Plaintext
556 lines
28 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"metadata": {},
|
||
"cell_type": "markdown",
|
||
"source": "## 订单明细",
|
||
"id": "8bdaf70d574d1868"
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"ExecuteTime": {
|
||
"end_time": "2026-01-20T09:45:36.313844400Z",
|
||
"start_time": "2026-01-20T09:38:55.425173700Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import requests\n",
|
||
"from tqdm import tqdm\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"import time\n",
|
||
"\n",
|
||
"all_data_list = []\n",
|
||
"for i in tqdm(range(1, 1303)):\n",
|
||
" time.sleep(0.2)\n",
|
||
" import requests\n",
|
||
"\n",
|
||
" headers = {\n",
|
||
" 'Accept': 'text/html, */*; q=0.01',\n",
|
||
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
|
||
" 'Connection': 'keep-alive',\n",
|
||
" 'Referer': 'http://rp.chezizhu.com/ReportServer?reportlet=czz/order/order_detail_md.cpt&servicer_id=HyBKFgc2uCNtqs59aDZhc2&database=chezizhu_14',\n",
|
||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0',\n",
|
||
" 'X-Requested-With': 'XMLHttpRequest',\n",
|
||
" }\n",
|
||
"\n",
|
||
" params = {\n",
|
||
" '_': '1768901546535',\n",
|
||
" '__boxModel__': 'true',\n",
|
||
" 'op': 'page_content',\n",
|
||
" 'sessionID': '99282',\n",
|
||
" 'pn': i,\n",
|
||
" '__fit__': 'false',\n",
|
||
" }\n",
|
||
"\n",
|
||
" res = requests.get('http://rp.chezizhu.com/ReportServer', params=params, headers=headers, verify=False)\n",
|
||
"\n",
|
||
" # print(res.text)\n",
|
||
"\n",
|
||
" keys = [\n",
|
||
" \"单号\", \"用料\", \"图片\", \"客户姓名\", \"客户电话\", \"车牌号码\", \"车型\",\n",
|
||
" \"产品金额\", \"服务费\", \"上门费\", \"折扣金额\", \"折扣原因\", \"实收金额\",\n",
|
||
" \"结算方式\", \"新老用户\", \"订单来源\", \"下单方式\", \"服务类型\", \"服务商\",\n",
|
||
" \"技师\", \"下单时间\", \"预约时间\", \"完成时间\", \"评价状态\", \"工单状态\",\n",
|
||
" \"订单状态\", \"全车检测\", \"备注\"\n",
|
||
" ]\n",
|
||
" soup = BeautifulSoup(res.text, 'lxml')\n",
|
||
" for tr in soup.find_all('tr'):\n",
|
||
" tds = tr.find_all('td')\n",
|
||
" if \"单号\" in tds[0].text or \"总计\" in tds[0].text or \"订单列表\" in tds[\n",
|
||
" 0].text or \"注:红色订单为实收金额有误订单\" in tds[0].text:\n",
|
||
" continue\n",
|
||
" td_list = []\n",
|
||
" for td in tds:\n",
|
||
" td_list.append(td.text)\n",
|
||
"\n",
|
||
" order_dict = dict(zip(keys, td_list))\n",
|
||
" # print(order_dict)\n",
|
||
" all_data_list.append(order_dict)\n",
|
||
"\n",
|
||
"df = pd.DataFrame(all_data_list)\n",
|
||
"df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\大唛云订单明细-18135844455.xlsx', index=False)\n"
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"100%|██████████| 1302/1302 [06:34<00:00, 3.30it/s]\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 6
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2026-01-20T09:34:49.479362100Z",
|
||
"start_time": "2026-01-20T09:34:49.180981Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"df = pd.DataFrame(all_data_list)\n",
|
||
"df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\大唛云订单明细-18135844455.xlsx', index=False)"
|
||
],
|
||
"id": "a848bd6b4137743e",
|
||
"outputs": [],
|
||
"execution_count": 4
|
||
},
|
||
{
|
||
"metadata": {},
|
||
"cell_type": "markdown",
|
||
"source": "## 历史维修记录",
|
||
"id": "271853920f6210f"
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-07-14T11:44:43.656887Z",
|
||
"start_time": "2025-07-14T09:38:43.922512Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"import requests\n",
|
||
"import re\n",
|
||
"import time\n",
|
||
"\n",
|
||
"timestamp_ms = int(time.time() * 1000)\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"import requests\n",
|
||
"from tqdm import tqdm\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"from urllib.parse import urlencode, quote\n",
|
||
"import time\n",
|
||
"from requests.exceptions import RequestException\n",
|
||
"\n",
|
||
"# 读取Excel文件\n",
|
||
"df = pd.read_excel(r\"D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\大唛云订单明细-18135844455.xlsx\")\n",
|
||
"\n",
|
||
"# 定义要提取的基本字段列表\n",
|
||
"BASE_FIELDS = [\n",
|
||
" '保险公司', '保险到期', '年检日期', '车架号',\n",
|
||
" '购车日期', '服务日期', '服务里程',\n",
|
||
" '下次服务日期', '下次服务里程'\n",
|
||
"]\n",
|
||
"\n",
|
||
"# 创建两个DataFrame分别存储不同类型的数据\n",
|
||
"base_info_df = df.copy() # 存储基本信息\n",
|
||
"service_items_df = pd.DataFrame() # 存储消费项目明细\n",
|
||
"\n",
|
||
"\n",
|
||
"def make_request_with_retry(url, headers, max_retries=10, retry_delay=1):\n",
|
||
" \"\"\"带重试机制的请求函数\"\"\"\n",
|
||
" for attempt in range(max_retries):\n",
|
||
" try:\n",
|
||
" res = requests.get(url=url, headers=headers, timeout=10)\n",
|
||
" res.raise_for_status()\n",
|
||
" return res\n",
|
||
" except RequestException as e:\n",
|
||
" if attempt == max_retries - 1:\n",
|
||
" raise\n",
|
||
" print(f\"请求失败,第 {attempt + 1} 次重试... 错误: {str(e)}\")\n",
|
||
" time.sleep(retry_delay)\n",
|
||
" return None\n",
|
||
" # print(res.text)\n",
|
||
"\n",
|
||
"\n",
|
||
"def extract_session_id(html_content):\n",
|
||
" soup = BeautifulSoup(html_content, \"html.parser\")\n",
|
||
" scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
|
||
"\n",
|
||
" session_ids = set()\n",
|
||
" for script in scripts:\n",
|
||
" # 匹配注册语句中的sessionid\n",
|
||
" register_match = re.search(r\"FR\\.SessionMgr\\.register\\('(\\d+)'\", script.text)\n",
|
||
" if register_match:\n",
|
||
" session_ids.add(register_match.group(1))\n",
|
||
"\n",
|
||
" # 匹配变量赋值中的sessionid\n",
|
||
" current_match = re.search(r\"currentSessionID\\s*=\\s*'(\\d+)'\", script.text)\n",
|
||
" if current_match:\n",
|
||
" session_ids.add(current_match.group(1))\n",
|
||
"\n",
|
||
" return list(session_ids)\n",
|
||
"\n",
|
||
"\n",
|
||
"for index, row in tqdm(list(df.iterrows()), desc=\"处理订单\"):\n",
|
||
" order_number = row[\"单号\"]\n",
|
||
" success = False\n",
|
||
" # print(f\"正在处理订单 {order_number}\")\n",
|
||
"\n",
|
||
" timestamp_ms = int(time.time() * 1000)\n",
|
||
" for attempt in range(10): # 最大重试次数\n",
|
||
" try:\n",
|
||
" url = (\n",
|
||
" \"http://rp.chezizhu.com/ReportServer?reportlet=/czz/order/order.cpt&__parameters__={%22_%22:%22\" + f\"{timestamp_ms}\" + \"%22,%22PINGJIA%22:[5b][5d],%22DISCOUNT_DESC%22:%22%22,%22B%22:%222011-05-01%22,%22LABELPLATE_NUMBER%22:%22%25E8%25BD%25A6%25E7%2589%258C%25E5%258F%25B7%22,%22LABELUSER_NAME%22:%22%25E5%25A7%2593%25E5%2590%258D%22,%22E%22:%222025-05-07%22,%22SERVICER%22:[5b][5d],%22CAR_NAME%22:%22%22,%22ORDER_SERVICE_TYPE%22:%22%22,%22order_number%22:%22\" + order_number + \"%22,%22USER_MOBILE%22:%22%22,%22LABELUSER_STATE%22:%22%25E6%2596%25B0%25E8%2580%2581%25E7%2594%25A8%25E6%2588%25B7%22,%22LABELCAR_NAME%22:%22%25E8%25BD%25A6%25E5%259E%258B%22,%22SERVICER_ID%22:%22683XGWqnyn66mE1oa8Gkzb%22,%22LABELORDER_SOURCE%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E6%259D%25A5%25E6%25BA%2590%22,%22LABELE%22:%22%25E8%2587%25B3%22,%22LABELSERVER%22:%22%25E9%2597%25A8%25E5%25BA%2597%22,%22LABELGONGDAN_STATUS%22:%22%25E5%25B7%25A5%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22LABELORDER_STATUS%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22ORDER_STATUS%22:[5b][5d],%22CMD%22:%22parameters_d%22,%22PAY_METHOD%22:%22%22,%22LABELUSER_MOBILE%22:%22%25E5%25AE%25A2%25E6%2588%25B7%25E7%2594%25B5%25E8%25AF%259D%22,%22LABELPINGJIA%22:%22%25E8%25AF%2584%25E4%25BB%25B7%25E7%258A%25B6%25E6%2580%2581%22,%22USER_STATE%22:[5b][5d],%22LABELRP_TYPE%22:%22%25E6%258A%25A5%25E8%25A1%25A8%25E7%25B1%25BB%25E5%259E%258B%22,%22LABELPAY_METHOD%22:%22%25E7%25BB%2593%25E7%25AE%2597%25E6%2596%25B9%25E5%25BC%258F%22,%22USER_NAME%22:%22%22,%22REPORTNAME%22:%22czz%252Forder%252Forder_detail_md.cpt%22,%22DISCOUNT_TYPE%22:%22%22,%22DATE_TYPE%22:%22%25E5%25AE%258C%25E6%2588%2590%25E6%2597%25A5%25E6%259C%259F%22,%22LABELDISCOUNT_DESC%22:%22%25E6%258A%2598%25E6%2589%25A3%25E5%258E%259F%25E5%259B%25A0%22,%22GONGDAN_STATUS%22:[5b][5d],%22__STREAMCLOSED__%22:%22%22,%22__pi__%22:%22true%22,%22CHANNEL%22:%22%22,%22DATABASE%22:%22chezizhu_14%22,%22ORDER_SOURCE%22:[5b][5d],%22LABELORDER_NUMBER%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E5%258F%25B7%22,%22PLATE_NUMBER%22:%22%22,%22RP_TYPE%22:%22%25E9%25AB%2598%25E7%25BA%25A7%25E6%2590%259C%25E7%25B4%25A2%22}\"\n",
|
||
" )\n",
|
||
"\n",
|
||
" headers = {\n",
|
||
" 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',\n",
|
||
" 'Accept-Encoding': 'gzip, deflate',\n",
|
||
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
|
||
" 'Cache-Control': 'max-age=0',\n",
|
||
" 'Connection': 'keep-alive',\n",
|
||
" 'Cookie': 'td_cookie=2912605941',\n",
|
||
" 'Host': 'rp.chezizhu.com',\n",
|
||
" 'Referer': 'http://rp.chezizhu.com/ReportServer?reportlet=czz/order/order_detail_md.cpt&servicer_id=CJRuYehLSqLqy4snS5bLoo&database=chezizhu_14',\n",
|
||
" 'Upgrade-Insecure-Requests': '1',\n",
|
||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'\n",
|
||
" }\n",
|
||
"\n",
|
||
" res = requests.get(url, headers=headers)\n",
|
||
"\n",
|
||
" session_ids = extract_session_id(res.text)\n",
|
||
" # print(\"提取到的SessionID:\", session_ids[0])\n",
|
||
" # ========== 请求部分 ==========\n",
|
||
" url = f\"http://rp.chezizhu.com/ReportServer?_={timestamp_ms}&__boxModel__=true&op=page_content&sessionID={session_ids[0]}&pn=1&__fit__=false\"\n",
|
||
"\n",
|
||
" headers = {\n",
|
||
" 'Accept': '*/*',\n",
|
||
" 'Accept-Encoding': 'gzip, deflate',\n",
|
||
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
|
||
" 'Connection': 'keep-alive',\n",
|
||
" 'Cookie': 'td_cookie=2912605941',\n",
|
||
" 'Host': 'rp.chezizhu.com',\n",
|
||
" \"Referer\": \"http://rp.chezizhu.com/ReportServer?reportlet=/czz/order/order.cpt&__parameters__={%22_%22:%22\" + f\"{timestamp_ms}\" + \"%22,%22PINGJIA%22:[5b][5d],%22DISCOUNT_DESC%22:%22%22,%22B%22:%222010-07-01%22,%22LABELPLATE_NUMBER%22:%22%25E8%25BD%25A6%25E7%2589%258C%25E5%258F%25B7%22,%22LABELUSER_NAME%22:%22%25E5%25A7%2593%25E5%2590%258D%22,%22E%22:%222025-07-14%22,%22SERVICER%22:[5b][5d],%22CAR_NAME%22:%22%22,%22ORDER_SERVICE_TYPE%22:%22%22,%22order_number%22:%22\" + order_number + \"%22,%22USER_MOBILE%22:%22%22,%22LABELUSER_STATE%22:%22%25E6%2596%25B0%25E8%2580%2581%25E7%2594%25A8%25E6%2588%25B7%22,%22LABELCAR_NAME%22:%22%25E8%25BD%25A6%25E5%259E%258B%22,%22SERVICER_ID%22:%22CJRuYehLSqLqy4snS5bLoo%22,%22LABELORDER_SOURCE%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E6%259D%25A5%25E6%25BA%2590%22,%22LABELE%22:%22%25E8%2587%25B3%22,%22LABELSERVER%22:%22%25E9%2597%25A8%25E5%25BA%2597%22,%22LABELGONGDAN_STATUS%22:%22%25E5%25B7%25A5%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22LABELORDER_STATUS%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22ORDER_STATUS%22:[5b][5d],%22CMD%22:%22parameters_d%22,%22PAY_METHOD%22:%22%22,%22LABELUSER_MOBILE%22:%22%25E5%25AE%25A2%25E6%2588%25B7%25E7%2594%25B5%25E8%25AF%259D%22,%22LABELPINGJIA%22:%22%25E8%25AF%2584%25E4%25BB%25B7%25E7%258A%25B6%25E6%2580%2581%22,%22USER_STATE%22:[5b][5d],%22LABELRP_TYPE%22:%22%25E6%258A%25A5%25E8%25A1%25A8%25E7%25B1%25BB%25E5%259E%258B%22,%22LABELPAY_METHOD%22:%22%25E7%25BB%2593%25E7%25AE%2597%25E6%2596%25B9%25E5%25BC%258F%22,%22USER_NAME%22:%22%22,%22REPORTNAME%22:%22czz%252Forder%252Forder_detail_md.cpt%22,%22DISCOUNT_TYPE%22:%22%22,%22DATE_TYPE%22:%22%25E5%25AE%258C%25E6%2588%2590%25E6%2597%25A5%25E6%259C%259F%22,%22LABELDISCOUNT_DESC%22:%22%25E6%258A%2598%25E6%2589%25A3%25E5%258E%259F%25E5%259B%25A0%22,%22GONGDAN_STATUS%22:[5b][5d],%22__STREAMCLOSED__%22:%22%22,%22__pi__%22:%22true%22,%22CHANNEL%22:%22%22,%22DATABASE%22:%22chezizhu_14%22,%22ORDER_SOURCE%22:[5b][5d],%22LABELORDER_NUMBER%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E5%258F%25B7%22,%22PLATE_NUMBER%22:%22%22,%22RP_TYPE%22:%22%25E9%25AB%2598%25E7%25BA%25A7%25E6%2590%259C%25E7%25B4%25A2%22}\",\n",
|
||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0\",\n",
|
||
" \"X-Requested-With\": \"XMLHttpRequest\"\n",
|
||
" }\n",
|
||
"\n",
|
||
" # 使用带重试的请求函数\n",
|
||
" res = make_request_with_retry(url, headers)\n",
|
||
" # print(res.text)\n",
|
||
"\n",
|
||
" soup = BeautifulSoup(res.text, 'lxml')\n",
|
||
"\n",
|
||
" # ========== 1. 提取基本信息(单行) ==========\n",
|
||
" base_info = {'单号': order_number} # 保留原始订单号\n",
|
||
"\n",
|
||
" for field in BASE_FIELDS:\n",
|
||
" field_td = soup.find('td', string=f'{field}:')\n",
|
||
" if field_td:\n",
|
||
" value_td = field_td.find_next('td')\n",
|
||
" base_info[field] = value_td.get_text(strip=True) if value_td else None\n",
|
||
" else:\n",
|
||
" base_info[field] = None\n",
|
||
"\n",
|
||
" # 更新基本信息DataFrame\n",
|
||
" for col in base_info:\n",
|
||
" if col not in base_info_df.columns:\n",
|
||
" base_info_df[col] = None\n",
|
||
" base_info_df.at[index, col] = base_info[col]\n",
|
||
"\n",
|
||
" # ========== 2. 提取消费项目(多行) ==========\n",
|
||
" consumption_header = soup.find(lambda tag: tag.name == 'td' and '消费项目' in tag.text)\n",
|
||
" if consumption_header:\n",
|
||
" table = consumption_header.find_parent('table')\n",
|
||
" if table:\n",
|
||
" rows = table.find_all('tr')[1:] # 跳过表头\n",
|
||
"\n",
|
||
" for row_idx, row in enumerate(rows, 1):\n",
|
||
" cols = row.find_all('td')\n",
|
||
" if len(cols) >= 6:\n",
|
||
" item = {\n",
|
||
" '单号': order_number,\n",
|
||
" '行号': row_idx,\n",
|
||
" '套餐': cols[0].get_text(strip=True) if len(cols) > 0 else '',\n",
|
||
" '服务项目': cols[1].get_text(strip=True) if len(cols) > 1 else '',\n",
|
||
" '产品名称': cols[2].get_text(strip=True) if len(cols) > 2 else '',\n",
|
||
" '产品型号': cols[3].get_text(strip=True) if len(cols) > 3 else '',\n",
|
||
" '数量': cols[4].get_text(strip=True) if len(cols) > 4 else '',\n",
|
||
" '规格': cols[5].get_text(strip=True) if len(cols) > 5 else '',\n",
|
||
" '单位': cols[6].get_text(strip=True) if len(cols) > 6 else '',\n",
|
||
" '单价': cols[7].get_text(strip=True) if len(cols) > 7 else '',\n",
|
||
" '产品小计': cols[8].get_text(strip=True) if len(cols) > 8 else '',\n",
|
||
" '工时': cols[9].get_text(strip=True) if len(cols) > 9 else '',\n",
|
||
" '合计': cols[10].get_text(strip=True) if len(cols) > 10 else ''\n",
|
||
" }\n",
|
||
"\n",
|
||
" # 添加到消费项目DataFrame\n",
|
||
" service_items_df = pd.concat([\n",
|
||
" service_items_df,\n",
|
||
" pd.DataFrame([item])\n",
|
||
" ], ignore_index=True)\n",
|
||
" # print(item)\n",
|
||
"\n",
|
||
" success = True\n",
|
||
" break # 成功则跳出重试循环\n",
|
||
"\n",
|
||
" except Exception as e:\n",
|
||
" if attempt == 9: # 最后一次尝试仍然失败\n",
|
||
" print(f\"订单 {order_number} 处理失败,已达最大重试次数。错误: {str(e)}\")\n",
|
||
" # 记录失败订单(可选)\n",
|
||
" with open('failed_orders.txt', 'a') as f:\n",
|
||
" f.write(f\"{order_number}\\n\")\n",
|
||
" time.sleep(1) # 等待1秒后重试\n",
|
||
"\n",
|
||
" if not success:\n",
|
||
" continue # 跳过处理失败的订单\n",
|
||
" # break\n",
|
||
"\n",
|
||
"# ========== 3. 合并数据并保存 ==========\n",
|
||
"# 方法1:保存为两个关联表(推荐)\n",
|
||
"base_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\订单基本信息.xlsx'\n",
|
||
"items_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\消费项目明细.xlsx'\n",
|
||
"base_info_df.to_excel(base_output, index=False)\n",
|
||
"service_items_df.to_excel(items_output, index=False)\n",
|
||
"\n",
|
||
"# 方法2:合并为一个宽表(每行重复基本信息)\n",
|
||
"if not service_items_df.empty:\n",
|
||
" merged_df = pd.merge(\n",
|
||
" base_info_df,\n",
|
||
" service_items_df,\n",
|
||
" on='单号',\n",
|
||
" how='left'\n",
|
||
" )\n",
|
||
" merged_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\历史维修记录明细-古交腾飞路店13593189858.xlsx'\n",
|
||
" merged_df.to_excel(merged_output, index=False)\n",
|
||
"\n",
|
||
"print(\"\\n处理完成!\")\n",
|
||
"print(f\"- 基本信息保存至: {base_output}\")\n",
|
||
"print(f\"- 消费项目保存至: {items_output}\")\n",
|
||
"if not service_items_df.empty:\n",
|
||
" print(f\"- 合并数据保存至: {merged_output}\") # 输出:['57435']"
|
||
],
|
||
"id": "ed2755053dd426c",
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"处理订单: 0%| | 0/10868 [00:00<?, ?it/s]C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_4532\\3409432529.py:48: DeprecationWarning: The 'text' argument to find()-type methods is deprecated. Use 'string' instead.\n",
|
||
" scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
|
||
"处理订单: 0%| | 1/10868 [00:00<28:50, 6.28it/s]C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_4532\\3409432529.py:48: DeprecationWarning: The 'text' argument to find()-type methods is deprecated. Use 'string' instead.\n",
|
||
" scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
|
||
"处理订单: 18%|█▊ | 1974/10868 [09:20<50:33, 2.93it/s] "
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"请求失败,第 1 次重试... 错误: HTTPConnectionPool(host='rp.chezizhu.com', port=80): Max retries exceeded with url: /ReportServer?_=1752486488556&__boxModel__=true&op=page_content&sessionID=26250&pn=1&__fit__=false (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x000002C3E3298E90>, 'Connection to rp.chezizhu.com timed out. (connect timeout=10)'))\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"处理订单: 100%|██████████| 10868/10868 [2:03:15<00:00, 1.47it/s] \n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n",
|
||
"处理完成!\n",
|
||
"- 基本信息保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\订单基本信息.xlsx\n",
|
||
"- 消费项目保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\消费项目明细.xlsx\n",
|
||
"- 合并数据保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\历史维修记录明细-古交腾飞路店13593189858.xlsx\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 12
|
||
},
|
||
{
|
||
"metadata": {},
|
||
"cell_type": "markdown",
|
||
"source": "## 库存 (页面已支持导出)",
|
||
"id": "5e7ac3ee321a3549"
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-16T09:51:20.409717Z",
|
||
"start_time": "2025-04-16T09:51:13.116335Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import requests\n",
|
||
"from tqdm import tqdm\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"from urllib.parse import urlencode, quote\n",
|
||
"import time\n",
|
||
"from requests.exceptions import RequestException\n",
|
||
"\n",
|
||
"url = \"http://sp.chezizhu.com/alliance/store/list.htm\"\n",
|
||
"header = {\n",
|
||
" \"content-type\": \"application/x-www-form-urlencoded; charset=UTF-8\",\n",
|
||
" \"cookie\": \"JSESSIONID=28FFBA865A6CAAA76CB8CDB6A34B5886\",\n",
|
||
" \"host\": \"sp.chezizhu.com\",\n",
|
||
" \"origin\": \"http://sp.chezizhu.com\",\n",
|
||
" \"referer\": \"http://sp.chezizhu.com/alliance/index.htm\",\n",
|
||
" \"user-agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0\",\n",
|
||
" \"x-requested-with\": \"XMLHttpRequest\"\n",
|
||
"}\n",
|
||
"all_data = []\n",
|
||
"for i in tqdm(range(1, 103)):\n",
|
||
" data = f'pageNum={i}&pageSize=20&dataType=json&keywords='\n",
|
||
"\n",
|
||
" res = requests.post(url, headers=header, data=data)\n",
|
||
" data_list = res.json().get(\"list\", [])\n",
|
||
"\n",
|
||
" for data in data_list:\n",
|
||
" tp_number = data.get(\"tp_number\")\n",
|
||
" damai_id = data.get(\"damai_id\")\n",
|
||
" prod_name = data.get(\"prod_name\")\n",
|
||
" price = data.get(\"price\")\n",
|
||
" standard = data.get(\"standard\")\n",
|
||
" unit = data.get(\"unit\")\n",
|
||
" count = data.get(\"count\")\n",
|
||
" check_count = data.get(\"check_count\")\n",
|
||
" check_date = data.get(\"check_date\")\n",
|
||
"\n",
|
||
" all_data.append([tp_number, damai_id, prod_name, price, standard, unit, count, check_count, check_date])\n",
|
||
"\n",
|
||
"df = pd.DataFrame(all_data,\n",
|
||
" columns=[\"tp_number\", \"damai_id\", \"prod_name\", \"price\", \"standard\", \"unit\", \"count\", \"check_count\",\n",
|
||
" \"check_date\"])\n",
|
||
"df.to_excel(r\"D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\库存.xlsx\", index=False)\n",
|
||
"\n",
|
||
"\n",
|
||
"\n"
|
||
],
|
||
"id": "95616ee8b933e5ce",
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"100%|██████████| 102/102 [00:07<00:00, 14.43it/s]\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 52
|
||
},
|
||
{
|
||
"metadata": {},
|
||
"cell_type": "markdown",
|
||
"source": "",
|
||
"id": "70a45b885b2739c"
|
||
},
|
||
{
|
||
"metadata": {},
|
||
"cell_type": "markdown",
|
||
"source": "## 销售明细",
|
||
"id": "7df1375e5debe404"
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-07-15T01:34:31.895645Z",
|
||
"start_time": "2025-07-15T01:32:17.223368Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import requests\n",
|
||
"from tqdm import tqdm\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"\n",
|
||
"all_data_list = []\n",
|
||
"keys = [\"门店\", \"订单编号\", \"产品类型\", \"产品名称\", \"型号\", \"数量\", \"采购单价\", \"销售单价\",\n",
|
||
" \"采购金额\", \"销售金额\", \"毛利润\", \"下单人\", \"接单人\", \"车牌号\"]\n",
|
||
"reversed_keys = list(reversed(keys))\n",
|
||
"\n",
|
||
"for i in tqdm(range(1, 1390)):\n",
|
||
" url = f\"http://rp.chezizhu.com/ReportServer?_=1752541860351&__boxModel__=true&op=page_content&sessionID=91877&pn={i}&__fit__=false\"\n",
|
||
"\n",
|
||
" header = {\n",
|
||
" \"Referer\": \"http://rp.chezizhu.com/ReportServer?reportlet=czz/storage/prod_sale_dtl_jm.cpt&servicer_id=CJRuYehLSqLqy4snS5bLoo&hideexport=true&database=chezizhu_14\",\n",
|
||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0\",\n",
|
||
" \"X-Requested-With\": \"XMLHttpRequest\"\n",
|
||
" }\n",
|
||
"\n",
|
||
" while True:\n",
|
||
" try:\n",
|
||
" res = requests.post(url, headers=header)\n",
|
||
" break\n",
|
||
" except RequestException as e:\n",
|
||
" print(f\"请求失败,正在重试... 错误信息:{str(e)}\")\n",
|
||
" time.sleep(1)\n",
|
||
" soup = BeautifulSoup(res.text, \"html.parser\")\n",
|
||
"\n",
|
||
" # 定位真正的数据行(跳过标题行)\n",
|
||
" for tr in soup.select('tr:not(:has(th))'): # 排除包含表头的行\n",
|
||
" tds = [td.get_text(strip=True) for td in tr.find_all(\"td\")]\n",
|
||
" reversed_tds = list(reversed(tds))\n",
|
||
"\n",
|
||
" # 如果 reversed_tds 长度不够,填充空字符串\n",
|
||
" if len(reversed_tds) < len(reversed_keys):\n",
|
||
" reversed_tds += [\"\"] * (len(reversed_keys) - len(reversed_tds))\n",
|
||
" elif len(reversed_tds) > len(reversed_keys):\n",
|
||
" reversed_tds = reversed_tds[:len(reversed_keys)]\n",
|
||
"\n",
|
||
" # 构建字典(反转后的 keys 和 tds)\n",
|
||
" row_data = dict(zip(reversed_keys, reversed_tds))\n",
|
||
" all_data_list.append(row_data)\n",
|
||
"\n",
|
||
" # break\n",
|
||
"# print(all_data_list)\n",
|
||
"df = pd.DataFrame(all_data_list)\n",
|
||
"df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\大唛云销售明细.xlsx', index=False)"
|
||
],
|
||
"id": "b3100f77cc1061b1",
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"100%|██████████| 1389/1389 [02:09<00:00, 10.70it/s]\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 14
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-29T07:16:14.900349Z",
|
||
"start_time": "2025-04-29T07:16:07.255065Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"df = pd.DataFrame(all_data_list)\n",
|
||
"df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\大唛云销售明细-古交腾飞路店13593189858.xlsx', index=False)"
|
||
],
|
||
"id": "7671bfd65cc413c7",
|
||
"outputs": [],
|
||
"execution_count": 9
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|