411 lines
24 KiB
Plaintext
411 lines
24 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-15T03:14:27.994103Z",
|
||
"start_time": "2025-05-15T02:03:56.503917Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"import requests\n",
|
||
"import re\n",
|
||
"import time\n",
|
||
"\n",
|
||
"timestamp_ms = int(time.time() * 1000)\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"import requests\n",
|
||
"from tqdm import tqdm\n",
|
||
"from bs4 import BeautifulSoup\n",
|
||
"from urllib.parse import urlencode, quote\n",
|
||
"import time\n",
|
||
"from requests.exceptions import RequestException\n",
|
||
"\n",
|
||
"# 读取Excel文件\n",
|
||
"df = pd.read_excel(r\"D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\大唛云订单明细 (2).xlsx\")\n",
|
||
"\n",
|
||
"# 定义要提取的基本字段列表\n",
|
||
"BASE_FIELDS = [\n",
|
||
" '保险公司', '保险到期', '年检日期', '车架号',\n",
|
||
" '购车日期', '服务日期', '服务里程',\n",
|
||
" '下次服务日期', '下次服务里程'\n",
|
||
"]\n",
|
||
"\n",
|
||
"# 创建两个DataFrame分别存储不同类型的数据\n",
|
||
"base_info_df = df.copy() # 存储基本信息\n",
|
||
"service_items_df = pd.DataFrame() # 存储消费项目明细\n",
|
||
"\n",
|
||
"\n",
|
||
"def make_request_with_retry(url, headers, max_retries=10, retry_delay=1):\n",
|
||
" \"\"\"带重试机制的请求函数\"\"\"\n",
|
||
" for attempt in range(max_retries):\n",
|
||
" try:\n",
|
||
" res = requests.get(url=url, headers=headers, timeout=10)\n",
|
||
" res.raise_for_status()\n",
|
||
" return res\n",
|
||
" except RequestException as e:\n",
|
||
" if attempt == max_retries - 1:\n",
|
||
" raise\n",
|
||
" print(f\"请求失败,第 {attempt + 1} 次重试... 错误: {str(e)}\")\n",
|
||
" time.sleep(retry_delay)\n",
|
||
" return None\n",
|
||
" # print(res.text)\n",
|
||
"\n",
|
||
"\n",
|
||
"def extract_session_id(html_content):\n",
|
||
" soup = BeautifulSoup(html_content, \"html.parser\")\n",
|
||
" scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
|
||
"\n",
|
||
" session_ids = set()\n",
|
||
" for script in scripts:\n",
|
||
" # 匹配注册语句中的sessionid\n",
|
||
" register_match = re.search(r\"FR\\.SessionMgr\\.register\\('(\\d+)'\", script.text)\n",
|
||
" if register_match:\n",
|
||
" session_ids.add(register_match.group(1))\n",
|
||
"\n",
|
||
" # 匹配变量赋值中的sessionid\n",
|
||
" current_match = re.search(r\"currentSessionID\\s*=\\s*'(\\d+)'\", script.text)\n",
|
||
" if current_match:\n",
|
||
" session_ids.add(current_match.group(1))\n",
|
||
"\n",
|
||
" return list(session_ids)\n",
|
||
"\n",
|
||
"\n",
|
||
"for index, row in tqdm(list(df.iterrows()), desc=\"处理订单\"):\n",
|
||
" order_number = row[\"单号\"]\n",
|
||
" success = False\n",
|
||
" # print(f\"正在处理订单 {order_number}\")\n",
|
||
"\n",
|
||
" timestamp_ms = int(time.time() * 1000)\n",
|
||
" for attempt in range(10): # 最大重试次数\n",
|
||
" try:\n",
|
||
" url = (\n",
|
||
" \"http://rp.chezizhu.com/ReportServer?reportlet=/czz/order/order.cpt&__parameters__={%22_%22:%22\" + f\"{timestamp_ms}\" + \"%22,%22PINGJIA%22:[5b][5d],%22DISCOUNT_DESC%22:%22%22,%22B%22:%222011-05-01%22,%22LABELPLATE_NUMBER%22:%22%25E8%25BD%25A6%25E7%2589%258C%25E5%258F%25B7%22,%22LABELUSER_NAME%22:%22%25E5%25A7%2593%25E5%2590%258D%22,%22E%22:%222025-05-07%22,%22SERVICER%22:[5b][5d],%22CAR_NAME%22:%22%22,%22ORDER_SERVICE_TYPE%22:%22%22,%22order_number%22:%22\" + order_number + \"%22,%22USER_MOBILE%22:%22%22,%22LABELUSER_STATE%22:%22%25E6%2596%25B0%25E8%2580%2581%25E7%2594%25A8%25E6%2588%25B7%22,%22LABELCAR_NAME%22:%22%25E8%25BD%25A6%25E5%259E%258B%22,%22SERVICER_ID%22:%22683XGWqnyn66mE1oa8Gkzb%22,%22LABELORDER_SOURCE%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E6%259D%25A5%25E6%25BA%2590%22,%22LABELE%22:%22%25E8%2587%25B3%22,%22LABELSERVER%22:%22%25E9%2597%25A8%25E5%25BA%2597%22,%22LABELGONGDAN_STATUS%22:%22%25E5%25B7%25A5%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22LABELORDER_STATUS%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22ORDER_STATUS%22:[5b][5d],%22CMD%22:%22parameters_d%22,%22PAY_METHOD%22:%22%22,%22LABELUSER_MOBILE%22:%22%25E5%25AE%25A2%25E6%2588%25B7%25E7%2594%25B5%25E8%25AF%259D%22,%22LABELPINGJIA%22:%22%25E8%25AF%2584%25E4%25BB%25B7%25E7%258A%25B6%25E6%2580%2581%22,%22USER_STATE%22:[5b][5d],%22LABELRP_TYPE%22:%22%25E6%258A%25A5%25E8%25A1%25A8%25E7%25B1%25BB%25E5%259E%258B%22,%22LABELPAY_METHOD%22:%22%25E7%25BB%2593%25E7%25AE%2597%25E6%2596%25B9%25E5%25BC%258F%22,%22USER_NAME%22:%22%22,%22REPORTNAME%22:%22czz%252Forder%252Forder_detail_md.cpt%22,%22DISCOUNT_TYPE%22:%22%22,%22DATE_TYPE%22:%22%25E5%25AE%258C%25E6%2588%2590%25E6%2597%25A5%25E6%259C%259F%22,%22LABELDISCOUNT_DESC%22:%22%25E6%258A%2598%25E6%2589%25A3%25E5%258E%259F%25E5%259B%25A0%22,%22GONGDAN_STATUS%22:[5b][5d],%22__STREAMCLOSED__%22:%22%22,%22__pi__%22:%22true%22,%22CHANNEL%22:%22%22,%22DATABASE%22:%22chezizhu_14%22,%22ORDER_SOURCE%22:[5b][5d],%22LABELORDER_NUMBER%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E5%258F%25B7%22,%22PLATE_NUMBER%22:%22%22,%22RP_TYPE%22:%22%25E9%25AB%2598%25E7%25BA%25A7%25E6%2590%259C%25E7%25B4%25A2%22}\"\n",
|
||
" )\n",
|
||
"\n",
|
||
" headers = {\n",
|
||
" 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',\n",
|
||
" 'Accept-Encoding': 'gzip, deflate',\n",
|
||
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
|
||
" 'Cache-Control': 'max-age=0',\n",
|
||
" 'Connection': 'keep-alive',\n",
|
||
" 'Cookie': 'td_cookie=2912605941',\n",
|
||
" 'Host': 'rp.chezizhu.com',\n",
|
||
" 'Referer': 'http://rp.chezizhu.com/ReportServer?reportlet=czz/order/order_detail_md.cpt&servicer_id=683XGWqnyn66mE1oa8Gkzb&database=chezizhu_14',\n",
|
||
" 'Upgrade-Insecure-Requests': '1',\n",
|
||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'\n",
|
||
" }\n",
|
||
"\n",
|
||
" res = requests.get(url, headers=headers)\n",
|
||
"\n",
|
||
" session_ids = extract_session_id(res.text)\n",
|
||
" # print(\"提取到的SessionID:\", session_ids[0])\n",
|
||
" # ========== 请求部分 ==========\n",
|
||
" url = f\"http://rp.chezizhu.com/ReportServer?_={timestamp_ms}&__boxModel__=true&op=page_content&sessionID={session_ids[0]}&pn=1&__fit__=false\"\n",
|
||
"\n",
|
||
" headers = {\n",
|
||
" 'Accept': '*/*',\n",
|
||
" 'Accept-Encoding': 'gzip, deflate',\n",
|
||
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
|
||
" 'Connection': 'keep-alive',\n",
|
||
" 'Cookie': 'td_cookie=2912605941',\n",
|
||
" 'Host': 'rp.chezizhu.com',\n",
|
||
" \"Referer\": \"http://rp.chezizhu.com/ReportServer?reportlet=/czz/order/order.cpt&__parameters__={%22_%22:%22\" + f\"{timestamp_ms}\" + \"%22,%22PINGJIA%22:[5b][5d],%22DISCOUNT_DESC%22:%22%22,%22B%22:%222021-05-01%22,%22LABELPLATE_NUMBER%22:%22%25E8%25BD%25A6%25E7%2589%258C%25E5%258F%25B7%22,%22LABELUSER_NAME%22:%22%25E5%25A7%2593%25E5%2590%258D%22,%22E%22:%222025-05-06%22,%22SERVICER%22:[5b][5d],%22CAR_NAME%22:%22%22,%22ORDER_SERVICE_TYPE%22:%22%22,%22order_number%22:%22\"\n",
|
||
" + order_number +\n",
|
||
" \"%22,%22USER_MOBILE%22:%22%22,%22LABELUSER_STATE%22:%22%25E6%2596%25B0%25E8%2580%2581%25E7%2594%25A8%25E6%2588%25B7%22,%22LABELCAR_NAME%22:%22%25E8%25BD%25A6%25E5%259E%258B%22,%22SERVICER_ID%22:%22683XGWqnyn66mE1oa8Gkzb%22,%22LABELORDER_SOURCE%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E6%259D%25A5%25E6%25BA%2590%22,%22LABELE%22:%22%25E8%2587%25B3%22,%22LABELSERVER%22:%22%25E9%2597%25A8%25E5%25BA%2597%22,%22LABELGONGDAN_STATUS%22:%22%25E5%25B7%25A5%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22LABELORDER_STATUS%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22ORDER_STATUS%22:[5b][5d],%22CMD%22:%22parameters_d%22,%22PAY_METHOD%22:%22%22,%22LABELUSER_MOBILE%22:%22%25E5%25AE%25A2%25E6%2588%25B7%25E7%2594%25B5%25E8%25AF%259D%22,%22LABELPINGJIA%22:%22%25E8%25AF%2584%25E4%25BB%25B7%25E7%258A%25B6%25E6%2580%2581%22,%22USER_STATE%22:[5b][5d],%22LABELRP_TYPE%22:%22%25E6%258A%25A5%25E8%25A1%25A8%25E7%25B1%25BB%25E5%259E%258B%22,%22LABELPAY_METHOD%22:%22%25E7%25BB%2593%25E7%25AE%2597%25E6%2596%25B9%25E5%25BC%258F%22,%22USER_NAME%22:%22%22,%22REPORTNAME%22:%22czz%252Forder%252Forder_detail_md.cpt%22,%22DISCOUNT_TYPE%22:%22%22,%22DATE_TYPE%22:%22%25E5%25AE%258C%25E6%2588%2590%25E6%2597%25A5%25E6%259C%259F%22,%22LABELDISCOUNT_DESC%22:%22%25E6%258A%2598%25E6%2589%25A3%25E5%258E%259F%25E5%259B%25A0%22,%22GONGDAN_STATUS%22:[5b][5d],%22__STREAMCLOSED__%22:%22%22,%22__pi__%22:%22true%22,%22CHANNEL%22:%22%22,%22DATABASE%22:%22chezizhu_14%22,%22ORDER_SOURCE%22:[5b][5d],%22LABELORDER_NUMBER%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E5%258F%25B7%22,%22PLATE_NUMBER%22:%22%22,%22RP_TYPE%22:%22%25E9%25AB%2598%25E7%25BA%25A7%25E6%2590%259C%25E7%25B4%25A2%22}\",\n",
|
||
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0\",\n",
|
||
" \"X-Requested-With\": \"XMLHttpRequest\"\n",
|
||
" }\n",
|
||
"\n",
|
||
" # 使用带重试的请求函数\n",
|
||
" res = make_request_with_retry(url, headers)\n",
|
||
" # print(res.text)\n",
|
||
"\n",
|
||
" soup = BeautifulSoup(res.text, 'lxml')\n",
|
||
"\n",
|
||
" # ========== 1. 提取基本信息(单行) ==========\n",
|
||
" base_info = {'单号': order_number} # 保留原始订单号\n",
|
||
"\n",
|
||
" for field in BASE_FIELDS:\n",
|
||
" field_td = soup.find('td', string=f'{field}:')\n",
|
||
" if field_td:\n",
|
||
" value_td = field_td.find_next('td')\n",
|
||
" base_info[field] = value_td.get_text(strip=True) if value_td else None\n",
|
||
" else:\n",
|
||
" base_info[field] = None\n",
|
||
"\n",
|
||
" # 更新基本信息DataFrame\n",
|
||
" for col in base_info:\n",
|
||
" if col not in base_info_df.columns:\n",
|
||
" base_info_df[col] = None\n",
|
||
" base_info_df.at[index, col] = base_info[col]\n",
|
||
"\n",
|
||
" # ========== 2. 提取消费项目(多行) ==========\n",
|
||
" # 初始化 last_item 用于保存上一行数据\n",
|
||
" last_item = {}\n",
|
||
" \n",
|
||
" # 定义字段顺序(从后往前)\n",
|
||
" field_order = [\n",
|
||
" ('合计', 10),\n",
|
||
" ('工时', 9),\n",
|
||
" ('产品小计', 8),\n",
|
||
" ('单价', 7),\n",
|
||
" ('单位', 6),\n",
|
||
" ('规格', 5),\n",
|
||
" ('产品型号', 3),\n",
|
||
" ('产品名称', 2),\n",
|
||
" ('服务项目', 1),\n",
|
||
" ('套餐', 0),\n",
|
||
" ]\n",
|
||
" \n",
|
||
" # 设置不继承的字段集合\n",
|
||
" do_not_inherit_fields = {\n",
|
||
" '产品小计', '单价',\n",
|
||
" '单位', '规格', '产品型号', '产品名称'\n",
|
||
" }\n",
|
||
" \n",
|
||
" consumption_header = soup.find(lambda tag: tag.name == 'td' and '消费项目' in tag.text)\n",
|
||
" if consumption_header:\n",
|
||
" table = consumption_header.find_parent('table')\n",
|
||
" if table:\n",
|
||
" rows = table.find_all('tr')[1:] # 跳过表头\n",
|
||
" \n",
|
||
" for row_idx, row in enumerate(rows, 1):\n",
|
||
" cols = row.find_all('td')\n",
|
||
" \n",
|
||
" if len(cols) < 7:\n",
|
||
" continue\n",
|
||
" \n",
|
||
" # 初始化当前 item\n",
|
||
" item = {\n",
|
||
" '单号': order_number,\n",
|
||
" '行号': row_idx,\n",
|
||
" }\n",
|
||
" \n",
|
||
" # 倒序填充字段\n",
|
||
" for field_name, col_index in field_order:\n",
|
||
" if col_index < len(cols):\n",
|
||
" # 当前列存在,正常提取\n",
|
||
" try:\n",
|
||
" item[field_name] = cols[col_index].get_text(strip=True)\n",
|
||
" except Exception:\n",
|
||
" item[field_name] = ''\n",
|
||
" else:\n",
|
||
" # 列不存在,尝试从上一行继承(除非被禁止)\n",
|
||
" if field_name in do_not_inherit_fields:\n",
|
||
" item[field_name] = '' # 不允许继承,留空\n",
|
||
" else:\n",
|
||
" item[field_name] = last_item.get(field_name, '') # 可以继承\n",
|
||
" \n",
|
||
" # 更新 last_item\n",
|
||
" last_item = item.copy()\n",
|
||
" \n",
|
||
" # 添加到 DataFrame\n",
|
||
" service_items_df = pd.concat([\n",
|
||
" service_items_df,\n",
|
||
" pd.DataFrame([item])\n",
|
||
" ], ignore_index=True)\n",
|
||
" \n",
|
||
" success = True\n",
|
||
" break # 成功则跳出重试循环\n",
|
||
"\n",
|
||
"\n",
|
||
" \n",
|
||
"\n",
|
||
" except Exception as e:\n",
|
||
" if attempt == 9: # 最后一次尝试仍然失败\n",
|
||
" print(f\"订单 {order_number} 处理失败,已达最大重试次数。错误: {str(e)}\")\n",
|
||
" # 记录失败订单(可选)\n",
|
||
" with open('failed_orders.txt', 'a') as f:\n",
|
||
" f.write(f\"{order_number}\\n\")\n",
|
||
" time.sleep(1) # 等待1秒后重试\n",
|
||
"\n",
|
||
" if not success:\n",
|
||
" continue # 跳过处理失败的订单\n",
|
||
" # break\n",
|
||
"\n",
|
||
"# ========== 3. 合并数据并保存 ==========\n",
|
||
"# 方法1:保存为两个关联表(推荐)\n",
|
||
"base_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\订单基本信息.xlsx'\n",
|
||
"items_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\消费项目明细.xlsx'\n",
|
||
"base_info_df.to_excel(base_output, index=False)\n",
|
||
"service_items_df.to_excel(items_output, index=False)\n",
|
||
"\n",
|
||
"# 方法2:合并为一个宽表(每行重复基本信息)\n",
|
||
"if not service_items_df.empty:\n",
|
||
" merged_df = pd.merge(\n",
|
||
" base_info_df,\n",
|
||
" service_items_df,\n",
|
||
" on='单号',\n",
|
||
" how='left'\n",
|
||
" )\n",
|
||
" merged_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\历史维修记录明细-15635606677.xlsx'\n",
|
||
" merged_df.to_excel(merged_output, index=False)\n",
|
||
"\n",
|
||
"print(\"\\n处理完成!\")\n",
|
||
"print(f\"- 基本信息保存至: {base_output}\")\n",
|
||
"print(f\"- 消费项目保存至: {items_output}\")\n",
|
||
"if not service_items_df.empty:\n",
|
||
" print(f\"- 合并数据保存至: {merged_output}\") # 输出:['57435']\n",
|
||
" "
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"处理订单: 0%| | 0/7946 [00:00<?, ?it/s]C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_12220\\1424788091.py:48: DeprecationWarning: The 'text' argument to find()-type methods is deprecated. Use 'string' instead.\n",
|
||
" scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
|
||
"处理订单: 0%| | 1/7946 [00:00<24:49, 5.34it/s]C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_12220\\1424788091.py:48: DeprecationWarning: The 'text' argument to find()-type methods is deprecated. Use 'string' instead.\n",
|
||
" scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
|
||
"处理订单: 31%|███ | 2441/7946 [12:04<34:15, 2.68it/s]"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"请求失败,第 1 次重试... 错误: HTTPConnectionPool(host='rp.chezizhu.com', port=80): Max retries exceeded with url: /ReportServer?_=1747275363838&__boxModel__=true&op=page_content&sessionID=7058&pn=1&__fit__=false (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000024EEAFAB080>, 'Connection to rp.chezizhu.com timed out. (connect timeout=10)'))\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"处理订单: 36%|███▌ | 2851/7946 [15:04<36:00, 2.36it/s] "
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"请求失败,第 1 次重试... 错误: HTTPConnectionPool(host='rp.chezizhu.com', port=80): Max retries exceeded with url: /ReportServer?_=1747275543553&__boxModel__=true&op=page_content&sessionID=55535&pn=1&__fit__=false (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000024EEE97B0E0>, 'Connection to rp.chezizhu.com timed out. (connect timeout=10)'))\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"处理订单: 48%|████▊ | 3824/7946 [22:42<34:53, 1.97it/s] "
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"请求失败,第 1 次重试... 错误: HTTPConnectionPool(host='rp.chezizhu.com', port=80): Max retries exceeded with url: /ReportServer?_=1747276001354&__boxModel__=true&op=page_content&sessionID=78866&pn=1&__fit__=false (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000024EE97A0A10>, 'Connection to rp.chezizhu.com timed out. (connect timeout=10)'))\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"处理订单: 95%|█████████▌| 7578/7946 [1:03:49<04:32, 1.35it/s] "
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"请求失败,第 1 次重试... 错误: HTTPConnectionPool(host='rp.chezizhu.com', port=80): Max retries exceeded with url: /ReportServer?_=1747278468348&__boxModel__=true&op=page_content&sessionID=24705&pn=1&__fit__=false (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x0000024EEE909580>, 'Connection to rp.chezizhu.com timed out. (connect timeout=10)'))\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"处理订单: 100%|██████████| 7946/7946 [1:08:39<00:00, 1.93it/s]\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n",
|
||
"处理完成!\n",
|
||
"- 基本信息保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\订单基本信息.xlsx\n",
|
||
"- 消费项目保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\消费项目明细.xlsx\n",
|
||
"- 合并数据保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\历史维修记录明细-15635606677.xlsx\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 4
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-09T02:12:23.607378Z",
|
||
"start_time": "2025-05-09T02:12:23.588256Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# ========== 3. 合并数据并保存 ==========\n",
|
||
"# 方法1:保存为两个关联表(推荐)\n",
|
||
"base_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\订单基本信息.xlsx'\n",
|
||
"items_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\消费项目明细.xlsx'\n",
|
||
"base_info_df.to_excel(base_output, index=False)\n",
|
||
"service_items_df.to_excel(items_output, index=False)\n",
|
||
"\n",
|
||
"# 方法2:合并为一个宽表(每行重复基本信息)\n",
|
||
"if not service_items_df.empty:\n",
|
||
" merged_df = pd.merge(\n",
|
||
" base_info_df,\n",
|
||
" service_items_df,\n",
|
||
" on='单号',\n",
|
||
" how='left'\n",
|
||
" )\n",
|
||
" merged_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\历史维修记录明细-古交腾飞路店13593189858.xlsx'\n",
|
||
" merged_df.to_excel(merged_output, index=False)\n",
|
||
"\n",
|
||
"print(\"\\n处理完成!\")\n",
|
||
"print(f\"- 基本信息保存至: {base_output}\")\n",
|
||
"print(f\"- 消费项目保存至: {items_output}\")\n",
|
||
"if not service_items_df.empty:\n",
|
||
" print(f\"- 合并数据保存至: {merged_output}\") # 输出:['57435']\n",
|
||
" "
|
||
],
|
||
"id": "2dad90815fd16898",
|
||
"outputs": [
|
||
{
|
||
"ename": "NameError",
|
||
"evalue": "name 'base_info_df' is not defined",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
|
||
"\u001B[1;31mNameError\u001B[0m Traceback (most recent call last)",
|
||
"Cell \u001B[1;32mIn[3], line 5\u001B[0m\n\u001B[0;32m 3\u001B[0m base_output \u001B[38;5;241m=\u001B[39m \u001B[38;5;124mr\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mD:\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mIdea Project\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mF6+宜搭+其它(1)\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mnew\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m文件输出\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m订单基本信息.xlsx\u001B[39m\u001B[38;5;124m'\u001B[39m\n\u001B[0;32m 4\u001B[0m items_output \u001B[38;5;241m=\u001B[39m \u001B[38;5;124mr\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mD:\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mIdea Project\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mF6+宜搭+其它(1)\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mnew\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m文件输出\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m消费项目明细.xlsx\u001B[39m\u001B[38;5;124m'\u001B[39m\n\u001B[1;32m----> 5\u001B[0m base_info_df\u001B[38;5;241m.\u001B[39mto_excel(base_output, index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\n\u001B[0;32m 6\u001B[0m service_items_df\u001B[38;5;241m.\u001B[39mto_excel(items_output, index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\n\u001B[0;32m 8\u001B[0m \u001B[38;5;66;03m# 方法2:合并为一个宽表(每行重复基本信息)\u001B[39;00m\n",
|
||
"\u001B[1;31mNameError\u001B[0m: name 'base_info_df' is not defined"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 3
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|