F6--/张阳脚本/竞品系统数据导出/大唛云管理平台.ipynb

{
 "cells": [
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## 订单明细",
   "id": "8bdaf70d574d1868"
  },
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2026-01-20T09:45:36.313844400Z",
     "start_time": "2026-01-20T09:38:55.425173700Z"
    }
   },
   "source": [
    "import pandas as pd\n",
    "import requests\n",
    "from tqdm import tqdm\n",
    "from bs4 import BeautifulSoup\n",
    "import time\n",
    "\n",
    "all_data_list = []\n",
    "for i in tqdm(range(1, 1303)):\n",
    "    time.sleep(0.2)\n",
    "    import requests\n",
    "\n",
    "    headers = {\n",
    "        'Accept': 'text/html, */*; q=0.01',\n",
    "        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
    "        'Connection': 'keep-alive',\n",
    "        'Referer': 'http://rp.chezizhu.com/ReportServer?reportlet=czz/order/order_detail_md.cpt&servicer_id=HyBKFgc2uCNtqs59aDZhc2&database=chezizhu_14',\n",
    "        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0',\n",
    "        'X-Requested-With': 'XMLHttpRequest',\n",
    "    }\n",
    "\n",
    "    params = {\n",
    "        '_': '1768901546535',\n",
    "        '__boxModel__': 'true',\n",
    "        'op': 'page_content',\n",
    "        'sessionID': '99282',\n",
    "        'pn': i,\n",
    "        '__fit__': 'false',\n",
    "    }\n",
    "\n",
    "    res = requests.get('http://rp.chezizhu.com/ReportServer', params=params, headers=headers, verify=False)\n",
    "\n",
    "    # print(res.text)\n",
    "\n",
    "    keys = [\n",
    "        \"单号\", \"用料\", \"图片\", \"客户姓名\", \"客户电话\", \"车牌号码\", \"车型\",\n",
    "        \"产品金额\", \"服务费\", \"上门费\", \"折扣金额\", \"折扣原因\", \"实收金额\",\n",
    "        \"结算方式\", \"新老用户\", \"订单来源\", \"下单方式\", \"服务类型\", \"服务商\",\n",
    "        \"技师\", \"下单时间\", \"预约时间\", \"完成时间\", \"评价状态\", \"工单状态\",\n",
    "        \"订单状态\", \"全车检测\", \"备注\"\n",
    "    ]\n",
    "    soup = BeautifulSoup(res.text, 'lxml')\n",
    "    for tr in soup.find_all('tr'):\n",
    "        tds = tr.find_all('td')\n",
    "        if \"单号\" in tds[0].text or \"总计\" in tds[0].text or \"订单列表\" in tds[\n",
    "            0].text or \"注：红色订单为实收金额有误订单\" in tds[0].text:\n",
    "            continue\n",
    "        td_list = []\n",
    "        for td in tds:\n",
    "            td_list.append(td.text)\n",
    "\n",
    "        order_dict = dict(zip(keys, td_list))\n",
    "        # print(order_dict)\n",
    "        all_data_list.append(order_dict)\n",
    "\n",
    "df = pd.DataFrame(all_data_list)\n",
    "df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\大唛云订单明细-18135844455.xlsx', index=False)\n"
   ],
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1302/1302 [06:34<00:00,  3.30it/s]\n"
     ]
    }
   ],
   "execution_count": 6
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-01-20T09:34:49.479362100Z",
     "start_time": "2026-01-20T09:34:49.180981Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df = pd.DataFrame(all_data_list)\n",
    "df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\大唛云订单明细-18135844455.xlsx', index=False)"
   ],
   "id": "a848bd6b4137743e",
   "outputs": [],
   "execution_count": 4
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## 历史维修记录",
   "id": "271853920f6210f"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-14T11:44:43.656887Z",
     "start_time": "2025-07-14T09:38:43.922512Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import requests\n",
    "import re\n",
    "import time\n",
    "\n",
    "timestamp_ms = int(time.time() * 1000)\n",
    "\n",
    "import pandas as pd\n",
    "import requests\n",
    "from tqdm import tqdm\n",
    "from bs4 import BeautifulSoup\n",
    "from urllib.parse import urlencode, quote\n",
    "import time\n",
    "from requests.exceptions import RequestException\n",
    "\n",
    "# 读取Excel文件\n",
    "df = pd.read_excel(r\"D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\大唛云订单明细-18135844455.xlsx\")\n",
    "\n",
    "# 定义要提取的基本字段列表\n",
    "BASE_FIELDS = [\n",
    "    '保险公司', '保险到期', '年检日期', '车架号',\n",
    "    '购车日期', '服务日期', '服务里程',\n",
    "    '下次服务日期', '下次服务里程'\n",
    "]\n",
    "\n",
    "# 创建两个DataFrame分别存储不同类型的数据\n",
    "base_info_df = df.copy()  # 存储基本信息\n",
    "service_items_df = pd.DataFrame()  # 存储消费项目明细\n",
    "\n",
    "\n",
    "def make_request_with_retry(url, headers, max_retries=10, retry_delay=1):\n",
    "    \"\"\"带重试机制的请求函数\"\"\"\n",
    "    for attempt in range(max_retries):\n",
    "        try:\n",
    "            res = requests.get(url=url, headers=headers, timeout=10)\n",
    "            res.raise_for_status()\n",
    "            return res\n",
    "        except RequestException as e:\n",
    "            if attempt == max_retries - 1:\n",
    "                raise\n",
    "            print(f\"请求失败，第 {attempt + 1} 次重试... 错误: {str(e)}\")\n",
    "            time.sleep(retry_delay)\n",
    "    return None\n",
    "    # print(res.text)\n",
    "\n",
    "\n",
    "def extract_session_id(html_content):\n",
    "    soup = BeautifulSoup(html_content, \"html.parser\")\n",
    "    scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
    "\n",
    "    session_ids = set()\n",
    "    for script in scripts:\n",
    "        # 匹配注册语句中的sessionid\n",
    "        register_match = re.search(r\"FR\\.SessionMgr\\.register\\('(\\d+)'\", script.text)\n",
    "        if register_match:\n",
    "            session_ids.add(register_match.group(1))\n",
    "\n",
    "        # 匹配变量赋值中的sessionid\n",
    "        current_match = re.search(r\"currentSessionID\\s*=\\s*'(\\d+)'\", script.text)\n",
    "        if current_match:\n",
    "            session_ids.add(current_match.group(1))\n",
    "\n",
    "    return list(session_ids)\n",
    "\n",
    "\n",
    "for index, row in tqdm(list(df.iterrows()), desc=\"处理订单\"):\n",
    "    order_number = row[\"单号\"]\n",
    "    success = False\n",
    "    # print(f\"正在处理订单 {order_number}\")\n",
    "\n",
    "    timestamp_ms = int(time.time() * 1000)\n",
    "    for attempt in range(10):  # 最大重试次数\n",
    "        try:\n",
    "            url = (\n",
    "                    \"http://rp.chezizhu.com/ReportServer?reportlet=/czz/order/order.cpt&__parameters__={%22_%22:%22\" + f\"{timestamp_ms}\" + \"%22,%22PINGJIA%22:[5b][5d],%22DISCOUNT_DESC%22:%22%22,%22B%22:%222011-05-01%22,%22LABELPLATE_NUMBER%22:%22%25E8%25BD%25A6%25E7%2589%258C%25E5%258F%25B7%22,%22LABELUSER_NAME%22:%22%25E5%25A7%2593%25E5%2590%258D%22,%22E%22:%222025-05-07%22,%22SERVICER%22:[5b][5d],%22CAR_NAME%22:%22%22,%22ORDER_SERVICE_TYPE%22:%22%22,%22order_number%22:%22\" + order_number + \"%22,%22USER_MOBILE%22:%22%22,%22LABELUSER_STATE%22:%22%25E6%2596%25B0%25E8%2580%2581%25E7%2594%25A8%25E6%2588%25B7%22,%22LABELCAR_NAME%22:%22%25E8%25BD%25A6%25E5%259E%258B%22,%22SERVICER_ID%22:%22683XGWqnyn66mE1oa8Gkzb%22,%22LABELORDER_SOURCE%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E6%259D%25A5%25E6%25BA%2590%22,%22LABELE%22:%22%25E8%2587%25B3%22,%22LABELSERVER%22:%22%25E9%2597%25A8%25E5%25BA%2597%22,%22LABELGONGDAN_STATUS%22:%22%25E5%25B7%25A5%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22LABELORDER_STATUS%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22ORDER_STATUS%22:[5b][5d],%22CMD%22:%22parameters_d%22,%22PAY_METHOD%22:%22%22,%22LABELUSER_MOBILE%22:%22%25E5%25AE%25A2%25E6%2588%25B7%25E7%2594%25B5%25E8%25AF%259D%22,%22LABELPINGJIA%22:%22%25E8%25AF%2584%25E4%25BB%25B7%25E7%258A%25B6%25E6%2580%2581%22,%22USER_STATE%22:[5b][5d],%22LABELRP_TYPE%22:%22%25E6%258A%25A5%25E8%25A1%25A8%25E7%25B1%25BB%25E5%259E%258B%22,%22LABELPAY_METHOD%22:%22%25E7%25BB%2593%25E7%25AE%2597%25E6%2596%25B9%25E5%25BC%258F%22,%22USER_NAME%22:%22%22,%22REPORTNAME%22:%22czz%252Forder%252Forder_detail_md.cpt%22,%22DISCOUNT_TYPE%22:%22%22,%22DATE_TYPE%22:%22%25E5%25AE%258C%25E6%2588%2590%25E6%2597%25A5%25E6%259C%259F%22,%22LABELDISCOUNT_DESC%22:%22%25E6%258A%2598%25E6%2589%25A3%25E5%258E%259F%25E5%259B%25A0%22,%22GONGDAN_STATUS%22:[5b][5d],%22__STREAMCLOSED__%22:%22%22,%22__pi__%22:%22true%22,%22CHANNEL%22:%22%22,%22DATABASE%22:%22chezizhu_14%22,%22ORDER_SOURCE%22:[5b][5d],%22LABELORDER_NUMBER%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E5%258F%25B7%22,%22PLATE_NUMBER%22:%22%22,%22RP_TYPE%22:%22%25E9%25AB%2598%25E7%25BA%25A7%25E6%2590%259C%25E7%25B4%25A2%22}\"\n",
    "            )\n",
    "\n",
    "            headers = {\n",
    "                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',\n",
    "                'Accept-Encoding': 'gzip, deflate',\n",
    "                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
    "                'Cache-Control': 'max-age=0',\n",
    "                'Connection': 'keep-alive',\n",
    "                'Cookie': 'td_cookie=2912605941',\n",
    "                'Host': 'rp.chezizhu.com',\n",
    "                'Referer': 'http://rp.chezizhu.com/ReportServer?reportlet=czz/order/order_detail_md.cpt&servicer_id=CJRuYehLSqLqy4snS5bLoo&database=chezizhu_14',\n",
    "                'Upgrade-Insecure-Requests': '1',\n",
    "                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'\n",
    "            }\n",
    "\n",
    "            res = requests.get(url, headers=headers)\n",
    "\n",
    "            session_ids = extract_session_id(res.text)\n",
    "            # print(\"提取到的SessionID:\", session_ids[0])\n",
    "            # ========== 请求部分 ==========\n",
    "            url = f\"http://rp.chezizhu.com/ReportServer?_={timestamp_ms}&__boxModel__=true&op=page_content&sessionID={session_ids[0]}&pn=1&__fit__=false\"\n",
    "\n",
    "            headers = {\n",
    "                'Accept': '*/*',\n",
    "                'Accept-Encoding': 'gzip, deflate',\n",
    "                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
    "                'Connection': 'keep-alive',\n",
    "                'Cookie': 'td_cookie=2912605941',\n",
    "                'Host': 'rp.chezizhu.com',\n",
    "                \"Referer\": \"http://rp.chezizhu.com/ReportServer?reportlet=/czz/order/order.cpt&__parameters__={%22_%22:%22\" + f\"{timestamp_ms}\" + \"%22,%22PINGJIA%22:[5b][5d],%22DISCOUNT_DESC%22:%22%22,%22B%22:%222010-07-01%22,%22LABELPLATE_NUMBER%22:%22%25E8%25BD%25A6%25E7%2589%258C%25E5%258F%25B7%22,%22LABELUSER_NAME%22:%22%25E5%25A7%2593%25E5%2590%258D%22,%22E%22:%222025-07-14%22,%22SERVICER%22:[5b][5d],%22CAR_NAME%22:%22%22,%22ORDER_SERVICE_TYPE%22:%22%22,%22order_number%22:%22\" + order_number + \"%22,%22USER_MOBILE%22:%22%22,%22LABELUSER_STATE%22:%22%25E6%2596%25B0%25E8%2580%2581%25E7%2594%25A8%25E6%2588%25B7%22,%22LABELCAR_NAME%22:%22%25E8%25BD%25A6%25E5%259E%258B%22,%22SERVICER_ID%22:%22CJRuYehLSqLqy4snS5bLoo%22,%22LABELORDER_SOURCE%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E6%259D%25A5%25E6%25BA%2590%22,%22LABELE%22:%22%25E8%2587%25B3%22,%22LABELSERVER%22:%22%25E9%2597%25A8%25E5%25BA%2597%22,%22LABELGONGDAN_STATUS%22:%22%25E5%25B7%25A5%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22LABELORDER_STATUS%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E7%258A%25B6%25E6%2580%2581%22,%22ORDER_STATUS%22:[5b][5d],%22CMD%22:%22parameters_d%22,%22PAY_METHOD%22:%22%22,%22LABELUSER_MOBILE%22:%22%25E5%25AE%25A2%25E6%2588%25B7%25E7%2594%25B5%25E8%25AF%259D%22,%22LABELPINGJIA%22:%22%25E8%25AF%2584%25E4%25BB%25B7%25E7%258A%25B6%25E6%2580%2581%22,%22USER_STATE%22:[5b][5d],%22LABELRP_TYPE%22:%22%25E6%258A%25A5%25E8%25A1%25A8%25E7%25B1%25BB%25E5%259E%258B%22,%22LABELPAY_METHOD%22:%22%25E7%25BB%2593%25E7%25AE%2597%25E6%2596%25B9%25E5%25BC%258F%22,%22USER_NAME%22:%22%22,%22REPORTNAME%22:%22czz%252Forder%252Forder_detail_md.cpt%22,%22DISCOUNT_TYPE%22:%22%22,%22DATE_TYPE%22:%22%25E5%25AE%258C%25E6%2588%2590%25E6%2597%25A5%25E6%259C%259F%22,%22LABELDISCOUNT_DESC%22:%22%25E6%258A%2598%25E6%2589%25A3%25E5%258E%259F%25E5%259B%25A0%22,%22GONGDAN_STATUS%22:[5b][5d],%22__STREAMCLOSED__%22:%22%22,%22__pi__%22:%22true%22,%22CHANNEL%22:%22%22,%22DATABASE%22:%22chezizhu_14%22,%22ORDER_SOURCE%22:[5b][5d],%22LABELORDER_NUMBER%22:%22%25E8%25AE%25A2%25E5%258D%2595%25E5%258F%25B7%22,%22PLATE_NUMBER%22:%22%22,%22RP_TYPE%22:%22%25E9%25AB%2598%25E7%25BA%25A7%25E6%2590%259C%25E7%25B4%25A2%22}\",\n",
    "                \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0\",\n",
    "                \"X-Requested-With\": \"XMLHttpRequest\"\n",
    "            }\n",
    "\n",
    "            # 使用带重试的请求函数\n",
    "            res = make_request_with_retry(url, headers)\n",
    "            # print(res.text)\n",
    "\n",
    "            soup = BeautifulSoup(res.text, 'lxml')\n",
    "\n",
    "            # ========== 1. 提取基本信息（单行） ==========\n",
    "            base_info = {'单号': order_number}  # 保留原始订单号\n",
    "\n",
    "            for field in BASE_FIELDS:\n",
    "                field_td = soup.find('td', string=f'{field}：')\n",
    "                if field_td:\n",
    "                    value_td = field_td.find_next('td')\n",
    "                    base_info[field] = value_td.get_text(strip=True) if value_td else None\n",
    "                else:\n",
    "                    base_info[field] = None\n",
    "\n",
    "            # 更新基本信息DataFrame\n",
    "            for col in base_info:\n",
    "                if col not in base_info_df.columns:\n",
    "                    base_info_df[col] = None\n",
    "                base_info_df.at[index, col] = base_info[col]\n",
    "\n",
    "            # ========== 2. 提取消费项目（多行） ==========\n",
    "            consumption_header = soup.find(lambda tag: tag.name == 'td' and '消费项目' in tag.text)\n",
    "            if consumption_header:\n",
    "                table = consumption_header.find_parent('table')\n",
    "                if table:\n",
    "                    rows = table.find_all('tr')[1:]  # 跳过表头\n",
    "\n",
    "                    for row_idx, row in enumerate(rows, 1):\n",
    "                        cols = row.find_all('td')\n",
    "                        if len(cols) >= 6:\n",
    "                            item = {\n",
    "                                '单号': order_number,\n",
    "                                '行号': row_idx,\n",
    "                                '套餐': cols[0].get_text(strip=True) if len(cols) > 0 else '',\n",
    "                                '服务项目': cols[1].get_text(strip=True) if len(cols) > 1 else '',\n",
    "                                '产品名称': cols[2].get_text(strip=True) if len(cols) > 2 else '',\n",
    "                                '产品型号': cols[3].get_text(strip=True) if len(cols) > 3 else '',\n",
    "                                '数量': cols[4].get_text(strip=True) if len(cols) > 4 else '',\n",
    "                                '规格': cols[5].get_text(strip=True) if len(cols) > 5 else '',\n",
    "                                '单位': cols[6].get_text(strip=True) if len(cols) > 6 else '',\n",
    "                                '单价': cols[7].get_text(strip=True) if len(cols) > 7 else '',\n",
    "                                '产品小计': cols[8].get_text(strip=True) if len(cols) > 8 else '',\n",
    "                                '工时': cols[9].get_text(strip=True) if len(cols) > 9 else '',\n",
    "                                '合计': cols[10].get_text(strip=True) if len(cols) > 10 else ''\n",
    "                            }\n",
    "\n",
    "                            # 添加到消费项目DataFrame\n",
    "                            service_items_df = pd.concat([\n",
    "                                service_items_df,\n",
    "                                pd.DataFrame([item])\n",
    "                            ], ignore_index=True)\n",
    "                            # print(item)\n",
    "\n",
    "            success = True\n",
    "            break  # 成功则跳出重试循环\n",
    "\n",
    "        except Exception as e:\n",
    "            if attempt == 9:  # 最后一次尝试仍然失败\n",
    "                print(f\"订单 {order_number} 处理失败，已达最大重试次数。错误: {str(e)}\")\n",
    "                # 记录失败订单（可选）\n",
    "                with open('failed_orders.txt', 'a') as f:\n",
    "                    f.write(f\"{order_number}\\n\")\n",
    "            time.sleep(1)  # 等待1秒后重试\n",
    "\n",
    "        if not success:\n",
    "            continue  # 跳过处理失败的订单\n",
    "    # break\n",
    "\n",
    "# ========== 3. 合并数据并保存 ==========\n",
    "# 方法1：保存为两个关联表（推荐）\n",
    "base_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\订单基本信息.xlsx'\n",
    "items_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\消费项目明细.xlsx'\n",
    "base_info_df.to_excel(base_output, index=False)\n",
    "service_items_df.to_excel(items_output, index=False)\n",
    "\n",
    "# 方法2：合并为一个宽表（每行重复基本信息）\n",
    "if not service_items_df.empty:\n",
    "    merged_df = pd.merge(\n",
    "        base_info_df,\n",
    "        service_items_df,\n",
    "        on='单号',\n",
    "        how='left'\n",
    "    )\n",
    "    merged_output = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\历史维修记录明细-古交腾飞路店13593189858.xlsx'\n",
    "    merged_df.to_excel(merged_output, index=False)\n",
    "\n",
    "print(\"\\n处理完成！\")\n",
    "print(f\"- 基本信息保存至: {base_output}\")\n",
    "print(f\"- 消费项目保存至: {items_output}\")\n",
    "if not service_items_df.empty:\n",
    "    print(f\"- 合并数据保存至: {merged_output}\")  # 输出：['57435']"
   ],
   "id": "ed2755053dd426c",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "处理订单:   0%|          | 0/10868 [00:00<?, ?it/s]C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_4532\\3409432529.py:48: DeprecationWarning: The 'text' argument to find()-type methods is deprecated. Use 'string' instead.\n",
      "  scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
      "处理订单:   0%|          | 1/10868 [00:00<28:50,  6.28it/s]C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_4532\\3409432529.py:48: DeprecationWarning: The 'text' argument to find()-type methods is deprecated. Use 'string' instead.\n",
      "  scripts = soup.find_all(\"script\", text=re.compile(r\"(FR\\.SessionMgr\\.register|currentSessionID)\"))\n",
      "处理订单:  18%|█▊        | 1974/10868 [09:20<50:33,  2.93it/s]  "
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "请求失败，第 1 次重试... 错误: HTTPConnectionPool(host='rp.chezizhu.com', port=80): Max retries exceeded with url: /ReportServer?_=1752486488556&__boxModel__=true&op=page_content&sessionID=26250&pn=1&__fit__=false (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x000002C3E3298E90>, 'Connection to rp.chezizhu.com timed out. (connect timeout=10)'))\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "处理订单: 100%|██████████| 10868/10868 [2:03:15<00:00,  1.47it/s] \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "处理完成！\n",
      "- 基本信息保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\订单基本信息.xlsx\n",
      "- 消费项目保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\消费项目明细.xlsx\n",
      "- 合并数据保存至: D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\历史维修记录明细-古交腾飞路店13593189858.xlsx\n"
     ]
    }
   ],
   "execution_count": 12
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## 库存 （页面已支持导出）",
   "id": "5e7ac3ee321a3549"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-16T09:51:20.409717Z",
     "start_time": "2025-04-16T09:51:13.116335Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import requests\n",
    "from tqdm import tqdm\n",
    "from bs4 import BeautifulSoup\n",
    "from urllib.parse import urlencode, quote\n",
    "import time\n",
    "from requests.exceptions import RequestException\n",
    "\n",
    "url = \"http://sp.chezizhu.com/alliance/store/list.htm\"\n",
    "header = {\n",
    "    \"content-type\": \"application/x-www-form-urlencoded; charset=UTF-8\",\n",
    "    \"cookie\": \"JSESSIONID=28FFBA865A6CAAA76CB8CDB6A34B5886\",\n",
    "    \"host\": \"sp.chezizhu.com\",\n",
    "    \"origin\": \"http://sp.chezizhu.com\",\n",
    "    \"referer\": \"http://sp.chezizhu.com/alliance/index.htm\",\n",
    "    \"user-agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0\",\n",
    "    \"x-requested-with\": \"XMLHttpRequest\"\n",
    "}\n",
    "all_data = []\n",
    "for i in tqdm(range(1, 103)):\n",
    "    data = f'pageNum={i}&pageSize=20&dataType=json&keywords='\n",
    "\n",
    "    res = requests.post(url, headers=header, data=data)\n",
    "    data_list = res.json().get(\"list\", [])\n",
    "\n",
    "    for data in data_list:\n",
    "        tp_number = data.get(\"tp_number\")\n",
    "        damai_id = data.get(\"damai_id\")\n",
    "        prod_name = data.get(\"prod_name\")\n",
    "        price = data.get(\"price\")\n",
    "        standard = data.get(\"standard\")\n",
    "        unit = data.get(\"unit\")\n",
    "        count = data.get(\"count\")\n",
    "        check_count = data.get(\"check_count\")\n",
    "        check_date = data.get(\"check_date\")\n",
    "\n",
    "        all_data.append([tp_number, damai_id, prod_name, price, standard, unit, count, check_count, check_date])\n",
    "\n",
    "df = pd.DataFrame(all_data,\n",
    "                  columns=[\"tp_number\", \"damai_id\", \"prod_name\", \"price\", \"standard\", \"unit\", \"count\", \"check_count\",\n",
    "                           \"check_date\"])\n",
    "df.to_excel(r\"D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\库存.xlsx\", index=False)\n",
    "\n",
    "\n",
    "\n"
   ],
   "id": "95616ee8b933e5ce",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 102/102 [00:07<00:00, 14.43it/s]\n"
     ]
    }
   ],
   "execution_count": 52
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "",
   "id": "70a45b885b2739c"
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## 销售明细",
   "id": "7df1375e5debe404"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-15T01:34:31.895645Z",
     "start_time": "2025-07-15T01:32:17.223368Z"
    }
   },
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import requests\n",
    "from tqdm import tqdm\n",
    "from bs4 import BeautifulSoup\n",
    "\n",
    "all_data_list = []\n",
    "keys = [\"门店\", \"订单编号\", \"产品类型\", \"产品名称\", \"型号\", \"数量\", \"采购单价\", \"销售单价\",\n",
    "        \"采购金额\", \"销售金额\", \"毛利润\", \"下单人\", \"接单人\", \"车牌号\"]\n",
    "reversed_keys = list(reversed(keys))\n",
    "\n",
    "for i in tqdm(range(1, 1390)):\n",
    "    url = f\"http://rp.chezizhu.com/ReportServer?_=1752541860351&__boxModel__=true&op=page_content&sessionID=91877&pn={i}&__fit__=false\"\n",
    "\n",
    "    header = {\n",
    "        \"Referer\": \"http://rp.chezizhu.com/ReportServer?reportlet=czz/storage/prod_sale_dtl_jm.cpt&servicer_id=CJRuYehLSqLqy4snS5bLoo&hideexport=true&database=chezizhu_14\",\n",
    "        \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0\",\n",
    "        \"X-Requested-With\": \"XMLHttpRequest\"\n",
    "    }\n",
    "\n",
    "    while True:\n",
    "        try:\n",
    "            res = requests.post(url, headers=header)\n",
    "            break\n",
    "        except RequestException as e:\n",
    "            print(f\"请求失败，正在重试... 错误信息：{str(e)}\")\n",
    "            time.sleep(1)\n",
    "    soup = BeautifulSoup(res.text, \"html.parser\")\n",
    "\n",
    "    # 定位真正的数据行（跳过标题行）\n",
    "    for tr in soup.select('tr:not(:has(th))'):  # 排除包含表头的行\n",
    "        tds = [td.get_text(strip=True) for td in tr.find_all(\"td\")]\n",
    "        reversed_tds = list(reversed(tds))\n",
    "\n",
    "        # 如果 reversed_tds 长度不够，填充空字符串\n",
    "        if len(reversed_tds) < len(reversed_keys):\n",
    "            reversed_tds += [\"\"] * (len(reversed_keys) - len(reversed_tds))\n",
    "        elif len(reversed_tds) > len(reversed_keys):\n",
    "            reversed_tds = reversed_tds[:len(reversed_keys)]\n",
    "\n",
    "        # 构建字典（反转后的 keys 和 tds）\n",
    "        row_data = dict(zip(reversed_keys, reversed_tds))\n",
    "        all_data_list.append(row_data)\n",
    "\n",
    "        # break\n",
    "# print(all_data_list)\n",
    "df = pd.DataFrame(all_data_list)\n",
    "df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\大唛云销售明细.xlsx', index=False)"
   ],
   "id": "b3100f77cc1061b1",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1389/1389 [02:09<00:00, 10.70it/s]\n"
     ]
    }
   ],
   "execution_count": 14
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-29T07:16:14.900349Z",
     "start_time": "2025-04-29T07:16:07.255065Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df = pd.DataFrame(all_data_list)\n",
    "df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\大唛云销售明细-古交腾飞路店13593189858.xlsx', index=False)"
   ],
   "id": "7671bfd65cc413c7",
   "outputs": [],
   "execution_count": 9
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}