{ "cells": [ { "metadata": {}, "cell_type": "markdown", "source": "## 单据", "id": "4eaaaa788a0f298d" }, { "metadata": { "ExecuteTime": { "end_time": "2025-12-29T03:20:05.123503800Z", "start_time": "2025-12-29T03:20:03.975961200Z" } }, "cell_type": "code", "source": [ "import requests\n", "\n", "cookies = {\n", " 'ECSCP[admin_id]': '108',\n", " 'ECSCP[admin_pass]': 'a7a7436dc3cdb9a0cf46db404553e361',\n", " 'ECSCP[page_size]': '15',\n", " 'ECS_LastCheckOrder': 'Mon%2C%2029%20Dec%202025%2002%3A01%3A30%20GMT',\n", " 'real_ipd': '221.226.144.180',\n", " 'ECSCP_ID': 'b038b6f65745ee9036bb43d1d1b48393c0e41cbe',\n", "}\n", "\n", "headers = {\n", " 'accept': '*/*',\n", " 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n", " 'cache-control': 'no-cache',\n", " 'content-type': 'application/x-www-form-urlencoded',\n", " 'origin': 'https://tejialuntai.com',\n", " 'pragma': 'no-cache',\n", " 'priority': 'u=1, i',\n", " 'referer': 'https://tejialuntai.com/admin2/car_repair.php?act=list',\n", " 'sec-ch-ua': '\"Microsoft Edge\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"',\n", " 'sec-ch-ua-mobile': '?0',\n", " 'sec-ch-ua-platform': '\"Windows\"',\n", " 'sec-fetch-dest': 'empty',\n", " 'sec-fetch-mode': 'cors',\n", " 'sec-fetch-site': 'same-origin',\n", " 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0',\n", " # 'cookie': 'ECSCP[admin_id]=108; ECSCP[admin_pass]=a7a7436dc3cdb9a0cf46db404553e361; ECSCP[page_size]=15; ECS_LastCheckOrder=Mon%2C%2029%20Dec%202025%2002%3A01%3A30%20GMT; real_ipd=221.226.144.180; ECSCP_ID=b038b6f65745ee9036bb43d1d1b48393c0e41cbe',\n", "}\n", "\n", "params = {\n", " 'is_ajax': '1',\n", "}\n", "\n", "data = {\n", " 'act': 'query',\n", " 'repair_id': '',\n", " 'user_name': '',\n", " 'note_name': '',\n", " 'carnumber': '',\n", " 'into_fa': '',\n", " 'price_offer': '0',\n", " 'clear': '0',\n", " 'is_invalid': '0',\n", " 'insurance_id': '0',\n", " 'suer_id': '0',\n", " 'choice_time': '0',\n", " 'inyear': '0',\n", " 'intime': '0',\n", " 'inday': '0',\n", " 'is_completed': '0',\n", " 'car_id': '0',\n", " 'wanmsg': '',\n", " 'record_count': '5774',\n", " 'page_size': '15',\n", " 'page': '1',\n", " 'page_count': '385',\n", " 'start': '15',\n", "}\n", "\n", "response = requests.post('https://tejialuntai.com/admin2/car_repair.php', params=params, cookies=cookies,\n", " headers=headers, data=data)\n" ], "id": "c04273d6cb063db0", "outputs": [], "execution_count": 1 }, { "metadata": { "ExecuteTime": { "end_time": "2025-12-29T02:05:11.789892400Z", "start_time": "2025-12-29T02:05:11.770112600Z" } }, "cell_type": "code", "source": "response.json().get(\"content\")", "id": "b21d2f6cb6cc5846", "outputs": [ { "data": { "text/plain": [ "'\\r\\n\\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n \\r\\n
车主名称车牌号往来描述车型保险公司接待员进厂公里数进厂时间交车时间预检状态报价状态车主状态维修状态质检状态结算状态报价员结算员结算时间预收款预收方式单据总额实收金额优惠金额操作
12099313655076051梅小青闽AH5L11宝马纽斯特洪格玲112025-12-28 17:03:592025-12-28 12:01预检中未报价\\r\\n 未维修 挂账中00\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 0.000.000.00\\r\\n \\t 查看\\r\\n \\t \\t删除\\r\\n
12097613694889302李亚峰赣AMQ691宝马纽斯特洪格玲884432025-12-28 13:12:272025-12-28 12:01已预检已报价同意维修\\r\\n 已维修\\r\\n 挂账中纽斯特王君0\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 200.000.000.00\\r\\n \\t 查看\\r\\n \\t \\r\\n
12095618970093030洪达赣M60606洪达广汽传祺纽斯特洪格玲112025-12-28 10:04:022025-12-28 12:01预检中未报价\\r\\n 未维修 挂账中00\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 0.000.000.00\\r\\n \\t 查看\\r\\n \\t \\t删除\\r\\n
12095415070852061邹女士赣AP13M8奇瑞纽斯特王君55152025-12-28 09:56:522025-12-28 12:01已预检已报价同意维修\\r\\n 未维修 挂账中纽斯特王君0\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 260.000.000.00\\r\\n \\t 查看\\r\\n \\t \\t删除\\r\\n
12095215912345678骏驰赣AT90M7骏驰纽斯特洪格玲112025-12-28 09:33:102025-12-28 12:01已预检已报价同意维修\\r\\n 未维修 挂账中纽斯特洪格玲0\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 500.000.000.00\\r\\n \\t 查看\\r\\n \\t \\t删除\\r\\n
12095118970093030洪达赣A09PS0洪达纽斯特洪格玲112025-12-28 09:24:092025-12-28 12:01已预检已报价同意维修\\r\\n 已维修\\r\\n 已质检挂账中纽斯特王君0\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 960.000.000.00\\r\\n \\t 查看\\r\\n \\t \\r\\n 回访
12094815632985669米其林赣AR95N2米其林纽斯特王君112025-12-28 08:53:372025-12-28 12:01已预检已报价同意维修\\r\\n 维修中\\r\\n 挂账中纽斯特洪格玲0\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 320.000.000.00\\r\\n \\t \\r\\n 完成\\r\\n \\r\\n 查看\\r\\n \\t \\r\\n
12093313677910473张女士赣A9261U利凡现代纽斯特王君857792025-12-27 17:10:382025-12-27 12:01已预检已报价同意维修\\r\\n 已维修\\r\\n 已质检已结算纽斯特洪格玲纽斯特洪格玲2025-12-28 17:47:11\\r\\n \\r\\n \\r\\n 30 \\r\\n \\r\\n 30.000.000.00\\r\\n \\t 退结算\\r\\n 查看\\r\\n \\t \\r\\n 回访
12092813725690859先生赣AFJ919Jeep纽斯特王君458792025-12-27 16:28:222025-12-27 12:01已预检已报价同意维修\\r\\n 未维修 挂账中纽斯特洪格玲0\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 380.000.000.00\\r\\n \\t 查看\\r\\n \\t \\t删除\\r\\n
12092713725690859先生赣AFJ919Jeep纽斯特王君458792025-12-27 16:20:552025-12-27 12:01已预检已报价同意维修\\r\\n 未维修 挂账中纽斯特洪格玲0\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 200.000.000.00\\r\\n \\t 查看\\r\\n \\t \\t删除\\r\\n
12091915579161130曾女士赣AG21B3纽斯特万莉1291542025-12-27 15:56:372025-12-27 15:56:37已预检已报价同意维修\\r\\n 已维修\\r\\n 已质检已结算纽斯特洪格玲纽斯特洪格玲2025-12-28 17:47:56\\r\\n \\r\\n \\r\\n 80 \\r\\n \\r\\n 80.000.000.00\\r\\n \\t 退结算\\r\\n 查看\\r\\n \\t \\r\\n 回访
12091013870862988V7赣A11VG1领豪纽斯特王君112025-12-27 14:43:042025-12-27 12:01已预检已报价同意维修\\r\\n 已维修\\r\\n 挂账中纽斯特洪格玲0\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 650.000.000.00\\r\\n \\t 查看\\r\\n \\t \\r\\n
12090713970809872杨于翔赣MY3015雪铁龙纽斯特王君1121182025-12-27 14:26:562025-12-27 14:26:56已预检已报价同意维修\\r\\n 已维修\\r\\n 已质检已结算纽斯特王君纽斯特洪格玲2025-12-28 17:47:35\\r\\n \\r\\n \\r\\n 1020 \\r\\n \\r\\n 1030.000.000.00\\r\\n \\t 退结算\\r\\n 查看\\r\\n \\t \\r\\n 回访
12090515900669098涂先生赣A9CQ96别克纽斯特王君395352025-12-27 14:13:542025-12-27 14:13:54已预检已报价同意维修\\r\\n 已维修\\r\\n 挂账中纽斯特洪格玲0\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 140.000.000.00\\r\\n \\t 查看\\r\\n \\t \\r\\n
12089813767963367舒先生赣A3J503人保大众纽斯特王君619662025-12-27 13:42:102025-12-27 12:01已预检已报价同意维修\\r\\n 已维修\\r\\n 挂账中纽斯特洪格玲0\\r\\n \\r\\n \\t\\r\\n \\r\\n \\r\\n \\r\\n 200.000.000.00\\r\\n \\t 查看\\r\\n \\t \\r\\n
\\r\\n \\r\\n 总预收款:1212848  总单据金额3064263.32  总实收金额841906.69  总优惠金额19269.01  \\r\\n \\r\\n
\\r\\n 总计 5774\\r\\n 个记录分为 385\\r\\n 页当前第 1\\r\\n 页,每页 \\r\\n \\r\\n 第一页\\r\\n 上一页\\r\\n 下一页\\r\\n 最末页\\r\\n \\r\\n \\r\\n
\\r\\n\\r\\n'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 3 }, { "metadata": { "ExecuteTime": { "end_time": "2025-12-29T03:30:04.324729500Z", "start_time": "2025-12-29T03:20:53.780286400Z" } }, "cell_type": "code", "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "import pandas as pd\n", "import time\n", "from tqdm.notebook import tqdm\n", "\n", "# -------------------------- 1. 全局配置 --------------------------\n", "cookies = {\n", " 'ECSCP[admin_id]': '108',\n", " 'ECSCP[admin_pass]': 'a7a7436dc3cdb9a0cf46db404553e361',\n", " 'ECSCP[page_size]': '15',\n", " 'ECS_LastCheckOrder': 'Mon%2C%2029%20Dec%2025%2002%3A01%3A30%20GMT',\n", " 'real_ipd': '221.226.144.180',\n", " 'ECSCP_ID': 'b038b6f65745ee9036bb43d1d1b48393c0e41cbe',\n", "}\n", "\n", "headers = {\n", " 'accept': '*/*',\n", " 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n", " 'cache-control': 'no-cache',\n", " 'content-type': 'application/x-www-form-urlencoded',\n", " 'origin': 'https://tejialuntai.com',\n", " 'pragma': 'no-cache',\n", " 'priority': 'u=1, i',\n", " 'referer': 'https://tejialuntai.com/admin2/car_repair.php?act=list',\n", " 'sec-ch-ua': '\"Microsoft Edge\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"',\n", " 'sec-ch-ua-mobile': '?0',\n", " 'sec-ch-ua-platform': '\"Windows\"',\n", " 'sec-fetch-dest': 'empty',\n", " 'sec-fetch-mode': 'cors',\n", " 'sec-fetch-site': 'same-origin',\n", " 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0',\n", "}\n", "\n", "params = {'is_ajax': '1'}\n", "\n", "base_data = {\n", " 'act': 'query', 'repair_id': '', 'user_name': '', 'note_name': '', 'carnumber': '', 'into_fa': '',\n", " 'price_offer': '0', 'clear': '0', 'is_invalid': '0', 'insurance_id': '0', 'suer_id': '0',\n", " 'choice_time': '0', 'inyear': '0', 'intime': '0', 'inday': '0', 'is_completed': '0',\n", " 'car_id': '0', 'wanmsg': '', 'record_count': '5774', 'page_size': '15', 'page': '1',\n", " 'page_count': '385', 'start': '0',\n", "}\n", "\n", "# -------------------------- 2. 核心功能函数 --------------------------\n", "\n", "def get_total_pages():\n", " \"\"\"获取总页数\"\"\"\n", " try:\n", " response = requests.post(\n", " url='https://tejialuntai.com/admin2/car_repair.php',\n", " params=params, cookies=cookies, headers=headers, data=base_data, timeout=15\n", " )\n", " response.raise_for_status()\n", " json_result = response.json()\n", " html_content = json_result.get('content', '')\n", "\n", " if not html_content: return None\n", " soup = BeautifulSoup(html_content, 'html.parser')\n", " total_pages_span = soup.find('span', id='totalPages')\n", " return int(total_pages_span.text) if total_pages_span else 1\n", " except Exception as e:\n", " print(f\"获取总页数失败: {e}\")\n", " return None\n", "\n", "def scrape_first_page():\n", " \"\"\"\n", " 【新函数】专门用于抓取第一页,同时返回表头和数据。\n", " \"\"\"\n", " print(\"正在抓取第 1 页以确定表头和初始数据...\")\n", " try:\n", " response = requests.post(\n", " url='https://tejialuntai.com/admin2/car_repair.php',\n", " params=params, cookies=cookies, headers=headers, data=base_data, timeout=15\n", " )\n", " response.raise_for_status()\n", " json_result = response.json()\n", " html_content = json_result.get('content', '')\n", "\n", " if not html_content: return None, []\n", "\n", " soup = BeautifulSoup(html_content, 'html.parser')\n", " target_table = soup.find('table')\n", " if not target_table: return None, []\n", "\n", " # 1. 提取表头\n", " headers_list = []\n", " header_cells = target_table.find('tr').find_all(['th', 'td'])\n", " for cell in header_cells:\n", " cell_text = cell.get_text(strip=True)\n", " if cell_text and cell_text != '全选':\n", " headers_list.append(cell_text)\n", " if headers_list and headers_list[0] == '车主':\n", " headers_list.insert(0, '维修单号')\n", "\n", " # 2. 提取第一页数据\n", " page_data = []\n", " for tr in target_table.find_all('tr')[1:]:\n", " if not tr.get('id') or not tr.get('id').startswith('rp_'): continue\n", " cells = tr.find_all('td')\n", " row_data = {}\n", " for idx, cell in enumerate(cells):\n", " if idx < len(headers_list):\n", " input_tag = cell.find('input', {'type': 'text'})\n", " select_tag = cell.find('select')\n", " cell_text = \"\"\n", " if input_tag and input_tag.get('value'):\n", " cell_text = input_tag['value'].strip()\n", " elif select_tag:\n", " selected_opt = select_tag.find('option', selected=True)\n", " cell_text = selected_opt.get_text(strip=True) if selected_opt else \"\"\n", " else:\n", " cell_text = cell.get_text(strip=True)\n", " row_data[headers_list[idx]] = cell_text\n", " page_data.append(row_data)\n", "\n", " print(f\"第 1 页抓取完成,获取 {len(page_data)} 条记录。\")\n", " return headers_list, page_data\n", "\n", " except Exception as e:\n", " print(f\"抓取第一页失败: {e}\")\n", " return None, []\n", "\n", "def scrape_page(page_num, headers_list, page_size=15):\n", " \"\"\"\n", " 【修正后】用于抓取除第一页外的其他页面,只返回数据。\n", " \"\"\"\n", " start_record = (page_num - 1) * page_size\n", " current_data = base_data.copy()\n", " current_data['page'] = str(page_num)\n", " current_data['start'] = str(start_record)\n", "\n", " try:\n", " response = requests.post(\n", " url='https://tejialuntai.com/admin2/car_repair.php',\n", " params=params, cookies=cookies, headers=headers, data=current_data, timeout=15\n", " )\n", " response.raise_for_status()\n", " json_result = response.json()\n", " html_content = json_result.get('content', '')\n", "\n", " if not html_content: return []\n", "\n", " soup = BeautifulSoup(html_content, 'html.parser')\n", " target_table = soup.find('table')\n", " if not target_table: return []\n", "\n", " page_data = []\n", " for tr in target_table.find_all('tr')[1:]:\n", " if not tr.get('id') or not tr.get('id').startswith('rp_'): continue\n", " cells = tr.find_all('td')\n", " row_data = {}\n", " for idx, cell in enumerate(cells):\n", " if idx < len(headers_list):\n", " input_tag = cell.find('input', {'type': 'text'})\n", " select_tag = cell.find('select')\n", " cell_text = \"\"\n", " if input_tag and input_tag.get('value'):\n", " cell_text = input_tag['value'].strip()\n", " elif select_tag:\n", " selected_opt = select_tag.find('option', selected=True)\n", " cell_text = selected_opt.get_text(strip=True) if selected_opt else \"\"\n", " else:\n", " cell_text = cell.get_text(strip=True)\n", " row_data[headers_list[idx]] = cell_text\n", " page_data.append(row_data)\n", " return page_data\n", " except Exception as e:\n", " print(f\"\\n第 {page_num} 页处理失败: {e}\")\n", " return []\n", "\n", "# -------------------------- 3. Jupyter主执行流程 --------------------------\n", "\n", "# 1. 获取总页数\n", "print(\"正在获取总页数...\")\n", "total_pages = get_total_pages()\n", "\n", "all_data = []\n", "all_headers = []\n", "\n", "if total_pages:\n", " print(f\"检测到总共有 {total_pages} 页。准备开始抓取...\")\n", "\n", " # 2. 【修正点】调用新函数抓取第一页,获取表头和数据\n", " all_headers, first_page_data = scrape_first_page()\n", "\n", " if not all_headers:\n", " print(\"错误:无法从第一页获取表头,程序终止。\")\n", " else:\n", " all_data.extend(first_page_data)\n", "\n", " # 3. 循环抓取剩余页面\n", " if total_pages > 1:\n", " for page in tqdm(range(2, total_pages + 1), desc=\"抓取进度\"):\n", " page_data = scrape_page(page, all_headers)\n", " if page_data:\n", " all_data.extend(page_data)\n", " time.sleep(0.5) # 适当降低请求频率\n", "\n", " # 4. 转换为DataFrame并显示\n", " print(\"\\n===== 所有数据抓取完成!=====\")\n", " df = pd.DataFrame(all_data, columns=all_headers)\n", " print(f\"共抓取 {len(df)} 条记录。\")\n", "\n", " # 在Jupyter中直接显示DataFrame\n", " display(df.head()) # 只显示前5行预览\n", "\n", " # 5. 导出到Excel\n", " file_path = \"汽车维修记录_全部数据.xlsx\"\n", " try:\n", " df.to_excel(file_path, index=False, engine='openpyxl')\n", " print(f\"\\n🎉 所有数据已成功导出至:{file_path}\")\n", " except Exception as e:\n", " print(f\"导出Excel失败: {e}\")\n", "else:\n", " print(\"无法获取总页数,程序终止。\")" ], "id": "7439dfbc38475085", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "正在获取总页数...\n", "检测到总共有 385 页。准备开始抓取...\n", "正在抓取第 1 页以确定表头和初始数据...\n", "第 1 页抓取完成,获取 15 条记录。\n" ] }, { "data": { "text/plain": [ "抓取进度: 0%| | 0/384 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
维修单号车主名称车牌号往来描述车型保险公司接待员进厂公里数进厂时间...结算状态报价员结算员结算时间预收款预收方式单据总额实收金额优惠金额操作
012099313655076051梅小青闽AH5L11宝马纽斯特洪格玲112025-12-28 17:03:59...挂账中0000.000.000.00查看删除
112097613694889302李亚峰赣AMQ691宝马纽斯特洪格玲884432025-12-28 13:12:27...挂账中纽斯特王君00200.000.000.00查看
212095618970093030洪达赣M60606洪达广汽传祺纽斯特洪格玲112025-12-28 10:04:02...挂账中0000.000.000.00查看删除
312095415070852061邹女士赣AP13M8奇瑞纽斯特王君55152025-12-28 09:56:52...挂账中纽斯特王君00260.000.000.00查看删除
412095215912345678骏驰赣AT90M7骏驰纽斯特洪格玲112025-12-28 09:33:10...挂账中纽斯特洪格玲00500.000.000.00查看删除
\n", "

5 rows × 26 columns

\n", "" ] }, "metadata": {}, "output_type": "display_data", "jetTransient": { "display_id": null } }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "🎉 所有数据已成功导出至:汽车维修记录_全部数据.xlsx\n" ] } ], "execution_count": 2 }, { "metadata": {}, "cell_type": "markdown", "source": "## 历史维修记录", "id": "a1f320fe80c616af" }, { "metadata": { "ExecuteTime": { "end_time": "2025-12-29T05:38:25.310242300Z", "start_time": "2025-12-29T03:36:00.499070500Z" } }, "cell_type": "code", "source": [ "import requests\n", "import pandas as pd\n", "from bs4 import BeautifulSoup\n", "import time\n", "\n", "# 关键步骤 1: 从 tqdm.notebook 导入 tqdm\n", "from tqdm.notebook import tqdm\n", "\n", "# 1. 读取Excel中的id\n", "try:\n", " df = pd.read_excel(\"汽车维修记录_全部数据.xlsx\", sheet_name=0)\n", "except FileNotFoundError:\n", " print(\"错误:找不到 '汽车维修记录_全部数据.xlsx' 文件。请检查文件名和路径。\")\n", " exit()\n", "\n", "# 2. 定义请求参数\n", "cookies = {\n", " 'ECSCP[admin_id]': '108',\n", " 'ECSCP[admin_pass]': 'a7a7436dc3cdb9a0cf46db404553e361',\n", " 'ECSCP[page_size]': '15',\n", " 'ECS_LastCheckOrder': 'Mon%2C%2029%20Dec%202025%2002%3A49%3A33%20GMT',\n", " 'real_ipd': '221.226.144.180',\n", " 'ECSCP_ID': 'b038b6f65745ee9036bb43d1d1b48393c0e41cbe',\n", "}\n", "\n", "request_headers = {\n", " 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',\n", " 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n", " 'cache-control': 'no-cache',\n", " 'pragma': 'no-cache',\n", " 'priority': 'u=0, i',\n", " 'referer': 'https://tejialuntai.com/admin2/car_repair.php?act=list',\n", " 'sec-ch-ua': '\"Microsoft Edge\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"',\n", " 'sec-ch-ua-mobile': '?0',\n", " 'sec-ch-ua-platform': '\"Windows\"',\n", " 'sec-fetch-dest': 'frame',\n", " 'sec-fetch-mode': 'navigate',\n", " 'sec-fetch-site': 'same-origin',\n", " 'sec-fetch-user': '?1',\n", " 'upgrade-insecure-requests': '1',\n", " 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0',\n", "}\n", "\n", "# 3. 遍历df中的id,提取数据\n", "all_data = []\n", "\n", "# 关键步骤 2: 使用 tqdm 包装你的 for 循环\n", "# tqdm() 需要知道总共有多少个迭代项,所以我们传入 df.itertuples()\n", "# desc 参数可以为进度条添加描述文字\n", "for row in tqdm(df.itertuples(index=False), total=len(df), desc=\"正在爬取数据\"):\n", " repair_id = row.维修单号\n", "\n", " params = {'act': 'edit', 'id': repair_id}\n", "\n", " try:\n", " response = requests.get(\n", " 'https://tejialuntai.com/admin2/car_repair.php',\n", " params=params,\n", " cookies=cookies,\n", " headers=request_headers,\n", " timeout=10\n", " )\n", " response.raise_for_status()\n", "\n", " html_content = response.text\n", " soup = BeautifulSoup(html_content, 'html.parser')\n", "\n", " car_plate_elem = soup.find('td', string=lambda text: text and '车牌号:' in text)\n", " if not car_plate_elem:\n", " # tqdm.write() 可以在不破坏进度条显示的情况下打印信息\n", " tqdm.write(f\"警告:在维修单 {repair_id} 的页面中未找到车牌号,已跳过。\")\n", " continue\n", "\n", " car_plate_td = car_plate_elem.find_next_sibling('td')\n", " if not car_plate_td:\n", " tqdm.write(f\"警告:在维修单 {repair_id} 的页面中找到车牌号标签,但未找到其后的兄弟元素,已跳过。\")\n", " continue\n", "\n", " car_plate = car_plate_td.text.strip().split('(')[0]\n", "\n", " quote_table = soup.find('table', style='width:85%')\n", " if not quote_table:\n", " tqdm.write(f\"警告:在维修单 {repair_id} 的页面中未找到报价表格,已跳过。\")\n", " continue\n", "\n", " table_headers = [th.text.strip() for th in quote_table.find_all('th')]\n", " if not table_headers:\n", " tqdm.write(f\"警告:在维修单 {repair_id} 的页面中未找到表头,已跳过。\")\n", " continue\n", "\n", " quote_data = []\n", " for tr in quote_table.find_all('tr')[1:]:\n", " row_data = [td.text.strip() for td in tr.find_all('td')]\n", " if len(row_data) == len(table_headers):\n", " quote_data.append(row_data)\n", "\n", " if not quote_data:\n", " tqdm.write(f\"警告:在维修单 {repair_id} 的页面中未找到有效的行数据,已跳过。\")\n", " continue\n", "\n", " quote_df = pd.DataFrame(quote_data, columns=table_headers)\n", " quote_df['车牌号'] = car_plate\n", " all_data.append(quote_df)\n", "\n", " time.sleep(0.2)\n", "\n", " except requests.exceptions.RequestException as e:\n", " tqdm.write(f\"错误:处理维修单 {repair_id} 时发生网络请求错误: {e}\")\n", " continue\n", " except Exception as e:\n", " tqdm.write(f\"错误:处理维修单 {repair_id} 时发生未知错误: {e}\")\n", " continue\n", "\n", "# 4. 合并所有数据并保存\n", "if all_data:\n", " final_df = pd.concat(all_data, ignore_index=True)\n", " try:\n", " final_df.to_excel(\"汽车维修报价项目_汇总.xlsx\", index=False)\n", " print(\"\\n数据提取完成,已保存到 '汽车维修报价项目_汇总.xlsx'!\")\n", " except Exception as e:\n", " print(f\"错误:保存Excel文件失败: {e}\")\n", "else:\n", " print(\"\\n未能提取到任何有效数据,未生成Excel文件。\")" ], "id": "1eccb5f27833bc95", "outputs": [ { "data": { "text/plain": [ "正在爬取数据: 0%| | 0/5774 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
分类报修内容项目名称数量单位单价金额是否采购采购留言...说明预检时间预检员预检图片维修工开始时间维修情况完成时间操作车牌号
0机电更换水泵总成请输入产品名称单位\\n套\\n桶\\n根\\n张\\n只\\nL\\n个\\n次\\n包0.00采购方式\\n配件商自己留言...随便说明0()未维修清空价格闽AH5L11
1漆面前杠左半部进口漆前杠左半部进口漆(校修)请输入产品名称单位\\n套\\n桶\\n根\\n张\\n只\\nL\\n个\\n次\\n包200.00采购方式\\n配件商自己留言...随便说明02025-12-28 13:12:27纽斯特倪兵纽斯特倪兵(note)2025-12-29 09:35维修完成2025-12-29 09:35赣AMQ691
2钣金前杠前保(校修)请输入产品名称单位\\n套\\n桶\\n根\\n张\\n只\\nL\\n个\\n次\\n包0.00采购方式\\n配件商自己留言...随便说明02025-12-28 14:29:07纽斯特吴杨州纽斯特吴杨州(note)2025-12-29 09:19维修完成2025-12-29 09:19赣AMQ691
3机电更换水箱请输入产品名称单位\\n套\\n桶\\n根\\n张\\n只\\nL\\n个\\n次\\n包0.00采购方式\\n配件商自己留言...随便说明0()未维修清空价格赣M60606
4机电保养壹达530N(换件)请输入产品名称单位\\n套\\n桶\\n根\\n张\\n只\\nL\\n个\\n次\\n包260.00采购方式\\n配件商自己留言...随便说明02025-12-28 16:25:28纽斯特易林平()0未维修0清空价格赣AP13M8
..................................................................
16984机电空调不制冷冷凝器(换件)冷凝器1单位\\n套\\n桶\\n根\\n张\\n只\\nL\\n个\\n次\\n包280.00280.00采购方式\\n配件商自己留言...随便说明02023-07-29 11:06:36纽斯特胡神统纽斯特胡神统(note)2023-07-31 11:31维修完成2023-08-05 11:14赣A50N85
16985机电空调不制冷冷媒(换件)冷媒3单位\\n套\\n桶\\n根\\n张\\n只\\nL\\n个\\n次\\n包35.00105.00采购方式\\n配件商自己留言...随便说明02023-07-29 11:06:36纽斯特胡神统纽斯特胡神统(note)2023-07-31 11:31维修完成2023-08-05 11:14赣A50N85
16986机电动平衡四轮动平衡(校修)四轮动平衡4单位\\n套\\n桶\\n根\\n张\\n只\\nL\\n个\\n次\\n包20.0080.00采购方式\\n配件商自己留言...随便说明02023-07-28 10:50:58纽斯特胡神统纽斯特胡神统(note)2023-07-28 10:58维修完成2023-07-28 11:14粤SAG866
16987漆面后盖喷漆后盖喷漆(校修)后盖喷漆单位\\n套\\n桶\\n根\\n张\\n只\\nL\\n个\\n次\\n包0.00采购方式\\n配件商自己留言...随便说明02023-07-28 10:27:19纽斯特倪兵纽斯特倪兵(note)2023-07-28 10:41维修完成2023-08-07 14:52赣ATS267
16988钣金漏报钣金(校修)钣金单位\\n套\\n桶\\n根\\n张\\n只\\nL\\n个\\n次\\n包0.00采购方式\\n配件商自己留言...随便说明02023-07-28 10:44:32纽斯特邓建明纽斯特邓建明(note)2023-07-28 10:44:32维修完成2023-07-28 10:44:32赣ATS267
\n", "

16989 rows × 25 columns

\n", "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 2 }, { "metadata": { "ExecuteTime": { "end_time": "2025-12-29T05:43:46.270524600Z", "start_time": "2025-12-29T05:43:29.356879400Z" } }, "cell_type": "code", "source": [ "import requests\n", "import pandas as pd\n", "from bs4 import BeautifulSoup\n", "import time\n", "from tqdm.notebook import tqdm\n", "\n", "# 1. 读取Excel中的id\n", "try:\n", " df = pd.read_excel(\"汽车维修记录_全部数据.xlsx\", sheet_name=0)\n", "except FileNotFoundError:\n", " print(\"错误:找不到 '汽车维修记录_全部数据.xlsx' 文件。请检查文件名和路径。\")\n", " exit()\n", "\n", "# 2. 定义请求参数\n", "cookies = {\n", " 'ECSCP[admin_id]': '108',\n", " 'ECSCP[admin_pass]': 'a7a7436dc3cdb9a0cf46db404553e361',\n", " 'ECSCP[page_size]': '15',\n", " 'ECS_LastCheckOrder': 'Mon%2C%2029%20Dec%202025%2002%3A49%3A33%20GMT',\n", " 'real_ipd': '221.226.144.180',\n", " 'ECSCP_ID': 'b038b6f65745ee9036bb43d1d1b48393c0e41cbe',\n", "}\n", "\n", "request_headers = {\n", " 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',\n", " 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n", " 'cache-control': 'no-cache',\n", " 'pragma': 'no-cache',\n", " 'priority': 'u=0, i',\n", " 'referer': 'https://tejialuntai.com/admin2/car_repair.php?act=list',\n", " 'sec-ch-ua': '\"Microsoft Edge\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"',\n", " 'sec-ch-ua-mobile': '?0',\n", " 'sec-ch-ua-platform': '\"Windows\"',\n", " 'sec-fetch-dest': 'frame',\n", " 'sec-fetch-mode': 'navigate',\n", " 'sec-fetch-site': 'same-origin',\n", " 'sec-fetch-user': '?1',\n", " 'upgrade-insecure-requests': '1',\n", " 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0',\n", "}\n", "\n", "# 3. 遍历df中的id,提取数据\n", "all_data = []\n", "\n", "for row in tqdm(df.itertuples(index=False), total=len(df), desc=\"正在爬取数据\"):\n", " repair_id = row.维修单号\n", "\n", " params = {'act': 'edit', 'id': repair_id}\n", "\n", " try:\n", " response = requests.get(\n", " 'https://tejialuntai.com/admin2/car_repair.php',\n", " params=params,\n", " cookies=cookies,\n", " headers=request_headers,\n", " timeout=10\n", " )\n", " response.raise_for_status()\n", "\n", " html_content = response.text\n", " soup = BeautifulSoup(html_content, 'html.parser')\n", "\n", " car_plate_elem = soup.find('td', string=lambda text: text and '车牌号:' in text)\n", " if not car_plate_elem:\n", " tqdm.write(f\"警告:在维修单 {repair_id} 的页面中未找到车牌号,已跳过。\")\n", " continue\n", "\n", " car_plate_td = car_plate_elem.find_next_sibling('td')\n", " if not car_plate_td:\n", " tqdm.write(f\"警告:在维修单 {repair_id} 的页面中找到车牌号标签,但未找到其后的兄弟元素,已跳过。\")\n", " continue\n", "\n", " car_plate = car_plate_td.text.strip().split('(')[0]\n", "\n", " quote_table = soup.find('table', style='width:85%')\n", " if not quote_table:\n", " tqdm.write(f\"警告:在维修单 {repair_id} 的页面中未找到报价表格,已跳过。\")\n", " continue\n", "\n", " table_headers = [th.text.strip() for th in quote_table.find_all('th')]\n", " if not table_headers:\n", " tqdm.write(f\"警告:在维修单 {repair_id} 的页面中未找到表头,已跳过。\")\n", " continue\n", "\n", " quote_data = []\n", " for tr in quote_table.find_all('tr')[1:]:\n", " row_data = [td.text.strip() for td in tr.find_all('td')]\n", " if len(row_data) == len(table_headers):\n", " quote_data.append(row_data)\n", "\n", " if not quote_data:\n", " tqdm.write(f\"警告:在维修单 {repair_id} 的页面中未找到有效的行数据,已跳过。\")\n", " continue\n", "\n", " quote_df = pd.DataFrame(quote_data, columns=table_headers)\n", "\n", " # --- 核心修改点 ---\n", " # 在将DataFrame添加到列表前,为其添加'维修单号'和'车牌号'列\n", " quote_df['维修单号'] = repair_id\n", " quote_df['车牌号'] = car_plate\n", " # -------------------\n", "\n", " all_data.append(quote_df)\n", "\n", " time.sleep(0.5)\n", "\n", " except requests.exceptions.RequestException as e:\n", " tqdm.write(f\"错误:处理维修单 {repair_id} 时发生网络请求错误: {e}\")\n", " continue\n", " except Exception as e:\n", " tqdm.write(f\"错误:处理维修单 {repair_id} 时发生未知错误: {e}\")\n", " continue\n", "\n", "# 4. 合并所有数据并保存\n", "if all_data:\n", " final_df = pd.concat(all_data, ignore_index=True)\n", " try:\n", " final_df.to_excel(\"汽车维修报价项目_汇总.xlsx\", index=False)\n", " print(\"\\n数据提取完成,已保存到 '汽车维修报价项目_汇总.xlsx'!\")\n", " except Exception as e:\n", " print(f\"错误:保存Excel文件失败: {e}\")\n", "else:\n", " print(\"\\n未能提取到任何有效数据,未生成Excel文件。\")" ], "id": "807a4c625b434fcd", "outputs": [ { "data": { "text/plain": [ "正在爬取数据: 0%| | 0/5774 [00:00