Files
F6--/张阳脚本/竞品系统数据导出/爱车店新.ipynb
T
2026-01-30 11:28:35 +08:00

979 lines
46 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-06-23T05:28:31.780431Z",
"start_time": "2025-06-23T05:26:25.738579Z"
}
},
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"import time\n",
"from urllib.parse import urljoin\n",
"import pandas as pd\n",
"from selenium.webdriver import Chrome\n",
"from selenium.webdriver.chrome.service import Service\n",
"from datetime import datetime\n",
"from selenium.webdriver.chrome.options import Options\n",
"from datetime import datetime, timedelta\n",
"from selenium.common.exceptions import NoSuchElementException\n",
"from tqdm import tqdm\n",
"from selenium.common.exceptions import TimeoutException\n",
"\n",
"# 设置Chrome选项\n",
"chrome_options = Options()\n",
"# 设置为无头模式(不打开浏览器窗口)\n",
"# chrome_options.add_argument('--headless')\n",
"chrome_options.add_argument('--disable-gpu')\n",
"chrome_options.add_argument('--no-sandbox')\n",
"\n",
"# 指定ChromeDriver路径\n",
"service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')\n",
"\n",
"# 创建WebDriver对象\n",
"driver = webdriver.Chrome(service=service, options=chrome_options)\n",
"\n",
"# 目标网址\n",
"# url = 'http://xlsf.aichedian.com/order/order-detail/1115935207959/ # 爱车店有不同的网址\n",
"url = 'http://best.aichedian.com/order/order-detail/1115935207959/'\n",
"username = '15307259977'\n",
"password = 'juanzi810119'\n",
"\n",
"# 访问网页\n",
"driver.get(url)\n",
"\n",
"WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id=\"username\"]'))).send_keys(username)\n",
"WebDriverWait(driver, 10).until(\n",
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[4]/div/input'))).click()\n",
"time.sleep(5) # 提供时间选择门店\n",
"WebDriverWait(driver, 10).until(\n",
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[3]/div[2]/div/input'))).send_keys(password)\n",
"WebDriverWait(driver, 10).until(\n",
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[4]/div/input'))).click()\n",
"\n",
"time.sleep(2)\n",
"\n",
"# 设置起始日期为今天,结束日期为两年前 # 需要修改日期\n",
"end_date = datetime.now()\n",
"start_date = end_date - timedelta(days=1552) #786\n",
"current_date = start_date\n",
"all_data = []\n",
"\n",
"# 定义表头\n",
"headers = [\n",
" '订单号',\n",
" '开单时间', '入账时间', '车辆', '车主', '订单详情',\n",
" '车牌号码', '车辆品牌', '会员卡号', '车主姓名', '联系方式',\n",
" '服务名称', '销售人员', '施工人员', '应付金额', '施工时间', '完工时间', '服务评分',\n",
" '产品名称', '型号', '单价', '数量', '总价', '销售人员', '销售时间',\n",
" '支付方式', '账号', '金额', '时间', '备注'\n",
"]\n",
"\n",
"# 起始日期和当前日期\n",
"first_date = \"2021-04-03\"\n",
"now_date = datetime.now().strftime('%Y-%m-%d')\n",
"\n",
"# 转换为 pandas 的 Timestamp 类型\n",
"start = pd.to_datetime(first_date)\n",
"end = pd.to_datetime(now_date)\n",
"\n",
"# 初始化当前时间指针\n",
"current = start\n",
"\n",
"# 计算总共有多少年,用于进度条\n",
"total_years = (end - start).days // 365 + 1\n",
"\n",
"all_url = []\n",
"\n",
"with tqdm(total=total_years, desc=\"处理时间段\") as pbar:\n",
" no_data_count=0\n",
" while current <= end:\n",
" # 计算这一年的结束日期\n",
" year_end = current + pd.DateOffset(years=1) - pd.Timedelta(days=1)\n",
" if year_end > end:\n",
" year_end = end\n",
"\n",
" # 格式化成 URL 需要的格式:YYYY/MM/DD\n",
" url_start = current.strftime('%Y/%m/%d')\n",
" url_end = year_end.strftime('%Y/%m/%d')\n",
"\n",
" # 构造 URL\n",
" url = f'http://best.aichedian.com/report/order-stat/?start_date={url_start}&end_date={url_end}&tab=1'\n",
"\n",
" driver.get(url) # 获取每日订单\n",
" # 获取指定 XPath 下的所有 <tr> 元素\n",
" xpath = '//*[@id=\"order-profit-count\"]/table[2]'\n",
" try:\n",
" rows = WebDriverWait(driver, 5).until(\n",
" EC.presence_of_all_elements_located((By.XPATH, xpath))\n",
" )\n",
" if not rows:\n",
" raise NoSuchElementException # 如果没有找到任何行,抛出异常\n",
" no_data_count = 0 # 重置计数器\n",
" except TimeoutException:\n",
" no_data_count += 1\n",
" if no_data_count >= 30:\n",
" print(\"连续30天没有数据,退出循环\")\n",
" break\n",
" continue\n",
"\n",
" for row in tqdm(rows):\n",
" # 获取每一行中的所有 <td> 元素\n",
" cells = row.find_elements(By.TAG_NAME, 'td')\n",
" row_data = [cell.text for cell in cells] # 将一行中的所有单元格文本存入列表\n",
" start_dj_data = row_data[0] # 开始时间\n",
" get_money_data = row_data[1] # 入账时间\n",
" car_number = row_data[2] # 车牌名称\n",
" customer = row_data[3] # 假设第二列是客户名称\n",
" order_details = '\\n'.join(row_data) # 将行数据合并为一个字符串,每列之间用换行符分隔\n",
"\n",
" for cell in cells:\n",
" try:\n",
" links = cell.find_elements(By.TAG_NAME, 'a') # 获取当前单元格内的所有 <a> 标签\n",
" absolute_url = None\n",
" all_absolute_urls = []\n",
" for link in links:\n",
" if link.text == \"查看详细\":\n",
" relative_url = link.get_attribute('href')\n",
" absolute_url = urljoin('http://best.aichedian.com/', relative_url)\n",
" all_absolute_urls.append(absolute_url)\n",
" else:\n",
" continue\n",
" \n",
" if not all_absolute_urls:\n",
" continue\n",
" \n",
" for absolute_url in all_absolute_urls:\n",
" \n",
" driver.get(absolute_url) # 获取查看详情\n",
" \n",
" n_xpath = '//*[@id=\"x-single-content\"]'\n",
" try:\n",
" n_rows = WebDriverWait(driver, 5).until(\n",
" EC.presence_of_all_elements_located((By.XPATH, n_xpath))\n",
" )\n",
" except:\n",
" continue\n",
" first_row = n_rows[0] # 此时为一个列表\n",
" n_cells = first_row.find_elements(By.TAG_NAME, 'td')\n",
" \n",
" for n_cell in n_cells:\n",
" n_links = n_cell.find_elements(By.TAG_NAME, 'a') # 获取当前单元格内的所有 <a> 标签\n",
" n_absolute_url = None\n",
" details_id= \"\"\n",
" for n_link in n_links:\n",
" # print(n_link.text)\n",
" if n_link.text == \"明细\":\n",
" n_relative_url = n_link.get_attribute('href')\n",
" n_absolute_url = urljoin('http://best.aichedian.com/', n_relative_url)\n",
" details_id = n_absolute_url.split('/')[-2]\n",
" all_url.append(n_absolute_url)\n",
" else:\n",
" continue\n",
" \n",
" if not n_absolute_url:\n",
" continue\n",
" \n",
" \n",
" time.sleep(0.1)\n",
" driver.back()\n",
"\n",
" except :\n",
" continue # 如果没有找到 <a> 标签,继续下一个 <td\n",
"\n",
"\n",
" # 返回页面\n",
" driver.back()\n",
" # 获取每日订单\n",
" pbar.update(1)\n",
" \n",
"for url in all_url:\n",
" driver.get(url) # 获取明细数据\n",
"\n",
" xpath1 = '//*[@id=\"x-single-content\"]/h2'\n",
" h2 = WebDriverWait(driver, 5).until(\n",
" EC.presence_of_element_located((By.XPATH, xpath1))\n",
" )\n",
"\n",
" if \"车辆检测单\" in h2.text or \"订单详情\" not in h2.text:\n",
" driver.back()\n",
" print(\"跳过\")\n",
" time.sleep(\n",
" 5\n",
" )\n",
" continue\n",
"\n",
"\n",
" # 获取基本信息\n",
" try:\n",
" base_details = WebDriverWait(driver, 10).until(\n",
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"x-single-content\"]/ul'))\n",
" )\n",
" li_items = base_details.find_elements(By.TAG_NAME, 'li')\n",
" base_info = {li.text.split(':')[0].strip(): li.text.split(':')[1].strip() if ':' in li.text else ''\n",
" for li in li_items}\n",
" # 获取具体信息,如果不存在则返回空字符串\n",
" license_plate = base_info.get('车牌号', '')\n",
" customer_name = base_info.get('客户名称', '')\n",
" order_id = base_info.get('订单编号', '')\n",
" except TimeoutException:\n",
" continue # 获取不到基本信息\n",
"\n",
"\n",
" # 获取服务项目\n",
" service_info = []\n",
" try:\n",
" h3_element = WebDriverWait(driver, 0.2).until(\n",
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"服务项目\"]'))\n",
" )\n",
" if h3_element.text == \"服务项目\":\n",
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
" for row1 in rows[1:]:\n",
" li_items = row1.find_elements(By.TAG_NAME, 'td')\n",
" service_info.append({\n",
" '服务名称': li_items[0].text,\n",
" '销售人员': li_items[1].text,\n",
" '施工人员': li_items[2].text,\n",
" '应付金额': li_items[3].text,\n",
" '施工时间': li_items[4].text,\n",
" '完工时间': li_items[5].text,\n",
" '服务评分': li_items[6].text\n",
" })\n",
" except:\n",
" pass\n",
"\n",
" # 获取销售产品\n",
" product_info = []\n",
" try:\n",
" h3_element = WebDriverWait(driver, 0.2).until(\n",
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"销售产品\"]'))\n",
" )\n",
" if h3_element.text == \"销售产品\":\n",
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
" for row2 in rows[1:]:\n",
" li_items = row2.find_elements(By.TAG_NAME, 'td')\n",
" product_info.append({\n",
" '产品名称': li_items[0].text,\n",
" '型号': li_items[1].text,\n",
" '单价': li_items[2].text,\n",
" '数量': li_items[3].text,\n",
" '总价': li_items[4].text,\n",
" '销售人员': li_items[5].text,\n",
" '销售时间': li_items[6].text\n",
" })\n",
" except:\n",
" pass\n",
"\n",
" # 获取支付记录\n",
" payment_info = []\n",
" try:\n",
" h3_element = WebDriverWait(driver, 0.2).until(\n",
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"支付记录\"]'))\n",
" )\n",
" if h3_element.text == \"支付记录\":\n",
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
" for row3 in rows[1:]:\n",
" li_items = row3.find_elements(By.TAG_NAME, 'td')\n",
" payment_info.append({\n",
" '支付方式': li_items[0].text,\n",
" '账号': li_items[1].text,\n",
" '金额': li_items[2].text,\n",
" '时间': li_items[3].text,\n",
" '备注': li_items[4].text\n",
" })\n",
" except:\n",
" pass\n",
"\n",
" # 将所有信息组合成一个字典\n",
" order_info = {\n",
" '订单号': details_id,\n",
" '开单时间': start_dj_data,\n",
" '入账时间': get_money_data,\n",
" '车辆': car_number,\n",
" '车主': customer,\n",
" '车牌号码': base_info.get('车牌号码', ''),\n",
" '车辆品牌': base_info.get('车辆品牌', ''),\n",
" '会员卡号': base_info.get('会员卡号', ''),\n",
" '车主姓名': base_info.get('车主姓名', ''),\n",
" '联系方式': base_info.get('联系方式', ''),\n",
" # '订单详情': order_details,\n",
" '服务名称': '\\n'.join([item['服务名称'] for item in service_info]),\n",
" '销售人员': '\\n'.join([item['销售人员'] for item in service_info]),\n",
" '施工人员': '\\n'.join([item['施工人员'] for item in service_info]),\n",
" '应付金额': '\\n'.join([item['应付金额'] for item in service_info]),\n",
" '施工时间': '\\n'.join([item['施工时间'] for item in service_info]),\n",
" '完工时间': '\\n'.join([item['完工时间'] for item in service_info]),\n",
" '服务评分': '\\n'.join([item['服务评分'] for item in service_info]),\n",
" '产品名称': '\\n'.join([item['产品名称'] for item in product_info]),\n",
" '型号': '\\n'.join([item['型号'] for item in product_info]),\n",
" '单价': '\\n'.join([item['单价'] for item in product_info]),\n",
" '数量': '\\n'.join([item['数量'] for item in product_info]),\n",
" '总价': '\\n'.join([item['总价'] for item in product_info]),\n",
" '销售人员': '\\n'.join([item['销售人员'] for item in product_info]),\n",
" '销售时间': '\\n'.join([item['销售时间'] for item in product_info]),\n",
" '支付方式': '\\n'.join([item['支付方式'] for item in payment_info]),\n",
" '账号': '\\n'.join([item['账号'] for item in payment_info]),\n",
" '金额': '\\n'.join([item['金额'] for item in payment_info]),\n",
" '时间': '\\n'.join([item['时间'] for item in payment_info]),\n",
" '备注': '\\n'.join([item['备注'] for item in payment_info])\n",
" }\n",
"\n",
" all_data.append(order_info) # 将完整订单信息添加到总数据列表中\n",
"\n",
"# 使用pandas将数据保存为Excel文件\n",
"df = pd.DataFrame(all_data, columns=headers)\n",
"df.to_excel('爱车店数据导出.xlsx', index=False) # 保存为Excel文件\n",
"\n",
"# 关闭浏览器\n",
"driver.quit()"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<>:25: SyntaxWarning: invalid escape sequence '\\P'\n",
"<>:25: SyntaxWarning: invalid escape sequence '\\P'\n",
"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_20808\\1843733566.py:25: SyntaxWarning: invalid escape sequence '\\P'\n",
" service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')\n",
"处理时间段: 0%| | 0/5 [00:00<?, ?it/s]\n",
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"100%|██████████| 1/1 [00:58<00:00, 58.80s/it]\u001B[A\n",
"\n",
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"100%|██████████| 1/1 [00:56<00:00, 56.86s/it]\u001B[A\n",
"处理时间段: 0%| | 0/5 [01:56<?, ?it/s]\n",
"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_20808\\1843733566.py:25: SyntaxWarning: invalid escape sequence '\\P'\n",
" service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')\n"
]
},
{
"ename": "InvalidSessionIdException",
"evalue": "Message: invalid session id\nStacktrace:\n\tGetHandleVerifier [0x0x7ff728a4cda5+78885]\n\tGetHandleVerifier [0x0x7ff728a4ce00+78976]\n\t(No symbol) [0x0x7ff7288099fc]\n\t(No symbol) [0x0x7ff7288507df]\n\t(No symbol) [0x0x7ff728888a52]\n\t(No symbol) [0x0x7ff728883413]\n\t(No symbol) [0x0x7ff7288824d9]\n\t(No symbol) [0x0x7ff7287d5d55]\n\tGetHandleVerifier [0x0x7ff728d24eed+3061101]\n\tGetHandleVerifier [0x0x7ff728d1f33d+3037629]\n\tGetHandleVerifier [0x0x7ff728d3e592+3165202]\n\tGetHandleVerifier [0x0x7ff728a6730e+186766]\n\tGetHandleVerifier [0x0x7ff728a6eb3f+217535]\n\t(No symbol) [0x0x7ff7287d4dca]\n\tGetHandleVerifier [0x0x7ff728e445e8+4238440]\n\tBaseThreadInitThunk [0x0x7fff2f1de8d7+23]\n\tRtlUserThreadStart [0x0x7fff307bc34c+44]\n",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mInvalidSessionIdException\u001B[0m Traceback (most recent call last)",
"Cell \u001B[1;32mIn[49], line 95\u001B[0m\n\u001B[0;32m 92\u001B[0m \u001B[38;5;66;03m# 构造 URL\u001B[39;00m\n\u001B[0;32m 93\u001B[0m url \u001B[38;5;241m=\u001B[39m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mhttp://best.aichedian.com/report/order-stat/?start_date=\u001B[39m\u001B[38;5;132;01m{\u001B[39;00murl_start\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m&end_date=\u001B[39m\u001B[38;5;132;01m{\u001B[39;00murl_end\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m&tab=1\u001B[39m\u001B[38;5;124m'\u001B[39m\n\u001B[1;32m---> 95\u001B[0m driver\u001B[38;5;241m.\u001B[39mget(url) \u001B[38;5;66;03m# 获取每日订单\u001B[39;00m\n\u001B[0;32m 96\u001B[0m \u001B[38;5;66;03m# 获取指定 XPath 下的所有 <tr> 元素\u001B[39;00m\n\u001B[0;32m 97\u001B[0m xpath \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m//*[@id=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124morder-profit-count\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m]/table[2]\u001B[39m\u001B[38;5;124m'\u001B[39m\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:363\u001B[0m, in \u001B[0;36mWebDriver.get\u001B[1;34m(self, url)\u001B[0m\n\u001B[0;32m 361\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mget\u001B[39m(\u001B[38;5;28mself\u001B[39m, url: \u001B[38;5;28mstr\u001B[39m) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m 362\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"Loads a web page in the current browser session.\"\"\"\u001B[39;00m\n\u001B[1;32m--> 363\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mexecute(Command\u001B[38;5;241m.\u001B[39mGET, {\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124murl\u001B[39m\u001B[38;5;124m\"\u001B[39m: url})\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:354\u001B[0m, in \u001B[0;36mWebDriver.execute\u001B[1;34m(self, driver_command, params)\u001B[0m\n\u001B[0;32m 352\u001B[0m response \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcommand_executor\u001B[38;5;241m.\u001B[39mexecute(driver_command, params)\n\u001B[0;32m 353\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m response:\n\u001B[1;32m--> 354\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39merror_handler\u001B[38;5;241m.\u001B[39mcheck_response(response)\n\u001B[0;32m 355\u001B[0m response[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mvalue\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_unwrap_value(response\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mvalue\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28;01mNone\u001B[39;00m))\n\u001B[0;32m 356\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m response\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py:229\u001B[0m, in \u001B[0;36mErrorHandler.check_response\u001B[1;34m(self, response)\u001B[0m\n\u001B[0;32m 227\u001B[0m alert_text \u001B[38;5;241m=\u001B[39m value[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124malert\u001B[39m\u001B[38;5;124m\"\u001B[39m]\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtext\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 228\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m exception_class(message, screen, stacktrace, alert_text) \u001B[38;5;66;03m# type: ignore[call-arg] # mypy is not smart enough here\u001B[39;00m\n\u001B[1;32m--> 229\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m exception_class(message, screen, stacktrace)\n",
"\u001B[1;31mInvalidSessionIdException\u001B[0m: Message: invalid session id\nStacktrace:\n\tGetHandleVerifier [0x0x7ff728a4cda5+78885]\n\tGetHandleVerifier [0x0x7ff728a4ce00+78976]\n\t(No symbol) [0x0x7ff7288099fc]\n\t(No symbol) [0x0x7ff7288507df]\n\t(No symbol) [0x0x7ff728888a52]\n\t(No symbol) [0x0x7ff728883413]\n\t(No symbol) [0x0x7ff7288824d9]\n\t(No symbol) [0x0x7ff7287d5d55]\n\tGetHandleVerifier [0x0x7ff728d24eed+3061101]\n\tGetHandleVerifier [0x0x7ff728d1f33d+3037629]\n\tGetHandleVerifier [0x0x7ff728d3e592+3165202]\n\tGetHandleVerifier [0x0x7ff728a6730e+186766]\n\tGetHandleVerifier [0x0x7ff728a6eb3f+217535]\n\t(No symbol) [0x0x7ff7287d4dca]\n\tGetHandleVerifier [0x0x7ff728e445e8+4238440]\n\tBaseThreadInitThunk [0x0x7fff2f1de8d7+23]\n\tRtlUserThreadStart [0x0x7fff307bc34c+44]\n"
]
}
],
"execution_count": 49
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-23T05:15:55.507074Z",
"start_time": "2025-06-23T05:15:55.444172Z"
}
},
"cell_type": "code",
"source": [
"# 使用pandas将数据保存为Excel文件\n",
"df = pd.DataFrame(all_data, columns=headers)\n",
"df.to_excel('爱车店数据导出.xlsx', index=False) # 保存为Excel文件"
],
"id": "911d116f7d50ffd2",
"outputs": [],
"execution_count": 44
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-23T11:53:26.818502Z",
"start_time": "2025-06-23T06:29:37.293543Z"
}
},
"cell_type": "code",
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"import time\n",
"from urllib.parse import urljoin\n",
"import pandas as pd\n",
"from selenium.webdriver import Chrome\n",
"from selenium.webdriver.chrome.service import Service\n",
"from datetime import datetime\n",
"from selenium.webdriver.chrome.options import Options\n",
"from datetime import datetime, timedelta\n",
"from selenium.common.exceptions import NoSuchElementException\n",
"from tqdm import tqdm\n",
"from selenium.common.exceptions import TimeoutException\n",
"\n",
"# 设置Chrome选项\n",
"chrome_options = Options()\n",
"# 设置为无头模式(不打开浏览器窗口)\n",
"# chrome_options.add_argument('--headless')\n",
"chrome_options.add_argument('--disable-gpu')\n",
"chrome_options.add_argument('--no-sandbox')\n",
"\n",
"# 指定ChromeDriver路径\n",
"service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')\n",
"\n",
"# 创建WebDriver对象\n",
"driver = webdriver.Chrome(service=service, options=chrome_options)\n",
"\n",
"# 目标网址\n",
"# url = 'http://xlsf.aichedian.com/order/order-detail/1115935207959/ # 爱车店有不同的网址\n",
"url = 'http://best.aichedian.com/order/order-detail/1115935207959/'\n",
"username = '15307259977'\n",
"password = 'juanzi810119'\n",
"\n",
"# 访问网页\n",
"driver.get(url)\n",
"\n",
"WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id=\"username\"]'))).send_keys(username)\n",
"WebDriverWait(driver, 10).until(\n",
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[4]/div/input'))).click()\n",
"time.sleep(5) # 提供时间选择门店\n",
"WebDriverWait(driver, 10).until(\n",
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[3]/div[2]/div/input'))).send_keys(password)\n",
"WebDriverWait(driver, 10).until(\n",
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[4]/div/input'))).click()\n",
"\n",
"time.sleep(2)\n",
"\n",
"# 设置起始日期为今天,结束日期为两年前 # 需要修改日期\n",
"end_date = datetime.now()\n",
"start_date = end_date - timedelta(days=1552) #786\n",
"current_date = start_date\n",
"all_data = []\n",
"\n",
"# 定义表头\n",
"headers = [\n",
" '订单号',\n",
" '开单时间', '入账时间', '车辆', '车主', '订单详情',\n",
" '车牌号码', '车辆品牌', '会员卡号', '车主姓名', '联系方式',\n",
" '服务名称', '销售人员', '施工人员', '应付金额', '施工时间', '完工时间', '服务评分',\n",
" '产品名称', '型号', '单价', '数量', '总价', '销售人员', '销售时间',\n",
" '支付方式', '账号', '金额', '时间', '备注'\n",
"]\n",
"\n",
"# 起始日期和当前日期\n",
"first_date = \"2021-04-03\"\n",
"now_date = datetime.now().strftime('%Y-%m-%d')\n",
"\n",
"# 转换为 pandas 的 Timestamp 类型\n",
"start = pd.to_datetime(first_date)\n",
"end = pd.to_datetime(now_date)\n",
"\n",
"# 初始化当前时间指针\n",
"current = start\n",
"\n",
"# 计算总共有多少年,用于进度条\n",
"total_years = (end - start).days // 365 + 1\n",
"\n",
"all_url = []\n",
"\n",
"with tqdm(total=total_years, desc=\"处理时间段\") as pbar:\n",
" no_data_count=0\n",
" while current <= end:\n",
" # 计算这一年的结束日期\n",
" year_end = current + pd.DateOffset(years=1) - pd.Timedelta(days=1)\n",
" if year_end > end:\n",
" year_end = end\n",
"\n",
" # 格式化成 URL 需要的格式:YYYY/MM/DD\n",
" url_start = current.strftime('%Y/%m/%d')\n",
" url_end = year_end.strftime('%Y/%m/%d')\n",
"\n",
" # 构造 URL\n",
" url = f'http://best.aichedian.com/report/order-stat/?start_date={url_start}&end_date={url_end}&tab=1'\n",
"\n",
" driver.get(url) # 获取每日订单\n",
"\n",
" # 更新当前日期指针到下一年的开始\n",
" current = year_end + pd.Timedelta(days=1)\n",
" # 获取指定 XPath 下的所有 <tr> 元素\n",
" xpath = '//*[@id=\"order-profit-count\"]/table[2]'\n",
" try:\n",
" rows = WebDriverWait(driver, 5).until(\n",
" EC.presence_of_all_elements_located((By.XPATH, xpath))\n",
" )\n",
" if not rows:\n",
" raise NoSuchElementException # 如果没有找到任何行,抛出异常\n",
" no_data_count = 0 # 重置计数器\n",
" except TimeoutException:\n",
" no_data_count += 1\n",
" if no_data_count >= 30:\n",
" print(\"连续30天没有数据,退出循环\")\n",
" break\n",
" continue\n",
"\n",
" for row in tqdm(rows):\n",
" # 获取每一行中的所有 <td> 元素\n",
" cells = row.find_elements(By.TAG_NAME, 'td')\n",
" row_data = [cell.text for cell in cells] # 将一行中的所有单元格文本存入列表\n",
" start_dj_data = row_data[0] # 开始时间\n",
" get_money_data = row_data[1] # 入账时间\n",
" car_number = row_data[2] # 车牌名称\n",
" customer = row_data[3] # 假设第二列是客户名称\n",
" order_details = '\\n'.join(row_data) # 将行数据合并为一个字符串,每列之间用换行符分隔\n",
"\n",
" for cell in cells:\n",
" try:\n",
" links = cell.find_elements(By.TAG_NAME, 'a') # 获取当前单元格内的所有 <a> 标签\n",
" absolute_url = None\n",
" all_absolute_urls = []\n",
" for link in links:\n",
" if link.text == \"查看详细\":\n",
" relative_url = link.get_attribute('href')\n",
" absolute_url = urljoin('http://best.aichedian.com/', relative_url)\n",
" all_absolute_urls.append(absolute_url)\n",
" else:\n",
" continue\n",
"\n",
" if not all_absolute_urls:\n",
" continue\n",
"\n",
" for absolute_url in all_absolute_urls:\n",
"\n",
" driver.get(absolute_url) # 获取查看详情\n",
"\n",
" n_xpath = '//*[@id=\"x-single-content\"]'\n",
" try:\n",
" n_rows = WebDriverWait(driver, 5).until(\n",
" EC.presence_of_all_elements_located((By.XPATH, n_xpath))\n",
" )\n",
" except:\n",
" continue\n",
"\n",
" for row in n_rows:\n",
" n_cells = row.find_elements(By.TAG_NAME, 'td')\n",
" for n_cell in n_cells:\n",
" n_links = n_cell.find_elements(By.TAG_NAME, 'a')\n",
" for n_link in n_links:\n",
" if n_link.text == \"明细\":\n",
" n_relative_url = n_link.get_attribute('href')\n",
" n_absolute_url = urljoin('http://best.aichedian.com/', n_relative_url)\n",
" all_url.append(n_absolute_url)\n",
"\n",
" time.sleep(0.1)\n",
" driver.back()\n",
" except :\n",
" continue # 如果没有找到 <a> 标签,继续下一个 <td\n",
" # 返回页面\n",
" driver.back()\n",
" # 获取每日订单\n",
" pbar.update(1)\n",
"\n",
"\n",
"df1 = pd.DataFrame(all_url)\n",
"df1.to_csv('all_url.csv', index=False)\n",
"for url in tqdm(all_url):\n",
" try:\n",
" driver.get(url) # 获取明细数据\n",
" details_id = url.split('/')[-2]\n",
" \n",
" xpath1 = '//*[@id=\"x-single-content\"]/h2'\n",
" h2 = WebDriverWait(driver, 5).until(\n",
" EC.presence_of_element_located((By.XPATH, xpath1))\n",
" )\n",
" \n",
" if \"车辆检测单\" in h2.text or \"订单详情\" not in h2.text:\n",
" driver.back()\n",
" print(\"跳过\")\n",
" time.sleep(\n",
" 5\n",
" )\n",
" continue\n",
" \n",
" \n",
" # 获取基本信息\n",
" try:\n",
" base_details = WebDriverWait(driver, 10).until(\n",
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"x-single-content\"]/ul'))\n",
" )\n",
" li_items = base_details.find_elements(By.TAG_NAME, 'li')\n",
" base_info = {li.text.split(':')[0].strip(): li.text.split(':')[1].strip() if ':' in li.text else ''\n",
" for li in li_items}\n",
" # 获取具体信息,如果不存在则返回空字符串\n",
" license_plate = base_info.get('车牌号', '')\n",
" customer_name = base_info.get('客户名称', '')\n",
" order_id = base_info.get('订单编号', '')\n",
" except TimeoutException:\n",
" continue # 获取不到基本信息\n",
" \n",
" \n",
" # 获取服务项目\n",
" service_info = []\n",
" try:\n",
" h3_element = WebDriverWait(driver, 0.2).until(\n",
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"服务项目\"]'))\n",
" )\n",
" if h3_element.text == \"服务项目\":\n",
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
" for row1 in rows[1:]:\n",
" li_items = row1.find_elements(By.TAG_NAME, 'td')\n",
" service_info.append({\n",
" '服务名称': li_items[0].text,\n",
" '销售人员': li_items[1].text,\n",
" '施工人员': li_items[2].text,\n",
" '应付金额': li_items[3].text,\n",
" '施工时间': li_items[4].text,\n",
" '完工时间': li_items[5].text,\n",
" '服务评分': li_items[6].text\n",
" })\n",
" except:\n",
" pass\n",
" \n",
" # 获取销售产品\n",
" product_info = []\n",
" try:\n",
" h3_element = WebDriverWait(driver, 0.2).until(\n",
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"销售产品\"]'))\n",
" )\n",
" if h3_element.text == \"销售产品\":\n",
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
" for row2 in rows[1:]:\n",
" li_items = row2.find_elements(By.TAG_NAME, 'td')\n",
" product_info.append({\n",
" '产品名称': li_items[0].text,\n",
" '型号': li_items[1].text,\n",
" '单价': li_items[2].text,\n",
" '数量': li_items[3].text,\n",
" '总价': li_items[4].text,\n",
" '销售人员': li_items[5].text,\n",
" '销售时间': li_items[6].text\n",
" })\n",
" except:\n",
" pass\n",
" \n",
" # 获取支付记录\n",
" payment_info = []\n",
" try:\n",
" h3_element = WebDriverWait(driver, 0.2).until(\n",
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"支付记录\"]'))\n",
" )\n",
" if h3_element.text == \"支付记录\":\n",
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
" for row3 in rows[1:]:\n",
" li_items = row3.find_elements(By.TAG_NAME, 'td')\n",
" payment_info.append({\n",
" '支付方式': li_items[0].text,\n",
" '账号': li_items[1].text,\n",
" '金额': li_items[2].text,\n",
" '时间': li_items[3].text,\n",
" '备注': li_items[4].text\n",
" })\n",
" except:\n",
" pass\n",
" \n",
" # 将所有信息组合成一个字典\n",
" order_info = {\n",
" '订单号': details_id,\n",
" '开单时间': start_dj_data,\n",
" '入账时间': get_money_data,\n",
" '车辆': car_number,\n",
" '车主': customer,\n",
" '车牌号码': base_info.get('车牌号码', ''),\n",
" '车辆品牌': base_info.get('车辆品牌', ''),\n",
" '会员卡号': base_info.get('会员卡号', ''),\n",
" '车主姓名': base_info.get('车主姓名', ''),\n",
" '联系方式': base_info.get('联系方式', ''),\n",
" # '订单详情': order_details,\n",
" '服务名称': '\\n'.join([item['服务名称'] for item in service_info]),\n",
" '销售人员': '\\n'.join([item['销售人员'] for item in service_info]),\n",
" '施工人员': '\\n'.join([item['施工人员'] for item in service_info]),\n",
" '应付金额': '\\n'.join([item['应付金额'] for item in service_info]),\n",
" '施工时间': '\\n'.join([item['施工时间'] for item in service_info]),\n",
" '完工时间': '\\n'.join([item['完工时间'] for item in service_info]),\n",
" '服务评分': '\\n'.join([item['服务评分'] for item in service_info]),\n",
" '产品名称': '\\n'.join([item['产品名称'] for item in product_info]),\n",
" '型号': '\\n'.join([item['型号'] for item in product_info]),\n",
" '单价': '\\n'.join([item['单价'] for item in product_info]),\n",
" '数量': '\\n'.join([item['数量'] for item in product_info]),\n",
" '总价': '\\n'.join([item['总价'] for item in product_info]),\n",
" '销售人员': '\\n'.join([item['销售人员'] for item in product_info]),\n",
" '销售时间': '\\n'.join([item['销售时间'] for item in product_info]),\n",
" '支付方式': '\\n'.join([item['支付方式'] for item in payment_info]),\n",
" '账号': '\\n'.join([item['账号'] for item in payment_info]),\n",
" '金额': '\\n'.join([item['金额'] for item in payment_info]),\n",
" '时间': '\\n'.join([item['时间'] for item in payment_info]),\n",
" '备注': '\\n'.join([item['备注'] for item in payment_info])\n",
" }\n",
" \n",
" all_data.append(order_info) # 将完整订单信息添加到总数据列表中\n",
" except:\n",
" pass\n",
"\n",
"# 使用pandas将数据保存为Excel文件\n",
"df = pd.DataFrame(all_data, columns=headers)\n",
"df.to_excel('爱车店数据导出.xlsx', index=False) # 保存为Excel文件\n",
"\n",
"# 关闭浏览器\n",
"driver.quit()"
],
"id": "3bdcde801e98b6bd",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<>:25: SyntaxWarning: invalid escape sequence '\\P'\n",
"<>:25: SyntaxWarning: invalid escape sequence '\\P'\n",
"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_27060\\653923207.py:25: SyntaxWarning: invalid escape sequence '\\P'\n",
" service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')\n",
"处理时间段: 0%| | 0/5 [00:00<?, ?it/s]\n",
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A\n",
"100%|██████████| 1/1 [04:13<00:00, 253.29s/it]\u001B[A\n",
"\n",
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A\n",
"100%|██████████| 1/1 [04:49<00:00, 289.93s/it]\u001B[A\n",
"\n",
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A\n",
"100%|██████████| 1/1 [05:09<00:00, 309.01s/it]\u001B[A\n",
"\n",
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A\n",
"100%|██████████| 1/1 [04:25<00:00, 265.08s/it]\u001B[A\n",
"\n",
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A\n",
"100%|██████████| 1/1 [00:02<00:00, 2.59s/it]\u001B[A\n",
"处理时间段: 20%|██ | 1/5 [18:44<1:14:57, 1124.42s/it]\n",
" 0%| | 39/14439 [00:37<3:54:19, 1.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 1%|▏ | 183/14439 [02:55<4:05:12, 1.03s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 1%|▏ | 184/14439 [03:00<9:07:41, 2.31s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 1%|▏ | 198/14439 [03:15<2:58:07, 1.33it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 1%|▏ | 199/14439 [03:20<8:21:44, 2.11s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 1%|▏ | 200/14439 [03:26<12:27:17, 3.15s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 2%|▏ | 219/14439 [03:48<3:40:41, 1.07it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 2%|▏ | 220/14439 [03:53<8:49:45, 2.24s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 2%|▏ | 281/14439 [04:54<3:38:17, 1.08it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 6%|▌ | 813/14439 [14:18<4:40:52, 1.24s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 36%|███▌ | 5177/14439 [1:30:02<2:24:55, 1.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"跳过\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 14439/14439 [5:04:48<00:00, 1.27s/it] \n"
]
}
],
"execution_count": 5
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-23T06:06:18.139653Z",
"start_time": "2025-06-23T06:06:18.131895Z"
}
},
"cell_type": "code",
"source": [
"df1 = pd.DataFrame(all_url)\n",
"df1.to_csv('all_url.csv', index=False)"
],
"id": "a68ff75a6184bae",
"outputs": [],
"execution_count": 3
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}