979 lines
46 KiB
Plaintext
979 lines
46 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"id": "initial_id",
|
|
"metadata": {
|
|
"collapsed": true,
|
|
"ExecuteTime": {
|
|
"end_time": "2025-06-23T05:28:31.780431Z",
|
|
"start_time": "2025-06-23T05:26:25.738579Z"
|
|
}
|
|
},
|
|
"source": [
|
|
"from selenium import webdriver\n",
|
|
"from selenium.webdriver.common.by import By\n",
|
|
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
|
"from selenium.webdriver.support import expected_conditions as EC\n",
|
|
"import time\n",
|
|
"from urllib.parse import urljoin\n",
|
|
"import pandas as pd\n",
|
|
"from selenium.webdriver import Chrome\n",
|
|
"from selenium.webdriver.chrome.service import Service\n",
|
|
"from datetime import datetime\n",
|
|
"from selenium.webdriver.chrome.options import Options\n",
|
|
"from datetime import datetime, timedelta\n",
|
|
"from selenium.common.exceptions import NoSuchElementException\n",
|
|
"from tqdm import tqdm\n",
|
|
"from selenium.common.exceptions import TimeoutException\n",
|
|
"\n",
|
|
"# 设置Chrome选项\n",
|
|
"chrome_options = Options()\n",
|
|
"# 设置为无头模式(不打开浏览器窗口)\n",
|
|
"# chrome_options.add_argument('--headless')\n",
|
|
"chrome_options.add_argument('--disable-gpu')\n",
|
|
"chrome_options.add_argument('--no-sandbox')\n",
|
|
"\n",
|
|
"# 指定ChromeDriver路径\n",
|
|
"service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')\n",
|
|
"\n",
|
|
"# 创建WebDriver对象\n",
|
|
"driver = webdriver.Chrome(service=service, options=chrome_options)\n",
|
|
"\n",
|
|
"# 目标网址\n",
|
|
"# url = 'http://xlsf.aichedian.com/order/order-detail/1115935207959/ # 爱车店有不同的网址\n",
|
|
"url = 'http://best.aichedian.com/order/order-detail/1115935207959/'\n",
|
|
"username = '15307259977'\n",
|
|
"password = 'juanzi810119'\n",
|
|
"\n",
|
|
"# 访问网页\n",
|
|
"driver.get(url)\n",
|
|
"\n",
|
|
"WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id=\"username\"]'))).send_keys(username)\n",
|
|
"WebDriverWait(driver, 10).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[4]/div/input'))).click()\n",
|
|
"time.sleep(5) # 提供时间选择门店\n",
|
|
"WebDriverWait(driver, 10).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[3]/div[2]/div/input'))).send_keys(password)\n",
|
|
"WebDriverWait(driver, 10).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[4]/div/input'))).click()\n",
|
|
"\n",
|
|
"time.sleep(2)\n",
|
|
"\n",
|
|
"# 设置起始日期为今天,结束日期为两年前 # 需要修改日期\n",
|
|
"end_date = datetime.now()\n",
|
|
"start_date = end_date - timedelta(days=1552) #786\n",
|
|
"current_date = start_date\n",
|
|
"all_data = []\n",
|
|
"\n",
|
|
"# 定义表头\n",
|
|
"headers = [\n",
|
|
" '订单号',\n",
|
|
" '开单时间', '入账时间', '车辆', '车主', '订单详情',\n",
|
|
" '车牌号码', '车辆品牌', '会员卡号', '车主姓名', '联系方式',\n",
|
|
" '服务名称', '销售人员', '施工人员', '应付金额', '施工时间', '完工时间', '服务评分',\n",
|
|
" '产品名称', '型号', '单价', '数量', '总价', '销售人员', '销售时间',\n",
|
|
" '支付方式', '账号', '金额', '时间', '备注'\n",
|
|
"]\n",
|
|
"\n",
|
|
"# 起始日期和当前日期\n",
|
|
"first_date = \"2021-04-03\"\n",
|
|
"now_date = datetime.now().strftime('%Y-%m-%d')\n",
|
|
"\n",
|
|
"# 转换为 pandas 的 Timestamp 类型\n",
|
|
"start = pd.to_datetime(first_date)\n",
|
|
"end = pd.to_datetime(now_date)\n",
|
|
"\n",
|
|
"# 初始化当前时间指针\n",
|
|
"current = start\n",
|
|
"\n",
|
|
"# 计算总共有多少年,用于进度条\n",
|
|
"total_years = (end - start).days // 365 + 1\n",
|
|
"\n",
|
|
"all_url = []\n",
|
|
"\n",
|
|
"with tqdm(total=total_years, desc=\"处理时间段\") as pbar:\n",
|
|
" no_data_count=0\n",
|
|
" while current <= end:\n",
|
|
" # 计算这一年的结束日期\n",
|
|
" year_end = current + pd.DateOffset(years=1) - pd.Timedelta(days=1)\n",
|
|
" if year_end > end:\n",
|
|
" year_end = end\n",
|
|
"\n",
|
|
" # 格式化成 URL 需要的格式:YYYY/MM/DD\n",
|
|
" url_start = current.strftime('%Y/%m/%d')\n",
|
|
" url_end = year_end.strftime('%Y/%m/%d')\n",
|
|
"\n",
|
|
" # 构造 URL\n",
|
|
" url = f'http://best.aichedian.com/report/order-stat/?start_date={url_start}&end_date={url_end}&tab=1'\n",
|
|
"\n",
|
|
" driver.get(url) # 获取每日订单\n",
|
|
" # 获取指定 XPath 下的所有 <tr> 元素\n",
|
|
" xpath = '//*[@id=\"order-profit-count\"]/table[2]'\n",
|
|
" try:\n",
|
|
" rows = WebDriverWait(driver, 5).until(\n",
|
|
" EC.presence_of_all_elements_located((By.XPATH, xpath))\n",
|
|
" )\n",
|
|
" if not rows:\n",
|
|
" raise NoSuchElementException # 如果没有找到任何行,抛出异常\n",
|
|
" no_data_count = 0 # 重置计数器\n",
|
|
" except TimeoutException:\n",
|
|
" no_data_count += 1\n",
|
|
" if no_data_count >= 30:\n",
|
|
" print(\"连续30天没有数据,退出循环\")\n",
|
|
" break\n",
|
|
" continue\n",
|
|
"\n",
|
|
" for row in tqdm(rows):\n",
|
|
" # 获取每一行中的所有 <td> 元素\n",
|
|
" cells = row.find_elements(By.TAG_NAME, 'td')\n",
|
|
" row_data = [cell.text for cell in cells] # 将一行中的所有单元格文本存入列表\n",
|
|
" start_dj_data = row_data[0] # 开始时间\n",
|
|
" get_money_data = row_data[1] # 入账时间\n",
|
|
" car_number = row_data[2] # 车牌名称\n",
|
|
" customer = row_data[3] # 假设第二列是客户名称\n",
|
|
" order_details = '\\n'.join(row_data) # 将行数据合并为一个字符串,每列之间用换行符分隔\n",
|
|
"\n",
|
|
" for cell in cells:\n",
|
|
" try:\n",
|
|
" links = cell.find_elements(By.TAG_NAME, 'a') # 获取当前单元格内的所有 <a> 标签\n",
|
|
" absolute_url = None\n",
|
|
" all_absolute_urls = []\n",
|
|
" for link in links:\n",
|
|
" if link.text == \"查看详细\":\n",
|
|
" relative_url = link.get_attribute('href')\n",
|
|
" absolute_url = urljoin('http://best.aichedian.com/', relative_url)\n",
|
|
" all_absolute_urls.append(absolute_url)\n",
|
|
" else:\n",
|
|
" continue\n",
|
|
" \n",
|
|
" if not all_absolute_urls:\n",
|
|
" continue\n",
|
|
" \n",
|
|
" for absolute_url in all_absolute_urls:\n",
|
|
" \n",
|
|
" driver.get(absolute_url) # 获取查看详情\n",
|
|
" \n",
|
|
" n_xpath = '//*[@id=\"x-single-content\"]'\n",
|
|
" try:\n",
|
|
" n_rows = WebDriverWait(driver, 5).until(\n",
|
|
" EC.presence_of_all_elements_located((By.XPATH, n_xpath))\n",
|
|
" )\n",
|
|
" except:\n",
|
|
" continue\n",
|
|
" first_row = n_rows[0] # 此时为一个列表\n",
|
|
" n_cells = first_row.find_elements(By.TAG_NAME, 'td')\n",
|
|
" \n",
|
|
" for n_cell in n_cells:\n",
|
|
" n_links = n_cell.find_elements(By.TAG_NAME, 'a') # 获取当前单元格内的所有 <a> 标签\n",
|
|
" n_absolute_url = None\n",
|
|
" details_id= \"\"\n",
|
|
" for n_link in n_links:\n",
|
|
" # print(n_link.text)\n",
|
|
" if n_link.text == \"明细\":\n",
|
|
" n_relative_url = n_link.get_attribute('href')\n",
|
|
" n_absolute_url = urljoin('http://best.aichedian.com/', n_relative_url)\n",
|
|
" details_id = n_absolute_url.split('/')[-2]\n",
|
|
" all_url.append(n_absolute_url)\n",
|
|
" else:\n",
|
|
" continue\n",
|
|
" \n",
|
|
" if not n_absolute_url:\n",
|
|
" continue\n",
|
|
" \n",
|
|
" \n",
|
|
" time.sleep(0.1)\n",
|
|
" driver.back()\n",
|
|
"\n",
|
|
" except :\n",
|
|
" continue # 如果没有找到 <a> 标签,继续下一个 <td\n",
|
|
"\n",
|
|
"\n",
|
|
" # 返回页面\n",
|
|
" driver.back()\n",
|
|
" # 获取每日订单\n",
|
|
" pbar.update(1)\n",
|
|
" \n",
|
|
"for url in all_url:\n",
|
|
" driver.get(url) # 获取明细数据\n",
|
|
"\n",
|
|
" xpath1 = '//*[@id=\"x-single-content\"]/h2'\n",
|
|
" h2 = WebDriverWait(driver, 5).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, xpath1))\n",
|
|
" )\n",
|
|
"\n",
|
|
" if \"车辆检测单\" in h2.text or \"订单详情\" not in h2.text:\n",
|
|
" driver.back()\n",
|
|
" print(\"跳过\")\n",
|
|
" time.sleep(\n",
|
|
" 5\n",
|
|
" )\n",
|
|
" continue\n",
|
|
"\n",
|
|
"\n",
|
|
" # 获取基本信息\n",
|
|
" try:\n",
|
|
" base_details = WebDriverWait(driver, 10).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"x-single-content\"]/ul'))\n",
|
|
" )\n",
|
|
" li_items = base_details.find_elements(By.TAG_NAME, 'li')\n",
|
|
" base_info = {li.text.split(':')[0].strip(): li.text.split(':')[1].strip() if ':' in li.text else ''\n",
|
|
" for li in li_items}\n",
|
|
" # 获取具体信息,如果不存在则返回空字符串\n",
|
|
" license_plate = base_info.get('车牌号', '')\n",
|
|
" customer_name = base_info.get('客户名称', '')\n",
|
|
" order_id = base_info.get('订单编号', '')\n",
|
|
" except TimeoutException:\n",
|
|
" continue # 获取不到基本信息\n",
|
|
"\n",
|
|
"\n",
|
|
" # 获取服务项目\n",
|
|
" service_info = []\n",
|
|
" try:\n",
|
|
" h3_element = WebDriverWait(driver, 0.2).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"服务项目\"]'))\n",
|
|
" )\n",
|
|
" if h3_element.text == \"服务项目\":\n",
|
|
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
|
|
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
|
|
" for row1 in rows[1:]:\n",
|
|
" li_items = row1.find_elements(By.TAG_NAME, 'td')\n",
|
|
" service_info.append({\n",
|
|
" '服务名称': li_items[0].text,\n",
|
|
" '销售人员': li_items[1].text,\n",
|
|
" '施工人员': li_items[2].text,\n",
|
|
" '应付金额': li_items[3].text,\n",
|
|
" '施工时间': li_items[4].text,\n",
|
|
" '完工时间': li_items[5].text,\n",
|
|
" '服务评分': li_items[6].text\n",
|
|
" })\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
"\n",
|
|
" # 获取销售产品\n",
|
|
" product_info = []\n",
|
|
" try:\n",
|
|
" h3_element = WebDriverWait(driver, 0.2).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"销售产品\"]'))\n",
|
|
" )\n",
|
|
" if h3_element.text == \"销售产品\":\n",
|
|
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
|
|
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
|
|
" for row2 in rows[1:]:\n",
|
|
" li_items = row2.find_elements(By.TAG_NAME, 'td')\n",
|
|
" product_info.append({\n",
|
|
" '产品名称': li_items[0].text,\n",
|
|
" '型号': li_items[1].text,\n",
|
|
" '单价': li_items[2].text,\n",
|
|
" '数量': li_items[3].text,\n",
|
|
" '总价': li_items[4].text,\n",
|
|
" '销售人员': li_items[5].text,\n",
|
|
" '销售时间': li_items[6].text\n",
|
|
" })\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
"\n",
|
|
" # 获取支付记录\n",
|
|
" payment_info = []\n",
|
|
" try:\n",
|
|
" h3_element = WebDriverWait(driver, 0.2).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"支付记录\"]'))\n",
|
|
" )\n",
|
|
" if h3_element.text == \"支付记录\":\n",
|
|
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
|
|
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
|
|
" for row3 in rows[1:]:\n",
|
|
" li_items = row3.find_elements(By.TAG_NAME, 'td')\n",
|
|
" payment_info.append({\n",
|
|
" '支付方式': li_items[0].text,\n",
|
|
" '账号': li_items[1].text,\n",
|
|
" '金额': li_items[2].text,\n",
|
|
" '时间': li_items[3].text,\n",
|
|
" '备注': li_items[4].text\n",
|
|
" })\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
"\n",
|
|
" # 将所有信息组合成一个字典\n",
|
|
" order_info = {\n",
|
|
" '订单号': details_id,\n",
|
|
" '开单时间': start_dj_data,\n",
|
|
" '入账时间': get_money_data,\n",
|
|
" '车辆': car_number,\n",
|
|
" '车主': customer,\n",
|
|
" '车牌号码': base_info.get('车牌号码', ''),\n",
|
|
" '车辆品牌': base_info.get('车辆品牌', ''),\n",
|
|
" '会员卡号': base_info.get('会员卡号', ''),\n",
|
|
" '车主姓名': base_info.get('车主姓名', ''),\n",
|
|
" '联系方式': base_info.get('联系方式', ''),\n",
|
|
" # '订单详情': order_details,\n",
|
|
" '服务名称': '\\n'.join([item['服务名称'] for item in service_info]),\n",
|
|
" '销售人员': '\\n'.join([item['销售人员'] for item in service_info]),\n",
|
|
" '施工人员': '\\n'.join([item['施工人员'] for item in service_info]),\n",
|
|
" '应付金额': '\\n'.join([item['应付金额'] for item in service_info]),\n",
|
|
" '施工时间': '\\n'.join([item['施工时间'] for item in service_info]),\n",
|
|
" '完工时间': '\\n'.join([item['完工时间'] for item in service_info]),\n",
|
|
" '服务评分': '\\n'.join([item['服务评分'] for item in service_info]),\n",
|
|
" '产品名称': '\\n'.join([item['产品名称'] for item in product_info]),\n",
|
|
" '型号': '\\n'.join([item['型号'] for item in product_info]),\n",
|
|
" '单价': '\\n'.join([item['单价'] for item in product_info]),\n",
|
|
" '数量': '\\n'.join([item['数量'] for item in product_info]),\n",
|
|
" '总价': '\\n'.join([item['总价'] for item in product_info]),\n",
|
|
" '销售人员': '\\n'.join([item['销售人员'] for item in product_info]),\n",
|
|
" '销售时间': '\\n'.join([item['销售时间'] for item in product_info]),\n",
|
|
" '支付方式': '\\n'.join([item['支付方式'] for item in payment_info]),\n",
|
|
" '账号': '\\n'.join([item['账号'] for item in payment_info]),\n",
|
|
" '金额': '\\n'.join([item['金额'] for item in payment_info]),\n",
|
|
" '时间': '\\n'.join([item['时间'] for item in payment_info]),\n",
|
|
" '备注': '\\n'.join([item['备注'] for item in payment_info])\n",
|
|
" }\n",
|
|
"\n",
|
|
" all_data.append(order_info) # 将完整订单信息添加到总数据列表中\n",
|
|
"\n",
|
|
"# 使用pandas将数据保存为Excel文件\n",
|
|
"df = pd.DataFrame(all_data, columns=headers)\n",
|
|
"df.to_excel('爱车店数据导出.xlsx', index=False) # 保存为Excel文件\n",
|
|
"\n",
|
|
"# 关闭浏览器\n",
|
|
"driver.quit()"
|
|
],
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<>:25: SyntaxWarning: invalid escape sequence '\\P'\n",
|
|
"<>:25: SyntaxWarning: invalid escape sequence '\\P'\n",
|
|
"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_20808\\1843733566.py:25: SyntaxWarning: invalid escape sequence '\\P'\n",
|
|
" service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')\n",
|
|
"处理时间段: 0%| | 0/5 [00:00<?, ?it/s]\n",
|
|
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"100%|██████████| 1/1 [00:58<00:00, 58.80s/it]\u001B[A\n",
|
|
"\n",
|
|
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"100%|██████████| 1/1 [00:56<00:00, 56.86s/it]\u001B[A\n",
|
|
"处理时间段: 0%| | 0/5 [01:56<?, ?it/s]\n",
|
|
"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_20808\\1843733566.py:25: SyntaxWarning: invalid escape sequence '\\P'\n",
|
|
" service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')\n"
|
|
]
|
|
},
|
|
{
|
|
"ename": "InvalidSessionIdException",
|
|
"evalue": "Message: invalid session id\nStacktrace:\n\tGetHandleVerifier [0x0x7ff728a4cda5+78885]\n\tGetHandleVerifier [0x0x7ff728a4ce00+78976]\n\t(No symbol) [0x0x7ff7288099fc]\n\t(No symbol) [0x0x7ff7288507df]\n\t(No symbol) [0x0x7ff728888a52]\n\t(No symbol) [0x0x7ff728883413]\n\t(No symbol) [0x0x7ff7288824d9]\n\t(No symbol) [0x0x7ff7287d5d55]\n\tGetHandleVerifier [0x0x7ff728d24eed+3061101]\n\tGetHandleVerifier [0x0x7ff728d1f33d+3037629]\n\tGetHandleVerifier [0x0x7ff728d3e592+3165202]\n\tGetHandleVerifier [0x0x7ff728a6730e+186766]\n\tGetHandleVerifier [0x0x7ff728a6eb3f+217535]\n\t(No symbol) [0x0x7ff7287d4dca]\n\tGetHandleVerifier [0x0x7ff728e445e8+4238440]\n\tBaseThreadInitThunk [0x0x7fff2f1de8d7+23]\n\tRtlUserThreadStart [0x0x7fff307bc34c+44]\n",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
|
|
"\u001B[1;31mInvalidSessionIdException\u001B[0m Traceback (most recent call last)",
|
|
"Cell \u001B[1;32mIn[49], line 95\u001B[0m\n\u001B[0;32m 92\u001B[0m \u001B[38;5;66;03m# 构造 URL\u001B[39;00m\n\u001B[0;32m 93\u001B[0m url \u001B[38;5;241m=\u001B[39m \u001B[38;5;124mf\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mhttp://best.aichedian.com/report/order-stat/?start_date=\u001B[39m\u001B[38;5;132;01m{\u001B[39;00murl_start\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m&end_date=\u001B[39m\u001B[38;5;132;01m{\u001B[39;00murl_end\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m&tab=1\u001B[39m\u001B[38;5;124m'\u001B[39m\n\u001B[1;32m---> 95\u001B[0m driver\u001B[38;5;241m.\u001B[39mget(url) \u001B[38;5;66;03m# 获取每日订单\u001B[39;00m\n\u001B[0;32m 96\u001B[0m \u001B[38;5;66;03m# 获取指定 XPath 下的所有 <tr> 元素\u001B[39;00m\n\u001B[0;32m 97\u001B[0m xpath \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m//*[@id=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124morder-profit-count\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m]/table[2]\u001B[39m\u001B[38;5;124m'\u001B[39m\n",
|
|
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:363\u001B[0m, in \u001B[0;36mWebDriver.get\u001B[1;34m(self, url)\u001B[0m\n\u001B[0;32m 361\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mget\u001B[39m(\u001B[38;5;28mself\u001B[39m, url: \u001B[38;5;28mstr\u001B[39m) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m 362\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"Loads a web page in the current browser session.\"\"\"\u001B[39;00m\n\u001B[1;32m--> 363\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mexecute(Command\u001B[38;5;241m.\u001B[39mGET, {\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124murl\u001B[39m\u001B[38;5;124m\"\u001B[39m: url})\n",
|
|
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\selenium\\webdriver\\remote\\webdriver.py:354\u001B[0m, in \u001B[0;36mWebDriver.execute\u001B[1;34m(self, driver_command, params)\u001B[0m\n\u001B[0;32m 352\u001B[0m response \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcommand_executor\u001B[38;5;241m.\u001B[39mexecute(driver_command, params)\n\u001B[0;32m 353\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m response:\n\u001B[1;32m--> 354\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39merror_handler\u001B[38;5;241m.\u001B[39mcheck_response(response)\n\u001B[0;32m 355\u001B[0m response[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mvalue\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_unwrap_value(response\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mvalue\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28;01mNone\u001B[39;00m))\n\u001B[0;32m 356\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m response\n",
|
|
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\selenium\\webdriver\\remote\\errorhandler.py:229\u001B[0m, in \u001B[0;36mErrorHandler.check_response\u001B[1;34m(self, response)\u001B[0m\n\u001B[0;32m 227\u001B[0m alert_text \u001B[38;5;241m=\u001B[39m value[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124malert\u001B[39m\u001B[38;5;124m\"\u001B[39m]\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mtext\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 228\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m exception_class(message, screen, stacktrace, alert_text) \u001B[38;5;66;03m# type: ignore[call-arg] # mypy is not smart enough here\u001B[39;00m\n\u001B[1;32m--> 229\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m exception_class(message, screen, stacktrace)\n",
|
|
"\u001B[1;31mInvalidSessionIdException\u001B[0m: Message: invalid session id\nStacktrace:\n\tGetHandleVerifier [0x0x7ff728a4cda5+78885]\n\tGetHandleVerifier [0x0x7ff728a4ce00+78976]\n\t(No symbol) [0x0x7ff7288099fc]\n\t(No symbol) [0x0x7ff7288507df]\n\t(No symbol) [0x0x7ff728888a52]\n\t(No symbol) [0x0x7ff728883413]\n\t(No symbol) [0x0x7ff7288824d9]\n\t(No symbol) [0x0x7ff7287d5d55]\n\tGetHandleVerifier [0x0x7ff728d24eed+3061101]\n\tGetHandleVerifier [0x0x7ff728d1f33d+3037629]\n\tGetHandleVerifier [0x0x7ff728d3e592+3165202]\n\tGetHandleVerifier [0x0x7ff728a6730e+186766]\n\tGetHandleVerifier [0x0x7ff728a6eb3f+217535]\n\t(No symbol) [0x0x7ff7287d4dca]\n\tGetHandleVerifier [0x0x7ff728e445e8+4238440]\n\tBaseThreadInitThunk [0x0x7fff2f1de8d7+23]\n\tRtlUserThreadStart [0x0x7fff307bc34c+44]\n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 49
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-06-23T05:15:55.507074Z",
|
|
"start_time": "2025-06-23T05:15:55.444172Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"# 使用pandas将数据保存为Excel文件\n",
|
|
"df = pd.DataFrame(all_data, columns=headers)\n",
|
|
"df.to_excel('爱车店数据导出.xlsx', index=False) # 保存为Excel文件"
|
|
],
|
|
"id": "911d116f7d50ffd2",
|
|
"outputs": [],
|
|
"execution_count": 44
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-06-23T11:53:26.818502Z",
|
|
"start_time": "2025-06-23T06:29:37.293543Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"from selenium import webdriver\n",
|
|
"from selenium.webdriver.common.by import By\n",
|
|
"from selenium.webdriver.support.ui import WebDriverWait\n",
|
|
"from selenium.webdriver.support import expected_conditions as EC\n",
|
|
"import time\n",
|
|
"from urllib.parse import urljoin\n",
|
|
"import pandas as pd\n",
|
|
"from selenium.webdriver import Chrome\n",
|
|
"from selenium.webdriver.chrome.service import Service\n",
|
|
"from datetime import datetime\n",
|
|
"from selenium.webdriver.chrome.options import Options\n",
|
|
"from datetime import datetime, timedelta\n",
|
|
"from selenium.common.exceptions import NoSuchElementException\n",
|
|
"from tqdm import tqdm\n",
|
|
"from selenium.common.exceptions import TimeoutException\n",
|
|
"\n",
|
|
"# 设置Chrome选项\n",
|
|
"chrome_options = Options()\n",
|
|
"# 设置为无头模式(不打开浏览器窗口)\n",
|
|
"# chrome_options.add_argument('--headless')\n",
|
|
"chrome_options.add_argument('--disable-gpu')\n",
|
|
"chrome_options.add_argument('--no-sandbox')\n",
|
|
"\n",
|
|
"# 指定ChromeDriver路径\n",
|
|
"service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')\n",
|
|
"\n",
|
|
"# 创建WebDriver对象\n",
|
|
"driver = webdriver.Chrome(service=service, options=chrome_options)\n",
|
|
"\n",
|
|
"# 目标网址\n",
|
|
"# url = 'http://xlsf.aichedian.com/order/order-detail/1115935207959/ # 爱车店有不同的网址\n",
|
|
"url = 'http://best.aichedian.com/order/order-detail/1115935207959/'\n",
|
|
"username = '15307259977'\n",
|
|
"password = 'juanzi810119'\n",
|
|
"\n",
|
|
"# 访问网页\n",
|
|
"driver.get(url)\n",
|
|
"\n",
|
|
"WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id=\"username\"]'))).send_keys(username)\n",
|
|
"WebDriverWait(driver, 10).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[4]/div/input'))).click()\n",
|
|
"time.sleep(5) # 提供时间选择门店\n",
|
|
"WebDriverWait(driver, 10).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[3]/div[2]/div/input'))).send_keys(password)\n",
|
|
"WebDriverWait(driver, 10).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"login_form\"]/div[4]/div/input'))).click()\n",
|
|
"\n",
|
|
"time.sleep(2)\n",
|
|
"\n",
|
|
"# 设置起始日期为今天,结束日期为两年前 # 需要修改日期\n",
|
|
"end_date = datetime.now()\n",
|
|
"start_date = end_date - timedelta(days=1552) #786\n",
|
|
"current_date = start_date\n",
|
|
"all_data = []\n",
|
|
"\n",
|
|
"# 定义表头\n",
|
|
"headers = [\n",
|
|
" '订单号',\n",
|
|
" '开单时间', '入账时间', '车辆', '车主', '订单详情',\n",
|
|
" '车牌号码', '车辆品牌', '会员卡号', '车主姓名', '联系方式',\n",
|
|
" '服务名称', '销售人员', '施工人员', '应付金额', '施工时间', '完工时间', '服务评分',\n",
|
|
" '产品名称', '型号', '单价', '数量', '总价', '销售人员', '销售时间',\n",
|
|
" '支付方式', '账号', '金额', '时间', '备注'\n",
|
|
"]\n",
|
|
"\n",
|
|
"# 起始日期和当前日期\n",
|
|
"first_date = \"2021-04-03\"\n",
|
|
"now_date = datetime.now().strftime('%Y-%m-%d')\n",
|
|
"\n",
|
|
"# 转换为 pandas 的 Timestamp 类型\n",
|
|
"start = pd.to_datetime(first_date)\n",
|
|
"end = pd.to_datetime(now_date)\n",
|
|
"\n",
|
|
"# 初始化当前时间指针\n",
|
|
"current = start\n",
|
|
"\n",
|
|
"# 计算总共有多少年,用于进度条\n",
|
|
"total_years = (end - start).days // 365 + 1\n",
|
|
"\n",
|
|
"all_url = []\n",
|
|
"\n",
|
|
"with tqdm(total=total_years, desc=\"处理时间段\") as pbar:\n",
|
|
" no_data_count=0\n",
|
|
" while current <= end:\n",
|
|
" # 计算这一年的结束日期\n",
|
|
" year_end = current + pd.DateOffset(years=1) - pd.Timedelta(days=1)\n",
|
|
" if year_end > end:\n",
|
|
" year_end = end\n",
|
|
"\n",
|
|
" # 格式化成 URL 需要的格式:YYYY/MM/DD\n",
|
|
" url_start = current.strftime('%Y/%m/%d')\n",
|
|
" url_end = year_end.strftime('%Y/%m/%d')\n",
|
|
"\n",
|
|
" # 构造 URL\n",
|
|
" url = f'http://best.aichedian.com/report/order-stat/?start_date={url_start}&end_date={url_end}&tab=1'\n",
|
|
"\n",
|
|
" driver.get(url) # 获取每日订单\n",
|
|
"\n",
|
|
" # 更新当前日期指针到下一年的开始\n",
|
|
" current = year_end + pd.Timedelta(days=1)\n",
|
|
" # 获取指定 XPath 下的所有 <tr> 元素\n",
|
|
" xpath = '//*[@id=\"order-profit-count\"]/table[2]'\n",
|
|
" try:\n",
|
|
" rows = WebDriverWait(driver, 5).until(\n",
|
|
" EC.presence_of_all_elements_located((By.XPATH, xpath))\n",
|
|
" )\n",
|
|
" if not rows:\n",
|
|
" raise NoSuchElementException # 如果没有找到任何行,抛出异常\n",
|
|
" no_data_count = 0 # 重置计数器\n",
|
|
" except TimeoutException:\n",
|
|
" no_data_count += 1\n",
|
|
" if no_data_count >= 30:\n",
|
|
" print(\"连续30天没有数据,退出循环\")\n",
|
|
" break\n",
|
|
" continue\n",
|
|
"\n",
|
|
" for row in tqdm(rows):\n",
|
|
" # 获取每一行中的所有 <td> 元素\n",
|
|
" cells = row.find_elements(By.TAG_NAME, 'td')\n",
|
|
" row_data = [cell.text for cell in cells] # 将一行中的所有单元格文本存入列表\n",
|
|
" start_dj_data = row_data[0] # 开始时间\n",
|
|
" get_money_data = row_data[1] # 入账时间\n",
|
|
" car_number = row_data[2] # 车牌名称\n",
|
|
" customer = row_data[3] # 假设第二列是客户名称\n",
|
|
" order_details = '\\n'.join(row_data) # 将行数据合并为一个字符串,每列之间用换行符分隔\n",
|
|
"\n",
|
|
" for cell in cells:\n",
|
|
" try:\n",
|
|
" links = cell.find_elements(By.TAG_NAME, 'a') # 获取当前单元格内的所有 <a> 标签\n",
|
|
" absolute_url = None\n",
|
|
" all_absolute_urls = []\n",
|
|
" for link in links:\n",
|
|
" if link.text == \"查看详细\":\n",
|
|
" relative_url = link.get_attribute('href')\n",
|
|
" absolute_url = urljoin('http://best.aichedian.com/', relative_url)\n",
|
|
" all_absolute_urls.append(absolute_url)\n",
|
|
" else:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" if not all_absolute_urls:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" for absolute_url in all_absolute_urls:\n",
|
|
"\n",
|
|
" driver.get(absolute_url) # 获取查看详情\n",
|
|
"\n",
|
|
" n_xpath = '//*[@id=\"x-single-content\"]'\n",
|
|
" try:\n",
|
|
" n_rows = WebDriverWait(driver, 5).until(\n",
|
|
" EC.presence_of_all_elements_located((By.XPATH, n_xpath))\n",
|
|
" )\n",
|
|
" except:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" for row in n_rows:\n",
|
|
" n_cells = row.find_elements(By.TAG_NAME, 'td')\n",
|
|
" for n_cell in n_cells:\n",
|
|
" n_links = n_cell.find_elements(By.TAG_NAME, 'a')\n",
|
|
" for n_link in n_links:\n",
|
|
" if n_link.text == \"明细\":\n",
|
|
" n_relative_url = n_link.get_attribute('href')\n",
|
|
" n_absolute_url = urljoin('http://best.aichedian.com/', n_relative_url)\n",
|
|
" all_url.append(n_absolute_url)\n",
|
|
"\n",
|
|
" time.sleep(0.1)\n",
|
|
" driver.back()\n",
|
|
" except :\n",
|
|
" continue # 如果没有找到 <a> 标签,继续下一个 <td\n",
|
|
" # 返回页面\n",
|
|
" driver.back()\n",
|
|
" # 获取每日订单\n",
|
|
" pbar.update(1)\n",
|
|
"\n",
|
|
"\n",
|
|
"df1 = pd.DataFrame(all_url)\n",
|
|
"df1.to_csv('all_url.csv', index=False)\n",
|
|
"for url in tqdm(all_url):\n",
|
|
" try:\n",
|
|
" driver.get(url) # 获取明细数据\n",
|
|
" details_id = url.split('/')[-2]\n",
|
|
" \n",
|
|
" xpath1 = '//*[@id=\"x-single-content\"]/h2'\n",
|
|
" h2 = WebDriverWait(driver, 5).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, xpath1))\n",
|
|
" )\n",
|
|
" \n",
|
|
" if \"车辆检测单\" in h2.text or \"订单详情\" not in h2.text:\n",
|
|
" driver.back()\n",
|
|
" print(\"跳过\")\n",
|
|
" time.sleep(\n",
|
|
" 5\n",
|
|
" )\n",
|
|
" continue\n",
|
|
" \n",
|
|
" \n",
|
|
" # 获取基本信息\n",
|
|
" try:\n",
|
|
" base_details = WebDriverWait(driver, 10).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, f'//*[@id=\"x-single-content\"]/ul'))\n",
|
|
" )\n",
|
|
" li_items = base_details.find_elements(By.TAG_NAME, 'li')\n",
|
|
" base_info = {li.text.split(':')[0].strip(): li.text.split(':')[1].strip() if ':' in li.text else ''\n",
|
|
" for li in li_items}\n",
|
|
" # 获取具体信息,如果不存在则返回空字符串\n",
|
|
" license_plate = base_info.get('车牌号', '')\n",
|
|
" customer_name = base_info.get('客户名称', '')\n",
|
|
" order_id = base_info.get('订单编号', '')\n",
|
|
" except TimeoutException:\n",
|
|
" continue # 获取不到基本信息\n",
|
|
" \n",
|
|
" \n",
|
|
" # 获取服务项目\n",
|
|
" service_info = []\n",
|
|
" try:\n",
|
|
" h3_element = WebDriverWait(driver, 0.2).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"服务项目\"]'))\n",
|
|
" )\n",
|
|
" if h3_element.text == \"服务项目\":\n",
|
|
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
|
|
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
|
|
" for row1 in rows[1:]:\n",
|
|
" li_items = row1.find_elements(By.TAG_NAME, 'td')\n",
|
|
" service_info.append({\n",
|
|
" '服务名称': li_items[0].text,\n",
|
|
" '销售人员': li_items[1].text,\n",
|
|
" '施工人员': li_items[2].text,\n",
|
|
" '应付金额': li_items[3].text,\n",
|
|
" '施工时间': li_items[4].text,\n",
|
|
" '完工时间': li_items[5].text,\n",
|
|
" '服务评分': li_items[6].text\n",
|
|
" })\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
" \n",
|
|
" # 获取销售产品\n",
|
|
" product_info = []\n",
|
|
" try:\n",
|
|
" h3_element = WebDriverWait(driver, 0.2).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"销售产品\"]'))\n",
|
|
" )\n",
|
|
" if h3_element.text == \"销售产品\":\n",
|
|
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
|
|
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
|
|
" for row2 in rows[1:]:\n",
|
|
" li_items = row2.find_elements(By.TAG_NAME, 'td')\n",
|
|
" product_info.append({\n",
|
|
" '产品名称': li_items[0].text,\n",
|
|
" '型号': li_items[1].text,\n",
|
|
" '单价': li_items[2].text,\n",
|
|
" '数量': li_items[3].text,\n",
|
|
" '总价': li_items[4].text,\n",
|
|
" '销售人员': li_items[5].text,\n",
|
|
" '销售时间': li_items[6].text\n",
|
|
" })\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
" \n",
|
|
" # 获取支付记录\n",
|
|
" payment_info = []\n",
|
|
" try:\n",
|
|
" h3_element = WebDriverWait(driver, 0.2).until(\n",
|
|
" EC.presence_of_element_located((By.XPATH, '//h3[text()=\"支付记录\"]'))\n",
|
|
" )\n",
|
|
" if h3_element.text == \"支付记录\":\n",
|
|
" table = h3_element.find_element(By.XPATH, './following-sibling::table')\n",
|
|
" rows = table.find_elements(By.TAG_NAME, 'tr')\n",
|
|
" for row3 in rows[1:]:\n",
|
|
" li_items = row3.find_elements(By.TAG_NAME, 'td')\n",
|
|
" payment_info.append({\n",
|
|
" '支付方式': li_items[0].text,\n",
|
|
" '账号': li_items[1].text,\n",
|
|
" '金额': li_items[2].text,\n",
|
|
" '时间': li_items[3].text,\n",
|
|
" '备注': li_items[4].text\n",
|
|
" })\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
" \n",
|
|
" # 将所有信息组合成一个字典\n",
|
|
" order_info = {\n",
|
|
" '订单号': details_id,\n",
|
|
" '开单时间': start_dj_data,\n",
|
|
" '入账时间': get_money_data,\n",
|
|
" '车辆': car_number,\n",
|
|
" '车主': customer,\n",
|
|
" '车牌号码': base_info.get('车牌号码', ''),\n",
|
|
" '车辆品牌': base_info.get('车辆品牌', ''),\n",
|
|
" '会员卡号': base_info.get('会员卡号', ''),\n",
|
|
" '车主姓名': base_info.get('车主姓名', ''),\n",
|
|
" '联系方式': base_info.get('联系方式', ''),\n",
|
|
" # '订单详情': order_details,\n",
|
|
" '服务名称': '\\n'.join([item['服务名称'] for item in service_info]),\n",
|
|
" '销售人员': '\\n'.join([item['销售人员'] for item in service_info]),\n",
|
|
" '施工人员': '\\n'.join([item['施工人员'] for item in service_info]),\n",
|
|
" '应付金额': '\\n'.join([item['应付金额'] for item in service_info]),\n",
|
|
" '施工时间': '\\n'.join([item['施工时间'] for item in service_info]),\n",
|
|
" '完工时间': '\\n'.join([item['完工时间'] for item in service_info]),\n",
|
|
" '服务评分': '\\n'.join([item['服务评分'] for item in service_info]),\n",
|
|
" '产品名称': '\\n'.join([item['产品名称'] for item in product_info]),\n",
|
|
" '型号': '\\n'.join([item['型号'] for item in product_info]),\n",
|
|
" '单价': '\\n'.join([item['单价'] for item in product_info]),\n",
|
|
" '数量': '\\n'.join([item['数量'] for item in product_info]),\n",
|
|
" '总价': '\\n'.join([item['总价'] for item in product_info]),\n",
|
|
" '销售人员': '\\n'.join([item['销售人员'] for item in product_info]),\n",
|
|
" '销售时间': '\\n'.join([item['销售时间'] for item in product_info]),\n",
|
|
" '支付方式': '\\n'.join([item['支付方式'] for item in payment_info]),\n",
|
|
" '账号': '\\n'.join([item['账号'] for item in payment_info]),\n",
|
|
" '金额': '\\n'.join([item['金额'] for item in payment_info]),\n",
|
|
" '时间': '\\n'.join([item['时间'] for item in payment_info]),\n",
|
|
" '备注': '\\n'.join([item['备注'] for item in payment_info])\n",
|
|
" }\n",
|
|
" \n",
|
|
" all_data.append(order_info) # 将完整订单信息添加到总数据列表中\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
"\n",
|
|
"# 使用pandas将数据保存为Excel文件\n",
|
|
"df = pd.DataFrame(all_data, columns=headers)\n",
|
|
"df.to_excel('爱车店数据导出.xlsx', index=False) # 保存为Excel文件\n",
|
|
"\n",
|
|
"# 关闭浏览器\n",
|
|
"driver.quit()"
|
|
],
|
|
"id": "3bdcde801e98b6bd",
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<>:25: SyntaxWarning: invalid escape sequence '\\P'\n",
|
|
"<>:25: SyntaxWarning: invalid escape sequence '\\P'\n",
|
|
"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_27060\\653923207.py:25: SyntaxWarning: invalid escape sequence '\\P'\n",
|
|
" service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')\n",
|
|
"处理时间段: 0%| | 0/5 [00:00<?, ?it/s]\n",
|
|
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A\n",
|
|
"100%|██████████| 1/1 [04:13<00:00, 253.29s/it]\u001B[A\n",
|
|
"\n",
|
|
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A\n",
|
|
"100%|██████████| 1/1 [04:49<00:00, 289.93s/it]\u001B[A\n",
|
|
"\n",
|
|
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A\n",
|
|
"100%|██████████| 1/1 [05:09<00:00, 309.01s/it]\u001B[A\n",
|
|
"\n",
|
|
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A\n",
|
|
"100%|██████████| 1/1 [04:25<00:00, 265.08s/it]\u001B[A\n",
|
|
"\n",
|
|
" 0%| | 0/1 [00:00<?, ?it/s]\u001B[A\n",
|
|
"100%|██████████| 1/1 [00:02<00:00, 2.59s/it]\u001B[A\n",
|
|
"处理时间段: 20%|██ | 1/5 [18:44<1:14:57, 1124.42s/it]\n",
|
|
" 0%| | 39/14439 [00:37<3:54:19, 1.02it/s]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 1%|▏ | 183/14439 [02:55<4:05:12, 1.03s/it]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 1%|▏ | 184/14439 [03:00<9:07:41, 2.31s/it]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 1%|▏ | 198/14439 [03:15<2:58:07, 1.33it/s] "
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 1%|▏ | 199/14439 [03:20<8:21:44, 2.11s/it]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 1%|▏ | 200/14439 [03:26<12:27:17, 3.15s/it]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 2%|▏ | 219/14439 [03:48<3:40:41, 1.07it/s] "
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 2%|▏ | 220/14439 [03:53<8:49:45, 2.24s/it]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 2%|▏ | 281/14439 [04:54<3:38:17, 1.08it/s] "
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 6%|▌ | 813/14439 [14:18<4:40:52, 1.24s/it]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 36%|███▌ | 5177/14439 [1:30:02<2:24:55, 1.07it/s]"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"跳过\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"100%|██████████| 14439/14439 [5:04:48<00:00, 1.27s/it] \n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 5
|
|
},
|
|
{
|
|
"metadata": {
|
|
"ExecuteTime": {
|
|
"end_time": "2025-06-23T06:06:18.139653Z",
|
|
"start_time": "2025-06-23T06:06:18.131895Z"
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"df1 = pd.DataFrame(all_url)\n",
|
|
"df1.to_csv('all_url.csv', index=False)"
|
|
],
|
|
"id": "a68ff75a6184bae",
|
|
"outputs": [],
|
|
"execution_count": 3
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 2
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython2",
|
|
"version": "2.7.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|