Files
F6--/张阳脚本/竞品系统数据导出/爱车店.py
T
2026-01-30 11:28:35 +08:00

252 lines
13 KiB
Python

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from urllib.parse import urljoin
import pandas as pd
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.service import Service
from datetime import datetime
from selenium.webdriver.chrome.options import Options
from datetime import datetime, timedelta
from selenium.common.exceptions import NoSuchElementException
from tqdm import tqdm
from selenium.common.exceptions import TimeoutException
# 设置Chrome选项
chrome_options = Options()
# 设置为无头模式(不打开浏览器窗口)
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
# 指定ChromeDriver路径
service = Service(executable_path='D:\ProgramTools\chromedriver-win64\chromedriver.exe')
# 创建WebDriver对象
driver = webdriver.Chrome(service=service, options=chrome_options)
# 目标网址
url = 'http://best.aichedian.com/login/'
url1 = ''
username = '15307259977'
password = 'juanzi810119'
title = 'HGYH -- HGYH'
# 访问网页
driver.get(url)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id="username"]'))).send_keys(username)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, f'//*[@id="login_form"]/div[4]/div/input'))).click()
time.sleep(2)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, f'//*[@id="select-dom"]'))).click()
# time.sleep(200)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, f'//*[@id="select_box"]/div/div[1]/div'))).click()
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, f'//*[@id="select_box"]/div/div[1]/div/span[1]'))).click()
time.sleep(5)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, f'//*[@id="login_form"]/div[3]/div[2]/div/input'))).send_keys(password)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, f'//*[@id="login_form"]/div[4]/div/input'))).click()
time.sleep(2)
# 设置起始日期为今天,结束日期为两年前 # 需要修改日期
end_date = datetime.now()
start_date = end_date - timedelta(days=1465) # 1358
# demo_date =end_date - timedelta(days=786)
current_date = start_date
all_data = []
# 定义表头
headers = [
'订单号',
'开单时间', '入账时间', '车辆', '车主', '订单详情',
'车牌号码', '车辆品牌', '会员卡号', '车主姓名', '联系方式',
'服务名称', '销售人员', '施工人员', '应付金额', '施工时间', '完工时间', '服务评分',
'产品名称', '型号', '单价', '数量', '总价', '销售人员', '销售时间',
'支付方式', '账号', '金额', '时间', '备注'
]
# 进度条
date_range = pd.date_range(start=start_date, end=end_date)
# date_range = pd.date_range(start=start_date, end=demo_date)
with tqdm(total=len(date_range), desc="处理日期") as pbar:
no_data_count = 0 # 记录连续没有数据的次数
for current_date in date_range:
date_str = current_date.strftime('%Y/%m/%d')
# http://best.aichedian.com/report/order-count-detail/?dt=2024/12/01
url1 = f'http://best.aichedian.com/report/order-count-detail/?dt={date_str}'
driver.get(url1) # 获取每日订单
# 获取指定 XPath 下的所有 <tr> 元素
xpath = '//*[@id="x-single-content"]/table[2]/tbody/tr'
try:
rows = WebDriverWait(driver, 5).until(
EC.presence_of_all_elements_located((By.XPATH, xpath))
)
if not rows:
raise NoSuchElementException # 如果没有找到任何行,抛出异常
no_data_count = 0 # 重置计数器
except TimeoutException:
no_data_count += 1
if no_data_count >= 1000:
print("连续1000天没有数据,退出循环")
break
pbar.update(1)
continue
for row in rows:
# 获取每一行中的所有 <td> 元素
cells = row.find_elements(By.TAG_NAME, 'td')
row_data = [cell.text for cell in cells] # 将一行中的所有单元格文本存入列表
start_dj_data = row_data[0] # 开始时间
get_money_data = row_data[1] # 入账时间
car_number = row_data[2] # 车牌名称
customer = row_data[3] # 假设第二列是客户名称
order_details = '\n'.join(row_data) # 将行数据合并为一个字符串,每列之间用换行符分隔
for cell in cells:
try:
base_url = 'http://xlsf.aichedian.com/'
link = cell.find_element(By.XPATH, './/a[@class="underline-styled"]')
relative_url = link.get_attribute('href')
absolute_url = urljoin(base_url, relative_url) # 明细url
details_id = absolute_url.split('/')[-2]
driver.get(absolute_url) # 获取订单明细
# 获取基本信息
base_details = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, f'//*[@id="x-single-content"]/ul'))
)
li_items = base_details.find_elements(By.TAG_NAME, 'li')
base_info = {li.text.split(':')[0].strip(): li.text.split(':')[1].strip() if ':' in li.text else ''
for li in li_items}
# 获取具体信息,如果不存在则返回空字符串
license_plate = base_info.get('车牌号', '')
customer_name = base_info.get('客户名称', '')
order_id = base_info.get('订单编号', '')
# 获取服务项目
service_info = []
try:
h3_element = WebDriverWait(driver, 0.2).until(
EC.presence_of_element_located((By.XPATH, '//h3[text()="服务项目"]'))
)
if h3_element.text == "服务项目":
table = h3_element.find_element(By.XPATH, './following-sibling::table')
rows = table.find_elements(By.TAG_NAME, 'tr')
for row1 in rows[1:]:
li_items = row1.find_elements(By.TAG_NAME, 'td')
service_info.append({
'服务名称': li_items[0].text,
'销售人员': li_items[1].text,
'施工人员': li_items[2].text,
'应付金额': li_items[3].text,
'施工时间': li_items[4].text,
'完工时间': li_items[5].text,
'服务评分': li_items[6].text
})
except:
pass
# 获取销售产品
product_info = []
try:
h3_element = WebDriverWait(driver, 0.2).until(
EC.presence_of_element_located((By.XPATH, '//h3[text()="销售产品"]'))
)
if h3_element.text == "销售产品":
table = h3_element.find_element(By.XPATH, './following-sibling::table')
rows = table.find_elements(By.TAG_NAME, 'tr')
for row2 in rows[1:]:
li_items = row2.find_elements(By.TAG_NAME, 'td')
product_info.append({
'产品名称': li_items[0].text,
'型号': li_items[1].text,
'单价': li_items[2].text,
'数量': li_items[3].text,
'总价': li_items[4].text,
'销售人员': li_items[5].text,
'销售时间': li_items[6].text
})
except:
pass
# 获取支付记录
payment_info = []
try:
h3_element = WebDriverWait(driver, 0.2).until(
EC.presence_of_element_located((By.XPATH, '//h3[text()="支付记录"]'))
)
if h3_element.text == "支付记录":
table = h3_element.find_element(By.XPATH, './following-sibling::table')
rows = table.find_elements(By.TAG_NAME, 'tr')
for row3 in rows[1:]:
li_items = row3.find_elements(By.TAG_NAME, 'td')
payment_info.append({
'支付方式': li_items[0].text,
'账号': li_items[1].text,
'金额': li_items[2].text,
'时间': li_items[3].text,
'备注': li_items[4].text
})
except:
pass
# 将所有信息组合成一个字典
order_info = {
'订单号': details_id,
'开单时间': start_dj_data,
'入账时间': get_money_data,
'车辆': car_number,
'车主': customer,
'车牌号码': base_info.get('车牌号码', ''),
'车辆品牌': base_info.get('车辆品牌', ''),
'会员卡号': base_info.get('会员卡号', ''),
'车主姓名': base_info.get('车主姓名', ''),
'联系方式': base_info.get('联系方式', ''),
# '订单详情': order_details,
'服务名称': '\n'.join([item['服务名称'] for item in service_info]),
'销售人员': '\n'.join([item['销售人员'] for item in service_info]),
'施工人员': '\n'.join([item['施工人员'] for item in service_info]),
'应付金额': '\n'.join([item['应付金额'] for item in service_info]),
'施工时间': '\n'.join([item['施工时间'] for item in service_info]),
'完工时间': '\n'.join([item['完工时间'] for item in service_info]),
'服务评分': '\n'.join([item['服务评分'] for item in service_info]),
'产品名称': '\n'.join([item['产品名称'] for item in product_info]),
'型号': '\n'.join([item['型号'] for item in product_info]),
'单价': '\n'.join([item['单价'] for item in product_info]),
'数量': '\n'.join([item['数量'] for item in product_info]),
'总价': '\n'.join([item['总价'] for item in product_info]),
'销售人员': '\n'.join([item['销售人员'] for item in product_info]),
'销售时间': '\n'.join([item['销售时间'] for item in product_info]),
'支付方式': '\n'.join([item['支付方式'] for item in payment_info]),
'账号': '\n'.join([item['账号'] for item in payment_info]),
'金额': '\n'.join([item['金额'] for item in payment_info]),
'时间': '\n'.join([item['时间'] for item in payment_info]),
'备注': '\n'.join([item['备注'] for item in payment_info])
}
all_data.append(order_info) # 将完整订单信息添加到总数据列表中
time.sleep(0.1)
driver.back()
except NoSuchElementException:
continue # 如果没有找到 <a> 标签,继续下一个 <td>
driver.back()
pbar.update(1)
# 使用pandas将数据保存为Excel文件
df = pd.DataFrame(all_data, columns=headers)
df.to_excel('爱车店数据导出.xlsx', index=False) # 保存为Excel文件
# 关闭浏览器
driver.quit()