252 lines
13 KiB
Python
252 lines
13 KiB
Python
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
import time
|
|
from urllib.parse import urljoin
|
|
import pandas as pd
|
|
from selenium.webdriver import Chrome
|
|
from selenium.webdriver.chrome.service import Service
|
|
from datetime import datetime
|
|
from selenium.webdriver.chrome.options import Options
|
|
from datetime import datetime, timedelta
|
|
from selenium.common.exceptions import NoSuchElementException
|
|
from tqdm import tqdm
|
|
from selenium.common.exceptions import TimeoutException
|
|
|
|
# 设置Chrome选项
|
|
chrome_options = Options()
|
|
# 设置为无头模式(不打开浏览器窗口)
|
|
# chrome_options.add_argument('--headless')
|
|
chrome_options.add_argument('--disable-gpu')
|
|
chrome_options.add_argument('--no-sandbox')
|
|
|
|
# 指定ChromeDriver路径
|
|
service = Service(executable_path='D:\ProgramTools\chromedriver-win64\chromedriver.exe')
|
|
|
|
# 创建WebDriver对象
|
|
driver = webdriver.Chrome(service=service, options=chrome_options)
|
|
|
|
# 目标网址
|
|
url = 'http://best.aichedian.com/login/'
|
|
url1 = ''
|
|
username = '15307259977'
|
|
password = 'juanzi810119'
|
|
title = 'HGYH -- HGYH'
|
|
|
|
# 访问网页
|
|
driver.get(url)
|
|
|
|
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id="username"]'))).send_keys(username)
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="login_form"]/div[4]/div/input'))).click()
|
|
time.sleep(2)
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="select-dom"]'))).click()
|
|
# time.sleep(200)
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="select_box"]/div/div[1]/div'))).click()
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="select_box"]/div/div[1]/div/span[1]'))).click()
|
|
time.sleep(5)
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="login_form"]/div[3]/div[2]/div/input'))).send_keys(password)
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="login_form"]/div[4]/div/input'))).click()
|
|
|
|
time.sleep(2)
|
|
|
|
# 设置起始日期为今天,结束日期为两年前 # 需要修改日期
|
|
end_date = datetime.now()
|
|
start_date = end_date - timedelta(days=1465) # 1358
|
|
# demo_date =end_date - timedelta(days=786)
|
|
current_date = start_date
|
|
all_data = []
|
|
|
|
# 定义表头
|
|
headers = [
|
|
'订单号',
|
|
'开单时间', '入账时间', '车辆', '车主', '订单详情',
|
|
'车牌号码', '车辆品牌', '会员卡号', '车主姓名', '联系方式',
|
|
'服务名称', '销售人员', '施工人员', '应付金额', '施工时间', '完工时间', '服务评分',
|
|
'产品名称', '型号', '单价', '数量', '总价', '销售人员', '销售时间',
|
|
'支付方式', '账号', '金额', '时间', '备注'
|
|
]
|
|
|
|
# 进度条
|
|
date_range = pd.date_range(start=start_date, end=end_date)
|
|
# date_range = pd.date_range(start=start_date, end=demo_date)
|
|
with tqdm(total=len(date_range), desc="处理日期") as pbar:
|
|
no_data_count = 0 # 记录连续没有数据的次数
|
|
for current_date in date_range:
|
|
date_str = current_date.strftime('%Y/%m/%d')
|
|
# http://best.aichedian.com/report/order-count-detail/?dt=2024/12/01
|
|
url1 = f'http://best.aichedian.com/report/order-count-detail/?dt={date_str}'
|
|
|
|
driver.get(url1) # 获取每日订单
|
|
# 获取指定 XPath 下的所有 <tr> 元素
|
|
xpath = '//*[@id="x-single-content"]/table[2]/tbody/tr'
|
|
try:
|
|
rows = WebDriverWait(driver, 5).until(
|
|
EC.presence_of_all_elements_located((By.XPATH, xpath))
|
|
)
|
|
if not rows:
|
|
raise NoSuchElementException # 如果没有找到任何行,抛出异常
|
|
no_data_count = 0 # 重置计数器
|
|
except TimeoutException:
|
|
no_data_count += 1
|
|
if no_data_count >= 1000:
|
|
print("连续1000天没有数据,退出循环")
|
|
break
|
|
pbar.update(1)
|
|
continue
|
|
|
|
for row in rows:
|
|
# 获取每一行中的所有 <td> 元素
|
|
cells = row.find_elements(By.TAG_NAME, 'td')
|
|
row_data = [cell.text for cell in cells] # 将一行中的所有单元格文本存入列表
|
|
start_dj_data = row_data[0] # 开始时间
|
|
get_money_data = row_data[1] # 入账时间
|
|
car_number = row_data[2] # 车牌名称
|
|
customer = row_data[3] # 假设第二列是客户名称
|
|
order_details = '\n'.join(row_data) # 将行数据合并为一个字符串,每列之间用换行符分隔
|
|
|
|
for cell in cells:
|
|
try:
|
|
base_url = 'http://xlsf.aichedian.com/'
|
|
link = cell.find_element(By.XPATH, './/a[@class="underline-styled"]')
|
|
relative_url = link.get_attribute('href')
|
|
absolute_url = urljoin(base_url, relative_url) # 明细url
|
|
details_id = absolute_url.split('/')[-2]
|
|
driver.get(absolute_url) # 获取订单明细
|
|
|
|
# 获取基本信息
|
|
base_details = WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="x-single-content"]/ul'))
|
|
)
|
|
li_items = base_details.find_elements(By.TAG_NAME, 'li')
|
|
base_info = {li.text.split(':')[0].strip(): li.text.split(':')[1].strip() if ':' in li.text else ''
|
|
for li in li_items}
|
|
|
|
# 获取具体信息,如果不存在则返回空字符串
|
|
license_plate = base_info.get('车牌号', '')
|
|
customer_name = base_info.get('客户名称', '')
|
|
order_id = base_info.get('订单编号', '')
|
|
|
|
# 获取服务项目
|
|
service_info = []
|
|
try:
|
|
h3_element = WebDriverWait(driver, 0.2).until(
|
|
EC.presence_of_element_located((By.XPATH, '//h3[text()="服务项目"]'))
|
|
)
|
|
if h3_element.text == "服务项目":
|
|
table = h3_element.find_element(By.XPATH, './following-sibling::table')
|
|
rows = table.find_elements(By.TAG_NAME, 'tr')
|
|
for row1 in rows[1:]:
|
|
li_items = row1.find_elements(By.TAG_NAME, 'td')
|
|
service_info.append({
|
|
'服务名称': li_items[0].text,
|
|
'销售人员': li_items[1].text,
|
|
'施工人员': li_items[2].text,
|
|
'应付金额': li_items[3].text,
|
|
'施工时间': li_items[4].text,
|
|
'完工时间': li_items[5].text,
|
|
'服务评分': li_items[6].text
|
|
})
|
|
except:
|
|
pass
|
|
|
|
# 获取销售产品
|
|
product_info = []
|
|
try:
|
|
h3_element = WebDriverWait(driver, 0.2).until(
|
|
EC.presence_of_element_located((By.XPATH, '//h3[text()="销售产品"]'))
|
|
)
|
|
if h3_element.text == "销售产品":
|
|
table = h3_element.find_element(By.XPATH, './following-sibling::table')
|
|
rows = table.find_elements(By.TAG_NAME, 'tr')
|
|
for row2 in rows[1:]:
|
|
li_items = row2.find_elements(By.TAG_NAME, 'td')
|
|
product_info.append({
|
|
'产品名称': li_items[0].text,
|
|
'型号': li_items[1].text,
|
|
'单价': li_items[2].text,
|
|
'数量': li_items[3].text,
|
|
'总价': li_items[4].text,
|
|
'销售人员': li_items[5].text,
|
|
'销售时间': li_items[6].text
|
|
})
|
|
except:
|
|
pass
|
|
|
|
# 获取支付记录
|
|
payment_info = []
|
|
try:
|
|
h3_element = WebDriverWait(driver, 0.2).until(
|
|
EC.presence_of_element_located((By.XPATH, '//h3[text()="支付记录"]'))
|
|
)
|
|
if h3_element.text == "支付记录":
|
|
table = h3_element.find_element(By.XPATH, './following-sibling::table')
|
|
rows = table.find_elements(By.TAG_NAME, 'tr')
|
|
for row3 in rows[1:]:
|
|
li_items = row3.find_elements(By.TAG_NAME, 'td')
|
|
payment_info.append({
|
|
'支付方式': li_items[0].text,
|
|
'账号': li_items[1].text,
|
|
'金额': li_items[2].text,
|
|
'时间': li_items[3].text,
|
|
'备注': li_items[4].text
|
|
})
|
|
except:
|
|
pass
|
|
|
|
# 将所有信息组合成一个字典
|
|
order_info = {
|
|
'订单号': details_id,
|
|
'开单时间': start_dj_data,
|
|
'入账时间': get_money_data,
|
|
'车辆': car_number,
|
|
'车主': customer,
|
|
'车牌号码': base_info.get('车牌号码', ''),
|
|
'车辆品牌': base_info.get('车辆品牌', ''),
|
|
'会员卡号': base_info.get('会员卡号', ''),
|
|
'车主姓名': base_info.get('车主姓名', ''),
|
|
'联系方式': base_info.get('联系方式', ''),
|
|
# '订单详情': order_details,
|
|
'服务名称': '\n'.join([item['服务名称'] for item in service_info]),
|
|
'销售人员': '\n'.join([item['销售人员'] for item in service_info]),
|
|
'施工人员': '\n'.join([item['施工人员'] for item in service_info]),
|
|
'应付金额': '\n'.join([item['应付金额'] for item in service_info]),
|
|
'施工时间': '\n'.join([item['施工时间'] for item in service_info]),
|
|
'完工时间': '\n'.join([item['完工时间'] for item in service_info]),
|
|
'服务评分': '\n'.join([item['服务评分'] for item in service_info]),
|
|
'产品名称': '\n'.join([item['产品名称'] for item in product_info]),
|
|
'型号': '\n'.join([item['型号'] for item in product_info]),
|
|
'单价': '\n'.join([item['单价'] for item in product_info]),
|
|
'数量': '\n'.join([item['数量'] for item in product_info]),
|
|
'总价': '\n'.join([item['总价'] for item in product_info]),
|
|
'销售人员': '\n'.join([item['销售人员'] for item in product_info]),
|
|
'销售时间': '\n'.join([item['销售时间'] for item in product_info]),
|
|
'支付方式': '\n'.join([item['支付方式'] for item in payment_info]),
|
|
'账号': '\n'.join([item['账号'] for item in payment_info]),
|
|
'金额': '\n'.join([item['金额'] for item in payment_info]),
|
|
'时间': '\n'.join([item['时间'] for item in payment_info]),
|
|
'备注': '\n'.join([item['备注'] for item in payment_info])
|
|
}
|
|
|
|
all_data.append(order_info) # 将完整订单信息添加到总数据列表中
|
|
time.sleep(0.1)
|
|
driver.back()
|
|
except NoSuchElementException:
|
|
continue # 如果没有找到 <a> 标签,继续下一个 <td>
|
|
|
|
driver.back()
|
|
pbar.update(1)
|
|
|
|
# 使用pandas将数据保存为Excel文件
|
|
df = pd.DataFrame(all_data, columns=headers)
|
|
df.to_excel('爱车店数据导出.xlsx', index=False) # 保存为Excel文件
|
|
|
|
# 关闭浏览器
|
|
driver.quit()
|