200 lines
8.5 KiB
Python
200 lines
8.5 KiB
Python
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
import time
|
|
from urllib.parse import urljoin
|
|
import pandas as pd
|
|
from selenium.webdriver import Chrome
|
|
from selenium.webdriver.chrome.service import Service
|
|
from selenium.webdriver.chrome.options import Options
|
|
from datetime import datetime, timedelta
|
|
from selenium.common.exceptions import NoSuchElementException
|
|
from tqdm import tqdm
|
|
import re
|
|
from selenium.common.exceptions import TimeoutException
|
|
from bs4 import BeautifulSoup
|
|
|
|
# 设置Chrome选项
|
|
chrome_options = Options()
|
|
# 设置为无头模式(不打开浏览器窗口)
|
|
# chrome_options.add_argument('--headless')
|
|
chrome_options.add_argument('--disable-gpu')
|
|
chrome_options.add_argument('--no-sandbox')
|
|
|
|
# 指定ChromeDriver路径
|
|
service = Service(executable_path='D:\ProgramTools\chromedriver-win64\chromedriver.exe')
|
|
|
|
# 创建WebDriver对象
|
|
driver = webdriver.Chrome(service=service, options=chrome_options)
|
|
|
|
# 目标网址
|
|
url = 'http://best.aichedian.com/login/'
|
|
url1 = ''
|
|
username = '15307259977'
|
|
password = 'juanzi810119'
|
|
title = 'HGYH -- HGYH'
|
|
|
|
# 访问网页
|
|
driver.get(url)
|
|
|
|
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f'//*[@id="username"]'))).send_keys(username)
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="login_form"]/div[4]/div/input'))).click()
|
|
time.sleep(2)
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="select-dom"]'))).click()
|
|
# time.sleep(200)
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="select_box"]/div/div[1]/div'))).click()
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="select_box"]/div/div[1]/div/span[1]'))).click()
|
|
time.sleep(5)
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="login_form"]/div[3]/div[2]/div/input'))).send_keys(password)
|
|
WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="login_form"]/div[4]/div/input'))).click()
|
|
|
|
time.sleep(2)
|
|
|
|
all_data = []
|
|
page = 1
|
|
|
|
# 定义表头
|
|
headers = [
|
|
'订单号',
|
|
'挂账时间', '挂账金额', # 入账订单列表
|
|
'车牌号码', '车辆品牌', '车主姓名', '联系方式', '进站公里数', '业务类型', # 基础信息
|
|
'服务名称', '销售人员', '施工人员', '应付金额', '施工时间', '完工时间', '服务评分', # 服务项目
|
|
'产品名称', '型号', '单价', '数量', '总价', '销售人员', '销售时间',
|
|
|
|
]
|
|
|
|
for page in tqdm(range(1, 18)): # 循环页码
|
|
url1 = f'http://best.aichedian.com/order/debt/?q=&page={page}'
|
|
driver.get(url1) # 获取挂账列表
|
|
|
|
xpath = '//*[@id="data-table-1"]/tbody'
|
|
|
|
tbody = WebDriverWait(driver, 10).until(
|
|
EC.presence_of_all_elements_located((By.XPATH, xpath))
|
|
)
|
|
rows = tbody[0].find_elements(By.TAG_NAME, 'tr')
|
|
if not rows:
|
|
raise NoSuchElementException # 如果没有找到任何行,抛出异常
|
|
|
|
for row in rows: # 循环行
|
|
cells = row.find_elements(By.TAG_NAME, 'td')
|
|
|
|
transaction_time = cells[0].text
|
|
price_cell = cells[3].text
|
|
# print(price_cell)
|
|
match = re.search(r'¥(\d+\.\d{2})', price_cell)
|
|
if match:
|
|
price = float(match.group(1))
|
|
else:
|
|
price = "未匹配到金额"
|
|
# print(transaction_time) # 打印挂账时间
|
|
# print(price) # 打印挂账金额
|
|
|
|
detail_url = cells[4].find_element(By.TAG_NAME, 'a').get_attribute('href')
|
|
# print(detail_url)
|
|
driver.get(detail_url)
|
|
details_id = detail_url.split('/')[-2]
|
|
|
|
# 获取基本信息
|
|
base_details = WebDriverWait(driver, 10).until(
|
|
EC.presence_of_element_located((By.XPATH, f'//*[@id="x-single-content"]/ul'))
|
|
)
|
|
li_items = base_details.find_elements(By.TAG_NAME, 'li')
|
|
base_info = {li.text.split(':')[0].strip(): li.text.split(':')[1].strip() if ':' in li.text else ''
|
|
for li in li_items}
|
|
# print(base_info)
|
|
|
|
# 获取具体信息,如果不存在则返回空字符串
|
|
license_plate = base_info.get('车牌号', '')
|
|
customer_name = base_info.get('客户名称', '')
|
|
order_id = base_info.get('订单编号', '')
|
|
|
|
# 获取服务项目
|
|
service_info = []
|
|
try:
|
|
h3_element = WebDriverWait(driver, 0.2).until(
|
|
EC.presence_of_element_located((By.XPATH, '//h3[text()="服务项目"]'))
|
|
)
|
|
if h3_element.text == "服务项目":
|
|
table = h3_element.find_element(By.XPATH, './following-sibling::table')
|
|
rows = table.find_elements(By.TAG_NAME, 'tr')
|
|
for row1 in rows[1:]:
|
|
li_items = row1.find_elements(By.TAG_NAME, 'td')
|
|
service_info.append({
|
|
'服务名称': li_items[0].text,
|
|
'销售人员': li_items[1].text,
|
|
'施工人员': li_items[2].text,
|
|
'应付金额': li_items[3].text,
|
|
'施工时间': li_items[4].text,
|
|
'完工时间': li_items[5].text,
|
|
'服务评分': li_items[6].text
|
|
})
|
|
except:
|
|
pass
|
|
|
|
# 获取销售产品
|
|
product_info = []
|
|
try:
|
|
h3_element = WebDriverWait(driver, 0.2).until(
|
|
EC.presence_of_element_located((By.XPATH, '//h3[text()="销售产品"]'))
|
|
)
|
|
if h3_element.text == "销售产品":
|
|
table = h3_element.find_element(By.XPATH, './following-sibling::table')
|
|
rows = table.find_elements(By.TAG_NAME, 'tr')
|
|
for row2 in rows[1:]:
|
|
li_items = row2.find_elements(By.TAG_NAME, 'td')
|
|
product_info.append({
|
|
'产品名称': li_items[0].text,
|
|
'型号': li_items[1].text,
|
|
'单价': li_items[2].text,
|
|
'数量': li_items[3].text,
|
|
'总价': li_items[4].text,
|
|
'销售人员': li_items[5].text,
|
|
'销售时间': li_items[6].text
|
|
})
|
|
except:
|
|
pass
|
|
|
|
# 将所有信息组合成一个字典
|
|
order_info = {
|
|
'订单号': details_id,
|
|
'挂账时间': transaction_time,
|
|
'挂账金额': price,
|
|
'车牌号码': base_info.get('车牌号码', ''),
|
|
'车辆品牌': base_info.get('车辆品牌', ''),
|
|
# '会员卡号': base_info.get('会员卡号', ''),
|
|
'车主姓名': base_info.get('车主姓名', ''),
|
|
'联系方式': base_info.get('联系方式', ''),
|
|
'进站公里数': base_info.get('进站公里数', ''),
|
|
'业务类型': base_info.get('业务类型', ''),
|
|
# '订单详情': order_details,
|
|
'服务名称': '\n'.join([item['服务名称'] for item in service_info]),
|
|
'销售人员': '\n'.join([item['销售人员'] for item in service_info]),
|
|
'施工人员': '\n'.join([item['施工人员'] for item in service_info]),
|
|
'应付金额': '\n'.join([item['应付金额'] for item in service_info]),
|
|
'施工时间': '\n'.join([item['施工时间'] for item in service_info]),
|
|
'完工时间': '\n'.join([item['完工时间'] for item in service_info]),
|
|
'服务评分': '\n'.join([item['服务评分'] for item in service_info]),
|
|
'产品名称': '\n'.join([item['产品名称'] for item in product_info]),
|
|
'型号': '\n'.join([item['型号'] for item in product_info]),
|
|
'单价': '\n'.join([item['单价'] for item in product_info]),
|
|
'数量': '\n'.join([item['数量'] for item in product_info]),
|
|
'总价': '\n'.join([item['总价'] for item in product_info]),
|
|
'销售人员': '\n'.join([item['销售人员'] for item in product_info]),
|
|
'销售时间': '\n'.join([item['销售时间'] for item in product_info]),
|
|
}
|
|
|
|
all_data.append(order_info) # 将完整订单信息添加到总数据列表中
|
|
|
|
driver.back()
|
|
|
|
df = pd.DataFrame(all_data, columns=headers)
|
|
df.to_excel('爱车店应收账款数据导出.xlsx', index=False) # 保存为Excel文件
|