203 lines
7.6 KiB
Python
203 lines
7.6 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
import time
|
|
import os
|
|
import urllib3
|
|
from openpyxl import Workbook
|
|
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
|
|
cookies = {
|
|
'ASP.NET_SessionId': 'vorf0ihfffhays4ojjgmolqu',
|
|
'Hm_lvt_ab3baaa579f771d051a6b0baad5a8cfe': '1780017347',
|
|
'HMACCOUNT': 'A6A0585E8C70051D',
|
|
'iswatchme': '0',
|
|
'setaddat': '0',
|
|
'hksdms': 'username2=admin&truename2=%e6%9d%a8%e7%9b%bc%e8%8f%b2&id=24574&wxusername2=18739061579&zb=false&qx=111-11111111111-11111111111111-0-1111-11111111111111-111111111-1111111111-1111111111111-1111-0-0-0-0-0-0-0-0-0-0&login=1&actname=%e7%ae%a1%e7%90%86%e5%91%98&act=%e7%ae%a1%e7%90%86%e5%91%98&username=admin&truename=%e6%9d%a8%e7%9b%bc%e8%8f%b2&userid=31100&valid=True&wxusername=18739061579&uniqueKey=9c680418-f250-4bbb-9b5c-c88b3bbb04ab&timeunitprice=0.00&allowquickout=False&telqx=1&tel=18739061579&StoreName=%e6%b4%9b%e9%98%b3%e5%be%b7%e5%a8%81Jeep%e4%b8%93%e4%bf%ae%e5%ba%97&attestationTel=18739061579&StoreName2=%e5%be%b7%e5%a8%81%e7%bb%b4%e4%bf%ae%e4%bf%9d%e5%85%bb&vipid=SAAS24574&zonecode=1003&zone=%e5%b1%b1%e4%b8%9c&CustomerID=164902&IsInitialized=1&ScrmModuleValidTime=&isScrmModule=False&isBasicModule=True&isTechnologyModule=True&isPartsManageModule=True&isBusinessImprovementModule=False',
|
|
'Morder': 'TextTime1=2016-04-01&TextTime2=2026-05-29&sortstring= order by indate desc ,tid desc &CurrentPage=1',
|
|
'SERVERID': '000e421eb0ab0efb9790874bd5c8f758|1780019746|1780017343',
|
|
'Hm_lpvt_ab3baaa579f771d051a6b0baad5a8cfe': '1780019753',
|
|
}
|
|
|
|
headers = {
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
'Cache-Control': 'max-age=0',
|
|
'Content-Type': 'application/x-www-form-urlencoded',
|
|
'Origin': 'http://www.kuaixiuge.com',
|
|
'Proxy-Connection': 'keep-alive',
|
|
'Referer': 'http://www.kuaixiuge.com/MaintenanceOrder.aspx?clientWidth=1647',
|
|
'Upgrade-Insecure-Requests': '1',
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36 Edg/148.0.0.0',
|
|
}
|
|
|
|
params = {
|
|
'clientWidth': '1647',
|
|
}
|
|
|
|
OUTPUT_DIR = r'D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\文件输出'
|
|
|
|
COLUMNS = ['工单号', '车牌号', 'VIN码', '客户名', '电话', '车型', '进厂时间', '出厂时间', '类别', '状态', '金额', '工位', '接车人']
|
|
|
|
session = requests.Session()
|
|
session.cookies.update(cookies)
|
|
|
|
def get_viewstate_and_pager(soup):
|
|
viewstate = soup.find('input', {'name': '__VIEWSTATE'})
|
|
viewstate_gen = soup.find('input', {'name': '__VIEWSTATEGENERATOR'})
|
|
event_validation = soup.find('input', {'name': '__EVENTVALIDATION'})
|
|
|
|
viewstate_val = viewstate['value'] if viewstate else ''
|
|
viewstate_gen_val = viewstate_gen['value'] if viewstate_gen else ''
|
|
event_val = event_validation['value'] if event_validation else ''
|
|
|
|
total_pages = 1
|
|
total_records = 0
|
|
body_text = soup.get_text()
|
|
match = re.search(r'共(\d+)条记录', body_text)
|
|
if match:
|
|
total_records = int(match.group(1))
|
|
total_pages = (total_records + 14) // 15
|
|
|
|
return viewstate_val, viewstate_gen_val, event_val, total_pages, total_records
|
|
|
|
def extract_gch_from_row(row):
|
|
cells = row.find_all('td')
|
|
if not cells:
|
|
return None, []
|
|
|
|
row_data = [cell.get_text(strip=True) for cell in cells]
|
|
gch = None
|
|
|
|
for text in row_data:
|
|
match = re.search(r'24574\d{9,}', text)
|
|
if match:
|
|
gch = match.group(0)
|
|
break
|
|
|
|
return gch, row_data
|
|
|
|
def init_first_page():
|
|
src_path = os.path.join(os.path.dirname(__file__), '快修哥历史维修记录.py')
|
|
with open(src_path, 'r', encoding='utf-8') as f:
|
|
src = f.read()
|
|
|
|
def get_str(key, next_key):
|
|
s = src.find("'" + key + "': '") + len(key) + 5
|
|
e = src.find("',\n '" + next_key + "'", s)
|
|
return src[s:e]
|
|
|
|
vs = get_str('__VIEWSTATE', '__VIEWSTATEGENERATOR')
|
|
ev = get_str('__EVENTVALIDATION', 'TextTime1')
|
|
|
|
post_resp = session.post(
|
|
'http://www.kuaixiuge.com/MaintenanceOrder.aspx',
|
|
params=params,
|
|
headers=headers,
|
|
data={
|
|
'__EVENTTARGET': '',
|
|
'__EVENTARGUMENT': '',
|
|
'__LASTFOCUS': '',
|
|
'__VIEWSTATE': vs,
|
|
'__VIEWSTATEGENERATOR': 'DECE3335',
|
|
'__EVENTVALIDATION': ev,
|
|
'TextTime1': '2016-04-01',
|
|
'TextTime2': '2026-05-29',
|
|
'DropDownUser': '0',
|
|
'DropDownStatus': '全部',
|
|
'TextGch': '',
|
|
'TextCname': '',
|
|
'txtSenpeopleOrPhone': '',
|
|
'DropDownXlxz': '全部',
|
|
'Button3': '搜索',
|
|
'AspNetPager1_input': '1',
|
|
},
|
|
verify=False,
|
|
)
|
|
soup = BeautifulSoup(post_resp.text, 'html.parser')
|
|
vs, vsg, ev, total_pages, total_records = get_viewstate_and_pager(soup)
|
|
return soup, vs, vsg, ev, total_pages, total_records
|
|
|
|
soup, viewstate, viewstate_gen, event_validation, total_pages, total_records = init_first_page()
|
|
|
|
print(f"总页数: {total_pages} (共 {total_records} 条记录)")
|
|
|
|
all_rows = []
|
|
|
|
for page in range(1, total_pages + 1):
|
|
if page == 1:
|
|
print(f"正在获取第 {page} 页...")
|
|
else:
|
|
print(f"正在获取第 {page} 页...")
|
|
time.sleep(1)
|
|
|
|
data = {
|
|
'__EVENTTARGET': 'AspNetPager1',
|
|
'__EVENTARGUMENT': str(page),
|
|
'__LASTFOCUS': '',
|
|
'__VIEWSTATE': viewstate,
|
|
'__VIEWSTATEGENERATOR': viewstate_gen,
|
|
'__EVENTVALIDATION': event_validation,
|
|
'TextTime1': '2016-04-01',
|
|
'TextTime2': '2026-05-29',
|
|
'DropDownUser': '0',
|
|
'DropDownStatus': '全部',
|
|
'TextGch': '',
|
|
'TextCname': '',
|
|
'txtSenpeopleOrPhone': '',
|
|
'DropDownXlxz': '全部',
|
|
'AspNetPager1_input': str(page),
|
|
}
|
|
|
|
morder = f'TextTime1=2016-04-01&TextTime2=2026-05-29&sortstring= order by indate desc ,tid desc &CurrentPage={page}'
|
|
session.cookies.set('Morder', morder)
|
|
|
|
response = session.post(
|
|
'http://www.kuaixiuge.com/MaintenanceOrder.aspx',
|
|
params=params,
|
|
headers=headers,
|
|
data=data,
|
|
verify=False,
|
|
)
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
viewstate, viewstate_gen, event_validation, _, _ = get_viewstate_and_pager(soup)
|
|
|
|
gridview = soup.find('table', {'id': 'GridView1'})
|
|
if gridview:
|
|
rows = gridview.find_all('tr')[1:]
|
|
print(f" 第 {page} 页提取到 {len(rows)} 行数据")
|
|
for row in rows:
|
|
gch, row_data = extract_gch_from_row(row)
|
|
if gch:
|
|
all_rows.append(row_data[:13])
|
|
else:
|
|
print(f" ⚠ 第 {page} 页未找到 GridView1 表格!")
|
|
|
|
print(f"\n共找到 {len(all_rows)} 条工单")
|
|
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
output_path = os.path.join(OUTPUT_DIR, '快修哥历史维修记录_主列表.xlsx')
|
|
|
|
wb = Workbook()
|
|
ws = wb.active
|
|
ws.title = '历史维修记录'
|
|
ws.append(COLUMNS)
|
|
|
|
for row in all_rows:
|
|
ws.append(row)
|
|
|
|
ws.auto_filter.ref = ws.dimensions
|
|
for col in ws.columns:
|
|
max_len = 0
|
|
col_letter = col[0].column_letter
|
|
for cell in col:
|
|
if cell.value:
|
|
max_len = max(max_len, len(str(cell.value)))
|
|
ws.column_dimensions[col_letter].width = min(max_len + 4, 40)
|
|
|
|
for cell in ws['A']:
|
|
cell.number_format = '@'
|
|
wb.save(output_path)
|
|
print(f"数据已保存到 {output_path}")
|