Files
F6--/张阳脚本/竞品系统数据导出/快修哥历史维修记录_分页获取.py
T
2026-06-02 15:08:26 +08:00

203 lines
7.6 KiB
Python

import requests
from bs4 import BeautifulSoup
import re
import time
import os
import urllib3
from openpyxl import Workbook
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
cookies = {
'ASP.NET_SessionId': 'vorf0ihfffhays4ojjgmolqu',
'Hm_lvt_ab3baaa579f771d051a6b0baad5a8cfe': '1780017347',
'HMACCOUNT': 'A6A0585E8C70051D',
'iswatchme': '0',
'setaddat': '0',
'hksdms': 'username2=admin&truename2=%e6%9d%a8%e7%9b%bc%e8%8f%b2&id=24574&wxusername2=18739061579&zb=false&qx=111-11111111111-11111111111111-0-1111-11111111111111-111111111-1111111111-1111111111111-1111-0-0-0-0-0-0-0-0-0-0&login=1&actname=%e7%ae%a1%e7%90%86%e5%91%98&act=%e7%ae%a1%e7%90%86%e5%91%98&username=admin&truename=%e6%9d%a8%e7%9b%bc%e8%8f%b2&userid=31100&valid=True&wxusername=18739061579&uniqueKey=9c680418-f250-4bbb-9b5c-c88b3bbb04ab&timeunitprice=0.00&allowquickout=False&telqx=1&tel=18739061579&StoreName=%e6%b4%9b%e9%98%b3%e5%be%b7%e5%a8%81Jeep%e4%b8%93%e4%bf%ae%e5%ba%97&attestationTel=18739061579&StoreName2=%e5%be%b7%e5%a8%81%e7%bb%b4%e4%bf%ae%e4%bf%9d%e5%85%bb&vipid=SAAS24574&zonecode=1003&zone=%e5%b1%b1%e4%b8%9c&CustomerID=164902&IsInitialized=1&ScrmModuleValidTime=&isScrmModule=False&isBasicModule=True&isTechnologyModule=True&isPartsManageModule=True&isBusinessImprovementModule=False',
'Morder': 'TextTime1=2016-04-01&TextTime2=2026-05-29&sortstring= order by indate desc ,tid desc &CurrentPage=1',
'SERVERID': '000e421eb0ab0efb9790874bd5c8f758|1780019746|1780017343',
'Hm_lpvt_ab3baaa579f771d051a6b0baad5a8cfe': '1780019753',
}
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Cache-Control': 'max-age=0',
'Content-Type': 'application/x-www-form-urlencoded',
'Origin': 'http://www.kuaixiuge.com',
'Proxy-Connection': 'keep-alive',
'Referer': 'http://www.kuaixiuge.com/MaintenanceOrder.aspx?clientWidth=1647',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36 Edg/148.0.0.0',
}
params = {
'clientWidth': '1647',
}
OUTPUT_DIR = r'D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\文件输出'
COLUMNS = ['工单号', '车牌号', 'VIN码', '客户名', '电话', '车型', '进厂时间', '出厂时间', '类别', '状态', '金额', '工位', '接车人']
session = requests.Session()
session.cookies.update(cookies)
def get_viewstate_and_pager(soup):
viewstate = soup.find('input', {'name': '__VIEWSTATE'})
viewstate_gen = soup.find('input', {'name': '__VIEWSTATEGENERATOR'})
event_validation = soup.find('input', {'name': '__EVENTVALIDATION'})
viewstate_val = viewstate['value'] if viewstate else ''
viewstate_gen_val = viewstate_gen['value'] if viewstate_gen else ''
event_val = event_validation['value'] if event_validation else ''
total_pages = 1
total_records = 0
body_text = soup.get_text()
match = re.search(r'共(\d+)条记录', body_text)
if match:
total_records = int(match.group(1))
total_pages = (total_records + 14) // 15
return viewstate_val, viewstate_gen_val, event_val, total_pages, total_records
def extract_gch_from_row(row):
cells = row.find_all('td')
if not cells:
return None, []
row_data = [cell.get_text(strip=True) for cell in cells]
gch = None
for text in row_data:
match = re.search(r'24574\d{9,}', text)
if match:
gch = match.group(0)
break
return gch, row_data
def init_first_page():
src_path = os.path.join(os.path.dirname(__file__), '快修哥历史维修记录.py')
with open(src_path, 'r', encoding='utf-8') as f:
src = f.read()
def get_str(key, next_key):
s = src.find("'" + key + "': '") + len(key) + 5
e = src.find("',\n '" + next_key + "'", s)
return src[s:e]
vs = get_str('__VIEWSTATE', '__VIEWSTATEGENERATOR')
ev = get_str('__EVENTVALIDATION', 'TextTime1')
post_resp = session.post(
'http://www.kuaixiuge.com/MaintenanceOrder.aspx',
params=params,
headers=headers,
data={
'__EVENTTARGET': '',
'__EVENTARGUMENT': '',
'__LASTFOCUS': '',
'__VIEWSTATE': vs,
'__VIEWSTATEGENERATOR': 'DECE3335',
'__EVENTVALIDATION': ev,
'TextTime1': '2016-04-01',
'TextTime2': '2026-05-29',
'DropDownUser': '0',
'DropDownStatus': '全部',
'TextGch': '',
'TextCname': '',
'txtSenpeopleOrPhone': '',
'DropDownXlxz': '全部',
'Button3': '搜索',
'AspNetPager1_input': '1',
},
verify=False,
)
soup = BeautifulSoup(post_resp.text, 'html.parser')
vs, vsg, ev, total_pages, total_records = get_viewstate_and_pager(soup)
return soup, vs, vsg, ev, total_pages, total_records
soup, viewstate, viewstate_gen, event_validation, total_pages, total_records = init_first_page()
print(f"总页数: {total_pages} (共 {total_records} 条记录)")
all_rows = []
for page in range(1, total_pages + 1):
if page == 1:
print(f"正在获取第 {page} 页...")
else:
print(f"正在获取第 {page} 页...")
time.sleep(1)
data = {
'__EVENTTARGET': 'AspNetPager1',
'__EVENTARGUMENT': str(page),
'__LASTFOCUS': '',
'__VIEWSTATE': viewstate,
'__VIEWSTATEGENERATOR': viewstate_gen,
'__EVENTVALIDATION': event_validation,
'TextTime1': '2016-04-01',
'TextTime2': '2026-05-29',
'DropDownUser': '0',
'DropDownStatus': '全部',
'TextGch': '',
'TextCname': '',
'txtSenpeopleOrPhone': '',
'DropDownXlxz': '全部',
'AspNetPager1_input': str(page),
}
morder = f'TextTime1=2016-04-01&TextTime2=2026-05-29&sortstring= order by indate desc ,tid desc &CurrentPage={page}'
session.cookies.set('Morder', morder)
response = session.post(
'http://www.kuaixiuge.com/MaintenanceOrder.aspx',
params=params,
headers=headers,
data=data,
verify=False,
)
soup = BeautifulSoup(response.text, 'html.parser')
viewstate, viewstate_gen, event_validation, _, _ = get_viewstate_and_pager(soup)
gridview = soup.find('table', {'id': 'GridView1'})
if gridview:
rows = gridview.find_all('tr')[1:]
print(f"{page} 页提取到 {len(rows)} 行数据")
for row in rows:
gch, row_data = extract_gch_from_row(row)
if gch:
all_rows.append(row_data[:13])
else:
print(f" ⚠ 第 {page} 页未找到 GridView1 表格!")
print(f"\n共找到 {len(all_rows)} 条工单")
os.makedirs(OUTPUT_DIR, exist_ok=True)
output_path = os.path.join(OUTPUT_DIR, '快修哥历史维修记录_主列表.xlsx')
wb = Workbook()
ws = wb.active
ws.title = '历史维修记录'
ws.append(COLUMNS)
for row in all_rows:
ws.append(row)
ws.auto_filter.ref = ws.dimensions
for col in ws.columns:
max_len = 0
col_letter = col[0].column_letter
for cell in col:
if cell.value:
max_len = max(max_len, len(str(cell.value)))
ws.column_dimensions[col_letter].width = min(max_len + 4, 40)
for cell in ws['A']:
cell.number_format = '@'
wb.save(output_path)
print(f"数据已保存到 {output_path}")