Files
F6--/张阳脚本/竞品系统数据导出/debug_paging20.py
T
2026-06-02 15:08:26 +08:00

115 lines
4.4 KiB
Python

"""检查翻页过程中VIEWSTATE是否断链"""
import requests, urllib3
from bs4 import BeautifulSoup
urllib3.disable_warnings()
BASE = "http://139.129.162.9"
session = requests.Session()
session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Content-Type": "application/x-www-form-urlencoded",
})
r = session.post(f"{BASE}/login.aspx", data={
"ucode": "9864", "uname": "admin", "upwd": "@19790825%ZFq",
"windowSize": "1614", "DeviceVersion": "", "ipAdress": "", "Location": "",
}, timeout=20, verify=False)
page_url = f"{BASE}/carinfo.aspx?clientWidth=1614"
def get_val(soup, name):
el = soup.find("input", {"id": name}) or soup.find("input", {"name": name})
return el.get("value", "") if el else ""
hidden_fields = {k: "" for k in [
"HiddenCountyear", "HiddenCount120", "HiddenCount240",
"HiddenTtime", "HiddenOpenId", "HiddenBdtime", "HiddenVipId",
"HiddenLasttime", "HiddenHfstate", "HiddenLastgch", "HiddenJyId",
"HiddenHkscode", "HiddenBrand", "HiddenBrandname", "HiddenChassisnumber",
"HiddenEngineDesc", "HiddenEngineStyle", "HiddenFamilyname",
"HiddenGearbox", "HiddenGearboxName", "HiddenLyid",
"HiddenProductyear", "HiddenVehiclename", "HiddenVehiclesale",
"HiddenVin", "HiddenYearpattern", "HiddenDrivetype",
"HiddenModelbrandlogourl", "HiddenModelbrandmfr", "HiddenModelid",
"HiddenFueltype", "HiddenKilowattpower", "HiddenListedyear",
"HiddenListedmonth", "HiddenStopyear", "HiddenBodynumdoors",
"HiddenTransmissiondescription", "HiddenMakename",
"HiddenModelbrandid", "HiddenMakeid", "HiddenIschoosevehicletype",
]}
r0 = session.get(page_url, timeout=20, verify=False)
soup0 = BeautifulSoup(r0.text, "html.parser")
vs = get_val(soup0, "__VIEWSTATE")
vs_gen = get_val(soup0, "__VIEWSTATEGENERATOR") or "B80C0CC7"
ev = get_val(soup0, "__EVENTVALIDATION")
# 搜索
data = {
"__EVENTTARGET": "",
"__EVENTARGUMENT": "",
"__VIEWSTATE": vs,
"__VIEWSTATEGENERATOR": vs_gen,
"__VIEWSTATEENCRYPTED": "",
"__EVENTVALIDATION": ev,
"TextTime1": "2015-01-01",
"TextTime2": "", "TextCname": "", "txtVin": "",
"Txtcartype": "", "txtEngineno": "",
"Button3": "搜索",
"AspNetPager1_input": "1",
**hidden_fields,
}
r1 = session.post(page_url, data=data, headers={"Referer": page_url, "Origin": BASE}, timeout=30, verify=False)
soup = BeautifulSoup(r1.text, "html.parser")
# 抓20页,检查每页第一条ID和VIEWSTATE长度
errors = 0
prev_id = None
dups = 0
for pg in range(1, 21):
if pg > 1:
cur_vs = get_val(soup, "__VIEWSTATE")
cur_ev = get_val(soup, "__EVENTVALIDATION")
cur_vs_gen = get_val(soup, "__VIEWSTATEGENERATOR") or vs_gen
if not cur_vs:
print(f"Page {pg}: VIEWSTATE empty!")
errors += 1
continue
pdata = {
"__EVENTTARGET": "AspNetPager1",
"__EVENTARGUMENT": str(pg),
"__VIEWSTATE": cur_vs,
"__VIEWSTATEGENERATOR": cur_vs_gen,
"__VIEWSTATEENCRYPTED": "",
"__EVENTVALIDATION": cur_ev,
"TextTime1": "2015-01-01",
"TextTime2": "", "TextCname": "", "txtVin": "",
"Txtcartype": "", "txtEngineno": "",
"AspNetPager1_input": str(pg - 1),
**hidden_fields,
}
try:
html = session.post(page_url, data=pdata, headers={"Referer": page_url, "Origin": BASE, "Content-Type": "application/x-www-form-urlencoded"}, timeout=30, verify=False).text
except Exception as e:
print(f"Page {pg}: Request failed: {e}")
errors += 1
continue
soup = BeautifulSoup(html, "html.parser")
table = soup.find("table", class_="table-theme1")
rows = [r for r in table.find_all("tr") if r.find("td")] if table else []
if rows:
cells = [c.text.strip() for c in rows[0].find_all("td")]
cid = cells[0] if cells else "?"
plate = cells[1] if len(cells) > 1 else "?"
vs_len = len(get_val(soup, "__VIEWSTATE") or "")
dup_mark = " <-- DUP!" if cid == prev_id else ""
if cid == prev_id:
dups += 1
print(f"Page {pg:3d}: ID={cid:10s} plate={plate:12s} VS_len={vs_len:6d}{dup_mark}")
prev_id = cid
else:
print(f"Page {pg:3d}: No rows found!")
errors += 1
print(f"\nTotal errors: {errors}, duplicates: {dups}")