115 lines
4.4 KiB
Python
115 lines
4.4 KiB
Python
"""检查翻页过程中VIEWSTATE是否断链"""
|
|
import requests, urllib3
|
|
from bs4 import BeautifulSoup
|
|
urllib3.disable_warnings()
|
|
|
|
BASE = "http://139.129.162.9"
|
|
session = requests.Session()
|
|
session.headers.update({
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
})
|
|
|
|
r = session.post(f"{BASE}/login.aspx", data={
|
|
"ucode": "9864", "uname": "admin", "upwd": "@19790825%ZFq",
|
|
"windowSize": "1614", "DeviceVersion": "", "ipAdress": "", "Location": "",
|
|
}, timeout=20, verify=False)
|
|
|
|
page_url = f"{BASE}/carinfo.aspx?clientWidth=1614"
|
|
|
|
def get_val(soup, name):
|
|
el = soup.find("input", {"id": name}) or soup.find("input", {"name": name})
|
|
return el.get("value", "") if el else ""
|
|
|
|
hidden_fields = {k: "" for k in [
|
|
"HiddenCountyear", "HiddenCount120", "HiddenCount240",
|
|
"HiddenTtime", "HiddenOpenId", "HiddenBdtime", "HiddenVipId",
|
|
"HiddenLasttime", "HiddenHfstate", "HiddenLastgch", "HiddenJyId",
|
|
"HiddenHkscode", "HiddenBrand", "HiddenBrandname", "HiddenChassisnumber",
|
|
"HiddenEngineDesc", "HiddenEngineStyle", "HiddenFamilyname",
|
|
"HiddenGearbox", "HiddenGearboxName", "HiddenLyid",
|
|
"HiddenProductyear", "HiddenVehiclename", "HiddenVehiclesale",
|
|
"HiddenVin", "HiddenYearpattern", "HiddenDrivetype",
|
|
"HiddenModelbrandlogourl", "HiddenModelbrandmfr", "HiddenModelid",
|
|
"HiddenFueltype", "HiddenKilowattpower", "HiddenListedyear",
|
|
"HiddenListedmonth", "HiddenStopyear", "HiddenBodynumdoors",
|
|
"HiddenTransmissiondescription", "HiddenMakename",
|
|
"HiddenModelbrandid", "HiddenMakeid", "HiddenIschoosevehicletype",
|
|
]}
|
|
|
|
r0 = session.get(page_url, timeout=20, verify=False)
|
|
soup0 = BeautifulSoup(r0.text, "html.parser")
|
|
vs = get_val(soup0, "__VIEWSTATE")
|
|
vs_gen = get_val(soup0, "__VIEWSTATEGENERATOR") or "B80C0CC7"
|
|
ev = get_val(soup0, "__EVENTVALIDATION")
|
|
|
|
# 搜索
|
|
data = {
|
|
"__EVENTTARGET": "",
|
|
"__EVENTARGUMENT": "",
|
|
"__VIEWSTATE": vs,
|
|
"__VIEWSTATEGENERATOR": vs_gen,
|
|
"__VIEWSTATEENCRYPTED": "",
|
|
"__EVENTVALIDATION": ev,
|
|
"TextTime1": "2015-01-01",
|
|
"TextTime2": "", "TextCname": "", "txtVin": "",
|
|
"Txtcartype": "", "txtEngineno": "",
|
|
"Button3": "搜索",
|
|
"AspNetPager1_input": "1",
|
|
**hidden_fields,
|
|
}
|
|
r1 = session.post(page_url, data=data, headers={"Referer": page_url, "Origin": BASE}, timeout=30, verify=False)
|
|
soup = BeautifulSoup(r1.text, "html.parser")
|
|
|
|
# 抓20页,检查每页第一条ID和VIEWSTATE长度
|
|
errors = 0
|
|
prev_id = None
|
|
dups = 0
|
|
for pg in range(1, 21):
|
|
if pg > 1:
|
|
cur_vs = get_val(soup, "__VIEWSTATE")
|
|
cur_ev = get_val(soup, "__EVENTVALIDATION")
|
|
cur_vs_gen = get_val(soup, "__VIEWSTATEGENERATOR") or vs_gen
|
|
if not cur_vs:
|
|
print(f"Page {pg}: VIEWSTATE empty!")
|
|
errors += 1
|
|
continue
|
|
pdata = {
|
|
"__EVENTTARGET": "AspNetPager1",
|
|
"__EVENTARGUMENT": str(pg),
|
|
"__VIEWSTATE": cur_vs,
|
|
"__VIEWSTATEGENERATOR": cur_vs_gen,
|
|
"__VIEWSTATEENCRYPTED": "",
|
|
"__EVENTVALIDATION": cur_ev,
|
|
"TextTime1": "2015-01-01",
|
|
"TextTime2": "", "TextCname": "", "txtVin": "",
|
|
"Txtcartype": "", "txtEngineno": "",
|
|
"AspNetPager1_input": str(pg - 1),
|
|
**hidden_fields,
|
|
}
|
|
try:
|
|
html = session.post(page_url, data=pdata, headers={"Referer": page_url, "Origin": BASE, "Content-Type": "application/x-www-form-urlencoded"}, timeout=30, verify=False).text
|
|
except Exception as e:
|
|
print(f"Page {pg}: Request failed: {e}")
|
|
errors += 1
|
|
continue
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
|
|
table = soup.find("table", class_="table-theme1")
|
|
rows = [r for r in table.find_all("tr") if r.find("td")] if table else []
|
|
if rows:
|
|
cells = [c.text.strip() for c in rows[0].find_all("td")]
|
|
cid = cells[0] if cells else "?"
|
|
plate = cells[1] if len(cells) > 1 else "?"
|
|
vs_len = len(get_val(soup, "__VIEWSTATE") or "")
|
|
dup_mark = " <-- DUP!" if cid == prev_id else ""
|
|
if cid == prev_id:
|
|
dups += 1
|
|
print(f"Page {pg:3d}: ID={cid:10s} plate={plate:12s} VS_len={vs_len:6d}{dup_mark}")
|
|
prev_id = cid
|
|
else:
|
|
print(f"Page {pg:3d}: No rows found!")
|
|
errors += 1
|
|
|
|
print(f"\nTotal errors: {errors}, duplicates: {dups}")
|