from playwright.sync_api import sync_playwright import time import pandas as pd def extract_table_data(page): """从当前页面提取表格数据""" rows = page.query_selector_all("table.dg tbody tr") data = [] for row in rows: # 跳过表头和合计行 if row.query_selector("th") or "合计" in row.text_content(): continue cells = row.query_selector_all("td") if len(cells) < 12: continue # 非数据行 record_id = row.query_selector("input[title]").get_attribute("title") or "" car_no = cells[2].text_content().strip() name = cells[3].text_content().strip() card_no = cells[4].text_content().strip() card_type = cells[5].text_content().strip() package = cells[6].text_content().strip() total_times = cells[7].text_content().strip() consumed = cells[8].text_content().strip() remaining = cells[9].text_content().strip() remaining_cost = cells[10].text_content().strip() expire_date = cells[11].text_content().strip() data.append({ "ID": record_id, "车牌": car_no, "姓名": name, "卡号": card_no, "卡类型": card_type, "套餐项目": package, "总次数": total_times, "消费": consumed, "剩余": remaining, "剩余成本": remaining_cost, "到期日期": expire_date }) return data def main(): # ====== 手动设置页码范围 ====== start_page = 1 # 起始页(包含) end_page = 5 # 结束页(包含) # ============================ with sync_playwright() as p: browser = p.chromium.launch(headless=False, slow_mo=300) context = browser.new_context() # 设置你的 Cookie(请根据实际情况更新) context.add_cookies([ {"name": "ASP.NET_SessionId", "value": "knhs0hxsbmolk20gidmlis3j", "domain": "crm.zhongtukj.com", "path": "/"}, {"name": "ztrjnew@4db97b96-12af-45b0-b232-fd1e9b7a672e@", "value": "PassWord=wZn2IuvdWeE=&RememberPwd=RXv90LpPskw=&UserId=nzK31b3ZYVQ=&CSID=VjfeyHPOjnU=&UserName=fDfTOArNJXHmbGEeaShOsw==&SID=nkNRF6dD83c=&RoleId=1X5bqQAfxQY=&GroupId=KUxCDdt69t4=", "domain": "crm.zhongtukj.com", "path": "/"} ]) page = context.new_page() print(f"正在加载第 {start_page} 页(初始页)...") page.goto("http://crm.zhongtukj.com/Boss/Customer/CustomerPackageList.aspx") page.wait_for_load_state("networkidle") time.sleep(2) all_data = [] for current_page in range(start_page, end_page + 1): if current_page == 1: # 第 1 页已加载,直接提取 pass else: # 跳转到指定页码 print(f"正在跳转到第 {current_page} 页...") page.evaluate(f"() => __doPostBack('AspNetPager', '{current_page}')") page.wait_for_load_state("networkidle") time.sleep(2) # 稳定等待 # 提取当前页数据 data = extract_table_data(page) all_data.extend(data) print(f" 第 {current_page} 页提取 {len(data)} 条记录") browser.close() # 保存结果 if all_data: df = pd.DataFrame(all_data) filename = f"D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\文件输出\套餐卡_第{start_page}至{end_page}页.xlsx" df.to_excel(filename, index=False) print(f"\n✅ 共提取 {len(all_data)} 条记录,已保存到 '{filename}'") else: print("⚠️ 未提取到任何数据") if __name__ == "__main__": main()