众途脚本爬取
会员卡不限制车辆使用
This commit is contained in:
@@ -12,7 +12,7 @@ from tqdm import tqdm
|
|||||||
|
|
||||||
# 配置 WebDriver
|
# 配置 WebDriver
|
||||||
|
|
||||||
chrom_dirverpath = "D:\ProgramTools\chromedriver-win64\chromedriver.exe"
|
chrom_dirverpath = r"D:\Program Files\chromedriver-win64\chromedriver.exe"
|
||||||
# chrome_options = Options()
|
# chrome_options = Options()
|
||||||
# chrome_options.add_argument("--headless")
|
# chrome_options.add_argument("--headless")
|
||||||
service = Service(executable_path=f'{chrom_dirverpath}')
|
service = Service(executable_path=f'{chrom_dirverpath}')
|
||||||
@@ -47,11 +47,12 @@ def open_page(driver, carId):
|
|||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
name = "18919515707"
|
name = "15726209669"
|
||||||
password = 'Gtyc123456'
|
password = 'Zhou123'
|
||||||
path = r"C:\Users\Administrator.DESKTOP-7IC2USJ\Downloads\会员卡信息 (5).xlsx"
|
path = r"C:\Users\hp_z66\Desktop\钉钉文件\一号车库需修改不限车辆使用套餐卡(宗川涵).xlsx"
|
||||||
df = pd.read_excel(path, engine='openpyxl', sheet_name='会员卡详情', dtype='string')
|
df = pd.read_excel(path, engine='openpyxl', sheet_name=1, dtype='string')
|
||||||
print(df)
|
print(df)
|
||||||
|
|
||||||
if not df.empty:
|
if not df.empty:
|
||||||
carIds = df["卡实体id"]
|
carIds = df["卡实体id"]
|
||||||
first_time = datetime.now()
|
first_time = datetime.now()
|
||||||
@@ -71,13 +72,9 @@ if not df.empty:
|
|||||||
WebDriverWait(driver, 3).until(
|
WebDriverWait(driver, 3).until(
|
||||||
EC.presence_of_element_located((By.XPATH, '//*[@id="subMain"]/div/div[3]/div[1]/div[1]/div/span[2]'))).click()
|
EC.presence_of_element_located((By.XPATH, '//*[@id="subMain"]/div/div[3]/div[1]/div[1]/div/span[2]'))).click()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
for carId in tqdm(carIds):
|
for carId in tqdm(carIds):
|
||||||
try:
|
try:
|
||||||
open_page(driver, carId)
|
open_page(driver, carId)
|
||||||
time.sleep(1)
|
|
||||||
except:
|
except:
|
||||||
print(f"{carId},无法打印")
|
print(f"{carId},无法打印")
|
||||||
driver.close()
|
driver.close()
|
||||||
|
|||||||
@@ -0,0 +1,126 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"id": "initial_id",
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": true,
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2026-01-30T09:27:59.557746200Z",
|
||||||
|
"start_time": "2026-01-30T09:27:59.437881100Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"# 假设你的 DataFrame 名为 df,包含以下列:\n",
|
||||||
|
"# 'material_code': 材料编码\n",
|
||||||
|
"# 'in_qty': 入库数量\n",
|
||||||
|
"# 'in_cost': 入库成本(总金额,不是单价)\n",
|
||||||
|
"df = pd.read_excel(fr\"C:\\Users\\hp_z66\\OneDrive\\Desktop\\材料成本明细表核对.xlsx\",sheet_name='Sheet1')\n",
|
||||||
|
"# 1. 按材料编码分组,计算总入库数量和总入库成本\n",
|
||||||
|
"summary = df.groupby('材料编码').agg(\n",
|
||||||
|
" total_in_qty=('数量', 'sum'),\n",
|
||||||
|
" total_in_cost=(' 除税成本', 'sum')\n",
|
||||||
|
").reset_index()\n",
|
||||||
|
"\n",
|
||||||
|
"print(summary)\n",
|
||||||
|
"summary.to_csv(fr\"C:\\Users\\hp_z66\\OneDrive\\Desktop\\材料成本明细表核对sheet1.csv\")"
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" 材料编码 total_in_qty total_in_cost\n",
|
||||||
|
"0 CL0003899 8 184.07\n",
|
||||||
|
"1 CL0004029 300 92.92\n",
|
||||||
|
"2 CL0004193 100 176.99\n",
|
||||||
|
"3 CL0005552 -1 -4250.04\n",
|
||||||
|
"4 CL0005554 2 2268.78\n",
|
||||||
|
".. ... ... ...\n",
|
||||||
|
"461 CL0007466 1 800.00\n",
|
||||||
|
"462 CL0007467 1 800.00\n",
|
||||||
|
"463 CL0007468 1 500.00\n",
|
||||||
|
"464 CL0007469 1 500.00\n",
|
||||||
|
"465 CL0007470 1 500.00\n",
|
||||||
|
"\n",
|
||||||
|
"[466 rows x 3 columns]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"execution_count": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2026-01-30T09:28:27.291489100Z",
|
||||||
|
"start_time": "2026-01-30T09:28:27.188359600Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"# 假设你的 DataFrame 名为 df,包含以下列:\n",
|
||||||
|
"# 'material_code': 材料编码\n",
|
||||||
|
"# 'in_qty': 入库数量\n",
|
||||||
|
"# 'in_cost': 入库成本(总金额,不是单价)\n",
|
||||||
|
"df = pd.read_excel(fr\"C:\\Users\\hp_z66\\OneDrive\\Desktop\\材料成本明细表核对.xlsx\",sheet_name='Sheet2')\n",
|
||||||
|
"# 1. 按材料编码分组,计算总入库数量和总入库成本\n",
|
||||||
|
"summary = df.groupby('材料编码').agg(\n",
|
||||||
|
" total_in_qty=('采购入库数量', 'sum'),\n",
|
||||||
|
" total_in_cost=('采购入库成本(除税)', 'sum')\n",
|
||||||
|
").reset_index()\n",
|
||||||
|
"\n",
|
||||||
|
"print(summary)\n",
|
||||||
|
"summary.to_csv(fr\"C:\\Users\\hp_z66\\OneDrive\\Desktop\\材料成本明细表核对sheet2.csv\")"
|
||||||
|
],
|
||||||
|
"id": "fcb775d7ed25bd85",
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" 材料编码 total_in_qty total_in_cost\n",
|
||||||
|
"0 CL0003899 8 184.070800\n",
|
||||||
|
"1 CL0004029 300 92.920500\n",
|
||||||
|
"2 CL0004193 100 176.991200\n",
|
||||||
|
"3 CL0005552 -1 -4250.044248\n",
|
||||||
|
"4 CL0005554 2 2268.778762\n",
|
||||||
|
".. ... ... ...\n",
|
||||||
|
"459 CL0007466 1 800.000000\n",
|
||||||
|
"460 CL0007467 1 800.000000\n",
|
||||||
|
"461 CL0007468 1 500.000000\n",
|
||||||
|
"462 CL0007469 1 500.000000\n",
|
||||||
|
"463 CL0007470 1 500.000000\n",
|
||||||
|
"\n",
|
||||||
|
"[464 rows x 3 columns]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"execution_count": 5
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 2
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython2",
|
||||||
|
"version": "2.7.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
@@ -0,0 +1,165 @@
|
|||||||
|
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
from tqdm import tqdm
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# 设置日志:记录跳过的页面
|
||||||
|
logging.basicConfig(
|
||||||
|
filename=r"D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\文件输出\skipped_pages.log",
|
||||||
|
level=logging.WARNING,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
encoding='utf-8'
|
||||||
|
)
|
||||||
|
|
||||||
|
def extract_table_data(page):
|
||||||
|
"""从当前页面提取表格数据"""
|
||||||
|
rows = page.query_selector_all("table.dg tbody tr")
|
||||||
|
data = []
|
||||||
|
for row in rows:
|
||||||
|
if row.query_selector("th") or "合计" in row.text_content():
|
||||||
|
continue
|
||||||
|
cells = row.query_selector_all("td")
|
||||||
|
if len(cells) < 12:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
record_id = row.query_selector("input[title]").get_attribute("title") or ""
|
||||||
|
except Exception:
|
||||||
|
record_id = ""
|
||||||
|
|
||||||
|
car_no = cells[2].text_content().strip()
|
||||||
|
name = cells[3].text_content().strip()
|
||||||
|
card_no = cells[4].text_content().strip()
|
||||||
|
card_type = cells[5].text_content().strip()
|
||||||
|
package = cells[6].text_content().strip()
|
||||||
|
total_times = cells[7].text_content().strip()
|
||||||
|
consumed = cells[8].text_content().strip()
|
||||||
|
remaining = cells[9].text_content().strip()
|
||||||
|
remaining_cost = cells[10].text_content().strip()
|
||||||
|
expire_date = cells[11].text_content().strip()
|
||||||
|
|
||||||
|
data.append({
|
||||||
|
"ID": record_id,
|
||||||
|
"车牌": car_no,
|
||||||
|
"姓名": name,
|
||||||
|
"卡号": card_no,
|
||||||
|
"卡类型": card_type,
|
||||||
|
"套餐项目": package,
|
||||||
|
"总次数": total_times,
|
||||||
|
"消费": consumed,
|
||||||
|
"剩余": remaining,
|
||||||
|
"剩余成本": remaining_cost,
|
||||||
|
"到期日期": expire_date
|
||||||
|
})
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
# ====== Cookie 配置(保持不变)======
|
||||||
|
cookie_str = "td_cookie=628629794; td_cookie=627897944; ASP.NET_SessionId=54barjh2gsquceps2flqvlwy; ztrjnew@4db97b96-12af-45b0-b232-fd1e9b7a672e@=PassWord=wZn2IuvdWeE=&RememberPwd=RXv90LpPskw=&UserId=nzK31b3ZYVQ=&CSID=VjfeyHPOjnU=&UserName=fDfTOArNJXHmbGEeaShOsw==&SID=nkNRF6dD83c=&RoleId=1X5bqQAfxQY=&GroupId=KUxCDdt69t4="
|
||||||
|
|
||||||
|
cookies_dict = {}
|
||||||
|
for part in cookie_str.split(";"):
|
||||||
|
part = part.strip()
|
||||||
|
if "=" in part:
|
||||||
|
name, value = part.split("=", 1)
|
||||||
|
cookies_dict[name] = value
|
||||||
|
|
||||||
|
domain = "crm.zhongtukj.com"
|
||||||
|
path = "/"
|
||||||
|
new_cookies = [
|
||||||
|
{"name": name, "value": value, "domain": domain, "path": path}
|
||||||
|
for name, value in cookies_dict.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def navigate_to_page(page, target_page: int, max_retries: int = 3):
|
||||||
|
"""
|
||||||
|
安全地跳转到指定页码,带重试机制
|
||||||
|
"""
|
||||||
|
for attempt in range(1, max_retries + 1):
|
||||||
|
try:
|
||||||
|
if target_page == 1:
|
||||||
|
# 第一页已在初始加载中完成,只需等待表格
|
||||||
|
page.wait_for_selector("table.dg tbody tr", timeout=50000)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
# 触发分页跳转
|
||||||
|
page.evaluate(f"() => __doPostBack('AspNetPager', '{target_page}')")
|
||||||
|
page.wait_for_load_state("networkidle", timeout=50000)
|
||||||
|
page.wait_for_selector("table.dg tbody tr", timeout=50000)
|
||||||
|
return True
|
||||||
|
except PlaywrightTimeoutError as e:
|
||||||
|
print(f" ⚠️ 第 {target_page} 页加载超时(第 {attempt}/{max_retries} 次尝试): {str(e)[:100]}...")
|
||||||
|
if attempt < max_retries:
|
||||||
|
time.sleep(2)
|
||||||
|
# 可选:刷新页面重试(针对严重卡死)
|
||||||
|
# page.reload()
|
||||||
|
else:
|
||||||
|
logging.warning(f"跳过页面 {target_page}: 加载超时")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ❌ 第 {target_page} 页发生未知错误(第 {attempt}/{max_retries} 次): {e}")
|
||||||
|
if attempt < max_retries:
|
||||||
|
time.sleep(2)
|
||||||
|
else:
|
||||||
|
logging.warning(f"跳过页面 {target_page}: 未知错误 - {str(e)}")
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
start_page = 1
|
||||||
|
end_page = 1532
|
||||||
|
all_data = []
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=False, slow_mo=300)
|
||||||
|
context = browser.new_context()
|
||||||
|
context.add_cookies(new_cookies)
|
||||||
|
page = context.new_page()
|
||||||
|
page.set_default_timeout(50000) # 全局 10 秒超时
|
||||||
|
|
||||||
|
# 初始加载第一页
|
||||||
|
print("正在加载初始页面...")
|
||||||
|
try:
|
||||||
|
page.goto("http://crm.zhongtukj.com/Boss/Customer/CustomerPackageList.aspx", timeout=50000)
|
||||||
|
page.wait_for_load_state("networkidle")
|
||||||
|
# 选择“所有门店”
|
||||||
|
page.select_option("#Drop_Group", value="0")
|
||||||
|
page.wait_for_load_state("networkidle")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ 初始页面加载失败: {e}")
|
||||||
|
browser.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
# 主循环:逐页处理
|
||||||
|
for current_page in tqdm(range(start_page, end_page + 1), desc="处理页面"):
|
||||||
|
success = navigate_to_page(page, current_page, max_retries=3)
|
||||||
|
if not success:
|
||||||
|
continue # 跳过该页
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = extract_table_data(page)
|
||||||
|
all_data.extend(data)
|
||||||
|
# print(f" 第 {current_page} 页提取 {len(data)} 条记录")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ❌ 第 {current_page} 页数据提取失败: {e}")
|
||||||
|
logging.warning(f"第 {current_page} 页数据提取异常: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
# 保存结果
|
||||||
|
if all_data:
|
||||||
|
df = pd.DataFrame(all_data)
|
||||||
|
filename = rf"D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\文件输出\套餐卡_第{start_page}至{end_page}页.xlsx"
|
||||||
|
df.to_excel(filename, index=False)
|
||||||
|
print(f"\n✅ 共提取 {len(all_data)} 条记录,已保存到 '{filename}'")
|
||||||
|
print(f"⚠️ 跳过的页面已记录到 skipped_pages.log")
|
||||||
|
else:
|
||||||
|
print("⚠️ 未提取到任何数据")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -12,8 +12,8 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true,
|
"collapsed": true,
|
||||||
"ExecuteTime": {
|
"ExecuteTime": {
|
||||||
"end_time": "2026-01-27T02:00:26.466693100Z",
|
"end_time": "2026-01-31T11:05:57.587138200Z",
|
||||||
"start_time": "2026-01-27T02:00:16.549976600Z"
|
"start_time": "2026-01-31T11:04:17.031226100Z"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -21,28 +21,38 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"import requests\n",
|
"import requests\n",
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
"cookies = {\n",
|
"cookies = {\n",
|
||||||
" 'td_cookie': '628168942',\n",
|
" 'td_cookie': '628629794',\n",
|
||||||
" 'td_cookie': '627897944',\n",
|
" 'td_cookie': '627897944',\n",
|
||||||
" 'ASP.NET_SessionId': 'q5qzer2z51b4uzsxrhterzop',\n",
|
" 'ASP.NET_SessionId': '54barjh2gsquceps2flqvlwy',\n",
|
||||||
" 'ztrjnew@4db97b96-12af-45b0-b232-fd1e9b7a672e@': 'PassWord=wZn2IuvdWeE=&RememberPwd=RXv90LpPskw=&UserId=nzK31b3ZYVQ=&CSID=VjfeyHPOjnU=&UserName=fDfTOArNJXHmbGEeaShOsw==&SID=nkNRF6dD83c=&RoleId=1X5bqQAfxQY=&GroupId=KUxCDdt69t4=',\n",
|
" 'ztrjnew@4db97b96-12af-45b0-b232-fd1e9b7a672e@': 'PassWord=wZn2IuvdWeE=&RememberPwd=RXv90LpPskw=&UserId=nzK31b3ZYVQ=&CSID=VjfeyHPOjnU=&UserName=fDfTOArNJXHmbGEeaShOsw==&SID=nkNRF6dD83c=&RoleId=1X5bqQAfxQY=&GroupId=KUxCDdt69t4=',\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"headers = {\n",
|
"headers = {\n",
|
||||||
" 'Accept': 'application/json, text/javascript, */*; q=0.01',\n",
|
" 'Accept': 'application/json, text/javascript, */*; q=0.01',\n",
|
||||||
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
|
" 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
|
||||||
" 'Connection': 'keep-alive',\n",
|
" 'Proxy-Connection': 'keep-alive',\n",
|
||||||
" 'Referer': 'http://crm.zhongtukj.com/Boss/Customer/CustomerCardListMem.aspx',\n",
|
" 'Referer': 'http://crm.zhongtukj.com/Boss/Customer/CustomerCardListMem.aspx',\n",
|
||||||
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0',\n",
|
" 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0',\n",
|
||||||
" 'X-Requested-With': 'XMLHttpRequest',\n",
|
" 'X-Requested-With': 'XMLHttpRequest',\n",
|
||||||
" # 'Cookie': 'td_cookie=628168942; td_cookie=627897944; ASP.NET_SessionId=q5qzer2z51b4uzsxrhterzop; ztrjnew@4db97b96-12af-45b0-b232-fd1e9b7a672e@=PassWord=wZn2IuvdWeE=&RememberPwd=RXv90LpPskw=&UserId=nzK31b3ZYVQ=&CSID=VjfeyHPOjnU=&UserName=fDfTOArNJXHmbGEeaShOsw==&SID=nkNRF6dD83c=&RoleId=1X5bqQAfxQY=&GroupId=KUxCDdt69t4=',\n",
|
" # 'Cookie': 'td_cookie=628629794; td_cookie=627897944; ASP.NET_SessionId=54barjh2gsquceps2flqvlwy; ztrjnew@4db97b96-12af-45b0-b232-fd1e9b7a672e@=PassWord=wZn2IuvdWeE=&RememberPwd=RXv90LpPskw=&UserId=nzK31b3ZYVQ=&CSID=VjfeyHPOjnU=&UserName=fDfTOArNJXHmbGEeaShOsw==&SID=nkNRF6dD83c=&RoleId=1X5bqQAfxQY=&GroupId=KUxCDdt69t4=',\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"all_data= []\n",
|
"\n",
|
||||||
"for i in range(1,4):\n",
|
"all_data = []\n",
|
||||||
|
"for i in range(1, 78):\n",
|
||||||
" params = {\n",
|
" params = {\n",
|
||||||
" 'action': 'GetList',\n",
|
" 'action': 'GetList',\n",
|
||||||
" 'groupId': '9',\n",
|
" 'groupId': '0',\n",
|
||||||
" 'keyword': '',\n",
|
" 'keyword': '',\n",
|
||||||
|
" 'managerid': '-1',\n",
|
||||||
|
" 'isweixin': '',\n",
|
||||||
|
" 'cardid': '',\n",
|
||||||
|
" 'saleid': '',\n",
|
||||||
|
" 'openCardTime1': '',\n",
|
||||||
|
" 'openCardTime2': '',\n",
|
||||||
|
" 'deadline1': '',\n",
|
||||||
|
" 'deadline2': '',\n",
|
||||||
" 'page': i,\n",
|
" 'page': i,\n",
|
||||||
" 'rows': '20',\n",
|
" 'rows': '20',\n",
|
||||||
" 'sort': 'ID',\n",
|
" 'sort': 'ID',\n",
|
||||||
@@ -61,17 +71,17 @@
|
|||||||
" all_data.extend(rows)\n",
|
" all_data.extend(rows)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"df = pd.DataFrame(all_data)\n",
|
"df = pd.DataFrame(all_data)\n",
|
||||||
"df.to_excel(\"D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\谷途有效会员卡.xlsx\",index=False)"
|
"df.to_excel(\"D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\谷途有效会员卡.xlsx\", index=False)"
|
||||||
],
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stderr",
|
"name": "stderr",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"<>:42: SyntaxWarning: invalid escape sequence '\\I'\n",
|
"<>:55: SyntaxWarning: invalid escape sequence '\\I'\n",
|
||||||
"<>:42: SyntaxWarning: invalid escape sequence '\\I'\n",
|
"<>:55: SyntaxWarning: invalid escape sequence '\\I'\n",
|
||||||
"C:\\Users\\hp_z66\\AppData\\Local\\Temp\\ipykernel_27516\\3060641384.py:42: SyntaxWarning: invalid escape sequence '\\I'\n",
|
"C:\\Users\\hp_z66\\AppData\\Local\\Temp\\ipykernel_16640\\1148863384.py:55: SyntaxWarning: invalid escape sequence '\\I'\n",
|
||||||
" df.to_excel(\"D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\谷途有效会员卡.xlsx\",index=False)\n"
|
" df.to_excel(\"D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\谷途有效会员卡.xlsx\", index=False)\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -1,103 +0,0 @@
|
|||||||
from playwright.sync_api import sync_playwright
|
|
||||||
import time
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
|
|
||||||
def extract_table_data(page):
|
|
||||||
"""从当前页面提取表格数据"""
|
|
||||||
rows = page.query_selector_all("table.dg tbody tr")
|
|
||||||
data = []
|
|
||||||
for row in rows:
|
|
||||||
# 跳过表头和合计行
|
|
||||||
if row.query_selector("th") or "合计" in row.text_content():
|
|
||||||
continue
|
|
||||||
|
|
||||||
cells = row.query_selector_all("td")
|
|
||||||
if len(cells) < 12:
|
|
||||||
continue # 非数据行
|
|
||||||
|
|
||||||
record_id = row.query_selector("input[title]").get_attribute("title") or ""
|
|
||||||
car_no = cells[2].text_content().strip()
|
|
||||||
name = cells[3].text_content().strip()
|
|
||||||
card_no = cells[4].text_content().strip()
|
|
||||||
card_type = cells[5].text_content().strip()
|
|
||||||
package = cells[6].text_content().strip()
|
|
||||||
total_times = cells[7].text_content().strip()
|
|
||||||
consumed = cells[8].text_content().strip()
|
|
||||||
remaining = cells[9].text_content().strip()
|
|
||||||
remaining_cost = cells[10].text_content().strip()
|
|
||||||
expire_date = cells[11].text_content().strip()
|
|
||||||
|
|
||||||
data.append({
|
|
||||||
"ID": record_id,
|
|
||||||
"车牌": car_no,
|
|
||||||
"姓名": name,
|
|
||||||
"卡号": card_no,
|
|
||||||
"卡类型": card_type,
|
|
||||||
"套餐项目": package,
|
|
||||||
"总次数": total_times,
|
|
||||||
"消费": consumed,
|
|
||||||
"剩余": remaining,
|
|
||||||
"剩余成本": remaining_cost,
|
|
||||||
"到期日期": expire_date
|
|
||||||
})
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# ====== 手动设置页码范围 ======
|
|
||||||
start_page = 1 # 起始页(包含)
|
|
||||||
end_page = 5 # 结束页(包含)
|
|
||||||
# ============================
|
|
||||||
|
|
||||||
with sync_playwright() as p:
|
|
||||||
browser = p.chromium.launch(headless=False, slow_mo=300)
|
|
||||||
context = browser.new_context()
|
|
||||||
|
|
||||||
# 设置你的 Cookie(请根据实际情况更新)
|
|
||||||
context.add_cookies([
|
|
||||||
{"name": "ASP.NET_SessionId", "value": "knhs0hxsbmolk20gidmlis3j", "domain": "crm.zhongtukj.com",
|
|
||||||
"path": "/"},
|
|
||||||
{"name": "ztrjnew@4db97b96-12af-45b0-b232-fd1e9b7a672e@",
|
|
||||||
"value": "PassWord=wZn2IuvdWeE=&RememberPwd=RXv90LpPskw=&UserId=nzK31b3ZYVQ=&CSID=VjfeyHPOjnU=&UserName=fDfTOArNJXHmbGEeaShOsw==&SID=nkNRF6dD83c=&RoleId=1X5bqQAfxQY=&GroupId=KUxCDdt69t4=",
|
|
||||||
"domain": "crm.zhongtukj.com", "path": "/"}
|
|
||||||
])
|
|
||||||
|
|
||||||
page = context.new_page()
|
|
||||||
print(f"正在加载第 {start_page} 页(初始页)...")
|
|
||||||
page.goto("http://crm.zhongtukj.com/Boss/Customer/CustomerPackageList.aspx")
|
|
||||||
page.wait_for_load_state("networkidle")
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
all_data = []
|
|
||||||
|
|
||||||
for current_page in range(start_page, end_page + 1):
|
|
||||||
if current_page == 1:
|
|
||||||
# 第 1 页已加载,直接提取
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# 跳转到指定页码
|
|
||||||
print(f"正在跳转到第 {current_page} 页...")
|
|
||||||
page.evaluate(f"() => __doPostBack('AspNetPager', '{current_page}')")
|
|
||||||
page.wait_for_load_state("networkidle")
|
|
||||||
time.sleep(2) # 稳定等待
|
|
||||||
|
|
||||||
# 提取当前页数据
|
|
||||||
data = extract_table_data(page)
|
|
||||||
all_data.extend(data)
|
|
||||||
print(f" 第 {current_page} 页提取 {len(data)} 条记录")
|
|
||||||
|
|
||||||
browser.close()
|
|
||||||
|
|
||||||
# 保存结果
|
|
||||||
if all_data:
|
|
||||||
df = pd.DataFrame(all_data)
|
|
||||||
filename = f"D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\文件输出\套餐卡_第{start_page}至{end_page}页.xlsx"
|
|
||||||
df.to_excel(filename, index=False)
|
|
||||||
print(f"\n✅ 共提取 {len(all_data)} 条记录,已保存到 '{filename}'")
|
|
||||||
else:
|
|
||||||
print("⚠️ 未提取到任何数据")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Reference in New Issue
Block a user