217 lines
9.2 KiB
Plaintext
217 lines
9.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"id": "initial_id",
|
|
"metadata": {
|
|
"collapsed": true,
|
|
"ExecuteTime": {
|
|
"end_time": "2025-12-29T03:33:47.044410Z",
|
|
"start_time": "2025-12-29T03:31:04.703879800Z"
|
|
}
|
|
},
|
|
"source": [
|
|
"import requests\n",
|
|
"from bs4 import BeautifulSoup\n",
|
|
"import pandas as pd\n",
|
|
"import time\n",
|
|
"from tqdm.notebook import tqdm # Jupyter 专属进度条\n",
|
|
"\n",
|
|
"# ---------------------- 配置请求参数 ----------------------\n",
|
|
"cookies = {\n",
|
|
" 'ECSCP[admin_id]': '108',\n",
|
|
" 'ECSCP[admin_pass]': 'a7a7436dc3cdb9a0cf46db404553e361',\n",
|
|
" 'ECS_LastCheckOrder': 'Mon%2C%2029%20Dec%202025%2003%3A26%3A30%20GMT',\n",
|
|
" 'ECSCP[lastfilterfile]': '9028E76F',\n",
|
|
" 'ECSCP[lastfilter]': 'a%253A12%253A%257Bs%253A9%253A%2522user_name%2522%253Bs%253A0%253A%2522%2522%253Bs%253A9%253A%2522note_name%2522%253Bs%253A0%253A%2522%2522%253Bs%253A9%253A%2522carnumber%2522%253Bs%253A0%253A%2522%2522%253Bs%253A5%253A%2522cates%2522%253Bs%253A0%253A%2522%2522%253Bs%253A12%253A%2522insurance_id%2522%253Bi%253A0%253Bs%253A6%253A%2522inyear%2522%253Bi%253A0%253Bs%253A6%253A%2522intime%2522%253Bi%253A0%253Bs%253A12%253A%2522record_count%2522%253Bs%253A4%253A%25222309%2522%253Bs%253A9%253A%2522page_size%2522%253Bi%253A15%253Bs%253A4%253A%2522page%2522%253Bi%253A1%253Bs%253A10%253A%2522page_count%2522%253Bd%253A154%253Bs%253A5%253A%2522start%2522%253Bi%253A0%253B%257D',\n",
|
|
" 'ECSCP[lastfiltersql]': 'U0VMRUNUIHUuKiwgcC51c2VyX25hbWUgYXMgcGFyZW50X25hbWUsIGEuY2F0X25hbWUsIGIuY2F0X25hbWUgYXMgY2FyX25hbWUsIHUuc2VjdXJpdHlkYXRlLCBpLmluc3VyYW5jZV9uYW1lIEZST00gYHRlamlhbHVudGFpX2NvbWAuYGVjc191c2Vyc2AgYXMgdSBsZWZ0IGpvaW4gYHRlamlhbHVudGFpX2NvbWAuYGVjc191c2Vyc2AgYXMgcCBvbiBwLnVzZXJfaWQgPSB1LnBhcmVudF9pZCAgbGVmdCBqb2luIGB0ZWppYWx1bnRhaV9jb21gLmBlY3NfaW5zdXJhbmNlYCBhcyBpIG9uIGkuaW5zdXJhbmNlX2lkID0gdS5pbnN1cmFuY2VfaWQgIGxlZnQgam9pbiBgdGVqaWFsdW50YWlfY29tYC5gZWNzX2NhdGVnb3J5YCBhcyBhIG9uIGEuY2F0X2lkID0gdS5jc3J0X2lkICBsZWZ0IGpvaW4gYHRlamlhbHVudGFpX2NvbWAuYGVjc19jYXRlZ29yeWAgYXMgeSBvbiBhLnBhcmVudF9pZCA9IHkuY2F0X2lkICBsZWZ0IGpvaW4gYHRlamlhbHVudGFpX2NvbWAuYGVjc19jYXRlZ29yeWAgYXMgYyBvbiB5LnBhcmVudF9pZCA9IGMuY2F0X2lkICBsZWZ0IGpvaW4gYHRlamlhbHVudGFpX2NvbWAuYGVjc19jYXRlZ29yeWAgYXMgYiBvbiBjLnBhcmVudF9pZCA9IGIuY2F0X2lkICB3aGVyZSAxIEFORCB1LnN1cHBsaWVyc19pZCA9ICcnIE9SREVSIEJZIHVzZXJfaWQgREVTQyBMSU1JVCAwLDE1',\n",
|
|
" 'real_ipd': '221.226.144.180',\n",
|
|
" 'ECSCP_ID': '1be4bfb53679215381f4dac51482e1152b69f88a',\n",
|
|
"}\n",
|
|
"\n",
|
|
"headers = {\n",
|
|
" 'accept': '*/*',\n",
|
|
" 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
|
|
" 'cache-control': 'no-cache',\n",
|
|
" 'content-type': 'application/x-www-form-urlencoded',\n",
|
|
" 'origin': 'https://tejialuntai.com',\n",
|
|
" 'pragma': 'no-cache',\n",
|
|
" 'priority': 'u=1, i',\n",
|
|
" 'referer': 'https://tejialuntai.com/admin2/suppliers_users.php?act=list',\n",
|
|
" 'sec-ch-ua': '\"Microsoft Edge\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"',\n",
|
|
" 'sec-ch-ua-mobile': '?0',\n",
|
|
" 'sec-ch-ua-platform': '\"Windows\"',\n",
|
|
" 'sec-fetch-dest': 'empty',\n",
|
|
" 'sec-fetch-mode': 'cors',\n",
|
|
" 'sec-fetch-site': 'same-origin',\n",
|
|
" 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0',\n",
|
|
"}\n",
|
|
"\n",
|
|
"params = {'is_ajax': '1'}\n",
|
|
"base_data = {\n",
|
|
" 'act': 'query',\n",
|
|
" 'user_name': '',\n",
|
|
" 'note_name': '',\n",
|
|
" 'carnumber': '',\n",
|
|
" 'cates': '0',\n",
|
|
" 'insurance_id': '0',\n",
|
|
" 'inyear': '0',\n",
|
|
" 'intime': '0',\n",
|
|
" 'record_count': '2309',\n",
|
|
" 'page_size': '15',\n",
|
|
" 'page_count': '154',\n",
|
|
"}\n",
|
|
"\n",
|
|
"# ---------------------- 定义数据提取函数 ----------------------\n",
|
|
"def get_page_data(page):\n",
|
|
" \"\"\"\n",
|
|
" 请求指定页码的数据并解析\n",
|
|
" :param page: 页码\n",
|
|
" :return: 解析后的当前页数据列表, 总页数\n",
|
|
" \"\"\"\n",
|
|
" start = (page - 1) * 15\n",
|
|
" data = base_data.copy()\n",
|
|
" data.update({'page': str(page), 'start': str(start)})\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # 发送请求,添加超时和重试机制\n",
|
|
" response = requests.post(\n",
|
|
" 'https://tejialuntai.com/admin2/suppliers_users.php',\n",
|
|
" params=params,\n",
|
|
" cookies=cookies,\n",
|
|
" headers=headers,\n",
|
|
" data=data,\n",
|
|
" timeout=15\n",
|
|
" )\n",
|
|
" response.raise_for_status() # 抛出HTTP错误\n",
|
|
" res_json = response.json()\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" print(f\"\\n请求第 {page} 页失败: {str(e)}\")\n",
|
|
" return [], None\n",
|
|
"\n",
|
|
" # 解析HTML表格\n",
|
|
" soup = BeautifulSoup(res_json['content'], 'html.parser')\n",
|
|
" table = soup.find('table')\n",
|
|
" if not table:\n",
|
|
" print(f\"\\n第 {page} 页未找到数据表格\")\n",
|
|
" return [], None\n",
|
|
"\n",
|
|
" rows = table.find_all('tr')[1:] # 跳过表头\n",
|
|
" page_data = []\n",
|
|
" for row in rows:\n",
|
|
" cols = row.find_all('td')\n",
|
|
" if len(cols) < 17:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # 提取每列数据\n",
|
|
" user_info = {\n",
|
|
" '用户id': cols[0].text.strip(),\n",
|
|
" '用户名称': cols[1].text.strip(),\n",
|
|
" '备注名称': cols[2].text.strip(),\n",
|
|
" '手机号码': cols[3].text.strip(),\n",
|
|
" '注册时间': cols[4].text.strip(),\n",
|
|
" '推荐人': cols[5].text.strip(),\n",
|
|
" '车型': cols[6].text.strip(),\n",
|
|
" '变速箱': cols[7].text.strip(),\n",
|
|
" '机油信息': cols[8].text.strip(),\n",
|
|
" '车牌号': cols[9].text.strip(),\n",
|
|
" '微信号': cols[10].text.strip(),\n",
|
|
" '余额': cols[11].text.strip(),\n",
|
|
" '保险公司': cols[12].text.strip(),\n",
|
|
" '保险到期': cols[13].text.strip(),\n",
|
|
" '旗下车辆': cols[14].text.strip(),\n",
|
|
" '车架号': cols[15].text.strip(),\n",
|
|
" '操作': cols[16].text.strip()\n",
|
|
" }\n",
|
|
" page_data.append(user_info)\n",
|
|
"\n",
|
|
" return page_data, res_json.get('page_count', 154)\n",
|
|
"\n",
|
|
"# ---------------------- 主执行逻辑 ----------------------\n",
|
|
"if __name__ == '__main__':\n",
|
|
" all_data = []\n",
|
|
" # 1. 获取第1页数据,确认总页数\n",
|
|
" first_page_data, total_pages = get_page_data(1)\n",
|
|
" if not first_page_data:\n",
|
|
" print(\"无法获取第1页数据,程序终止\")\n",
|
|
" else:\n",
|
|
" all_data.extend(first_page_data)\n",
|
|
" print(f\"第1页数据提取完成,共{len(first_page_data)}条\")\n",
|
|
"\n",
|
|
" # 2. 遍历剩余页码,使用进度条\n",
|
|
" for page in tqdm(range(2, int(total_pages) + 1), desc=\"提取所有页面数据\"):\n",
|
|
" page_data, _ = get_page_data(page)\n",
|
|
" all_data.extend(page_data)\n",
|
|
" time.sleep(0.5) # 延迟0.5秒,避免请求过快\n",
|
|
"\n",
|
|
" # 3. 转换为DataFrame并保存为xlsx\n",
|
|
" if all_data:\n",
|
|
" df = pd.DataFrame(all_data)\n",
|
|
" # 保存到当前目录,文件名:会员数据全量.xlsx\n",
|
|
" df.to_excel('会员数据全量.xlsx', index=False, engine='openpyxl')\n",
|
|
" print(f\"\\n所有数据提取完成!共{len(all_data)}条\")\n",
|
|
" print(f\"文件已保存为:会员数据全量.xlsx\")\n",
|
|
" else:\n",
|
|
" print(\"未提取到任何数据\")"
|
|
],
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"第1页数据提取完成,共16条\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"提取所有页面数据: 0%| | 0/153 [00:00<?, ?it/s]"
|
|
],
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"version_major": 2,
|
|
"version_minor": 0,
|
|
"model_id": "5c7aad4f6153456fa5d7347d3733eb5a"
|
|
}
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data",
|
|
"jetTransient": {
|
|
"display_id": null
|
|
}
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"所有数据提取完成!共2463条\n",
|
|
"文件已保存为:会员数据全量.xlsx\n"
|
|
]
|
|
}
|
|
],
|
|
"execution_count": 3
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 2
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython2",
|
|
"version": "2.7.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|