{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2025-12-29T03:33:47.044410Z", "start_time": "2025-12-29T03:31:04.703879800Z" } }, "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "import pandas as pd\n", "import time\n", "from tqdm.notebook import tqdm # Jupyter 专属进度条\n", "\n", "# ---------------------- 配置请求参数 ----------------------\n", "cookies = {\n", " 'ECSCP[admin_id]': '108',\n", " 'ECSCP[admin_pass]': 'a7a7436dc3cdb9a0cf46db404553e361',\n", " 'ECS_LastCheckOrder': 'Mon%2C%2029%20Dec%202025%2003%3A26%3A30%20GMT',\n", " 'ECSCP[lastfilterfile]': '9028E76F',\n", " 'ECSCP[lastfilter]': 'a%253A12%253A%257Bs%253A9%253A%2522user_name%2522%253Bs%253A0%253A%2522%2522%253Bs%253A9%253A%2522note_name%2522%253Bs%253A0%253A%2522%2522%253Bs%253A9%253A%2522carnumber%2522%253Bs%253A0%253A%2522%2522%253Bs%253A5%253A%2522cates%2522%253Bs%253A0%253A%2522%2522%253Bs%253A12%253A%2522insurance_id%2522%253Bi%253A0%253Bs%253A6%253A%2522inyear%2522%253Bi%253A0%253Bs%253A6%253A%2522intime%2522%253Bi%253A0%253Bs%253A12%253A%2522record_count%2522%253Bs%253A4%253A%25222309%2522%253Bs%253A9%253A%2522page_size%2522%253Bi%253A15%253Bs%253A4%253A%2522page%2522%253Bi%253A1%253Bs%253A10%253A%2522page_count%2522%253Bd%253A154%253Bs%253A5%253A%2522start%2522%253Bi%253A0%253B%257D',\n", " 'ECSCP[lastfiltersql]': 'U0VMRUNUIHUuKiwgcC51c2VyX25hbWUgYXMgcGFyZW50X25hbWUsIGEuY2F0X25hbWUsIGIuY2F0X25hbWUgYXMgY2FyX25hbWUsIHUuc2VjdXJpdHlkYXRlLCBpLmluc3VyYW5jZV9uYW1lIEZST00gYHRlamlhbHVudGFpX2NvbWAuYGVjc191c2Vyc2AgYXMgdSBsZWZ0IGpvaW4gYHRlamlhbHVudGFpX2NvbWAuYGVjc191c2Vyc2AgYXMgcCBvbiBwLnVzZXJfaWQgPSB1LnBhcmVudF9pZCAgbGVmdCBqb2luIGB0ZWppYWx1bnRhaV9jb21gLmBlY3NfaW5zdXJhbmNlYCBhcyBpIG9uIGkuaW5zdXJhbmNlX2lkID0gdS5pbnN1cmFuY2VfaWQgIGxlZnQgam9pbiBgdGVqaWFsdW50YWlfY29tYC5gZWNzX2NhdGVnb3J5YCBhcyBhIG9uIGEuY2F0X2lkID0gdS5jc3J0X2lkICBsZWZ0IGpvaW4gYHRlamlhbHVudGFpX2NvbWAuYGVjc19jYXRlZ29yeWAgYXMgeSBvbiBhLnBhcmVudF9pZCA9IHkuY2F0X2lkICBsZWZ0IGpvaW4gYHRlamlhbHVudGFpX2NvbWAuYGVjc19jYXRlZ29yeWAgYXMgYyBvbiB5LnBhcmVudF9pZCA9IGMuY2F0X2lkICBsZWZ0IGpvaW4gYHRlamlhbHVudGFpX2NvbWAuYGVjc19jYXRlZ29yeWAgYXMgYiBvbiBjLnBhcmVudF9pZCA9IGIuY2F0X2lkICB3aGVyZSAxIEFORCB1LnN1cHBsaWVyc19pZCA9ICcnIE9SREVSIEJZIHVzZXJfaWQgREVTQyBMSU1JVCAwLDE1',\n", " 'real_ipd': '221.226.144.180',\n", " 'ECSCP_ID': '1be4bfb53679215381f4dac51482e1152b69f88a',\n", "}\n", "\n", "headers = {\n", " 'accept': '*/*',\n", " 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n", " 'cache-control': 'no-cache',\n", " 'content-type': 'application/x-www-form-urlencoded',\n", " 'origin': 'https://tejialuntai.com',\n", " 'pragma': 'no-cache',\n", " 'priority': 'u=1, i',\n", " 'referer': 'https://tejialuntai.com/admin2/suppliers_users.php?act=list',\n", " 'sec-ch-ua': '\"Microsoft Edge\";v=\"143\", \"Chromium\";v=\"143\", \"Not A(Brand\";v=\"24\"',\n", " 'sec-ch-ua-mobile': '?0',\n", " 'sec-ch-ua-platform': '\"Windows\"',\n", " 'sec-fetch-dest': 'empty',\n", " 'sec-fetch-mode': 'cors',\n", " 'sec-fetch-site': 'same-origin',\n", " 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36 Edg/143.0.0.0',\n", "}\n", "\n", "params = {'is_ajax': '1'}\n", "base_data = {\n", " 'act': 'query',\n", " 'user_name': '',\n", " 'note_name': '',\n", " 'carnumber': '',\n", " 'cates': '0',\n", " 'insurance_id': '0',\n", " 'inyear': '0',\n", " 'intime': '0',\n", " 'record_count': '2309',\n", " 'page_size': '15',\n", " 'page_count': '154',\n", "}\n", "\n", "# ---------------------- 定义数据提取函数 ----------------------\n", "def get_page_data(page):\n", " \"\"\"\n", " 请求指定页码的数据并解析\n", " :param page: 页码\n", " :return: 解析后的当前页数据列表, 总页数\n", " \"\"\"\n", " start = (page - 1) * 15\n", " data = base_data.copy()\n", " data.update({'page': str(page), 'start': str(start)})\n", "\n", " try:\n", " # 发送请求,添加超时和重试机制\n", " response = requests.post(\n", " 'https://tejialuntai.com/admin2/suppliers_users.php',\n", " params=params,\n", " cookies=cookies,\n", " headers=headers,\n", " data=data,\n", " timeout=15\n", " )\n", " response.raise_for_status() # 抛出HTTP错误\n", " res_json = response.json()\n", " except requests.exceptions.RequestException as e:\n", " print(f\"\\n请求第 {page} 页失败: {str(e)}\")\n", " return [], None\n", "\n", " # 解析HTML表格\n", " soup = BeautifulSoup(res_json['content'], 'html.parser')\n", " table = soup.find('table')\n", " if not table:\n", " print(f\"\\n第 {page} 页未找到数据表格\")\n", " return [], None\n", "\n", " rows = table.find_all('tr')[1:] # 跳过表头\n", " page_data = []\n", " for row in rows:\n", " cols = row.find_all('td')\n", " if len(cols) < 17:\n", " continue\n", "\n", " # 提取每列数据\n", " user_info = {\n", " '用户id': cols[0].text.strip(),\n", " '用户名称': cols[1].text.strip(),\n", " '备注名称': cols[2].text.strip(),\n", " '手机号码': cols[3].text.strip(),\n", " '注册时间': cols[4].text.strip(),\n", " '推荐人': cols[5].text.strip(),\n", " '车型': cols[6].text.strip(),\n", " '变速箱': cols[7].text.strip(),\n", " '机油信息': cols[8].text.strip(),\n", " '车牌号': cols[9].text.strip(),\n", " '微信号': cols[10].text.strip(),\n", " '余额': cols[11].text.strip(),\n", " '保险公司': cols[12].text.strip(),\n", " '保险到期': cols[13].text.strip(),\n", " '旗下车辆': cols[14].text.strip(),\n", " '车架号': cols[15].text.strip(),\n", " '操作': cols[16].text.strip()\n", " }\n", " page_data.append(user_info)\n", "\n", " return page_data, res_json.get('page_count', 154)\n", "\n", "# ---------------------- 主执行逻辑 ----------------------\n", "if __name__ == '__main__':\n", " all_data = []\n", " # 1. 获取第1页数据,确认总页数\n", " first_page_data, total_pages = get_page_data(1)\n", " if not first_page_data:\n", " print(\"无法获取第1页数据,程序终止\")\n", " else:\n", " all_data.extend(first_page_data)\n", " print(f\"第1页数据提取完成,共{len(first_page_data)}条\")\n", "\n", " # 2. 遍历剩余页码,使用进度条\n", " for page in tqdm(range(2, int(total_pages) + 1), desc=\"提取所有页面数据\"):\n", " page_data, _ = get_page_data(page)\n", " all_data.extend(page_data)\n", " time.sleep(0.5) # 延迟0.5秒,避免请求过快\n", "\n", " # 3. 转换为DataFrame并保存为xlsx\n", " if all_data:\n", " df = pd.DataFrame(all_data)\n", " # 保存到当前目录,文件名:会员数据全量.xlsx\n", " df.to_excel('会员数据全量.xlsx', index=False, engine='openpyxl')\n", " print(f\"\\n所有数据提取完成!共{len(all_data)}条\")\n", " print(f\"文件已保存为:会员数据全量.xlsx\")\n", " else:\n", " print(\"未提取到任何数据\")" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "第1页数据提取完成,共16条\n" ] }, { "data": { "text/plain": [ "提取所有页面数据: 0%| | 0/153 [00:00