{ "cells": [ { "metadata": {}, "cell_type": "markdown", "source": "获取材料", "id": "be89024828eeb339" }, { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2025-06-23T03:52:55.446076Z", "start_time": "2025-06-23T03:52:49.281182Z" } }, "source": [ "import requests\n", "import pandas as pd\n", "\n", "all_data = []\n", "cookies = {\n", " 'Hm_lvt_a856165bfa0ce84d1fb04dc98d558ead': '1750648887',\n", " 'Hm_lpvt_a856165bfa0ce84d1fb04dc98d558ead': '1750648887',\n", " 'HMACCOUNT': 'ABFCA62083E00432',\n", " 'JSESSIONID': 'C719B94B2515D8D6582B9D4AE91903FF',\n", " 'td_cookie': '3361803543',\n", "}\n", "\n", "headers = {\n", " 'Accept': 'application/json, text/javascript, */*; q=0.01',\n", " 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n", " 'Connection': 'keep-alive',\n", " 'Referer': 'http://www.idsz.xin:7070/setting_commodity_information_new',\n", " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0',\n", " 'X-Requested-With': 'XMLHttpRequest',\n", " # 'Cookie': 'Hm_lvt_a856165bfa0ce84d1fb04dc98d558ead=1750648887; Hm_lpvt_a856165bfa0ce84d1fb04dc98d558ead=1750648887; HMACCOUNT=ABFCA62083E00432; JSESSIONID=C719B94B2515D8D6582B9D4AE91903FF; td_cookie=3361803543',\n", "}\n", "\n", "topGroupId_list = [\n", " '974e032e-0052-42b0-9d98-2d83f9ee7d0a',\n", " '81f51c1d-f33b-4ec7-ab79-34af0cb4a006',\n", " 'b17589f3-ad29-429a-9f15-6077d1cbafbb',\n", " '25eb6487-0bf9-4fa2-8fcf-5c7e1fedf700',\n", " 'be723274-157e-4da5-a7cd-dae8874a3bf5',\n", " '29ae4eb1-cd7d-4521-a4cf-6feaae449f2f',\n", " '96a1f351-ae12-46dd-b6b3-57dd96a316a1',\n", " \"72278a29-7c9b-4c54-9eec-809542a2327b\"\n", "] # 手动获取\n", "\n", "for topGroupId in topGroupId_list:\n", " total = 0\n", " params = {\n", " 'apiname': 'goodscommon_list',\n", " 'page': '0',\n", " 'rows': '50',\n", " 'topGroupId': topGroupId, # 不同类目需要手动变更\n", " 'subGroupId': '',\n", " 'queryStr': '',\n", " 'goodsType': '',\n", " }\n", "\n", " response = requests.get('http://www.idsz.xin:7070/posapi_invoke', params=params, cookies=cookies, headers=headers,\n", " verify=False)\n", " total = response.json().get('total')\n", "\n", " rows_list = []\n", " for i in range(total + 1):\n", " new_params = {\n", " 'apiname': 'goodscommon_list',\n", " 'page': f'{i}',\n", " 'rows': '50',\n", " 'topGroupId': topGroupId, # 不同类目需要手动变更\n", " 'subGroupId': '',\n", " 'queryStr': '',\n", " 'goodsType': '',\n", " }\n", " rows_list += requests.get('http://www.idsz.xin:7070/posapi_invoke', params=new_params, cookies=cookies,\n", " headers=headers, verify=False).json().get('rows')\n", " df = pd.DataFrame(rows_list)\n", " df.to_excel(f'{topGroupId}data.xlsx', index=False)" ], "outputs": [], "execution_count": 16 }, { "metadata": {}, "cell_type": "markdown", "source": "## 客户信息", "id": "bcb243b547895096" }, { "metadata": { "ExecuteTime": { "end_time": "2025-12-05T03:39:42.773538Z", "start_time": "2025-12-05T03:38:31.538614Z" } }, "cell_type": "code", "source": [ "import requests\n", "from tqdm.notebook import tqdm\n", "import pandas as pd\n", "USER_AGENT = \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36\"\n", "\n", "cookies = {\n", " 'JSESSIONID': '621467AB3BF49A8C806B9AA859C05C81',\n", " 'td_cookie': '438646777',\n", "}\n", "\n", "headers = {\n", " 'Accept': 'application/json, text/javascript, */*; q=0.01',\n", " 'Accept-Language': 'zh-CN,zh;q=0.9',\n", " 'Cache-Control': 'no-cache',\n", " 'Connection': 'keep-alive',\n", " 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',\n", " 'Origin': 'http://www.idsz.xin:7070',\n", " 'Pragma': 'no-cache',\n", " 'Referer': 'http://www.idsz.xin:7070/report/memberservicelist?detailtype=3&key=4&startTime=2025-01-01&endTime=2025-12-05&sshopId=&type=1&typeFlag=0',\n", " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36 Edg/142.0.0.0',\n", " 'X-Requested-With': 'XMLHttpRequest',\n", " # 'Cookie': 'JSESSIONID=621467AB3BF49A8C806B9AA859C05C81; td_cookie=438646777',\n", "}\n", "\n", "all_data = []\n", "\n", "params = {\n", " 'apiname': 'kpi_customerDetailQuery_new',\n", " 'detailtype': '4',\n", " 'startTime': '2015-01-01',\n", " 'endTime': '2025-12-31',\n", " 'key': 'cus',\n", " 'sshopId': '',\n", " 'option': '',\n", " 'page': 1,\n", " 'pageSize': '50',\n", "}\n", "\n", "response = requests.get('http://www.idsz.xin:7070/posapi_invoke', params=params, cookies=cookies, headers=headers,\n", " verify=False)\n", "\n", "# print(response.json())\n", "total = response.json().get('total')\n", "total = int(total)\n", "print( total)\n", "total_pages = total // 50 + 1\n", "for i in tqdm(range(total_pages)):\n", " params['page'] = i + 1\n", " response = requests.get('http://www.idsz.xin:7070/posapi_invoke', params=params, cookies=cookies, headers=headers,\n", " verify=False)\n", " cus_list = response.json().get('rows')\n", " for cus in cus_list:\n", " all_data.append(cus)\n", "\n", "df = pd.DataFrame(all_data)\n", "df.to_excel('customer_data.xlsx', index=False)" ], "id": "2c30955bd49819cc", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7701\n" ] }, { "data": { "text/plain": [ " 0%| | 0/155 [00:00 \u001B[39m\u001B[32m51\u001B[39m cus_card_list = cus.get(\u001B[33m'\u001B[39m\u001B[33mtiemsItems\u001B[39m\u001B[33m'\u001B[39m)\n\u001B[32m 52\u001B[39m \u001B[38;5;28mprint\u001B[39m(cus)\n", "\u001B[31mAttributeError\u001B[39m: 'NoneType' object has no attribute 'get'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001B[31mKeyboardInterrupt\u001B[39m Traceback (most recent call last)", "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[32]\u001B[39m\u001B[32m, line 59\u001B[39m\n\u001B[32m 57\u001B[39m \u001B[38;5;28;01mexcept\u001B[39;00m:\n\u001B[32m 58\u001B[39m retry += \u001B[32m1\u001B[39m\n\u001B[32m---> \u001B[39m\u001B[32m59\u001B[39m time.sleep(\u001B[32m1\u001B[39m)\n\u001B[32m 60\u001B[39m \u001B[38;5;28;01mcontinue\u001B[39;00m\n\u001B[32m 62\u001B[39m df1 = pd.DataFrame(all_data)\n", "\u001B[31mKeyboardInterrupt\u001B[39m: " ] } ], "execution_count": 32 }, { "metadata": { "ExecuteTime": { "end_time": "2025-12-05T02:02:22.985849Z", "start_time": "2025-12-05T01:54:44.634161Z" } }, "cell_type": "code", "source": [ "import asyncio\n", "import httpx\n", "import pandas as pd\n", "import random\n", "from tqdm.notebook import tqdm\n", "\n", "# ===== 配置 =====\n", "CONCURRENCY = 5\n", "RETRY_LIMIT = 3\n", "TIMEOUT = 10\n", "MIN_DELAY = 0.2\n", "MAX_DELAY = 0.8\n", "\n", "cookies = {\n", " 'JSESSIONID': '621467AB3BF49A8C806B9AA859C05C81',\n", " 'td_cookie': '438646777',\n", "}\n", "\n", "headers = {\n", " 'Accept': 'application/json, text/javascript, */*; q=0.01',\n", " 'Accept-Language': 'zh-CN,zh;q=0.9',\n", " 'Cache-Control': 'no-cache',\n", " 'Connection': 'keep-alive',\n", " 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',\n", " 'Origin': 'http://www.idsz.xin:7070',\n", " 'Pragma': 'no-cache',\n", " 'Referer': 'http://www.idsz.xin:7070/report/memberservicelist?detailtype=3&key=4&startTime=2025-01-01&endTime=2025-12-05&sshopId=&type=1&typeFlag=0',\n", " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36 Edg/142.0.0.0',\n", " 'X-Requested-With': 'XMLHttpRequest',\n", " # 'Cookie': 'JSESSIONID=621467AB3BF49A8C806B9AA859C05C81; td_cookie=438646777',\n", "}\n", "\n", "params = {'apiname': 'member_person_query_by_id_v1'}\n", "\n", "async def fetch_card_data(client, fid, semaphore):\n", " async with semaphore:\n", " await asyncio.sleep(random.uniform(MIN_DELAY, MAX_DELAY))\n", " data = {'id': fid, 'licensePlate': '', 'idType': '1'}\n", " for attempt in range(RETRY_LIMIT):\n", " try:\n", " response = await client.post(\n", " 'http://www.idsz.xin:7070/posapi_invoke',\n", " params=params,\n", " cookies=cookies,\n", " headers=base_headers,\n", " data=data,\n", " timeout=TIMEOUT\n", " )\n", " response.raise_for_status()\n", " json_data = response.json()\n", " if not json_data.get(\"success\"):\n", " # 即使 success=False,也视为“成功响应”,不再重试\n", " return []\n", " user_obj = json_data.get('userObject', {})\n", " card_list = user_obj.get('tiemsItems', [])\n", " return [{'fid': fid, **card} for card in card_list]\n", " except Exception as e:\n", " if attempt == RETRY_LIMIT - 1:\n", " print(f\"❌ 最终失败: fid={fid}, error={str(e)}\")\n", " return []\n", " await asyncio.sleep(random.uniform(0.5, 1.5))\n", " return []\n", "\n", "# ===== 改进版:精准进度条 =====\n", "async def run_main():\n", " df = pd.read_excel('车牌查询结果.xlsx', sheet_name='Sheet1')\n", " fids = df['fid'].tolist()\n", " semaphore = asyncio.Semaphore(CONCURRENCY)\n", "\n", " async with httpx.AsyncClient(verify=False) as client:\n", " # 创建所有任务\n", " tasks = [fetch_card_data(client, fid, semaphore) for fid in fids]\n", "\n", " # 使用 tqdm 手动管理进度\n", " all_results = []\n", " pbar = tqdm(total=len(tasks), desc=\"正在抓取会员卡信息\")\n", "\n", " # 使用 as_completed 逐个获取完成的任务\n", " for coro in asyncio.as_completed(tasks):\n", " result = await coro\n", " all_results.append(result)\n", " pbar.update(1) # 每完成一个,进度+1\n", "\n", " pbar.close()\n", "\n", " # 展平结果\n", " all_data = [item for sublist in all_results for item in sublist]\n", " df1 = pd.DataFrame(all_data)\n", " df1.to_excel('会员卡信息1.xlsx', index=False)\n", " print(f\"✅ 共获取 {len(all_data)} 条记录,已保存到 '会员卡信息1.xlsx'\")\n", "\n", "# 在 Jupyter 中运行\n", "await run_main()" ], "id": "ad8733e5b29be5fc", "outputs": [ { "data": { "text/plain": [ "正在抓取会员卡信息: 0%| | 0/7658 [00:00 \u001B[39m\u001B[32m52\u001B[39m res_df = pd.DataFrame(all_data)\n\u001B[32m 53\u001B[39m res_df.to_excel(\u001B[33mr\u001B[39m\u001B[33m\"\u001B[39m\u001B[33mD:\u001B[39m\u001B[33m\\\u001B[39m\u001B[33mIdea Project\u001B[39m\u001B[33m\\\u001B[39m\u001B[33mF6+宜搭+其它(1)\u001B[39m\u001B[33m\\\u001B[39m\u001B[33m张阳脚本\u001B[39m\u001B[33m\\\u001B[39m\u001B[33m文件输出\u001B[39m\u001B[33m\\\u001B[39m\u001B[33m有卡客户信息.xlsx\u001B[39m\u001B[33m\"\u001B[39m,index=\u001B[38;5;28;01mFalse\u001B[39;00m)\n", "\u001B[36mFile \u001B[39m\u001B[32mD:\\ProgramTools\\anaconda3\\envs\\f6\\Lib\\site-packages\\pandas\\core\\frame.py:851\u001B[39m, in \u001B[36mDataFrame.__init__\u001B[39m\u001B[34m(self, data, index, columns, dtype, copy)\u001B[39m\n\u001B[32m 849\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m columns \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[32m 850\u001B[39m columns = ensure_index(columns)\n\u001B[32m--> \u001B[39m\u001B[32m851\u001B[39m arrays, columns, index = nested_data_to_arrays(\n\u001B[32m 852\u001B[39m \u001B[38;5;66;03m# error: Argument 3 to \"nested_data_to_arrays\" has incompatible\u001B[39;00m\n\u001B[32m 853\u001B[39m \u001B[38;5;66;03m# type \"Optional[Collection[Any]]\"; expected \"Optional[Index]\"\u001B[39;00m\n\u001B[32m 854\u001B[39m data,\n\u001B[32m 855\u001B[39m columns,\n\u001B[32m 856\u001B[39m index, \u001B[38;5;66;03m# type: ignore[arg-type]\u001B[39;00m\n\u001B[32m 857\u001B[39m dtype,\n\u001B[32m 858\u001B[39m )\n\u001B[32m 859\u001B[39m mgr = arrays_to_mgr(\n\u001B[32m 860\u001B[39m arrays,\n\u001B[32m 861\u001B[39m columns,\n\u001B[32m (...)\u001B[39m\u001B[32m 864\u001B[39m typ=manager,\n\u001B[32m 865\u001B[39m )\n\u001B[32m 866\u001B[39m \u001B[38;5;28;01melse\u001B[39;00m:\n", "\u001B[36mFile \u001B[39m\u001B[32mD:\\ProgramTools\\anaconda3\\envs\\f6\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:520\u001B[39m, in \u001B[36mnested_data_to_arrays\u001B[39m\u001B[34m(data, columns, index, dtype)\u001B[39m\n\u001B[32m 517\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m is_named_tuple(data[\u001B[32m0\u001B[39m]) \u001B[38;5;129;01mand\u001B[39;00m columns \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[32m 518\u001B[39m columns = ensure_index(data[\u001B[32m0\u001B[39m]._fields)\n\u001B[32m--> \u001B[39m\u001B[32m520\u001B[39m arrays, columns = to_arrays(data, columns, dtype=dtype)\n\u001B[32m 521\u001B[39m columns = ensure_index(columns)\n\u001B[32m 523\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m index \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n", "\u001B[36mFile \u001B[39m\u001B[32mD:\\ProgramTools\\anaconda3\\envs\\f6\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:837\u001B[39m, in \u001B[36mto_arrays\u001B[39m\u001B[34m(data, columns, dtype)\u001B[39m\n\u001B[32m 835\u001B[39m arr = _list_to_arrays(data)\n\u001B[32m 836\u001B[39m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(data[\u001B[32m0\u001B[39m], abc.Mapping):\n\u001B[32m--> \u001B[39m\u001B[32m837\u001B[39m arr, columns = _list_of_dict_to_arrays(data, columns)\n\u001B[32m 838\u001B[39m \u001B[38;5;28;01melif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(data[\u001B[32m0\u001B[39m], ABCSeries):\n\u001B[32m 839\u001B[39m arr, columns = _list_of_series_to_arrays(data, columns)\n", "\u001B[36mFile \u001B[39m\u001B[32mD:\\ProgramTools\\anaconda3\\envs\\f6\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:917\u001B[39m, in \u001B[36m_list_of_dict_to_arrays\u001B[39m\u001B[34m(data, columns)\u001B[39m\n\u001B[32m 915\u001B[39m gen = (\u001B[38;5;28mlist\u001B[39m(x.keys()) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m data)\n\u001B[32m 916\u001B[39m sort = \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(d, \u001B[38;5;28mdict\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m d \u001B[38;5;129;01min\u001B[39;00m data)\n\u001B[32m--> \u001B[39m\u001B[32m917\u001B[39m pre_cols = lib.fast_unique_multiple_list_gen(gen, sort=sort)\n\u001B[32m 918\u001B[39m columns = ensure_index(pre_cols)\n\u001B[32m 920\u001B[39m \u001B[38;5;66;03m# assure that they are of the base dict class and not of derived\u001B[39;00m\n\u001B[32m 921\u001B[39m \u001B[38;5;66;03m# classes\u001B[39;00m\n", "\u001B[36mFile \u001B[39m\u001B[32mpandas/_libs/lib.pyx:367\u001B[39m, in \u001B[36mpandas._libs.lib.fast_unique_multiple_list_gen\u001B[39m\u001B[34m()\u001B[39m\n", "\u001B[36mFile \u001B[39m\u001B[32mD:\\ProgramTools\\anaconda3\\envs\\f6\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:915\u001B[39m, in \u001B[36m\u001B[39m\u001B[34m(.0)\u001B[39m\n\u001B[32m 895\u001B[39m \u001B[38;5;250m\u001B[39m\u001B[33;03m\"\"\"\u001B[39;00m\n\u001B[32m 896\u001B[39m \u001B[33;03mConvert list of dicts to numpy arrays\u001B[39;00m\n\u001B[32m 897\u001B[39m \n\u001B[32m (...)\u001B[39m\u001B[32m 912\u001B[39m \u001B[33;03mcolumns : Index\u001B[39;00m\n\u001B[32m 913\u001B[39m \u001B[33;03m\"\"\"\u001B[39;00m\n\u001B[32m 914\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m columns \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[32m--> \u001B[39m\u001B[32m915\u001B[39m gen = (\u001B[38;5;28mlist\u001B[39m(x.keys()) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m data)\n\u001B[32m 916\u001B[39m sort = \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(d, \u001B[38;5;28mdict\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m d \u001B[38;5;129;01min\u001B[39;00m data)\n\u001B[32m 917\u001B[39m pre_cols = lib.fast_unique_multiple_list_gen(gen, sort=sort)\n", "\u001B[31mAttributeError\u001B[39m: 'NoneType' object has no attribute 'keys'" ] } ], "execution_count": 21 }, { "metadata": { "ExecuteTime": { "end_time": "2025-12-05T01:23:06.186557Z", "start_time": "2025-12-05T01:23:05.740048Z" } }, "cell_type": "code", "source": [ "import requests\n", "\n", "cookies = {\n", " 'JSESSIONID': '32C141F5A7A289491D34C50325D2D5A8',\n", " 'td_cookie': '430509658',\n", "}\n", "\n", "headers = {\n", " 'Accept': 'application/json, text/javascript, */*; q=0.01',\n", " 'Accept-Language': 'zh-CN,zh;q=0.9',\n", " 'Cache-Control': 'no-cache',\n", " 'Connection': 'keep-alive',\n", " 'Pragma': 'no-cache',\n", " 'Referer': 'http://www.idsz.xin:7070/report_member_verifi_list?detailtype=4&key=totalBalance&datafrom=2025-01-01&datato=2025-12-04&sshopId=&type=1',\n", " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36 Edg/142.0.0.0',\n", " 'X-Requested-With': 'XMLHttpRequest',\n", " # 'Cookie': 'JSESSIONID=32C141F5A7A289491D34C50325D2D5A8; td_cookie=430509658',\n", "}\n", "\n", "params = {\n", " 'apiname': 'kpi_memberVerifiAndSurplusQuery',\n", " 'detailtype': '4',\n", " 'startTime': '2025-01-01',\n", " 'endTime': '2025-12-04',\n", " 'key': 'totalBalance',\n", " 'sshopId': '',\n", " 'timesCardId': '',\n", " 'option': '',\n", " 'page': '2',\n", " 'pageSize': '50',\n", "}\n", "\n", "response = requests.get('http://www.idsz.xin:7070/posapi_invoke', params=params, cookies=cookies, headers=headers,\n", " verify=False)\n" ], "id": "63713fc278b99d27", "outputs": [], "execution_count": 9 } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }