diff --git a/张阳脚本/minimax调用.ipynb b/张阳脚本/minimax调用.ipynb new file mode 100644 index 0000000..54f657b --- /dev/null +++ b/张阳脚本/minimax调用.ipynb @@ -0,0 +1,37 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/张阳脚本/minmax.py b/张阳脚本/minmax.py new file mode 100644 index 0000000..7c68785 --- /dev/null +++ b/张阳脚本/minmax.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- \ No newline at end of file diff --git a/张阳脚本/竞品系统数据导出/H1车店数据导出.ipynb b/张阳脚本/竞品系统数据导出/H1车店数据导出.ipynb new file mode 100644 index 0000000..82b8c9a --- /dev/null +++ b/张阳脚本/竞品系统数据导出/H1车店数据导出.ipynb @@ -0,0 +1,805 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2be2e0c2", + "metadata": {}, + "source": [ + "# 车辆信息" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "70a8b0da", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "正在获取总页数...\n", + "总页数:88 页\n", + "正在爬取第 1/88 页...\n", + "正在爬取第 2/88 页...\n", + "正在爬取第 3/88 页...\n", + "正在爬取第 4/88 页...\n", + "正在爬取第 5/88 页...\n", + "正在爬取第 6/88 页...\n", + "正在爬取第 7/88 页...\n", + "正在爬取第 8/88 页...\n", + "正在爬取第 9/88 页...\n", + "正在爬取第 10/88 页...\n", + "正在爬取第 11/88 页...\n", + "正在爬取第 12/88 页...\n", + "正在爬取第 13/88 页...\n", + "正在爬取第 14/88 页...\n", + "正在爬取第 15/88 页...\n", + "正在爬取第 16/88 页...\n", + "正在爬取第 17/88 页...\n", + "正在爬取第 18/88 页...\n", + "正在爬取第 19/88 页...\n", + "正在爬取第 20/88 页...\n", + "正在爬取第 21/88 页...\n", + "正在爬取第 22/88 页...\n", + "正在爬取第 23/88 页...\n", + "正在爬取第 24/88 页...\n", + "正在爬取第 25/88 页...\n", + "正在爬取第 26/88 页...\n", + "正在爬取第 27/88 页...\n", + "正在爬取第 28/88 页...\n", + "正在爬取第 29/88 页...\n", + "正在爬取第 30/88 页...\n", + "正在爬取第 31/88 页...\n", + "正在爬取第 32/88 页...\n", + "正在爬取第 33/88 页...\n", + "正在爬取第 34/88 页...\n", + "正在爬取第 35/88 页...\n", + "正在爬取第 36/88 页...\n", + "正在爬取第 37/88 页...\n", + "正在爬取第 38/88 页...\n", + "正在爬取第 39/88 页...\n", + "正在爬取第 40/88 页...\n", + "正在爬取第 41/88 页...\n", + "正在爬取第 42/88 页...\n", + "正在爬取第 43/88 页...\n", + "正在爬取第 44/88 页...\n", + "正在爬取第 45/88 页...\n", + "正在爬取第 46/88 页...\n", + "正在爬取第 47/88 页...\n", + "正在爬取第 48/88 页...\n", + "正在爬取第 49/88 页...\n", + "正在爬取第 50/88 页...\n", + "正在爬取第 51/88 页...\n", + "正在爬取第 52/88 页...\n", + "正在爬取第 53/88 页...\n", + "正在爬取第 54/88 页...\n", + "正在爬取第 55/88 页...\n", + "正在爬取第 56/88 页...\n", + "正在爬取第 57/88 页...\n", + "正在爬取第 58/88 页...\n", + "正在爬取第 59/88 页...\n", + "正在爬取第 60/88 页...\n", + "正在爬取第 61/88 页...\n", + "正在爬取第 62/88 页...\n", + "正在爬取第 63/88 页...\n", + "正在爬取第 64/88 页...\n", + "正在爬取第 65/88 页...\n", + "正在爬取第 66/88 页...\n", + "正在爬取第 67/88 页...\n", + "正在爬取第 68/88 页...\n", + "正在爬取第 69/88 页...\n", + "正在爬取第 70/88 页...\n", + "正在爬取第 71/88 页...\n", + "正在爬取第 72/88 页...\n", + "正在爬取第 73/88 页...\n", + "正在爬取第 74/88 页...\n", + "正在爬取第 75/88 页...\n", + "正在爬取第 76/88 页...\n", + "正在爬取第 77/88 页...\n", + "正在爬取第 78/88 页...\n", + "正在爬取第 79/88 页...\n", + "正在爬取第 80/88 页...\n", + "正在爬取第 81/88 页...\n", + "正在爬取第 82/88 页...\n", + "正在爬取第 83/88 页...\n", + "正在爬取第 84/88 页...\n", + "正在爬取第 85/88 页...\n", + "正在爬取第 86/88 页...\n", + "正在爬取第 87/88 页...\n", + "正在爬取第 88/88 页...\n", + "\n", + "========== 爬取完成 ==========\n", + "总计数据:1745 行\n", + "\n", + "✅ 文件已保存到桌面:\n", + "📊 Excel文件:C:\\Users\\CW\\Desktop\\车辆数据_已拆分_20260316_151600.csv\n", + "📄 文本文件:C:\\Users\\CW\\Desktop\\车辆数据_已拆分_20260316_151600.txt\n", + "\n", + "前5行数据预览:\n", + "1 ['1', '陕KF39335', '杜源', '', '', '', '/', '', '', '', '', '消费记录 编辑 迁移 删除']\n", + "2 ['2', '陕KB772U', '张子龙', '15686644333', '', '', '1F1FW1R68KFB41218', '', '', '', '', '消费记录 编辑 迁移 删除']\n", + "3 ['3', '陕KUY513', '高', '13474443858', '', '', 'LMGAT1L85P1320223', '', '24048km', '', '', '消费记录 编辑 迁移 删除']\n", + "4 ['4', '陕K55489', '刘永强', '15229629888', '', '', 'JTEJU9FJ5DK058155', '丰田普拉多(进口) > 2013款 4.0 自动 中东版', '', '', '', '消费记录 编辑 迁移 删除']\n", + "5 ['5', '陕K139B8', '郝波', '15929825811', '', '', 'LSVAA4182A2355722', '', '', '', '', '消费记录 编辑 迁移 删除']\n" + ] + } + ], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import re\n", + "import os\n", + "import csv\n", + "from datetime import datetime\n", + "\n", + "# ===================== 配置区 =====================\n", + "COOKIES = (\n", + " 'carOwnerSelects=11847,13014; LOGIN_URL=https%3A%2F%2Fscrm.h1cd.com%2Flogin.html; '\n", + " 'showSmsActivity=1; showEasyMoney=1; PHPSESSID=f5sjp1p18hvgke5oqvfr6q456o; '\n", + " 'adminpd=IMAkQA9qfl0V0bY6hPCcbYdz3rcy0MG2%2FbHz34%2BOWy0%3D; adminun=15529803908; uid=10042'\n", + ")\n", + "\n", + "BASE_URL = \"https://scrm.h1cd.com/admin/members/carlist\"\n", + "HEADERS = {\n", + " \"Accept\": \"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7\",\n", + " \"Accept-Language\": \"zh-CN,zh;q=0.9\",\n", + " \"Connection\": \"keep-alive\",\n", + " \"Referer\": \"https://scrm.h1cd.com/admin/members/carlist.html\",\n", + " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36\",\n", + " \"sec-ch-ua\": '\"Not(A:Brand\";v=\"99\", \"Google Chrome\";v=\"133\", \"Chromium\";v=\"133\"',\n", + " \"sec-ch-ua-mobile\": \"?0\",\n", + " \"sec-ch-ua-platform\": '\"Windows\"',\n", + " \"Sec-Fetch-Dest\": \"iframe\",\n", + " \"Sec-Fetch-Mode\": \"navigate\",\n", + " \"Sec-Fetch-Site\": \"same-origin\",\n", + " \"Sec-Fetch-User\": \"?1\",\n", + " \"Upgrade-Insecure-Requests\": \"1\"\n", + "}\n", + "\n", + "# 解析Cookie\n", + "def parse_cookies(cookie_str):\n", + " cookie_dict = {}\n", + " for item in cookie_str.split(\"; \"):\n", + " if \"=\" in item:\n", + " key, value = item.split(\"=\", 1)\n", + " cookie_dict[key] = value\n", + " return cookie_dict\n", + "\n", + "# ===================== 核心:数据拆分处理(已修复 br 换行) =====================\n", + "def process_row(row):\n", + " if len(row) < 5:\n", + " return row\n", + " \n", + " new_row = row.copy()\n", + "\n", + " # ========== 1. 处理 C列:客户名称 + 手机号(按
拆分,已修复) ==========\n", + " c_text = new_row.pop(2)\n", + " \n", + " # 处理换行:统一空格/换行/空白符,提取 名称 + 手机号\n", + " # 先把所有空白(包括HTML换行产生的空格)替换成统一分隔符\n", + " c_text = re.sub(r'\\s+', ' ', c_text).strip()\n", + " \n", + " name, phone = \"\", \"\"\n", + " # 匹配手机号(11位数字),自动分割\n", + " phone_match = re.search(r'1[3-9]\\d{9}', c_text)\n", + " if phone_match:\n", + " phone = phone_match.group()\n", + " name = c_text.replace(phone, '').strip()\n", + " else:\n", + " name = c_text.strip()\n", + "\n", + " # ========== 2. 处理 E列:颜色/发动机/车架号(已去除前缀) ==========\n", + " e_text = new_row.pop(3)\n", + " color, engine, vin = \"\", \"\", \"\"\n", + "\n", + " # 去掉所有中文前缀\n", + " e_text = re.sub(r'颜\\s*色\\s*:', '', e_text)\n", + " e_text = re.sub(r'发动机\\s*:', '', e_text)\n", + " e_text = re.sub(r'车架号\\s*:', '', e_text)\n", + " \n", + " # 按 / 拆分\n", + " if \"/\" in e_text:\n", + " parts = e_text.split(\"/\", 2)\n", + " color = parts[0].strip()\n", + " engine = parts[1].strip() if len(parts) > 1 else \"\"\n", + " vin = parts[2].strip() if len(parts) > 2 else \"\"\n", + " else:\n", + " color = e_text.strip()\n", + "\n", + " # 插入拆分后字段\n", + " new_row.insert(2, name)\n", + " new_row.insert(3, phone)\n", + " new_row.insert(4, color)\n", + " new_row.insert(5, engine)\n", + " new_row.insert(6, vin)\n", + "\n", + " return new_row\n", + "\n", + "# 获取单页表格数据 + 自动拆分(保留HTML内换行,解决
问题)\n", + "def get_page_data(page_num):\n", + " if page_num == 1:\n", + " url = f\"{BASE_URL}.html\"\n", + " else:\n", + " url = f\"{BASE_URL}_{page_num}.html\"\n", + "\n", + " try:\n", + " resp = requests.get(url, headers=HEADERS, cookies=parse_cookies(COOKIES), timeout=15)\n", + " resp.raise_for_status()\n", + " soup = BeautifulSoup(resp.text, \"html.parser\")\n", + " table = soup.find(\"table\")\n", + " if not table:\n", + " return []\n", + "\n", + " rows = table.find_all(\"tr\")\n", + " data = []\n", + " for tr in rows:\n", + " tds = tr.find_all(\"td\")\n", + " cols = []\n", + " for td in tds:\n", + " # 关键:保留
产生的换行,不直接压缩\n", + " text = td.get_text(separator=\" \", strip=True)\n", + " cols.append(text)\n", + " \n", + " if cols:\n", + " processed_cols = process_row(cols)\n", + " data.append(processed_cols)\n", + " return data\n", + "\n", + " except Exception as e:\n", + " print(f\"第{page_num}页请求失败:{e}\")\n", + " return []\n", + "\n", + "# 获取总页数\n", + "def get_total_pages():\n", + " try:\n", + " resp = requests.get(f\"{BASE_URL}.html\", headers=HEADERS, cookies=parse_cookies(COOKIES), timeout=10)\n", + " soup = BeautifulSoup(resp.text, \"html.parser\")\n", + " page_text = soup.get_text()\n", + " match = re.search(r\"共\\s*(\\d+)\\s*页\", page_text)\n", + " if match:\n", + " return int(match.group(1))\n", + "\n", + " page_links = soup.find_all(\"a\", href=re.compile(r\"carlist_\\d+\\.html\"))\n", + " max_page = 1\n", + " for a in page_links:\n", + " num_match = re.search(r\"carlist_(\\d+)\\.html\", a[\"href\"])\n", + " if num_match:\n", + " max_page = max(max_page, int(num_match.group(1)))\n", + " return max_page\n", + " except:\n", + " return 1\n", + "\n", + "# ===================== 保存到桌面(自动加表头) =====================\n", + "def save_to_desktop(all_data):\n", + " desktop_path = os.path.join(os.path.expanduser(\"~\"), \"Desktop\")\n", + " time_str = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", + " \n", + " # 表头(自动添加)\n", + " header = [\n", + " \"序号\", \"车牌号\", \"客户名称\", \"客户手机号\",\n", + " \"颜色\", \"发动机号\", \"车架号\", \"其他列1\", \"其他列2\", \"其他列3\"\n", + " ]\n", + " \n", + " csv_file = os.path.join(desktop_path, f\"车辆数据_已拆分_{time_str}.csv\")\n", + " with open(csv_file, \"w\", newline=\"\", encoding=\"utf-8-sig\") as f:\n", + " writer = csv.writer(f)\n", + " writer.writerow(header) # 写入标题\n", + " writer.writerows(all_data)\n", + " \n", + " txt_file = os.path.join(desktop_path, f\"车辆数据_已拆分_{time_str}.txt\")\n", + " with open(txt_file, \"w\", encoding=\"utf-8\") as f:\n", + " f.write(\" | \".join(header) + \"\\n\")\n", + " for row in all_data:\n", + " f.write(\" | \".join(row) + \"\\n\")\n", + " \n", + " print(f\"\\n✅ 文件已保存到桌面:\")\n", + " print(f\"📊 Excel文件:{csv_file}\")\n", + " print(f\"📄 文本文件:{txt_file}\")\n", + "\n", + "# ===================== 主程序 =====================\n", + "if __name__ == \"__main__\":\n", + " print(\"正在获取总页数...\")\n", + " total_pages = get_total_pages()\n", + " # total_pages = 1\n", + " print(f\"总页数:{total_pages} 页\")\n", + "\n", + " all_data = []\n", + " for page in range(1, total_pages + 1):\n", + " print(f\"正在爬取第 {page}/{total_pages} 页...\")\n", + " page_data = get_page_data(page)\n", + " if page_data:\n", + " all_data.extend(page_data)\n", + "\n", + " print(f\"\\n========== 爬取完成 ==========\")\n", + " print(f\"总计数据:{len(all_data)} 行\")\n", + "\n", + " save_to_desktop(all_data)\n", + "\n", + " print(\"\\n前5行数据预览:\")\n", + " for i, row in enumerate(all_data[:5]):\n", + " print(i+1, row)" + ] + }, + { + "cell_type": "markdown", + "id": "6c370235", + "metadata": {}, + "source": [ + "# 库存信息" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5392bfc0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "==================================================\n", + "开始爬取库存数据...\n", + "==================================================\n", + "✅ 成功获取最大页数:93\n", + "正在爬取第 1/93 页...\n", + "正在爬取第 2/93 页...\n", + "正在爬取第 3/93 页...\n", + "正在爬取第 4/93 页...\n", + "正在爬取第 5/93 页...\n", + "正在爬取第 6/93 页...\n", + "正在爬取第 7/93 页...\n", + "正在爬取第 8/93 页...\n", + "正在爬取第 9/93 页...\n", + "正在爬取第 10/93 页...\n", + "正在爬取第 11/93 页...\n", + "正在爬取第 12/93 页...\n", + "正在爬取第 13/93 页...\n", + "正在爬取第 14/93 页...\n", + "正在爬取第 15/93 页...\n", + "正在爬取第 16/93 页...\n", + "正在爬取第 17/93 页...\n", + "正在爬取第 18/93 页...\n", + "正在爬取第 19/93 页...\n", + "正在爬取第 20/93 页...\n", + "正在爬取第 21/93 页...\n", + "正在爬取第 22/93 页...\n", + "正在爬取第 23/93 页...\n", + "正在爬取第 24/93 页...\n", + "正在爬取第 25/93 页...\n", + "正在爬取第 26/93 页...\n", + "正在爬取第 27/93 页...\n", + "正在爬取第 28/93 页...\n", + "正在爬取第 29/93 页...\n", + "正在爬取第 30/93 页...\n", + "正在爬取第 31/93 页...\n", + "正在爬取第 32/93 页...\n", + "正在爬取第 33/93 页...\n", + "正在爬取第 34/93 页...\n", + "正在爬取第 35/93 页...\n", + "正在爬取第 36/93 页...\n", + "正在爬取第 37/93 页...\n", + "正在爬取第 38/93 页...\n", + "正在爬取第 39/93 页...\n", + "正在爬取第 40/93 页...\n", + "正在爬取第 41/93 页...\n", + "正在爬取第 42/93 页...\n", + "正在爬取第 43/93 页...\n", + "正在爬取第 44/93 页...\n", + "正在爬取第 45/93 页...\n", + "正在爬取第 46/93 页...\n", + "正在爬取第 47/93 页...\n", + "正在爬取第 48/93 页...\n", + "正在爬取第 49/93 页...\n", + "正在爬取第 50/93 页...\n", + "正在爬取第 51/93 页...\n", + "正在爬取第 52/93 页...\n", + "正在爬取第 53/93 页...\n", + "正在爬取第 54/93 页...\n", + "正在爬取第 55/93 页...\n", + "正在爬取第 56/93 页...\n", + "正在爬取第 57/93 页...\n", + "正在爬取第 58/93 页...\n", + "正在爬取第 59/93 页...\n", + "正在爬取第 60/93 页...\n", + "正在爬取第 61/93 页...\n", + "正在爬取第 62/93 页...\n", + "正在爬取第 63/93 页...\n", + "正在爬取第 64/93 页...\n", + "正在爬取第 65/93 页...\n", + "正在爬取第 66/93 页...\n", + "正在爬取第 67/93 页...\n", + "正在爬取第 68/93 页...\n", + "正在爬取第 69/93 页...\n", + "正在爬取第 70/93 页...\n", + "正在爬取第 71/93 页...\n", + "正在爬取第 72/93 页...\n", + "正在爬取第 73/93 页...\n", + "正在爬取第 74/93 页...\n", + "正在爬取第 75/93 页...\n", + "正在爬取第 76/93 页...\n", + "正在爬取第 77/93 页...\n", + "正在爬取第 78/93 页...\n", + "正在爬取第 79/93 页...\n", + "正在爬取第 80/93 页...\n", + "正在爬取第 81/93 页...\n", + "正在爬取第 82/93 页...\n", + "正在爬取第 83/93 页...\n", + "正在爬取第 84/93 页...\n", + "正在爬取第 85/93 页...\n", + "正在爬取第 86/93 页...\n", + "正在爬取第 87/93 页...\n", + "正在爬取第 88/93 页...\n", + "正在爬取第 89/93 页...\n", + "正在爬取第 90/93 页...\n", + "正在爬取第 91/93 页...\n", + "正在爬取第 92/93 页...\n", + "正在爬取第 93/93 页...\n", + "\n", + "🔍 去重完成:原始 1846 条 → 去重后 1823 条\n", + "==================================================\n", + "✅ 爬取 + 去重 完成!\n", + "📊 最终有效条数:1823\n", + "📁 已保存到桌面:库存数据_去重版.xlsx\n", + "==================================================\n" + ] + } + ], + "source": [ + "import requests\n", + "import pandas as pd\n", + "from bs4 import BeautifulSoup\n", + "import os\n", + "from urllib.parse import urlencode\n", + "import re\n", + "\n", + "# ===================== 【配置区】 =====================\n", + "COOKIE = ('LOGIN_URL=https%3A%2F%2Fscrm.h1cd.com%2Flogin.html; showSmsActivity=1; showEasyMoney=1; '\n", + " 'adminpd=IMAkQA9qfl0V0bY6hPCcbYdz3rcy0MG2%2FbHz34%2BOWy0%3D; adminun=15529803908; uid=10042; '\n", + " 'PHPSESSID=t1fg29l2b29j3nebq4o52tf0o7')\n", + "\n", + "BASE_PARAMS = {\n", + " 'storeId': '12521',\n", + " 'house_id': '8484',\n", + " 'repositoryName': '',\n", + " 'first_type': '',\n", + " 'product_type': '',\n", + " 'status': '',\n", + " 'searchType': '1',\n", + " 'product_name': ''\n", + "}\n", + "\n", + "# 关键修复:分页格式是 stores-search__.html\n", + "BASE_URL = \"https://scrm.h1cd.com/admin/billings/stores-search__{}.html\"\n", + "# ======================================================\n", + "\n", + "HEADERS = {\n", + " 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',\n", + " 'Accept-Language': 'zh-CN,zh;q=0.9',\n", + " 'Connection': 'keep-alive',\n", + " 'Cookie': COOKIE,\n", + " 'Referer': 'https://scrm.h1cd.com/admin/billings/stores-search.html',\n", + " 'Sec-Fetch-Dest': 'iframe',\n", + " 'Sec-Fetch-Mode': 'navigate',\n", + " 'Sec-Fetch-Site': 'same-origin',\n", + " 'Sec-Fetch-User': '?1',\n", + " 'Upgrade-Insecure-Requests': '1',\n", + " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',\n", + " 'sec-ch-ua': '\"Not(A:Brand\";v=\"99\", \"Google Chrome\";v=\"133\", \"Chromium\";v=\"133\"',\n", + " 'sec-ch-ua-mobile': '?0',\n", + " 'sec-ch-ua-platform': '\"Windows\"',\n", + "}\n", + "\n", + "\n", + "def get_page_html(page_num):\n", + " \"\"\"获取单页HTML\"\"\"\n", + " try:\n", + " if page_num == 1:\n", + " url = \"https://scrm.h1cd.com/admin/billings/stores-search.html\"\n", + " else:\n", + " url = BASE_URL.format(page_num)\n", + " \n", + " res = requests.get(url, headers=HEADERS, params=BASE_PARAMS, timeout=10)\n", + " res.raise_for_status()\n", + " return res.text\n", + " except Exception as e:\n", + " print(f\"第{page_num}页请求失败:{e}\")\n", + " return None\n", + "\n", + "\n", + "def parse_table(html):\n", + " \"\"\"解析表格数据\"\"\"\n", + " soup = BeautifulSoup(html, 'html.parser')\n", + " table = soup.find('table', class_='table-hover')\n", + " if not table:\n", + " return []\n", + "\n", + " rows = []\n", + " # 表头\n", + " headers = [th.get_text(strip=True) for th in table.select('thead th')]\n", + " rows.append(headers)\n", + "\n", + " # 表体\n", + " for tr in table.select('tbody tr'):\n", + " tds = tr.find_all('td')\n", + " row = [td.get_text(strip=True) for td in tds]\n", + " if any(row):\n", + " rows.append(row)\n", + " return rows\n", + "\n", + "\n", + "def get_max_page():\n", + " \"\"\"【已修复】从页面提取最大页数:共 1846 条记录,页 1/93\"\"\"\n", + " html = get_page_html(1)\n", + " if not html:\n", + " return 1\n", + "\n", + " soup = BeautifulSoup(html, 'html.parser')\n", + " page_info = soup.find('div', class_='dataTables_paginate')\n", + " if not page_info:\n", + " return 1\n", + "\n", + " text = page_info.get_text()\n", + " match = re.search(r'页\\s*1/(\\d+)', text) # 匹配 页 1/93\n", + " if match:\n", + " return int(match.group(1))\n", + " return 1\n", + "\n", + "\n", + "def main():\n", + " print(\"=\" * 50)\n", + " print(\"开始爬取库存数据...\")\n", + " print(\"=\" * 50)\n", + "\n", + " max_page = get_max_page() * 2\n", + " print(f\"✅ 成功获取最大页数:{max_page}\")\n", + "\n", + " all_data = []\n", + " for page in range(1, max_page + 1):\n", + " print(f\"正在爬取第 {page}/{max_page} 页...\")\n", + " html = get_page_html(page)\n", + " if not html:\n", + " continue\n", + "\n", + " rows = parse_table(html)\n", + " if page == 1:\n", + " all_data.extend(rows)\n", + " else:\n", + " all_data.extend(rows[1:])\n", + "\n", + " # 保存到桌面\n", + " desktop = os.path.join(os.path.expanduser(\"~\"), \"Desktop\")\n", + " \n", + " # ===================== 核心修改:按配件编码去重 =====================\n", + " df = pd.DataFrame(all_data[1:], columns=all_data[0])\n", + " \n", + " # 按【配件编码】列去重,保留第一条数据\n", + " if '配件编码' in df.columns:\n", + " total_before = len(df)\n", + " df = df.drop_duplicates(subset=['配件编码'], keep='first')\n", + " total_after = len(df)\n", + " print(f\"\\n🔍 去重完成:原始 {total_before} 条 → 去重后 {total_after} 条\")\n", + " else:\n", + " print(\"\\n⚠️ 未找到【配件编码】列,跳过去重\")\n", + " # ==================================================================\n", + "\n", + " path = os.path.join(desktop, \"库存数据_去重版.xlsx\")\n", + " df.to_excel(path, index=False)\n", + "\n", + " print(\"=\" * 50)\n", + " print(f\"✅ 爬取 + 去重 完成!\")\n", + " print(f\"📊 最终有效条数:{len(df)}\")\n", + " print(f\"📁 已保存到桌面:库存数据_去重版.xlsx\")\n", + " print(\"=\" * 50)\n", + "\n", + "\n", + "if __name__ == '__main__':\n", + " main()" + ] + }, + { + "cell_type": "markdown", + "id": "4b11e6fa", + "metadata": {}, + "source": [ + "# 历史维修记录" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "667edbdc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🔧 开始导出(100%匹配页面结构)...\n", + "正在读取第 1/1 页\n", + "\n", + "✅ 导出完成!桌面文件:维修记录_完美导出版.xlsx\n" + ] + } + ], + "source": [ + "import requests\n", + "import pandas as pd\n", + "from bs4 import BeautifulSoup\n", + "import os\n", + "import re\n", + "\n", + "# ===================== 固定配置 =====================\n", + "BASE_URL_1 = \"https://scrm.h1cd.com/admin/reports/orderCostDetail.html\"\n", + "BASE_URL_N = \"https://scrm.h1cd.com/admin/reports/orderCostDetail_{}.html\"\n", + "DESKTOP = os.path.join(os.path.expanduser(\"~\"), \"Desktop\")\n", + "OUTPUT_FILE = os.path.join(DESKTOP, \"维修记录_完美导出版.xlsx\")\n", + "\n", + "HEADERS = {\n", + " 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',\n", + " 'Accept-Language': 'zh-CN,zh;q=0.9',\n", + " 'Connection': 'keep-alive',\n", + " 'Referer': 'https://scrm.h1cd.com/admin/reports/orderCostDetail.html?storeId=0&cost_time_type=1&timeStart=2021-03-01&timeEnd=&type=&search=&_action=',\n", + " 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',\n", + "}\n", + "\n", + "COOKIES = {\n", + " 'LOGIN_URL': 'https%3A%2F%2Fscrm.h1cd.com%2Flogin.html',\n", + " 'showSmsActivity': '1',\n", + " 'showEasyMoney': '1',\n", + " 'adminpd': 'IMAkQA9qfl0V0bY6hPCcbYdz3rcy0MG2%2FbHz34%2BOWy0%3D',\n", + " 'adminun': '15529803908',\n", + " 'uid': '10042',\n", + " 'PHPSESSID': 't1fg29l2b29j3nebq4o52tf0o7'\n", + "}\n", + "\n", + "PARAMS = {\n", + " 'storeId': '0',\n", + " 'cost_time_type': '1',\n", + " 'timeStart': '2021-03-01',\n", + " 'timeEnd': '',\n", + " 'type': '',\n", + " 'search': '',\n", + " '_action': ''\n", + "}\n", + "\n", + "# =====================================================\n", + "\n", + "def get_html(page):\n", + " try:\n", + " if page == 1:\n", + " r = requests.get(BASE_URL_1, headers=HEADERS, cookies=COOKIES, params=PARAMS, timeout=10)\n", + " else:\n", + " r = requests.get(BASE_URL_N.format(page), headers=HEADERS, cookies=COOKIES, params=PARAMS, timeout=10)\n", + " r.raise_for_status()\n", + " return r.text\n", + " except:\n", + " return None\n", + "\n", + "def parse_real_table(html):\n", + " \"\"\"\n", + " 终极解析:严格按照你系统的合并单元格结构逐行还原\n", + " 主行:18列完整\n", + " 子行:从第4列开始,前面自动继承\n", + " \"\"\"\n", + " soup = BeautifulSoup(html, 'html.parser')\n", + " tables = soup.find_all('table', class_='table-hover')\n", + " if not tables:\n", + " return []\n", + " table = tables[0]\n", + " trs = table.find_all('tr')\n", + "\n", + " # 固定表头(和网页100%一致)\n", + " headers = [\n", + " '序号', '工单号', '车牌', '车主信息', '所属分类', '工单内容',\n", + " '金额', '折扣', '折后金额', '成本', '利润', '工单金额',\n", + " '优惠金额', '成本总计', '工单毛利', '毛利率', '结算时间', '接车人'\n", + " ]\n", + "\n", + " result = []\n", + " current_no = ''\n", + " current_order = ''\n", + " current_car = ''\n", + " current_user = ''\n", + "\n", + " for tr in trs:\n", + " tds = tr.find_all(['td', 'th'])\n", + " if not tds:\n", + " continue\n", + "\n", + " row = [td.get_text(strip=True) for td in tds]\n", + "\n", + " # ===================== 核心逻辑 =====================\n", + " # 这一行是【主行】:有 序号、工单号、车牌、车主\n", + " if len(tds) >= 15:\n", + " current_no = row[0]\n", + " current_order = row[1]\n", + " current_car = row[2]\n", + " current_user = row[3]\n", + " # 完整行\n", + " new_row = row[:18]\n", + "\n", + " # 这一行是【子行】:只有 分类~利润 共7列\n", + " else:\n", + " new_row = [\n", + " current_no, current_order, current_car, current_user,\n", + " row[0], row[1], row[2], row[3], row[4], row[5], row[6],\n", + " '', '', '', '', '', '', ''\n", + " ]\n", + " # ====================================================\n", + "\n", + " # 补齐18列\n", + " while len(new_row) < 18:\n", + " new_row.append('')\n", + "\n", + " result.append(new_row[:18])\n", + "\n", + " return [headers] + result\n", + "\n", + "def get_total_pages():\n", + " html = get_html(1)\n", + " if not html:\n", + " return 1\n", + " match = re.search(r'页\\s*1/(\\d+)', html)\n", + " if match:\n", + " return int(match.group(1))\n", + " return 1\n", + "\n", + "def main():\n", + " print(\"🔧 开始导出(100%匹配页面结构)...\")\n", + " total = get_total_pages()\n", + " all_data = []\n", + " header_added = False\n", + "\n", + " for p in range(1, total + 1):\n", + " print(f\"正在读取第 {p}/{total} 页\")\n", + " html = get_html(p)\n", + " if not html:\n", + " continue\n", + "\n", + " rows = parse_real_table(html)\n", + " if not rows:\n", + " continue\n", + "\n", + " if not header_added:\n", + " all_data.extend(rows)\n", + " header_added = True\n", + " else:\n", + " all_data.extend(rows[1:])\n", + "\n", + " if not all_data:\n", + " print(\"❌ 未获取到数据,请检查Cookie是否过期\")\n", + " return\n", + "\n", + " df = pd.DataFrame(all_data[1:], columns=all_data[0])\n", + " df.to_excel(OUTPUT_FILE, index=False)\n", + " print(f\"\\n✅ 导出完成!桌面文件:维修记录_完美导出版.xlsx\")\n", + "\n", + "if __name__ == '__main__':\n", + " main()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/张阳脚本/简道云/智能助手修复.ipynb b/张阳脚本/简道云/智能助手修复.ipynb new file mode 100644 index 0000000..54f657b --- /dev/null +++ b/张阳脚本/简道云/智能助手修复.ipynb @@ -0,0 +1,37 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/张阳脚本/简道云/智能助手修复.py b/张阳脚本/简道云/智能助手修复.py new file mode 100644 index 0000000..7c68785 --- /dev/null +++ b/张阳脚本/简道云/智能助手修复.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- \ No newline at end of file diff --git a/张阳脚本/钉钉群消息获取.ipynb b/张阳脚本/钉钉群消息获取.ipynb new file mode 100644 index 0000000..54f657b --- /dev/null +++ b/张阳脚本/钉钉群消息获取.ipynb @@ -0,0 +1,37 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}