4.18备份

2026-04-18 09:22:23 +08:00
parent cc445c405a
commit cc4148b67d
20 changed files with 13949 additions and 543 deletions
@@ -0,0 +1,281 @@
+import argparse
+import csv
+import json
+import os
+import re
+import time
+from html import unescape
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+from urllib.parse import urlencode
+
+import requests
+
+BASE_URL = "https://scrm.h1cd.com"
+
+DEFAULT_COOKIES = {
+    "showSmsActivity": "1",
+    "showEasyMoney": "1",
+    "LOGIN_URL": "https%3A%2F%2Fscrm.h1cd.com%2Flogin-h1cd.html",
+    "adminpd": "jVISiRrtcJplFhLoCuUIxK9XG5ekdfwzq%2B0y482ZKxE%3D",
+    "adminun": "15224781773",
+    "uid": "10291",
+    "PHPSESSID": "nbn58laakng0rv5iqln82a6qpu",
+}
+
+DEFAULT_HEADERS = {
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
+    "Connection": "keep-alive",
+    "Sec-Fetch-Dest": "iframe",
+    "Sec-Fetch-Mode": "navigate",
+    "Sec-Fetch-Site": "same-origin",
+    "Sec-Fetch-User": "?1",
+    "Upgrade-Insecure-Requests": "1",
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36 Edg/146.0.0.0",
+    "sec-ch-ua": '"Chromium";v="146", "Not-A.Brand";v="24", "Microsoft Edge";v="146"',
+    "sec-ch-ua-mobile": "?0",
+    "sec-ch-ua-platform": '"Windows"',
+}
+
+DEFAULT_PARAMS = {
+    "type": "",
+    "expired": "",
+    "storeId": "0",
+    "search": "",
+}
+
+_TABLE_RE = re.compile(r"<table\b[^>]*>.*?</table>", re.IGNORECASE | re.DOTALL)
+_TR_RE = re.compile(r"<tr\b[^>]*>.*?</tr>", re.IGNORECASE | re.DOTALL)
+_CELL_RE = re.compile(r"<t[hd]\b[^>]*>(.*?)</t[hd]>", re.IGNORECASE | re.DOTALL)
+_TAG_RE = re.compile(r"<[^>]+>", re.DOTALL)
+_BR_RE = re.compile(r"<\s*br\s*/?\s*>", re.IGNORECASE)
+_NBSP_RE = re.compile(r"(\xa0|&nbsp;)+", re.IGNORECASE)
+
+
+def _clean_html_text(raw: str) -> str:
+    raw = _BR_RE.sub("\n", raw)
+    raw = _TAG_RE.sub("", raw)
+    raw = unescape(raw)
+    raw = _NBSP_RE.sub(" ", raw)
+    raw = raw.replace("\r", "").strip()
+    raw = re.sub(r"[ \t]+\n", "\n", raw)
+    raw = re.sub(r"\n{3,}", "\n\n", raw)
+    raw = re.sub(r"[ \t]{2,}", " ", raw)
+    return raw.strip()
+
+
+def build_cards_url(page: int) -> str:
+    if page <= 1:
+        return f"{BASE_URL}/admin/members/cards.html"
+    return f"{BASE_URL}/admin/members/cards_{page}.html"
+
+
+def _load_cookies() -> Dict[str, str]:
+    env_json = os.environ.get("H1_COOKIES_JSON")
+    if env_json:
+        loaded = json.loads(env_json)
+        if not isinstance(loaded, dict):
+            raise ValueError("H1_COOKIES_JSON must be a JSON object")
+        return {str(k): str(v) for k, v in loaded.items()}
+    return dict(DEFAULT_COOKIES)
+
+
+def _fetch_html(
+    session: requests.Session,
+    page: int,
+    params: Dict[str, str],
+    base_headers: Dict[str, str],
+    timeout_seconds: int = 30,
+) -> str:
+    url = build_cards_url(page)
+    headers = dict(base_headers)
+
+    if page >= 2:
+        referer_params = dict(params)
+        headers["Referer"] = f"{build_cards_url(page - 1)}?{urlencode(referer_params, doseq=True)}"
+    else:
+        referer_params = dict(params)
+        headers["Referer"] = f"{build_cards_url(1)}?{urlencode(referer_params, doseq=True)}"
+
+    resp = session.get(url, params=params, headers=headers, timeout=timeout_seconds)
+    resp.raise_for_status()
+    if not resp.encoding:
+        resp.encoding = "utf-8"
+    return resp.text
+
+
+def _parse_table(table_html: str) -> Tuple[List[str], List[List[str]]]:
+    header: List[str] = []
+    data_rows: List[List[str]] = []
+
+    for tr_match in _TR_RE.finditer(table_html):
+        tr_html = tr_match.group(0)
+        cell_html_list = _CELL_RE.findall(tr_html)
+        if not cell_html_list:
+            continue
+
+        cells = [_clean_html_text(c) for c in cell_html_list]
+        if not any(cells):
+            continue
+
+        is_header = bool(re.search(r"<th\b", tr_html, re.IGNORECASE))
+        if is_header and not header:
+            header = cells
+        else:
+            data_rows.append(cells)
+
+    if not data_rows:
+        return header, []
+
+    if not header:
+        max_cols = max(len(r) for r in data_rows)
+        header = [f"col_{i + 1}" for i in range(max_cols)]
+
+    width = len(header)
+    normalized_rows: List[List[str]] = []
+    for row in data_rows:
+        if len(row) < width:
+            row = row + [""] * (width - len(row))
+        elif len(row) > width:
+            row = row[:width]
+        normalized_rows.append(row)
+
+    return header, normalized_rows
+
+
+def parse_cards_page(html_text: str) -> Tuple[List[str], List[Dict[str, str]]]:
+    tables = _TABLE_RE.findall(html_text)
+    best_header: List[str] = []
+    best_rows: List[List[str]] = []
+
+    for table_html in tables:
+        header, rows = _parse_table(table_html)
+        if len(header) <= 1:
+            continue
+        if len(rows) > len(best_rows):
+            best_header, best_rows = header, rows
+
+    if not best_rows:
+        return best_header, []
+
+    records = [dict(zip(best_header, row)) for row in best_rows]
+    return best_header, records
+
+
+def _merge_headers(existing: List[str], incoming: Sequence[str]) -> List[str]:
+    seen = set(existing)
+    merged = list(existing)
+    for col in incoming:
+        if col not in seen:
+            merged.append(col)
+            seen.add(col)
+    return merged
+
+
+def export_all_cards(
+    output_csv_path: str,
+    params: Optional[Dict[str, str]] = None,
+    headers: Optional[Dict[str, str]] = None,
+    max_pages: int = 200,
+    sleep_seconds: float = 0.3,
+) -> Tuple[int, int]:
+    cookies = _load_cookies()
+    params = dict(DEFAULT_PARAMS if params is None else params)
+    headers = dict(DEFAULT_HEADERS if headers is None else headers)
+
+    session = requests.Session()
+    session.cookies.update(cookies)
+
+    all_records: List[Dict[str, str]] = []
+    merged_header: List[str] = []
+    seen_keys: set[Tuple[str, ...]] = set()
+
+    pages_fetched = 0
+    for page in range(1, max_pages + 1):
+        html_text = _fetch_html(session=session, page=page, params=params, base_headers=headers)
+        page_header, page_records = parse_cards_page(html_text)
+        pages_fetched += 1
+
+        if not page_records:
+            break
+
+        merged_header = _merge_headers(merged_header, page_header)
+
+        for rec in page_records:
+            key = tuple(rec.get(col, "") for col in page_header)
+            if key in seen_keys:
+                continue
+            seen_keys.add(key)
+            all_records.append(rec)
+
+        if sleep_seconds > 0:
+            time.sleep(sleep_seconds)
+
+    if not all_records:
+        raise RuntimeError("未解析到任何表格数据（可能是登录失效/页面结构变化/被重定向到登录页）")
+
+    if not merged_header:
+        merged_header = sorted({k for r in all_records for k in r.keys()})
+
+    os.makedirs(os.path.dirname(os.path.abspath(output_csv_path)) or ".", exist_ok=True)
+    with open(output_csv_path, "w", encoding="utf-8-sig", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=merged_header, extrasaction="ignore")
+        writer.writeheader()
+        for rec in all_records:
+            writer.writerow({k: rec.get(k, "") for k in merged_header})
+
+    return pages_fetched, len(all_records)
+
+
+def _self_test() -> None:
+    html_text = """
+    <html><body>
+      <table>
+        <tr><th>会员名</th><th>卡号</th><th>余额</th></tr>
+        <tr><td>张三</td><td>NO001</td><td>100</td></tr>
+        <tr><td>李四</td><td>NO002</td><td>200</td></tr>
+      </table>
+    </body></html>
+    """
+    header, records = parse_cards_page(html_text)
+    assert header == ["会员名", "卡号", "余额"]
+    assert records[0]["卡号"] == "NO001"
+    assert records[1]["余额"] == "200"
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output", default="H1会员卡.csv")
+    parser.add_argument("--storeId", default=DEFAULT_PARAMS["storeId"])
+    parser.add_argument("--search", default=DEFAULT_PARAMS["search"])
+    parser.add_argument("--type", default=DEFAULT_PARAMS["type"])
+    parser.add_argument("--expired", default=DEFAULT_PARAMS["expired"])
+    parser.add_argument("--max-pages", type=int, default=200)
+    parser.add_argument("--sleep", type=float, default=0.3)
+    parser.add_argument("--self-test", action="store_true")
+    args = parser.parse_args(argv)
+
+    if args.self_test:
+        _self_test()
+        print("self-test ok")
+        return 0
+
+    params = {
+        "type": str(args.type),
+        "expired": str(args.expired),
+        "storeId": str(args.storeId),
+        "search": str(args.search),
+    }
+
+    pages_fetched, rows = export_all_cards(
+        output_csv_path=args.output,
+        params=params,
+        max_pages=args.max_pages,
+        sleep_seconds=args.sleep,
+    )
+    print(f"导出完成: pages={pages_fetched}, rows={rows}, output={os.path.abspath(args.output)}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,424 @@
+"""
+H1车店系统 - 会员卡信息导出
+从 https://scrm.h1cd.com/admin/members/cards.html 导出会员卡信息
+注意：脚本解析HTML表格，导出的原始数据格式不规范，需要清洗处理
+"""
+
+import requests
+import pandas as pd
+from bs4 import BeautifulSoup
+import os
+import re
+import time
+import json
+from datetime import datetime
+
+# ===================== 【配置区】 =====================
+# Cookie（请根据实际情况更新）
+COOKIES = {
+    'showSmsActivity': '1',
+    'showEasyMoney': '1',
+    'LOGIN_URL': 'https%3A%2F%2Fscrm.h1cd.com%2Flogin-h1cd.html',
+    'adminpd': 'jVISiRrtcJplFhLoCuUIxK9XG5ekdfwzq%2B0y482ZKxE%3D',
+    'adminun': '15224781773',
+    'uid': '10291',
+    'PHPSESSID': 'nbn58laakng0rv5iqln82a6qpu',
+}
+
+HEADERS = {
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+    'Connection': 'keep-alive',
+    'Referer': 'https://scrm.h1cd.com/admin/members/cards.html',
+    'Sec-Fetch-Dest': 'iframe',
+    'Sec-Fetch-Mode': 'navigate',
+    'Sec-Fetch-Site': 'same-origin',
+    'Sec-Fetch-User': '?1',
+    'Upgrade-Insecure-Requests': '1',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36 Edg/146.0.0.0',
+    'sec-ch-ua': '"Chromium";v="146", "Not-A.Brand";v="24", "Microsoft Edge";v="146"',
+    'sec-ch-ua-mobile': '?0',
+    'sec-ch-ua-platform': '"Windows"',
+}
+
+# 查询参数
+PARAMS = {
+    'type': '',
+    'expired': '',
+    'storeId': '0',
+    'search': '',
+}
+
+# 输出目录
+OUTPUT_DIR = r"D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\文件输出"
+
+# =====================================================
+
+
+def get_page_html(page_num, cookies, params):
+    """获取指定页面的HTML内容"""
+    try:
+        if page_num == 1:
+            url = "https://scrm.h1cd.com/admin/members/cards.html"
+        else:
+            url = f"https://scrm.h1cd.com/admin/members/cards_{page_num}.html"
+
+        r = requests.get(url, headers=HEADERS, cookies=cookies, params=params, timeout=30)
+
+        # 检查是否被重定向到登录页
+        if 'login' in r.url.lower() or '登录' in r.text[:2000]:
+            print(f"   ⚠️ 第{page_num}页检测到跳转登录，Cookie可能已失效。")
+            return None
+
+        r.raise_for_status()
+        r.encoding = 'utf-8'
+        return r.text
+    except Exception as e:
+        print(f"   ❌ 第{page_num}页请求失败: {str(e)}")
+        return None
+
+
+def parse_cards_table(html):
+    """
+    解析会员卡HTML表格，提取数据并做规范化处理。
+    
+    H1系统会员卡页面特点：
+    - 部分单元格包含多行信息（用<br>分隔），如姓名和手机号在同一格
+    - 状态信息可能包含多余文本
+    - 数值字段可能包含非数字字符
+    - 操作列包含按钮文本需要过滤
+    """
+    soup = BeautifulSoup(html, 'html.parser')
+    table = soup.find('table', class_='table')
+    if not table:
+        table = soup.find('table')
+    if not table:
+        return [], []
+
+    # 提取表头
+    header = []
+    thead = table.find('thead')
+    if thead:
+        ths = thead.find_all('th')
+        header = [th.get_text(strip=True) for th in ths]
+
+    # 如果没有 thead，尝试从第一行 tr 中获取
+    if not header:
+        first_tr = table.find('tr')
+        if first_tr:
+            ths = first_tr.find_all('th')
+            if ths:
+                header = [th.get_text(strip=True) for th in ths]
+
+    # 提取数据行
+    tbody = table.find('tbody')
+    rows = tbody.find_all('tr') if tbody else table.find_all('tr')
+
+    data_rows = []
+    for tr in rows:
+        # 跳过表头行
+        if tr.find('th'):
+            continue
+        tds = tr.find_all('td')
+        if not tds or len(tds) < 3:
+            continue
+
+        row_data = []
+        for td in tds:
+            # 保留<br>产生的换行，使用separator分隔
+            text = td.get_text(separator='|', strip=True)
+            # 清理多余空格
+            text = re.sub(r'\s+', ' ', text)
+            row_data.append(text.strip())
+
+        if any(row_data):
+            data_rows.append(row_data)
+
+    return header, data_rows
+
+
+def clean_card_record(row_dict, header):
+    """
+    清洗单条会员卡记录，处理不规范的数据格式。
+    
+    主要处理：
+    1. 姓名+手机号合并在一个字段中 → 拆分为独立的"客户名称"和"手机号"
+    2. 状态字段中的多余文本
+    3. 数值字段中的非数字字符
+    4. 操作列中的按钮文本
+    """
+    cleaned = {}
+
+    # 定义可能的列名映射（H1系统表头可能包含的关键字）
+    col_mappings = {
+        'name_col': ['会员名', '姓名', '会员名称', '客户', '车主'],
+        'phone_col': ['手机', '电话', '联系电话'],
+        'card_no_col': ['卡号', '会员卡号', '卡编号'],
+        'card_type_col': ['卡类型', '卡名称', '类型'],
+        'balance_col': ['余额', '储值余额', '可用余额'],
+        'total_recharge_col': ['充值', '累计充值', '总充值', '充值金额'],
+        'total_consume_col': ['消费', '累计消费', '总消费', '消费金额'],
+        'status_col': ['状态', '卡状态'],
+        'create_time_col': ['开卡时间', '创建时间', '注册时间'],
+        'expire_time_col': ['到期时间', '有效期', '过期时间'],
+        'store_col': ['门店', '所属门店', '门店名称'],
+        'level_col': ['等级', '会员等级', '会员级别'],
+    }
+
+    # 查找列索引
+    col_index = {}
+    for key, keywords in col_mappings.items():
+        for kw in keywords:
+            for i, h in enumerate(header):
+                if kw in h:
+                    col_index[key] = i
+                    break
+            if key in col_index:
+                break
+
+    # 逐列清洗
+    for i, h in enumerate(header):
+        value = row_dict.get(h, '') if isinstance(row_dict, dict) else (row_dict[i] if i < len(row_dict) else '')
+
+        # 处理操作列（通常在最后一列，包含"充值记录"、"消费记录"等按钮文本）
+        if '操作' in h:
+            cleaned[h] = ''
+            continue
+
+        # 处理复选框列
+        if '选择' in h or '勾选' in h:
+            cleaned[h] = ''
+            continue
+
+        # 处理姓名+手机号合并的情况
+        if i == col_index.get('name_col'):
+            name, phone = '', ''
+            if '|' in value:
+                parts = [p.strip() for p in value.split('|')]
+                for part in parts:
+                    phone_match = re.search(r'1[3-9]\d{9}', part)
+                    if phone_match:
+                        phone = phone_match.group()
+                    elif part and not re.match(r'^\d{11}$', part):
+                        name = part if not name else name + part
+                    elif re.match(r'^\d{11}$', part):
+                        phone = part
+            else:
+                phone_match = re.search(r'1[3-9]\d{9}', value)
+                if phone_match:
+                    phone = phone_match.group()
+                    name = value.replace(phone, '').strip()
+                else:
+                    name = value.strip()
+
+            cleaned['客户名称'] = name
+            cleaned['手机号'] = phone
+            continue
+
+        # 处理手机号列（独立列）
+        if i == col_index.get('phone_col'):
+            phone_match = re.search(r'1[3-9]\d{9}', value)
+            cleaned['手机号'] = phone_match.group() if phone_match else value
+            continue
+
+        # 处理数值列（去掉非数字字符，保留小数点）
+        if i == col_index.get('balance_col') or i == col_index.get('total_recharge_col') or i == col_index.get('total_consume_col'):
+            num_match = re.search(r'[\d.]+', value.replace(',', ''))
+            cleaned[h] = num_match.group() if num_match else value
+            continue
+
+        # 清理其他字段中的多余空白和分隔符
+        clean_val = value.replace('|', ' ').strip()
+        clean_val = re.sub(r'\s+', ' ', clean_val)
+        # 去除 "查看详情"、"编辑" 等按钮文本
+        clean_val = re.sub(r'(查看详情|编辑|删除|充值记录|消费记录|详情)', '', clean_val).strip()
+        cleaned[h] = clean_val
+
+    return cleaned
+
+
+def normalize_dataframe(df):
+    """
+    对整个DataFrame进行规范化处理。
+    处理各种数据不规范的情况。
+    """
+    # 去除完全重复的行
+    before_count = len(df)
+    df = df.drop_duplicates()
+    after_count = len(df)
+    if before_count != after_count:
+        print(f"   🔍 去重：{before_count} 条 → {after_count} 条（去除 {before_count - after_count} 条重复）")
+
+    # 尝试拆分合并列（如"姓名|手机号"）
+    for col in df.columns:
+        # 检测该列是否包含手机号（超过30%的值匹配手机号模式）
+        phone_ratio = df[col].astype(str).apply(lambda x: bool(re.search(r'1[3-9]\d{9}', x))).mean()
+        name_ratio = df[col].astype(str).apply(lambda x: bool(re.search(r'[\u4e00-\u9fa5]{2,4}', x))).mean()
+
+        if phone_ratio > 0.3 and name_ratio > 0.3 and '名称' in col:
+            # 该列同时包含姓名和手机号，需要拆分
+            if '客户名称' not in df.columns:
+                df['客户名称'] = df[col].apply(
+                    lambda x: re.sub(r'1[3-9]\d{9}', '', str(x)).replace('|', '').strip()
+                )
+            if '手机号' not in df.columns:
+                df['手机号'] = df[col].apply(
+                    lambda x: (re.search(r'1[3-9]\d{9}', str(x)) or type('', (), {'group': lambda s: ''})()).group()
+                )
+
+    # 清理数值列
+    for col in df.columns:
+        if any(kw in col for kw in ['余额', '充值', '消费', '金额']):
+            df[col] = df[col].astype(str).apply(
+                lambda x: re.search(r'[\d.]+', x.replace(',', '')).group() if re.search(r'[\d.]+', x.replace(',', '')) else x
+            )
+
+    # 清理操作列
+    for col in df.columns:
+        if '操作' in col or '选择' in col or '勾选' in col:
+            df = df.drop(columns=[col])
+
+    # 清理所有列中的按钮文本残留
+    for col in df.columns:
+        df[col] = df[col].astype(str).apply(
+            lambda x: re.sub(r'(查看详情|编辑|删除|充值记录|消费记录|详情|迁移)', '', str(x)).strip()
+        )
+        # 替换 'nan' 为空字符串
+        df[col] = df[col].replace('nan', '')
+        df[col] = df[col].replace('None', '')
+
+    return df
+
+
+def get_max_page(html):
+    """从页面中提取最大页数"""
+    if not html:
+        return 1
+
+    soup = BeautifulSoup(html, 'html.parser')
+    text = soup.get_text()
+
+    # 尝试匹配 "共X页" 格式
+    match = re.search(r'共\s*(\d+)\s*页', text)
+    if match:
+        return int(match.group(1))
+
+    # 尝试匹配 "页 1/X" 格式
+    match = re.search(r'页\s*1/(\d+)', text)
+    if match:
+        return int(match.group(1))
+
+    # 尝试匹配分页链接
+    page_links = soup.find_all('a', href=re.compile(r'cards_\d+\.html'))
+    if page_links:
+        max_page = 1
+        for a in page_links:
+            num_match = re.search(r'cards_(\d+)\.html', a.get('href', ''))
+            if num_match:
+                max_page = max(max_page, int(num_match.group(1)))
+        return max_page
+
+    return 1
+
+
+def main():
+    print("=" * 50)
+    print("开始爬取 H1系统 会员卡信息...")
+    print(f"当前 StoreID: {PARAMS['storeId']}")
+    print("=" * 50)
+
+    # 获取第一页，确定总页数
+    print("正在获取总页数...")
+    first_html = get_page_html(1, COOKIES, PARAMS)
+    if not first_html:
+        print("❌ 无法获取第一页数据，请检查 Cookie 或网络。")
+        return
+
+    max_page = get_max_page(first_html)
+    print(f"✅ 成功获取最大页数：{max_page}")
+
+    # 爬取所有页面
+    all_data = []
+    merged_header = []
+
+    for page in range(1, max_page + 1):
+        print(f"正在爬取第 {page}/{max_page} 页...")
+
+        if page == 1:
+            html = first_html
+        else:
+            html = get_page_html(page, COOKIES, PARAMS)
+            if not html:
+                print(f"❌ 第 {page} 页获取失败，跳过。")
+                continue
+
+        header, rows = parse_cards_table(html)
+
+        if not header and not rows:
+            print(f"⚠️ 第 {page} 页未解析到表格数据。")
+            continue
+
+        # 合并表头（不同页的表头可能略有差异）
+        if header:
+            for h in header:
+                if h not in merged_header:
+                    merged_header.append(h)
+
+        all_data.extend(rows)
+
+        # 请求间隔，避免过于频繁
+        if page < max_page:
+            time.sleep(0.3)
+
+    if not all_data:
+        print("\n❌ 未获取到任何数据，请检查 Cookie 或网络。")
+        return
+
+    print(f"\n✅ 爬取完成，共获取 {len(all_data)} 条原始记录")
+
+    # 构建DataFrame
+    if merged_header:
+        # 标准化行长度
+        normalized_rows = []
+        width = len(merged_header)
+        for row in all_data:
+            if len(row) < width:
+                row = row + [''] * (width - len(row))
+            elif len(row) > width:
+                row = row[:width]
+            normalized_rows.append(row)
+        df = pd.DataFrame(normalized_rows, columns=merged_header)
+    else:
+        df = pd.DataFrame(all_data)
+
+    print(f"📋 原始列名：{list(df.columns)}")
+    print(f"📋 原始数据前3行：")
+    print(df.head(3).to_string())
+
+    # 数据规范化处理
+    print("\n开始数据规范化处理...")
+    df = normalize_dataframe(df)
+
+    # 保存结果
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+    time_str = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"H1会员卡信息_{time_str}.xlsx"
+    filepath = os.path.join(OUTPUT_DIR, filename)
+
+    try:
+        df.to_excel(filepath, index=False)
+        print("=" * 50)
+        print(f"✅ 导出完成！")
+        print(f"📊 最终有效条数：{len(df)}")
+        print(f"📁 已保存到：{filepath}")
+        print("=" * 50)
+    except Exception as e:
+        print(f"❌ 保存Excel失败: {e}")
+        # 降级为CSV
+        csv_path = filepath.replace('.xlsx', '.csv')
+        df.to_csv(csv_path, index=False, encoding='utf-8-sig')
+        print(f"💡 已转为 CSV 保存至：{csv_path}")
+
+
+if __name__ == '__main__':
+    main()
@@ -10,6 +10,7 @@
  },
  {
   "cell_type": "code",
+   "execution_count": 1,
   "id": "70a8b0da",
   "metadata": {
    "ExecuteTime": {
@@ -17,6 +18,64 @@
     "start_time": "2026-03-25T03:51:31.198595700Z"
    }
   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "正在获取总页数...\n",
+      "总页数：34 页\n",
+      "正在爬取第 1/34 页...\n",
+      "正在爬取第 2/34 页...\n",
+      "正在爬取第 3/34 页...\n",
+      "正在爬取第 4/34 页...\n",
+      "正在爬取第 5/34 页...\n",
+      "正在爬取第 6/34 页...\n",
+      "正在爬取第 7/34 页...\n",
+      "正在爬取第 8/34 页...\n",
+      "正在爬取第 9/34 页...\n",
+      "正在爬取第 10/34 页...\n",
+      "正在爬取第 11/34 页...\n",
+      "正在爬取第 12/34 页...\n",
+      "正在爬取第 13/34 页...\n",
+      "正在爬取第 14/34 页...\n",
+      "正在爬取第 15/34 页...\n",
+      "正在爬取第 16/34 页...\n",
+      "正在爬取第 17/34 页...\n",
+      "正在爬取第 18/34 页...\n",
+      "正在爬取第 19/34 页...\n",
+      "正在爬取第 20/34 页...\n",
+      "正在爬取第 21/34 页...\n",
+      "正在爬取第 22/34 页...\n",
+      "正在爬取第 23/34 页...\n",
+      "正在爬取第 24/34 页...\n",
+      "正在爬取第 25/34 页...\n",
+      "正在爬取第 26/34 页...\n",
+      "正在爬取第 27/34 页...\n",
+      "正在爬取第 28/34 页...\n",
+      "正在爬取第 29/34 页...\n",
+      "正在爬取第 30/34 页...\n",
+      "正在爬取第 31/34 页...\n",
+      "正在爬取第 32/34 页...\n",
+      "正在爬取第 33/34 页...\n",
+      "正在爬取第 34/34 页...\n",
+      "\n",
+      "========== 爬取完成 ==========\n",
+      "总计数据：666 行\n",
+      "\n",
+      "✅ 文件已保存到桌面：\n",
+      "📊 Excel文件：C:\\Users\\hp_z66\\Desktop\\车辆数据_已拆分_20260325_1151531.csv\n",
+      "📄 文本文件：C:\\Users\\hp_z66\\Desktop\\车辆数据_已拆分_20260325_115153.txt\n",
+      "\n",
+      "前5行数据预览：\n",
+      "1 ['1', '豫NA477R', '卢忠厚', '', '', '', '/', '', '118933km', '', '', '消费记录 编辑 迁移 删除']\n",
+      "2 ['2', '豫NF3722', '刘建利', '', '', '', '/', '', '198609km', '', '', '消费记录 编辑 迁移 删除']\n",
+      "3 ['3', '豫N13B58', '石', '15090629992', '', '', '/', '', '22462km', '', '', '消费记录 编辑 迁移 删除']\n",
+      "4 ['4', '京PYB297', '科迪黄青春', '', '', '', '/', '', '119584km', '', '', '消费记录 编辑 迁移 删除']\n",
+      "5 ['5', '豫NN982M', '大众', '', '', '', '/', '', '197504km', '', '', '消费记录 编辑 迁移 删除']\n"
+     ]
+    }
+   ],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
@@ -215,66 +274,7 @@
    "    print(\"\\n前5行数据预览：\")\n",
    "    for i, row in enumerate(all_data[:5]):\n",
    "        print(i+1, row)"
-   ],
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "正在获取总页数...\n",
-      "总页数：34 页\n",
-      "正在爬取第 1/34 页...\n",
-      "正在爬取第 2/34 页...\n",
-      "正在爬取第 3/34 页...\n",
-      "正在爬取第 4/34 页...\n",
-      "正在爬取第 5/34 页...\n",
-      "正在爬取第 6/34 页...\n",
-      "正在爬取第 7/34 页...\n",
-      "正在爬取第 8/34 页...\n",
-      "正在爬取第 9/34 页...\n",
-      "正在爬取第 10/34 页...\n",
-      "正在爬取第 11/34 页...\n",
-      "正在爬取第 12/34 页...\n",
-      "正在爬取第 13/34 页...\n",
-      "正在爬取第 14/34 页...\n",
-      "正在爬取第 15/34 页...\n",
-      "正在爬取第 16/34 页...\n",
-      "正在爬取第 17/34 页...\n",
-      "正在爬取第 18/34 页...\n",
-      "正在爬取第 19/34 页...\n",
-      "正在爬取第 20/34 页...\n",
-      "正在爬取第 21/34 页...\n",
-      "正在爬取第 22/34 页...\n",
-      "正在爬取第 23/34 页...\n",
-      "正在爬取第 24/34 页...\n",
-      "正在爬取第 25/34 页...\n",
-      "正在爬取第 26/34 页...\n",
-      "正在爬取第 27/34 页...\n",
-      "正在爬取第 28/34 页...\n",
-      "正在爬取第 29/34 页...\n",
-      "正在爬取第 30/34 页...\n",
-      "正在爬取第 31/34 页...\n",
-      "正在爬取第 32/34 页...\n",
-      "正在爬取第 33/34 页...\n",
-      "正在爬取第 34/34 页...\n",
-      "\n",
-      "========== 爬取完成 ==========\n",
-      "总计数据：666 行\n",
-      "\n",
-      "✅ 文件已保存到桌面：\n",
-      "📊 Excel文件：C:\\Users\\hp_z66\\Desktop\\车辆数据_已拆分_20260325_1151531.csv\n",
-      "📄 文本文件：C:\\Users\\hp_z66\\Desktop\\车辆数据_已拆分_20260325_115153.txt\n",
-      "\n",
-      "前5行数据预览：\n",
-      "1 ['1', '豫NA477R', '卢忠厚', '', '', '', '/', '', '118933km', '', '', '消费记录 编辑 迁移 删除']\n",
-      "2 ['2', '豫NF3722', '刘建利', '', '', '', '/', '', '198609km', '', '', '消费记录 编辑 迁移 删除']\n",
-      "3 ['3', '豫N13B58', '石', '15090629992', '', '', '/', '', '22462km', '', '', '消费记录 编辑 迁移 删除']\n",
-      "4 ['4', '京PYB297', '科迪黄青春', '', '', '', '/', '', '119584km', '', '', '消费记录 编辑 迁移 删除']\n",
-      "5 ['5', '豫NN982M', '大众', '', '', '', '/', '', '197504km', '', '', '消费记录 编辑 迁移 删除']\n"
-     ]
-    }
-   ],
-   "execution_count": 1
+   ]
  },
  {
   "cell_type": "markdown",
@@ -286,6 +286,7 @@
  },
  {
   "cell_type": "code",
+   "execution_count": 2,
   "id": "5392bfc0",
   "metadata": {
    "ExecuteTime": {
@@ -293,6 +294,67 @@
     "start_time": "2026-03-25T03:53:18.688209100Z"
    }
   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "==================================================\n",
+      "开始爬取库存数据...\n",
+      "当前 StoreID: 13435\n",
+      "当前 HouseID: 9079\n",
+      "==================================================\n",
+      "✅ 成功获取最大页数：40\n",
+      "正在爬取第 1/40 页...\n",
+      "正在爬取第 2/40 页...\n",
+      "正在爬取第 3/40 页...\n",
+      "正在爬取第 4/40 页...\n",
+      "正在爬取第 5/40 页...\n",
+      "正在爬取第 6/40 页...\n",
+      "正在爬取第 7/40 页...\n",
+      "正在爬取第 8/40 页...\n",
+      "正在爬取第 9/40 页...\n",
+      "正在爬取第 10/40 页...\n",
+      "正在爬取第 11/40 页...\n",
+      "正在爬取第 12/40 页...\n",
+      "正在爬取第 13/40 页...\n",
+      "正在爬取第 14/40 页...\n",
+      "正在爬取第 15/40 页...\n",
+      "正在爬取第 16/40 页...\n",
+      "正在爬取第 17/40 页...\n",
+      "正在爬取第 18/40 页...\n",
+      "正在爬取第 19/40 页...\n",
+      "正在爬取第 20/40 页...\n",
+      "正在爬取第 21/40 页...\n",
+      "正在爬取第 22/40 页...\n",
+      "正在爬取第 23/40 页...\n",
+      "正在爬取第 24/40 页...\n",
+      "正在爬取第 25/40 页...\n",
+      "正在爬取第 26/40 页...\n",
+      "正在爬取第 27/40 页...\n",
+      "正在爬取第 28/40 页...\n",
+      "正在爬取第 29/40 页...\n",
+      "正在爬取第 30/40 页...\n",
+      "正在爬取第 31/40 页...\n",
+      "正在爬取第 32/40 页...\n",
+      "正在爬取第 33/40 页...\n",
+      "正在爬取第 34/40 页...\n",
+      "正在爬取第 35/40 页...\n",
+      "正在爬取第 36/40 页...\n",
+      "正在爬取第 37/40 页...\n",
+      "正在爬取第 38/40 页...\n",
+      "正在爬取第 39/40 页...\n",
+      "正在爬取第 40/40 页...\n",
+      "\n",
+      "🔍 去重完成 (基于列: 配件编码)：原始 782 条 → 去重后 782 条\n",
+      "==================================================\n",
+      "✅ 爬取 + 去重 完成！\n",
+      "📊 最终有效条数：782\n",
+      "📁 已保存到桌面：库存数据_13435_去重版1.xlsx\n",
+      "==================================================\n"
+     ]
+    }
+   ],
   "source": [
    "import requests\n",
    "import pandas as pd\n",
@@ -499,69 +561,7 @@
    "\n",
    "if __name__ == '__main__':\n",
    "    main()"
-   ],
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "==================================================\n",
-      "开始爬取库存数据...\n",
-      "当前 StoreID: 13435\n",
-      "当前 HouseID: 9079\n",
-      "==================================================\n",
-      "✅ 成功获取最大页数：40\n",
-      "正在爬取第 1/40 页...\n",
-      "正在爬取第 2/40 页...\n",
-      "正在爬取第 3/40 页...\n",
-      "正在爬取第 4/40 页...\n",
-      "正在爬取第 5/40 页...\n",
-      "正在爬取第 6/40 页...\n",
-      "正在爬取第 7/40 页...\n",
-      "正在爬取第 8/40 页...\n",
-      "正在爬取第 9/40 页...\n",
-      "正在爬取第 10/40 页...\n",
-      "正在爬取第 11/40 页...\n",
-      "正在爬取第 12/40 页...\n",
-      "正在爬取第 13/40 页...\n",
-      "正在爬取第 14/40 页...\n",
-      "正在爬取第 15/40 页...\n",
-      "正在爬取第 16/40 页...\n",
-      "正在爬取第 17/40 页...\n",
-      "正在爬取第 18/40 页...\n",
-      "正在爬取第 19/40 页...\n",
-      "正在爬取第 20/40 页...\n",
-      "正在爬取第 21/40 页...\n",
-      "正在爬取第 22/40 页...\n",
-      "正在爬取第 23/40 页...\n",
-      "正在爬取第 24/40 页...\n",
-      "正在爬取第 25/40 页...\n",
-      "正在爬取第 26/40 页...\n",
-      "正在爬取第 27/40 页...\n",
-      "正在爬取第 28/40 页...\n",
-      "正在爬取第 29/40 页...\n",
-      "正在爬取第 30/40 页...\n",
-      "正在爬取第 31/40 页...\n",
-      "正在爬取第 32/40 页...\n",
-      "正在爬取第 33/40 页...\n",
-      "正在爬取第 34/40 页...\n",
-      "正在爬取第 35/40 页...\n",
-      "正在爬取第 36/40 页...\n",
-      "正在爬取第 37/40 页...\n",
-      "正在爬取第 38/40 页...\n",
-      "正在爬取第 39/40 页...\n",
-      "正在爬取第 40/40 页...\n",
-      "\n",
-      "🔍 去重完成 (基于列: 配件编码)：原始 782 条 → 去重后 782 条\n",
-      "==================================================\n",
-      "✅ 爬取 + 去重 完成！\n",
-      "📊 最终有效条数：782\n",
-      "📁 已保存到桌面：库存数据_13435_去重版1.xlsx\n",
-      "==================================================\n"
-     ]
-    }
-   ],
-   "execution_count": 2
+   ]
  },
  {
   "cell_type": "markdown",
@@ -573,13 +573,191 @@
   ]
  },
  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "cbd4eeb0a30b3e15",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2026-03-25T03:58:48.443601700Z",
     "start_time": "2026-03-25T03:56:48.226330400Z"
    }
   },
-   "cell_type": "code",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "🔧 开始导出维修记录...\n",
+      "📄 正在获取第 1 页以分析页数...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists.html\n",
+      "📊 预估总页数: 53\n",
+      "🔄 正在处理第 1/53 页...\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 2/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_2.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 3/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_3.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 4/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_4.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 5/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_5.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 6/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_6.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 7/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_7.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 8/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_8.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 9/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_9.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 10/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_10.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 11/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_11.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 12/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_12.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 13/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_13.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 14/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_14.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 15/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_15.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 16/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_16.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 17/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_17.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 18/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_18.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 19/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_19.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 20/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_20.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 21/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_21.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 22/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_22.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 23/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_23.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 24/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_24.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 25/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_25.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 26/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_26.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 27/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_27.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 28/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_28.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 29/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_29.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 30/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_30.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 31/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_31.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 32/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_32.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 33/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_33.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 34/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_34.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 35/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_35.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 36/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_36.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 37/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_37.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 38/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_38.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 39/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_39.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 40/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_40.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 41/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_41.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 42/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_42.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 43/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_43.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 44/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_44.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 45/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_45.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 46/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_46.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 47/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_47.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 48/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_48.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 49/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_49.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 50/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_50.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 51/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_51.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 52/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_52.html\n",
+      "   ✅ 本页提取 20 条记录\n",
+      "🔄 正在处理第 53/53 页...\n",
+      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_53.html\n",
+      "   ✅ 本页提取 8 条记录\n",
+      "\n",
+      "==============================\n",
+      "✅ 导出成功！\n",
+      "📁 文件路径: D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\维修记录_完美导出版.xlsx\n",
+      "📈 总记录数: 1048\n",
+      "==============================\n"
+     ]
+    }
+   ],
   "source": [
    "import requests\n",
    "import pandas as pd\n",
@@ -885,185 +1063,344 @@
    "        print(\"💡 请运行以下命令安装: pip install \" + \" \".join(missing))\n",
    "    else:\n",
    "        main()"
-   ],
-   "id": "cbd4eeb0a30b3e15",
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "🔧 开始导出维修记录...\n",
-      "📄 正在获取第 1 页以分析页数...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists.html\n",
-      "📊 预估总页数: 53\n",
-      "🔄 正在处理第 1/53 页...\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 2/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_2.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 3/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_3.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 4/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_4.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 5/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_5.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 6/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_6.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 7/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_7.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 8/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_8.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 9/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_9.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 10/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_10.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 11/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_11.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 12/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_12.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 13/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_13.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 14/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_14.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 15/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_15.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 16/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_16.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 17/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_17.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 18/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_18.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 19/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_19.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 20/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_20.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 21/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_21.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 22/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_22.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 23/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_23.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 24/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_24.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 25/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_25.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 26/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_26.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 27/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_27.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 28/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_28.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 29/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_29.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 30/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_30.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 31/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_31.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 32/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_32.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 33/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_33.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 34/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_34.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 35/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_35.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 36/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_36.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 37/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_37.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 38/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_38.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 39/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_39.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 40/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_40.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 41/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_41.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 42/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_42.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 43/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_43.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 44/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_44.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 45/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_45.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 46/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_46.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 47/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_47.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 48/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_48.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 49/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_49.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 50/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_50.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 51/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_51.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 52/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_52.html\n",
-      "   ✅ 本页提取 20 条记录\n",
-      "🔄 正在处理第 53/53 页...\n",
-      "   正在请求: https://scrm.h1cd.com/admin/billings/Lists_53.html\n",
-      "   ✅ 本页提取 8 条记录\n",
-      "\n",
-      "==============================\n",
-      "✅ 导出成功！\n",
-      "📁 文件路径: D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\\维修记录_完美导出版.xlsx\n",
-      "📈 总记录数: 1048\n",
-      "==============================\n"
-     ]
-    }
-   ],
-   "execution_count": 3
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b3decf1d",
+   "metadata": {},
+   "source": [
+    "# 会员卡信息导出\n",
+    "\n",
+    "从 H1 系统导出会员卡信息（储值卡、套餐卡等），自动分页爬取并做数据规范化处理。\n",
+    "\n",
+    "> ⚠️ **注意**：H1系统导出的原始数据格式不规范（如姓名和手机号混在同一字段、操作列包含按钮文本等），脚本已内置清洗逻辑。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ab86773",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import pandas as pd\n",
+    "from bs4 import BeautifulSoup\n",
+    "import os\n",
+    "import re\n",
+    "import time\n",
+    "from datetime import datetime\n",
+    "\n",
+    "# ===================== 【配置区】 =====================\n",
+    "# Cookie（请根据实际情况更新，登录后从浏览器DevTools复制）\n",
+    "COOKIES = {\n",
+    "    'showSmsActivity': '1',\n",
+    "    'showEasyMoney': '1',\n",
+    "    'LOGIN_URL': 'https%3A%2F%2Fscrm.h1cd.com%2Flogin-h1cd.html',\n",
+    "    'adminpd': 'jVISiRrtcJplFhLoCuUIxK9XG5ekdfwzq%2B0y482ZKxE%3D',\n",
+    "    'adminun': '15224781773',\n",
+    "    'uid': '10291',\n",
+    "    'PHPSESSID': 'nbn58laakng0rv5iqln82a6qpu',\n",
+    "}\n",
+    "\n",
+    "HEADERS = {\n",
+    "    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',\n",
+    "    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',\n",
+    "    'Connection': 'keep-alive',\n",
+    "    'Referer': 'https://scrm.h1cd.com/admin/members/cards.html',\n",
+    "    'Sec-Fetch-Dest': 'iframe',\n",
+    "    'Sec-Fetch-Mode': 'navigate',\n",
+    "    'Sec-Fetch-Site': 'same-origin',\n",
+    "    'Sec-Fetch-User': '?1',\n",
+    "    'Upgrade-Insecure-Requests': '1',\n",
+    "    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36 Edg/146.0.0.0',\n",
+    "    'sec-ch-ua': '\"Chromium\";v=\"146\", \"Not-A.Brand\";v=\"24\", \"Microsoft Edge\";v=\"146\"',\n",
+    "    'sec-ch-ua-mobile': '?0',\n",
+    "    'sec-ch-ua-platform': '\"Windows\"',\n",
+    "}\n",
+    "\n",
+    "# 查询参数\n",
+    "PARAMS = {\n",
+    "    'type': '',\n",
+    "    'expired': '',\n",
+    "    'storeId': '0',\n",
+    "    'search': '',\n",
+    "}\n",
+    "\n",
+    "# 输出目录\n",
+    "OUTPUT_DIR = r\"D:\\Idea Project\\F6+宜搭+其它(1)\\张阳脚本\\文件输出\"\n",
+    "# =====================================================\n",
+    "\n",
+    "\n",
+    "def get_page_html(page_num):\n",
+    "    \"\"\"获取指定页面的HTML内容\"\"\"\n",
+    "    try:\n",
+    "        if page_num == 1:\n",
+    "            url = \"https://scrm.h1cd.com/admin/members/cards.html\"\n",
+    "        else:\n",
+    "            url = f\"https://scrm.h1cd.com/admin/members/cards_{page_num}.html\"\n",
+    "\n",
+    "        r = requests.get(url, headers=HEADERS, cookies=COOKIES, params=PARAMS, timeout=30)\n",
+    "\n",
+    "        # 检查是否被重定向到登录页\n",
+    "        if 'login' in r.url.lower() or '登录' in r.text[:2000]:\n",
+    "            print(f\"   ⚠️ 第{page_num}页检测到跳转登录，Cookie可能已失效。\")\n",
+    "            return None\n",
+    "\n",
+    "        r.raise_for_status()\n",
+    "        r.encoding = 'utf-8'\n",
+    "        return r.text\n",
+    "    except Exception as e:\n",
+    "        print(f\"   ❌ 第{page_num}页请求失败: {str(e)}\")\n",
+    "        return None\n",
+    "\n",
+    "\n",
+    "def parse_cards_table(html):\n",
+    "    \"\"\"\n",
+    "    解析会员卡HTML表格，提取数据。\n",
+    "    \n",
+    "    H1系统会员卡页面特点：\n",
+    "    - 部分单元格包含多行信息（用<br>分隔），如姓名和手机号在同一格\n",
+    "    - 操作列包含按钮文本需要过滤\n",
+    "    \"\"\"\n",
+    "    soup = BeautifulSoup(html, 'html.parser')\n",
+    "    table = soup.find('table', class_='table')\n",
+    "    if not table:\n",
+    "        table = soup.find(\"table\")\n",
+    "    if not table:\n",
+    "        return [], []\n",
+    "\n",
+    "    # 提取表头\n",
+    "    header = []\n",
+    "    thead = table.find(\"thead\")\n",
+    "    if thead:\n",
+    "        ths = thead.find_all('th')\n",
+    "        header = [th.get_text(strip=True) for th in ths]\n",
+    "\n",
+    "    if not header:\n",
+    "        first_tr = table.find(\"tr\")\n",
+    "        if first_tr:\n",
+    "            ths = first_tr.find_all('th')\n",
+    "            if ths:\n",
+    "                header = [th.get_text(strip=True) for th in ths]\n",
+    "\n",
+    "    # 提取数据行\n",
+    "    tbody = table.find(\"tbody\")\n",
+    "    rows = tbody.find_all(\"tr\") if tbody else table.find_all(\"tr\")\n",
+    "\n",
+    "    data_rows = []\n",
+    "    for tr in rows:\n",
+    "        if tr.find(\"th\"):\n",
+    "            continue\n",
+    "        tds = tr.find_all('td')\n",
+    "        if not tds or len(tds) < 3:\n",
+    "            continue\n",
+    "\n",
+    "        row_data = []\n",
+    "        for td in tds:\n",
+    "            text = td.get_text(separator='|', strip=True)\n",
+    "            text = re.sub(r'\\s+', ' ', text)\n",
+    "            row_data.append(text.strip())\n",
+    "\n",
+    "        if any(row_data):\n",
+    "            data_rows.append(row_data)\n",
+    "\n",
+    "    return header, data_rows\n",
+    "\n",
+    "\n",
+    "def normalize_dataframe(df):\n",
+    "    \"\"\"\n",
+    "    对整个DataFrame进行规范化处理。\n",
+    "    处理H1系统导出数据不规范的情况：\n",
+    "    1. 去重\n",
+    "    2. 拆分姓名+手机号合并字段\n",
+    "    3. 清理数值列\n",
+    "    4. 去除操作列和按钮文本残留\n",
+    "    \"\"\"\n",
+    "    # 去除完全重复的行\n",
+    "    before_count = len(df)\n",
+    "    df = df.drop_duplicates()\n",
+    "    after_count = len(df)\n",
+    "    if before_count != after_count:\n",
+    "        print(f\"   🔍 去重：{before_count} 条 → {after_count} 条（去除 {before_count - after_count} 条重复）\")\n",
+    "\n",
+    "    # 拆分合并列（如\"会员名\"列中同时包含姓名和手机号）\n",
+    "    for col in df.columns:\n",
+    "        if any(kw in col for kw in [\"会员名\", \"姓名\", \"客户名称\", \"车主\"]):\n",
+    "            # 检测该列是否同时包含姓名和手机号\n",
+    "            sample = df[col].astype(str).head(20)\n",
+    "            has_phone = sample.apply(lambda x: bool(re.search(r'1[3-9]\\d{9}', x))).any()\n",
+    "            if has_phone and '手机号' not in df.columns:\n",
+    "                df[\"客户名称\"] = df[col].apply(\n",
+    "                    lambda x: re.sub(r\"1[3-9]\\d{9}\", \"\", str(x)).replace(\"|\", \"\").strip()\n",
+    "                )\n",
+    "                df[\"手机号\"] = df[col].apply(\n",
+    "                    lambda x: (re.search(r\"1[3-9]\\d{9}\", str(x)).group() if re.search(r\"1[3-9]\\d{9}\", str(x)) else \"\")\n",
+    "                )\n",
+    "\n",
+    "    # 清理数值列\n",
+    "    for col in df.columns:\n",
+    "        if any(kw in col for kw in [\"余额\", \"充值\", \"消费\", \"金额\"]):\n",
+    "            df[col] = df[col].astype(str).apply(\n",
+    "                lambda x: (re.search(r\"[\\d.]+\", str(x).replace(\",\", \"\")).group() if re.search(r\"[\\d.]+\", str(x).replace(\",\", \"\")) else x)\n",
+    "            )\n",
+    "\n",
+    "    # 清理操作列\n",
+    "    cols_to_drop = [col for col in df.columns if any(kw in col for kw in [\"操作\", \"选择\", \"勾选\"])]\n",
+    "    if cols_to_drop:\n",
+    "        df = df.drop(columns=cols_to_drop)\n",
+    "\n",
+    "    # 清理所有列中的按钮文本残留\n",
+    "    btn_patterns = r\"(查看详情|编辑|删除|充值记录|消费记录|详情|迁移|查看)\"\n",
+    "    for col in df.columns:\n",
+    "        df[col] = df[col].astype(str).apply(\n",
+    "            lambda x: re.sub(btn_patterns, \"\", str(x)).strip()\n",
+    "        )\n",
+    "        df[col] = df[col].replace({'nan': '', 'None': ''})\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def get_max_page(html):\n",
+    "    \"\"\"从页面中提取最大页数\"\"\"\n",
+    "    if not html:\n",
+    "        return 1\n",
+    "\n",
+    "    soup = BeautifulSoup(html, 'html.parser')\n",
+    "    text = soup.get_text()\n",
+    "\n",
+    "    match = re.search(r'共\\s*(\\d+)\\s*页', text)\n",
+    "    if match:\n",
+    "        return int(match.group(1))\n",
+    "\n",
+    "    match = re.search(r'页\\s*1/(\\d+)', text)\n",
+    "    if match:\n",
+    "        return int(match.group(1))\n",
+    "\n",
+    "    page_links = soup.find_all('a', href=re.compile(r'cards_\\d+\\.html'))\n",
+    "    if page_links:\n",
+    "        max_page = 1\n",
+    "        for a in page_links:\n",
+    "            num_match = re.search(r'cards_(\\d+)\\.html', a.get('href', ''))\n",
+    "            if num_match:\n",
+    "                max_page = max(max_page, int(num_match.group(1)))\n",
+    "        return max_page\n",
+    "\n",
+    "    return 1\n",
+    "\n",
+    "\n",
+    "def main():\n",
+    "    print(\"=\" * 50)\n",
+    "    print(\"开始爬取 H1系统 会员卡信息...\")\n",
+    "    print(f\"当前 StoreID: {PARAMS['storeId']}\")\n",
+    "    print(\"=\" * 50)\n",
+    "\n",
+    "    # 获取第一页，确定总页数\n",
+    "    print(\"正在获取总页数...\")\n",
+    "    first_html = get_page_html(1)\n",
+    "    if not first_html:\n",
+    "        print(\"❌ 无法获取第一页数据，请检查 Cookie 或网络。\")\n",
+    "        return\n",
+    "\n",
+    "    max_page = get_max_page(first_html)\n",
+    "    print(f\"✅ 成功获取最大页数：{max_page}\")\n",
+    "\n",
+    "    # 爬取所有页面\n",
+    "    all_data = []\n",
+    "    merged_header = []\n",
+    "\n",
+    "    for page in range(1, max_page + 1):\n",
+    "        print(f\"正在爬取第 {page}/{max_page} 页...\")\n",
+    "\n",
+    "        if page == 1:\n",
+    "            html = first_html\n",
+    "        else:\n",
+    "            html = get_page_html(page)\n",
+    "            if not html:\n",
+    "                print(f\"❌ 第 {page} 页获取失败，跳过。\")\n",
+    "                continue\n",
+    "\n",
+    "        header, rows = parse_cards_table(html)\n",
+    "\n",
+    "        if not header and not rows:\n",
+    "            print(f\"⚠️ 第 {page} 页未解析到表格数据。\")\n",
+    "            continue\n",
+    "\n",
+    "        # 合并表头（不同页的表头可能略有差异）\n",
+    "        if header:\n",
+    "            for h in header:\n",
+    "                if h not in merged_header:\n",
+    "                    merged_header.append(h)\n",
+    "\n",
+    "        all_data.extend(rows)\n",
+    "\n",
+    "        # 请求间隔\n",
+    "        if page < max_page:\n",
+    "            time.sleep(0.3)\n",
+    "\n",
+    "    if not all_data:\n",
+    "        print(\"\\n❌ 未获取到任何数据，请检查 Cookie 或网络。\")\n",
+    "        return\n",
+    "\n",
+    "    print(f\"\\n✅ 爬取完成，共获取 {len(all_data)} 条原始记录\")\n",
+    "\n",
+    "    # 构建DataFrame\n",
+    "    if merged_header:\n",
+    "        normalized_rows = []\n",
+    "        width = len(merged_header)\n",
+    "        for row in all_data:\n",
+    "            if len(row) < width:\n",
+    "                row = row + [\"\"] * (width - len(row))\n",
+    "            elif len(row) > width:\n",
+    "                row = row[:width]\n",
+    "            normalized_rows.append(row)\n",
+    "        df = pd.DataFrame(normalized_rows, columns=merged_header)\n",
+    "    else:\n",
+    "        df = pd.DataFrame(all_data)\n",
+    "\n",
+    "    print(f\"📋 原始列名：{list(df.columns)}\")\n",
+    "    print(f\"📋 原始数据前3行：\")\n",
+    "    print(df.head(3).to_string())\n",
+    "\n",
+    "    # 数据规范化处理\n",
+    "    print(\"\\n开始数据规范化处理...\")\n",
+    "    df = normalize_dataframe(df)\n",
+    "\n",
+    "    # 保存结果\n",
+    "    os.makedirs(OUTPUT_DIR, exist_ok=True)\n",
+    "    time_str = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
+    "    filename = f\"H1会员卡信息_{time_str}.xlsx\"\n",
+    "    filepath = os.path.join(OUTPUT_DIR, filename)\n",
+    "\n",
+    "    try:\n",
+    "        df.to_excel(filepath, index=False)\n",
+    "        print(\"=\" * 50)\n",
+    "        print(\"✅ 导出完成！\")\n",
+    "        print(f\"📊 最终有效条数：{len(df)}\")\n",
+    "        print(f\"📁 已保存到：{filepath}\")\n",
+    "        print(\"=\" * 50)\n",
+    "    except Exception as e:\n",
+    "        print(f\"❌ 保存Excel失败: {e}\")\n",
+    "        csv_path = filepath.replace(\".xlsx\", \".csv\")\n",
+    "        df.to_csv(csv_path, index=False, encoding=\"utf-8-sig\")\n",
+    "        print(f\"💡 已转为 CSV 保存至：{csv_path}\")\n",
+    "\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    main()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c658267",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 运行导出\n",
+    "main()\n"
+   ]
  }
 ],
 "metadata": {
@@ -263,7 +263,21 @@
  - 逐个查询会员卡明细
  - 使用lxml解析HTML

-### 34. 途虎养车系统
+### 35. H1车店系统
+- **文件**: `H1车店数据导出.ipynb`, `H1会员卡.py`
+- **功能**:
+  - 车辆信息导出
+  - 库存信息导出
+  - 历史维修记录导出
+  - 开单管理数据导出
+  - 会员卡信息导出（储值卡、套餐卡等）
+- **接口**: `https://scrm.h1cd.com`
+- **特点**:
+  - 使用Cookie认证，需要定期更新
+  - HTML表格解析，分页URL模式为 `cards_{page}.html`
+  - **数据格式不规范**：导出的原始数据中姓名和手机号可能混在同一字段、操作列包含按钮文本、数值字段含非数字字符等，脚本内置了数据规范化处理（拆分合并列、清理按钮文本、数值标准化、去重等）
+
+### 36. 途虎养车系统
 - **文件**: `途虎养车脚本导出.ipynb`
 - **功能**:
  - 客户信息导出
@@ -460,6 +474,8 @@
 ├── 大唛云管理平台.ipynb           # 大唛云管理平台
 ├── 大大汽修token登录(1).ipynb     # 大大汽修Token登录
 ├── 大大汽修点击导出(1).ipynb       # 大大汽修点击导出
+├── H1车店数据导出.ipynb          # H1车店系统 - 车辆/库存/维修记录/会员卡导出
+├── H1会员卡.py                   # H1车店系统 - 会员卡信息导出（独立脚本）
 ├── 好店长.ipynb                   # 好店长系统
 ├── 客户无忧.ipynb                 # 客户无忧系统
 ├── 客管家数据导出(1).ipynb        # 客管家数据导出
@@ -0,0 +1,286 @@
+"""
+神汽链(sqzone.com)登录+数据导出 一体化脚本
+- 自动登录获取JSESSIONID（Playwright + ddddocr验证码识别）
+- 导出近10年历史维修记录
+"""
+import sys, io
+sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
+sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
+
+import requests
+import re
+import hashlib
+import time
+import json
+import base64
+import os
+from datetime import datetime, timedelta
+from tqdm import tqdm
+import pandas as pd
+import urllib3
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+# ============ 配置 ============
+ACCOUNT = '17690802976'
+PASSWORD = '123321'
+URL = 'https://www.sqzone.com/launa/pc/dataCenter/queryShopTurnoverInfo'
+OUTPUT_FILE = r'D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\文件输出\神汽链10年历史数据.xlsx'
+DATE_RANGE_START = (2016, 4)   # 近10年起始
+DATE_RANGE_END = (2026, 3)     # 截止到上个月
+# ==============================
+
+def md5(text):
+    return hashlib.md5(text.encode('utf-8')).hexdigest()
+
+def get_sqzone_cookies(account=ACCOUNT, password=PASSWORD, max_captcha_retries=5):
+    """通过Playwright浏览器自动化登录，获取JSESSIONID"""
+    from playwright.sync_api import sync_playwright
+    import ddddocr
+
+    ocr = ddddocr.DdddOcr(show_ad=False)
+
+    with sync_playwright() as p:
+        print("[登录] 启动浏览器...")
+        browser = p.chromium.launch(headless=True)
+        context = browser.new_context(
+            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36 Edg/146.0.0.0',
+            viewport={'width': 1280, 'height': 720}
+        )
+        page = context.new_page()
+
+        print("[登录] 访问神汽链登录页...")
+        page.goto('https://www.sqzone.com/launa/pc/login', wait_until='domcontentloaded', timeout=30000)
+        page.wait_for_timeout(3000)
+
+        uuid = page.evaluate('() => typeof uuid !== "undefined" ? uuid : ""')
+        if not uuid:
+            print("[登录] 未获取到uuid，登录页可能加载异常")
+            browser.close()
+            return None
+        print(f"[登录] uuid: {uuid}")
+
+        for attempt in range(max_captcha_retries):
+            print(f"[登录] 尝试 {attempt+1}/{max_captcha_retries}...")
+
+            captcha_result = page.evaluate('''async () => {
+                const resp = await fetch('/fauna/qrloginserver/getPasswordCaptcha?t=' + Date.now());
+                return await resp.json();
+            }''')
+
+            captcha_code = ''
+            if captcha_result.get('success'):
+                img_data = base64.b64decode(captcha_result['data'])
+                captcha_code = ocr.classification(img_data)
+                print(f"[登录] 验证码: {captcha_code}")
+            else:
+                print(f"[登录] 获取验证码失败")
+                continue
+
+            login_result = page.evaluate('''async (params) => {
+                const resp = await fetch('/fauna/qrloginserver/backOfficePwdLogin', {
+                    method: 'POST',
+                    headers: {'Content-Type': 'application/json;charset=UTF-8'},
+                    body: JSON.stringify(params)
+                });
+                return await resp.json();
+            }''', {
+                'mobilephone': account,
+                'password': md5(password),
+                'pwdCaptcha': captcha_code,
+                'referer': page.url,
+                'uuid': uuid,
+                'loginType': 'SAAS',
+            })
+
+            if login_result.get('success'):
+                print("[登录] SSO登录成功!")
+                break
+            elif '验证码错误' in (login_result.get('errorMsg') or ''):
+                print("[登录] 验证码错误，重试...")
+                continue
+            elif '密码' in (login_result.get('errorMsg') or ''):
+                print(f"[登录] 密码错误: {login_result.get('errorMsg')}")
+                browser.close()
+                return None
+            else:
+                print(f"[登录] 错误: {login_result.get('errorMsg')}")
+                continue
+        else:
+            print("[登录] 验证码多次失败")
+            browser.close()
+            return None
+
+        # 刷新页面让authorize回调完成
+        print("[登录] 刷新页面完成授权...")
+        page.reload(wait_until='domcontentloaded', timeout=30000)
+        page.wait_for_timeout(2000)
+
+        # 跳转到sqzone主页
+        page.goto('https://www.sqzone.com/launa/web/dc/turnover', wait_until='domcontentloaded', timeout=30000)
+        page.wait_for_timeout(2000)
+
+        # 获取cookies
+        cookies = context.cookies()
+        sqzone_cookies = {}
+        for c in cookies:
+            if 'sqzone' in c.get('domain', ''):
+                sqzone_cookies[c['name']] = c['value']
+
+        # 验证cookies
+        if sqzone_cookies.get('JSESSIONID'):
+            test_result = page.evaluate('''async () => {
+                const resp = await fetch('/launa/pc/dataCenter/queryShopTurnoverInfo', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json;charset=UTF-8',
+                        'X-Requested-With': 'XMLHttpRequest',
+                        'appName': 'SQLINK',
+                    },
+                    body: JSON.stringify({
+                        keyword: '', pageSize: 1, curPage: 1,
+                        shopId: '', payId: [], payStatus: '',
+                        startTime: '2026-03-01', endTime: '2026-03-31',
+                    })
+                });
+                return await resp.json();
+            }''')
+
+            if test_result.get('success') or test_result.get('data'):
+                print(f"[登录] Cookies有效! JSESSIONID={sqzone_cookies['JSESSIONID'][:20]}...")
+            else:
+                print(f"[登录] Cookies无效: {str(test_result)[:100]}")
+                browser.close()
+                return None
+        else:
+            print("[登录] 未获取到JSESSIONID")
+            browser.close()
+            return None
+
+        browser.close()
+        return sqzone_cookies
+
+
+def get_month_range(start_year, start_month, end_year, end_month):
+    dates = []
+    current = datetime(start_year, start_month, 1)
+    end = datetime(end_year, end_month, 1)
+    while current <= end:
+        next_month = (current.replace(day=28) + timedelta(days=4)).replace(day=1)
+        last_day = next_month - timedelta(days=1)
+        dates.append((current.strftime('%Y-%m-%d'), last_day.strftime('%Y-%m-%d')))
+        current = next_month
+    return dates
+
+
+def fetch_page_with_retry(cookies, headers, start_date, end_date, page_num, max_retries=3):
+    json_data = {
+        'keyword': '', 'pageSize': 50, 'curPage': page_num,
+        'shopId': '', 'payId': [], 'payStatus': '',
+        'startTime': start_date, 'endTime': end_date,
+    }
+    for attempt in range(max_retries):
+        try:
+            response = requests.post(URL, cookies=cookies, headers=headers, json=json_data, timeout=15, verify=False)
+            if response.status_code == 200:
+                res_json = response.json()
+                if res_json.get('data'):
+                    return res_json['data'].get('contents', [])
+                else:
+                    # 检查是否需要重新登录
+                    if res_json.get('code') == '-1302':
+                        print(f"    Session过期! 需要重新登录")
+                        return 'SESSION_EXPIRED'
+                    print(f"    第{page_num}页业务异常: {res_json}")
+            else:
+                print(f"    第{page_num}页HTTP错误: {response.status_code}")
+        except requests.exceptions.RequestException as e:
+            print(f"    第{page_num}页请求失败 ({attempt+1}/{max_retries}): {e}")
+        if attempt < max_retries - 1:
+            time.sleep((attempt + 1) * 2)
+    return None
+
+
+def export_data(cookies):
+    """导出历史数据"""
+    headers = {
+        'Accept': 'application/json, text/plain, */*',
+        'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+        'Connection': 'keep-alive',
+        'Content-Type': 'application/json;charset=UTF-8',
+        'Origin': 'https://www.sqzone.com',
+        'Referer': 'https://www.sqzone.com/launa/web/dc/turnover',
+        'Sec-Fetch-Dest': 'empty',
+        'Sec-Fetch-Mode': 'cors',
+        'Sec-Fetch-Site': 'same-origin',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36 Edg/146.0.0.0',
+        'X-Requested-With': 'XMLHttpRequest',
+        'appName': 'SQLINK',
+    }
+
+    os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)
+    all_data = []
+    month_ranges = get_month_range(*DATE_RANGE_START, *DATE_RANGE_END)
+    print(f"\n[导出] 共 {len(month_ranges)} 个月份需要抓取")
+
+    for start_date, end_date in tqdm(month_ranges, desc="正在抓取数据"):
+        page = 1
+        while True:
+            data_list = fetch_page_with_retry(cookies, headers, start_date, end_date, page)
+
+            if data_list == 'SESSION_EXPIRED':
+                return 'SESSION_EXPIRED', all_data
+
+            if data_list is None:
+                print(f"\n    {start_date}-{end_date} 第{page}页多次失败，跳过")
+                break
+
+            if not data_list:
+                break
+
+            for data in data_list:
+                parts = data.get('partsViews', [])
+                customer_info = {k: v for k, v in data.items() if k != 'partsViews'}
+                if parts:
+                    for part in parts:
+                        record = {**customer_info, **part}
+                        all_data.append(record)
+
+            page += 1
+            time.sleep(1)
+
+        # 每5000条临时保存
+        if len(all_data) > 0 and len(all_data) % 5000 == 0:
+            temp_file = OUTPUT_FILE.replace('.xlsx', '_temp.xlsx')
+            pd.DataFrame(all_data).to_excel(temp_file, index=False)
+            print(f"\n    临时保存: {len(all_data)} 条")
+
+    return 'OK', all_data
+
+
+if __name__ == '__main__':
+    # 登录获取cookies
+    cookies = get_sqzone_cookies()
+    if not cookies:
+        print("登录失败，退出")
+        sys.exit(1)
+
+    # 导出数据（支持Session过期自动重新登录）
+    while True:
+        status, all_data = export_data(cookies)
+        if status == 'SESSION_EXPIRED':
+            print("\nSession过期，重新登录...")
+            cookies = get_sqzone_cookies()
+            if not cookies:
+                print("重新登录失败，保存已有数据")
+                break
+            continue
+        break
+
+    # 保存最终数据
+    if all_data:
+        os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)
+        pd.DataFrame(all_data).to_excel(OUTPUT_FILE, index=False)
+        print(f"\n全部完成! 共 {len(all_data)} 条数据，已保存至 {OUTPUT_FILE}")
+    else:
+        print("\n未抓取到数据")
@@ -0,0 +1,89 @@
+import requests
+import pandas as pd
+
+cookies = {
+    'JSESSIONID': 'FA68674FDDA302C51E2775091B995EEA',
+    'td_cookie': '3009435466',
+}
+
+headers = {
+    'Accept': 'application/json, text/javascript, */*; q=0.01',
+    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+    'Connection': 'keep-alive',
+    'Referer': 'http://www.idsz.xin:7070/report_member_verifi_list?detailtype=1&key=totalCount&datafrom=2026-01-01&datato=2026-04-13&sshopId=&type=1',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36 Edg/146.0.0.0',
+    'X-Requested-With': 'XMLHttpRequest',
+}
+
+def get_data(page=0, page_size=50):
+    url = f'http://www.idsz.xin:7070/posapi_invoke?apiname=kpi_memberVerifiAndSurplusQuery&detailtype=1&startTime=2026-01-01&endTime=2026-04-13&key=totalCount&sshopId=&timesCardId=&option=&page={page}&pageSize={page_size}'
+    response = requests.get(url=url, headers=headers, cookies=cookies)
+    response.raise_for_status()
+    return response.json()
+
+print("正在请求会员卡数据...")
+try:
+    # 先获取第一页数据，获取总数
+    first_page = get_data(page=0)
+    total = first_page.get('total', 0)
+    page_size = 50
+    total_pages = (total + page_size - 1) // page_size
+    
+    print(f"数据总条数: {total}")
+    print(f"每页条数: {page_size}")
+    print(f"总页数: {total_pages}")
+    
+    TCK = []
+    
+    # 获取第一页数据
+    if 'rows' in first_page and len(first_page['rows']) > 0:
+        for row in first_page['rows']:
+            TCK1 = {
+                '车牌号': row.get('carNo', ''),
+                '卡名称': row.get('cardType', ''),
+                '到期时间': row.get('endTime', ''),
+                '发动机号': row.get('engineNumber', ''),
+                '剩余明细': row.get('goodsName', ''),
+                '剩余次数': row.get('qty', ''),
+                '手机号': row.get('mobilePhone', ''),
+                '客户姓名': row.get('name', ''),
+                '备注': row.get('remark', ''),
+                'Vin码': row.get('vin', '')
+            }
+            TCK.append(TCK1)
+        print(f"已获取第1页数据，累计 {len(TCK)} 条")
+    
+    # 获取剩余页数据
+    for page in range(1, total_pages):
+        try:
+            data = get_data(page=page)
+            if 'rows' in data and len(data['rows']) > 0:
+                for row in data['rows']:
+                    TCK1 = {
+                        '车牌号': row.get('carNo', ''),
+                        '卡名称': row.get('cardType', ''),
+                        '到期时间': row.get('endTime', ''),
+                        '发动机号': row.get('engineNumber', ''),
+                        '剩余明细': row.get('goodsName', ''),
+                        '剩余次数': row.get('qty', ''),
+                        '手机号': row.get('mobilePhone', ''),
+                        '客户姓名': row.get('name', ''),
+                        '备注': row.get('remark', ''),
+                        'Vin码': row.get('vin', '')
+                    }
+                    TCK.append(TCK1)
+                print(f"已获取第{page+1}页数据，累计 {len(TCK)} 条")
+        except Exception as e:
+            print(f"获取第{page+1}页数据失败: {e}")
+            continue
+    
+    # 导出数据
+    df = pd.DataFrame(TCK)
+    output_path = '会员卡.xlsx'
+    df.to_excel(output_path, index=False)
+    print(f"\n成功导出 {len(TCK)} 条会员卡数据到 {output_path}")
+    
+except requests.exceptions.RequestException as e:
+    print(f"请求失败: {e}")
+except ValueError as e:
+    print(f"JSON解析失败: {e}")
@@ -0,0 +1,89 @@
+import requests
+import pandas as pd
+
+cookies = {
+    'JSESSIONID': 'FA68674FDDA302C51E2775091B995EEA',
+    'td_cookie': '3008847516',
+}
+
+headers = {
+    'Accept': 'application/json, text/javascript, */*; q=0.01',
+    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+    'Connection': 'keep-alive',
+    'Referer': 'http://www.idsz.xin:7070/report_member_verifi_list?detailtype=1&key=totalBalance&datafrom=2026-01-01&datato=2026-04-13&sshopId=&type=1',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36 Edg/146.0.0.0',
+    'X-Requested-With': 'XMLHttpRequest',
+}
+
+def get_data(page=0, page_size=50):
+    url = f'http://www.idsz.xin:7070/posapi_invoke?apiname=kpi_memberVerifiAndSurplusQuery&detailtype=1&startTime=2026-01-01&endTime=2026-04-13&key=totalBalance&sshopId=&timesCardId=&option=&page={page}&pageSize={page_size}'
+    response = requests.get(url=url, headers=headers, cookies=cookies)
+    response.raise_for_status()
+    return response.json()
+
+print("正在请求数据...")
+try:
+    # 先获取第一页数据，获取总数
+    first_page = get_data(page=0)
+    total = first_page.get('total', 0)
+    page_size = 50
+    total_pages = (total + page_size - 1) // page_size
+    
+    print(f"数据总条数: {total}")
+    print(f"每页条数: {page_size}")
+    print(f"总页数: {total_pages}")
+    
+    CZK = []
+    
+    # 获取第一页数据
+    if 'rows' in first_page and len(first_page['rows']) > 0:
+        for row in first_page['rows']:
+            CZK1 = {
+                '车牌号': row.get('carNo', ''),
+                '卡名称': row.get('cardType', ''),
+                '到期时间': row.get('endTime', ''),
+                '发动机号': row.get('engineNumber', ''),
+                '剩余金额': row.get('leftAmount', ''),
+                '剩余赠送金额': row.get('leftsendAmount', ''),
+                '手机号': row.get('mobilePhone', ''),
+                '客户姓名': row.get('name', ''),
+                '备注': row.get('remark', ''),
+                'Vin码': row.get('vin', '')
+            }
+            CZK.append(CZK1)
+        print(f"已获取第1页数据，累计 {len(CZK)} 条")
+    
+    # 获取剩余页数据
+    for page in range(1, total_pages):
+        try:
+            data = get_data(page=page)
+            if 'rows' in data and len(data['rows']) > 0:
+                for row in data['rows']:
+                    CZK1 = {
+                        '车牌号': row.get('carNo', ''),
+                        '卡名称': row.get('cardType', ''),
+                        '到期时间': row.get('endTime', ''),
+                        '发动机号': row.get('engineNumber', ''),
+                        '剩余金额': row.get('leftAmount', ''),
+                        '剩余赠送金额': row.get('leftsendAmount', ''),
+                        '手机号': row.get('mobilePhone', ''),
+                        '客户姓名': row.get('name', ''),
+                        '备注': row.get('remark', ''),
+                        'Vin码': row.get('vin', '')
+                    }
+                    CZK.append(CZK1)
+                print(f"已获取第{page+1}页数据，累计 {len(CZK)} 条")
+        except Exception as e:
+            print(f"获取第{page+1}页数据失败: {e}")
+            continue
+    
+    # 导出数据
+    df = pd.DataFrame(CZK)
+    output_path = '储值卡.xlsx'
+    df.to_excel(output_path, index=False)
+    print(f"\n成功导出 {len(CZK)} 条数据到 {output_path}")
+    
+except requests.exceptions.RequestException as e:
+    print(f"请求失败: {e}")
+except ValueError as e:
+    print(f"JSON解析失败: {e}")