{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 续约回访90-180天调试分析\n", "\n", "本notebook用于调试续约回访派发数据为空的问题,每一步都会保存CSV以便分析。\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤1: 导入必要的库和初始化\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "当前工作目录: d:\\Idea Project\\SaaS_V1.7\\test\n", "项目根目录: d:\\Idea Project\\SaaS_V1.7\n", "Python路径: ['d:\\\\Idea Project\\\\SaaS_V1.7', 'd:\\\\Program Files\\\\anaconda3\\\\envs\\\\SaaS\\\\python313.zip', 'd:\\\\Program Files\\\\anaconda3\\\\envs\\\\SaaS\\\\DLLs']...\n", "输出目录: d:\\Idea Project\\SaaS_V1.7\\back_ground_module\\output\\debug_revisit_renew\n" ] } ], "source": [ "import os\n", "import sys\n", "import time\n", "import requests\n", "\n", "# 添加项目根目录到Python路径\n", "# 方法1: 如果notebook在test目录下,向上找一级\n", "current_dir = os.getcwd()\n", "if os.path.basename(current_dir) == 'test':\n", " project_root = os.path.dirname(current_dir)\n", "else:\n", " # 方法2: 向上查找直到找到api.py文件\n", " project_root = current_dir\n", " while project_root != os.path.dirname(project_root):\n", " if os.path.exists(os.path.join(project_root, 'api.py')):\n", " break\n", " project_root = os.path.dirname(project_root)\n", "\n", "if project_root not in sys.path:\n", " sys.path.insert(0, project_root)\n", "print(f\"当前工作目录: {current_dir}\")\n", "print(f\"项目根目录: {project_root}\")\n", "print(f\"Python路径: {sys.path[:3]}...\") # 只显示前3个路径\n", "\n", "from api import API\n", "from back_ground_module import CommonModule\n", "import pandas as pd\n", "import datetime\n", "import re\n", "from log_config import configure_task_logger, configure_error_task_logger\n", "\n", "api_instance = API()\n", "common_module = CommonModule()\n", "logger = configure_task_logger()\n", "error_task_logger = configure_error_task_logger()\n", "\n", "# 设置输出目录(相对于当前notebook目录)\n", "output_dir = \"output/debug_revisit_renew\"\n", "os.makedirs(output_dir, exist_ok=True)\n", "print(f\"输出目录: {os.path.abspath(output_dir)}\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤2: 加载所有数据(单独执行,数据量较大)\n", "\n", "**注意:这一步数据量较大,请单独执行此单元**\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(\"开始加载所有数据...\")\n", "print(f\"开始时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", "\n", "# 省市区人员关系表\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"676512ac3e54dc3159460c0a\"}\n", "json_dict = api_instance.entry_data_list(payload)\n", "if json_dict and \"data\" in json_dict:\n", " json_list = json_dict.get(\"data\")\n", " print(f\"省市区人员关系表: {len(json_list)} 条\")\n", " pd.DataFrame(json_list).to_csv(f\"{output_dir}/01_省市区人员关系表.csv\", index=False, encoding='utf-8-sig')\n", "else:\n", " print(\"加载省市区人员关系表失败\")\n", " json_list = []\n", "\n", "# 获取简道云员工id\n", "payload = {\"api_key\": \"6694d3c4fcb69ca9a111a6c4\", \"entry_id\": \"6769204a1902c9341340a1bc\"}\n", "staff_id = api_instance.entry_data_list(payload)\n", "staff_id_list = staff_id.get(\"data\")\n", "print(f\"简道云员工id: {len(staff_id_list)} 条\")\n", "pd.DataFrame(staff_id_list).to_csv(f\"{output_dir}/02_简道云员工id.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取权限表信息\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"675b96c14e839f90fef1647c\"}\n", "permissions_table = api_instance.entry_data_list(payload).get(\"data\")\n", "print(f\"权限表: {len(permissions_table)} 条\")\n", "pd.DataFrame(permissions_table).to_csv(f\"{output_dir}/03_权限表.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取NGV数据\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"675bb02bd2d53c2034c665e4\"}\n", "NGV_data_list = api_instance.entry_data_list(payload).get(\"data\")\n", "print(f\"NGV数据: {len(NGV_data_list)} 条\")\n", "pd.DataFrame(NGV_data_list).to_csv(f\"{output_dir}/04_NGV数据.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取服务提醒-数据支持表单数据\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"676bb7bda3029720f1083e99\"}\n", "service_remind = api_instance.entry_data_list(payload).get(\"data\")\n", "print(f\"服务提醒: {len(service_remind)} 条\")\n", "pd.DataFrame(service_remind).to_csv(f\"{output_dir}/05_服务提醒.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取智能检测-数据支持表单数据\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"676bb99649ab3ac975af6e39\"}\n", "Smart_detection = api_instance.entry_data_list(payload).get(\"data\")\n", "print(f\"智能检测: {len(Smart_detection)} 条\")\n", "pd.DataFrame(Smart_detection).to_csv(f\"{output_dir}/06_智能检测.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取功能使用情况表\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"6763bbf657bd8fb76fcb41b2\"}\n", "get_feature_usage = api_instance.entry_data_list(payload).get(\"data\", [])\n", "print(f\"功能使用情况: {len(get_feature_usage)} 条\")\n", "pd.DataFrame(get_feature_usage).to_csv(f\"{output_dir}/07_功能使用情况.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取保单识别表\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"6773a60d30ed87ff9f68d3c5\"}\n", "policy_recognition = api_instance.entry_data_list(payload).get(\"data\")\n", "print(f\"保单识别: {len(policy_recognition)} 条\")\n", "widget_list = [item['_widget_1735632397600'] for item in policy_recognition]\n", "pd.DataFrame(policy_recognition).to_csv(f\"{output_dir}/08_保单识别.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取私域小程序-数据支持表单数据\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"67e0f0fae622896749ba5087\"}\n", "private_domain = api_instance.entry_data_list(payload).get(\"data\", [])\n", "print(f\"私域小程序: {len(private_domain)} 条\")\n", "pd.DataFrame(private_domain).to_csv(f\"{output_dir}/09_私域小程序.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取公域小程序-数据支持表单数据\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"67e0c702c8f603b997980999\"}\n", "public_domain = api_instance.entry_data_list(payload).get(\"data\", [])\n", "public_domain_list = [item['_widget_1742784257506'] for item in public_domain]\n", "print(f\"公域小程序: {len(public_domain)} 条\")\n", "pd.DataFrame(public_domain).to_csv(f\"{output_dir}/10_公域小程序.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取异业合作-数据支持表单数据\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"67e24fdd8dfcfa918e17c30b\"}\n", "different_industries = api_instance.entry_data_list(payload).get(\"data\", [])\n", "different_industries_list = [item['_widget_1742884829007'] for item in different_industries]\n", "print(f\"异业合作: {len(different_industries)} 条\")\n", "pd.DataFrame(different_industries).to_csv(f\"{output_dir}/11_异业合作.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取短信-数据支持表单数据\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"67e5107198ba1b20d5df3974\"}\n", "groupnotification = api_instance.entry_data_list(payload).get(\"data\", [])\n", "print(f\"短信: {len(groupnotification)} 条\")\n", "pd.DataFrame(groupnotification).to_csv(f\"{output_dir}/12_短信.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 获取多公司过滤表\n", "payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"689bf5f8ba88a28cb0679ec9\"}\n", "get_filter_company_list = api_instance.entry_data_list(payload).get(\"data\", [])\n", "print(f\"多公司过滤表: {len(get_filter_company_list)} 条\")\n", "pd.DataFrame(get_filter_company_list).to_csv(f\"{output_dir}/13_多公司过滤表.csv\", index=False, encoding='utf-8-sig')\n", "\n", "print(f\"\\n数据加载完成!\")\n", "print(f\"结束时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤3: 获取节假日列表和计算date_one\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "节假日列表: 52 个日期\n", "当前日期: 2026-01-17\n", "遍历日期: 2026-01-16\n", "遍历次数: 1\n", "date_one = 1\n" ] } ], "source": [ "def calculate_date_one(date_list, start_offset=0):\n", " \"\"\"\n", " 计算从当前日期(或指定偏移量的日期)开始,往前遍历遇到date_list中日期的次数。\n", " \"\"\"\n", " now_time = datetime.datetime.now() + datetime.timedelta(days=start_offset)\n", " date_one = 1\n", " print(\"当前日期:\", now_time.strftime(\"%Y-%m-%d\"))\n", " \n", " if now_time.strftime(\"%Y-%m-%d\") in date_list:\n", " date_one = 0\n", " print(\"开始次数:\", date_one)\n", " else:\n", " for i in range(1, 10):\n", " new_date = now_time + datetime.timedelta(days=-i)\n", " new_date_str = new_date.strftime(\"%Y-%m-%d\")\n", " print(\"遍历日期:\", new_date_str)\n", " if new_date_str in date_list:\n", " date_one += 1\n", " print(\"节假日期:\", new_date_str)\n", " else:\n", " break\n", " \n", " print(\"遍历次数:\", date_one)\n", " return date_one\n", "\n", "# 获取节假日列表\n", "date_list = common_module.get_holiday_list()\n", "print(f\"节假日列表: {len(date_list)} 个日期\")\n", "pd.Series(date_list).to_csv(f\"{output_dir}/14_节假日列表.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 计算date_one\n", "date_one = calculate_date_one(date_list, start_offset=0)\n", "print(f\"date_one = {date_one}\")\n", "\n", "# 保存date_one\n", "pd.DataFrame([{\"date_one\": date_one}]).to_csv(f\"{output_dir}/15_date_one.csv\", index=False, encoding='utf-8-sig')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤4: 获取NGV明细数据\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "开始获取NGV明细数据...\n", "开始时间: 2026-01-17 13:34:48\n", "NGV明细数据: 45686 条\n", "NGV明细数据列数: 141\n", "已保存到: output/debug_revisit_renew/16_原始NGV数据.csv\n", "结束时间: 2026-01-17 13:35:00\n" ] } ], "source": [ "print(\"开始获取NGV明细数据...\")\n", "print(f\"开始时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", "\n", "data_NGV = common_module.get_ngv_details(days_back=1)\n", "print(f\"NGV明细数据: {len(data_NGV)} 条\")\n", "print(f\"NGV明细数据列数: {len(data_NGV.columns)}\")\n", "\n", "# 保存原始NGV数据\n", "data_NGV.to_csv(f\"{output_dir}/16_原始NGV数据.csv\", index=False, encoding='utf-8-sig')\n", "print(f\"已保存到: {output_dir}/16_原始NGV数据.csv\")\n", "\n", "print(f\"结束时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤5: 构建省市区索引\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "省市区索引构建完成: 3667 条\n" ] } ], "source": [ "def build_index(json_list):\n", " index = {}\n", " for json_item in json_list:\n", " try:\n", " key = (json_item['_widget_1734677164861'], json_item['_widget_1734677164862'],\n", " json_item['_widget_1734677164863']) # 省市区\n", " if '_widget_1734677164871' not in json_item: # 日常回访客服\n", " raise KeyError(\"缺少 '日常回访客服' 键\")\n", " index[key] = json_item\n", " except KeyError as e:\n", " print(f\"警告:{e},跳过该条记录: {json_item}\")\n", " continue\n", " return index\n", "\n", "# 从CSV加载省市区人员关系表(步骤2的输出)\n", "csv_path = f\"{output_dir}/01_省市区人员关系表.csv\"\n", "if os.path.exists(csv_path):\n", " print(f\"从CSV文件读取: {csv_path}\")\n", " json_list_df = pd.read_csv(csv_path, encoding='utf-8-sig')\n", " json_list = json_list_df.to_dict('records')\n", " print(f\"读取到 {len(json_list)} 条记录\")\n", "else:\n", " print(f\"警告: 文件不存在 {csv_path},请先执行步骤2\")\n", " json_list = []\n", "\n", "# 构建索引\n", "index = build_index(json_list)\n", "print(f\"省市区索引构建完成: {len(index)} 条\")\n", "\n", "# 保存索引信息\n", "index_df = pd.DataFrame([{\"省\": k[0], \"市\": k[1], \"区\": k[2], \"数据\": str(v)} for k, v in index.items()])\n", "index_df.to_csv(f\"{output_dir}/17_省市区索引.csv\", index=False, encoding='utf-8-sig')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤6: 获取多公司过滤列表\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "获取多公司过滤公司id\n", "过滤公司条数: 19\n" ] } ], "source": [ "print(\"获取多公司过滤公司id\")\n", "# 从CSV加载多公司过滤表(步骤2的输出)\n", "csv_path = f\"{output_dir}/13_多公司过滤表.csv\"\n", "if os.path.exists(csv_path):\n", " print(f\"从CSV文件读取: {csv_path}\")\n", " get_filter_company_list_df = pd.read_csv(csv_path, encoding='utf-8-sig')\n", " get_filter_company_list = get_filter_company_list_df.to_dict('records')\n", " print(f\"读取到 {len(get_filter_company_list)} 条记录\")\n", "else:\n", " print(f\"警告: 文件不存在 {csv_path},请先执行步骤2\")\n", " get_filter_company_list = []\n", "\n", "all_filter_company_list = []\n", "for company in get_filter_company_list:\n", " company_list = company.get(\"_widget_1755052002491\")\n", " if company_list:\n", " # 处理可能是字符串的情况\n", " if isinstance(company_list, str):\n", " import ast\n", " try:\n", " company_list = ast.literal_eval(company_list)\n", " except:\n", " continue\n", " for company_item in company_list:\n", " if company_item.get(\"_widget_1755052002496\") == \"否\":\n", " all_filter_company_list.append(company_item.get(\"_widget_1755052002495\"))\n", "\n", "print(f\"过滤公司条数: {len(all_filter_company_list)}\")\n", "pd.Series(all_filter_company_list).to_csv(f\"{output_dir}/18_过滤公司列表.csv\", index=False, encoding='utf-8-sig')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤7: 数据处理和过滤(第一部分:基础处理)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "过滤前数据量: 45686\n", "过滤后数据量: 45667\n" ] } ], "source": [ "# 从CSV加载NGV数据(步骤4的输出)\n", "csv_path = f\"{output_dir}/16_原始NGV数据.csv\"\n", "if os.path.exists(csv_path):\n", " print(f\"从CSV文件读取: {csv_path}\")\n", " data_NGV = pd.read_csv(csv_path, encoding='utf-8-sig')\n", " print(f\"读取到 {len(data_NGV)} 条记录\")\n", "else:\n", " print(f\"警告: 文件不存在 {csv_path},请先执行步骤4\")\n", " data_NGV = pd.DataFrame()\n", "\n", "# 将A列和B列的日期字符串转换为日期格式\n", "data_NGV_processed = data_NGV.copy()\n", "data_NGV_processed['A'] = pd.to_datetime(data_NGV_processed['expiry_time'])\n", "data_NGV_processed['B'] = pd.to_datetime(data_NGV_processed['renew_date'])\n", "\n", "def replace_values(series):\n", " return series.apply(lambda x: '' if pd.isna(x) or x in ['NA', 'None', ''] else x)\n", "\n", "# 处理字符串数据\n", "data_NGV_processed = data_NGV_processed.apply(replace_values)\n", "\n", "# 过滤多公司\n", "print(f\"过滤前数据量: {len(data_NGV_processed)}\")\n", "data_NGV_processed = data_NGV_processed[~data_NGV_processed['id_own_group'].isin(all_filter_company_list)]\n", "print(f\"过滤后数据量: {len(data_NGV_processed)}\")\n", "\n", "# 保存过滤后的数据\n", "data_NGV_processed.to_csv(f\"{output_dir}/19_过滤多公司后数据.csv\", index=False, encoding='utf-8-sig')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤8: 数据处理和过滤(第二部分:优先级排序和最佳值计算)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "合并最佳值后数据量: 45667\n" ] } ], "source": [ "# 从CSV加载前一步的数据(步骤7的输出)\n", "csv_path = f\"{output_dir}/19_过滤多公司后数据.csv\"\n", "if os.path.exists(csv_path):\n", " print(f\"从CSV文件读取: {csv_path}\")\n", " data_NGV_processed = pd.read_csv(csv_path, encoding='utf-8-sig')\n", " # 重新转换日期列\n", " data_NGV_processed['A'] = pd.to_datetime(data_NGV_processed['expiry_time'])\n", " data_NGV_processed['B'] = pd.to_datetime(data_NGV_processed['renew_date'])\n", " print(f\"读取到 {len(data_NGV_processed)} 条记录\")\n", "else:\n", " print(f\"警告: 文件不存在 {csv_path},请先执行步骤7\")\n", " data_NGV_processed = pd.DataFrame()\n", "\n", "# 定义优先级顺序\n", "edition_order = ['皇冠版', '至尊版', '尊享版', '旗舰版', '标准版', '进阶版', '基础版', '入门版']\n", "customer_type_order = [\"F\", \"E\", \"D\", \"C\", \"B\", \"A\"]\n", "group_grade_order = ['全国KA(FMVP)', '区域KA(MVP)', '重要客户(SVIP)', '普通客户(VIP)']\n", "\n", "# 创建映射字典\n", "edition_map = {edition: idx for idx, edition in enumerate(edition_order)}\n", "customer_type_map = {ctype: idx for idx, ctype in enumerate(customer_type_order)}\n", "group_grade_map = {grade: idx for idx, grade in enumerate(group_grade_order)}\n", "\n", "# 添加用于排序的新列\n", "data_NGV_processed['edition_rank'] = data_NGV_processed['saas_edition_fmt'].map(edition_map).fillna(0).astype(int)\n", "data_NGV_processed['customer_type_rank'] = data_NGV_processed['saas_customer_type'].map(customer_type_map).fillna(0).astype(int)\n", "data_NGV_processed['group_grade_rank'] = data_NGV_processed['group_grade'].map(group_grade_map).fillna(0).astype(int)\n", "\n", "# 找到每组中 edition_rank 最小值对应的行\n", "best_edition_idx = data_NGV_processed.groupby('id_own_group')['edition_rank'].idxmin()\n", "best_edition_rows = data_NGV_processed.loc[best_edition_idx]\n", "best_edition_rows['max_saas_edition'] = best_edition_rows['saas_edition_fmt']\n", "\n", "# 找到每组中 customer_type_rank 最小值对应的行\n", "best_customer_type_idx = data_NGV_processed.groupby('id_own_group')['customer_type_rank'].idxmin()\n", "best_customer_type_rows = data_NGV_processed.loc[best_customer_type_idx]\n", "best_customer_type_rows['max_saas_customer_type'] = best_customer_type_rows['customer_type_rank'].apply(\n", " lambda x: customer_type_order[x])\n", "\n", "# 找到每组中 group_grade_rank 最小值对应的行\n", "best_group_grade_idx = data_NGV_processed.groupby('id_own_group')['group_grade_rank'].idxmin()\n", "best_group_grade_rows = data_NGV_processed.loc[best_group_grade_idx]\n", "best_group_grade_rows['max_group_grade'] = best_group_grade_rows['group_grade']\n", "\n", "# 合并最佳值回到原数据集\n", "best_values = (\n", " best_edition_rows[['id_own_group', 'max_saas_edition']]\n", " .merge(best_customer_type_rows[['id_own_group', 'max_saas_customer_type']], on='id_own_group', how='outer')\n", " .merge(best_group_grade_rows[['id_own_group', 'max_group_grade']], on='id_own_group', how='outer')\n", ")\n", "\n", "# 将最佳值合并回原数据集\n", "data_NGV_processed = data_NGV_processed.merge(best_values, on='id_own_group', how='left')\n", "\n", "print(f\"合并最佳值后数据量: {len(data_NGV_processed)}\")\n", "data_NGV_processed.to_csv(f\"{output_dir}/20_合并最佳值后数据.csv\", index=False, encoding='utf-8-sig')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤8.5: 字段数据类型检查(调试用)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "============================================================\n", "字段数据类型和值检查\n", "============================================================\n", "\n", "当前 data_NGV_processed 数据量: 45667 条\n", "\n", "【is_main_org 字段检查】\n", "数据类型: object\n", "唯一值: ['0', '1']\n", "值分布:\n", "is_main_org\n", "1 37637\n", "0 8030\n", "Name: count, dtype: int64\n", "示例值(前5个): ['1', '0', '0', '0', '1']\n", "\n", "【org_status 字段检查】\n", "数据类型: object\n", "唯一值: ['留存', '过期']\n", "值分布:\n", "org_status\n", "留存 27997\n", "过期 17670\n", "Name: count, dtype: int64\n", "\n", "【org_type 字段检查】\n", "数据类型: object\n", "唯一值: ['一般', '天猫']\n", "值分布:\n", "org_type\n", "一般 42998\n", "天猫 2669\n", "Name: count, dtype: int64\n", "\n", "【area_manager 字段检查】\n", "数据类型: object\n", "唯一值数量: 16\n", "值分布(前10):\n", "area_manager\n", "肖军 10795\n", "景东强 8353\n", "陈庆伟 8301\n", "张凯 8232\n", "关磊 6994\n", "孙玉蕾 2007\n", "殷昊 745\n", "王涛 161\n", "刘伟 52\n", " 8\n", "Name: count, dtype: int64\n", "是否包含'殷昊': True\n", "是否包含'孙玉蕾': True\n", "\n", "【条件匹配测试】\n", "警告: is_main_org 是字符串类型,需要转换为数值或使用字符串比较\n", "使用数值比较 (== 1): 15066 条\n", "使用字符串比较 (== '1'): 15066 条\n", "\n", "============================================================\n" ] } ], "source": [ "# 从CSV加载前一步的数据(步骤8的输出)\n", "csv_path = f\"{output_dir}/20_合并最佳值后数据.csv\"\n", "if os.path.exists(csv_path):\n", " print(f\"从CSV文件读取: {csv_path}\")\n", " data_NGV_processed = pd.read_csv(csv_path, encoding='utf-8-sig')\n", " print(f\"读取到 {len(data_NGV_processed)} 条记录\")\n", "else:\n", " print(f\"警告: 文件不存在 {csv_path},请先执行步骤8\")\n", " data_NGV_processed = pd.DataFrame()\n", "\n", "# 检查关键字段的数据类型和实际值\n", "print(\"=\" * 60)\n", "print(\"字段数据类型和值检查\")\n", "print(\"=\" * 60)\n", "print(f\"\\n当前 data_NGV_processed 数据量: {len(data_NGV_processed)} 条\")\n", "\n", "# 检查 is_main_org\n", "print(f\"\\n【is_main_org 字段检查】\")\n", "print(f\"数据类型: {data_NGV_processed['is_main_org'].dtype}\")\n", "print(f\"唯一值: {sorted(data_NGV_processed['is_main_org'].unique())}\")\n", "print(f\"值分布:\\n{data_NGV_processed['is_main_org'].value_counts()}\")\n", "print(f\"示例值(前5个): {data_NGV_processed['is_main_org'].head().tolist()}\")\n", "\n", "# 检查 org_status\n", "print(f\"\\n【org_status 字段检查】\")\n", "print(f\"数据类型: {data_NGV_processed['org_status'].dtype}\")\n", "print(f\"唯一值: {sorted(data_NGV_processed['org_status'].unique())}\")\n", "print(f\"值分布:\\n{data_NGV_processed['org_status'].value_counts()}\")\n", "\n", "# 检查 org_type\n", "print(f\"\\n【org_type 字段检查】\")\n", "print(f\"数据类型: {data_NGV_processed['org_type'].dtype}\")\n", "print(f\"唯一值: {sorted(data_NGV_processed['org_type'].unique())}\")\n", "print(f\"值分布:\\n{data_NGV_processed['org_type'].value_counts()}\")\n", "\n", "# 检查 area_manager\n", "print(f\"\\n【area_manager 字段检查】\")\n", "print(f\"数据类型: {data_NGV_processed['area_manager'].dtype}\")\n", "print(f\"唯一值数量: {data_NGV_processed['area_manager'].nunique()}\")\n", "print(f\"值分布(前10):\\n{data_NGV_processed['area_manager'].value_counts().head(10)}\")\n", "print(f\"是否包含'殷昊': {'殷昊' in data_NGV_processed['area_manager'].values}\")\n", "print(f\"是否包含'孙玉蕾': {'孙玉蕾' in data_NGV_processed['area_manager'].values}\")\n", "\n", "# 测试条件匹配\n", "print(f\"\\n【条件匹配测试】\")\n", "# 测试主店过期条件\n", "if data_NGV_processed['is_main_org'].dtype == 'object':\n", " print(\"警告: is_main_org 是字符串类型,需要转换为数值或使用字符串比较\")\n", " # 尝试转换为数值\n", " is_main_org_numeric = pd.to_numeric(data_NGV_processed['is_main_org'], errors='coerce')\n", " condition1_test = (is_main_org_numeric == 1) & (data_NGV_processed['org_status'] == '过期')\n", " condition1_test_str = (data_NGV_processed['is_main_org'] == '1') & (data_NGV_processed['org_status'] == '过期')\n", " print(f\"使用数值比较 (== 1): {condition1_test.sum()} 条\")\n", " print(f\"使用字符串比较 (== '1'): {condition1_test_str.sum()} 条\")\n", "else:\n", " condition1_test = (data_NGV_processed['is_main_org'] == 1) & (data_NGV_processed['org_status'] == '过期')\n", " print(f\"主店过期条件匹配: {condition1_test.sum()} 条\")\n", "\n", "# 保存检查结果\n", "check_result = {\n", " 'is_main_org_dtype': str(data_NGV_processed['is_main_org'].dtype),\n", " 'is_main_org_unique_values': list(data_NGV_processed['is_main_org'].unique()),\n", " 'org_status_unique_values': list(data_NGV_processed['org_status'].unique()),\n", " 'org_type_unique_values': list(data_NGV_processed['org_type'].unique()),\n", " 'area_manager_unique_count': data_NGV_processed['area_manager'].nunique(),\n", " 'has_殷昊': '殷昊' in data_NGV_processed['area_manager'].values,\n", " 'has_孙玉蕾': '孙玉蕾' in data_NGV_processed['area_manager'].values,\n", "}\n", "pd.DataFrame([check_result]).to_csv(f\"{output_dir}/20.5_字段检查结果.csv\", index=False, encoding='utf-8-sig')\n", "\n", "print(\"\\n\" + \"=\" * 60)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤9: 数据处理和过滤(第三部分:主店过期处理)\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "主店过期数据量: 0\n", "警告: 主店过期数据为空,请检查 is_main_org 和 org_status 字段\n", "满足条件的分店数据量: 0\n", "警告: 主店过期数据为空,无法筛选分店数据\n", "满足条件的主店数据量: 20159\n", "警告: 没有分店数据可合并\n", "合并后总数据量: 20159\n" ] } ], "source": [ "# 主店过期,分店设置为主店\n", "# 修复:处理 is_main_org 可能是字符串类型的情况\n", "if data_NGV_processed['is_main_org'].dtype == 'object':\n", " # 如果是字符串类型,转换为数值或使用字符串比较\n", " is_main_org_numeric = pd.to_numeric(data_NGV_processed['is_main_org'], errors='coerce')\n", " condition = (is_main_org_numeric == 1) & (data_NGV_processed['org_status'] == '过期')\n", "else:\n", " condition = (data_NGV_processed['is_main_org'] == 1) & (data_NGV_processed['org_status'] == '过期')\n", "\n", "ngvv2 = data_NGV_processed[condition]\n", "print(f\"主店过期数据量: {len(ngvv2)}\")\n", "if len(ngvv2) > 0:\n", " ngvv2.to_csv(f\"{output_dir}/21_主店过期数据.csv\", index=False, encoding='utf-8-sig')\n", "else:\n", " print(\"警告: 主店过期数据为空,请检查 is_main_org 和 org_status 字段\")\n", "\n", "# 检查id_own_group是否存在于ngvv2中\n", "data_NGV_V2 = data_NGV_processed.copy()\n", "\n", "# 修复:处理 is_main_org 可能是字符串类型的情况\n", "if data_NGV_V2['is_main_org'].dtype == 'object':\n", " is_main_org_numeric_v2 = pd.to_numeric(data_NGV_V2['is_main_org'], errors='coerce')\n", " data_NGV_V2['条件'] = ((data_NGV_V2['org_type'] == \"一般\") & (data_NGV_V2['org_status'] == '留存') &\n", " (data_NGV_V2['area_manager'] != '殷昊') & (data_NGV_V2['area_manager'] != '孙玉蕾') &\n", " (is_main_org_numeric_v2 != 1))\n", "else:\n", " data_NGV_V2['条件'] = ((data_NGV_V2['org_type'] == \"一般\") & (data_NGV_V2['org_status'] == '留存') &\n", " (data_NGV_V2['area_manager'] != '殷昊') & (data_NGV_V2['area_manager'] != '孙玉蕾') &\n", " (data_NGV_V2['is_main_org'] != 1))\n", "\n", "data_NGV_V2 = data_NGV_V2.loc[data_NGV_V2[\"条件\"]]\n", "print(f\"满足条件的分店数据量: {len(data_NGV_V2)}\")\n", "\n", "# 过滤存在的记录\n", "if len(ngvv2) > 0:\n", " data_NGV_V2['exists_in_ngvv2'] = data_NGV_V2['id_own_group'].isin(ngvv2['id_own_group'])\n", " filtered_data = data_NGV_V2[data_NGV_V2['exists_in_ngvv2']]\n", " print(f\"存在于主店过期列表的分店数据量: {len(filtered_data)}\")\n", " \n", " if len(filtered_data) > 0:\n", " filtered_data.to_csv(f\"{output_dir}/22_分店数据.csv\", index=False, encoding='utf-8-sig')\n", " \n", " # 按版本排序并去重\n", " fixed_order = ['皇冠版', '至尊版', '尊享版', '旗舰版', '标准版', '进阶版', '基础版', '入门版']\n", " fixed_order_map = {edition: index for index, edition in enumerate(fixed_order)}\n", " filtered_data['sort_key'] = filtered_data['saas_edition_fmt'].map(fixed_order_map)\n", " filtered_data = filtered_data.sort_values(by='sort_key').drop('sort_key', axis=1)\n", " result = filtered_data.drop_duplicates(subset='id_own_group', keep='first')\n", " print(f\"去重后的分店数据量: {len(result)}\")\n", " if len(result) > 0:\n", " result.to_csv(f\"{output_dir}/23_去重后分店数据.csv\", index=False, encoding='utf-8-sig')\n", " else:\n", " print(\"警告: 没有分店数据存在于主店过期列表中\")\n", " result = pd.DataFrame()\n", "else:\n", " print(\"警告: 主店过期数据为空,无法筛选分店数据\")\n", " result = pd.DataFrame()\n", "\n", "# 合并主店和分店数据\n", "# 修复:处理 is_main_org 可能是字符串类型的情况\n", "if data_NGV_processed['is_main_org'].dtype == 'object':\n", " is_main_org_numeric_main = pd.to_numeric(data_NGV_processed['is_main_org'], errors='coerce')\n", " data_NGV_processed['条件'] = ((data_NGV_processed['org_type'] == \"一般\") & (data_NGV_processed['org_status'] == '留存') &\n", " (data_NGV_processed['area_manager'] != '殷昊') &\n", " (data_NGV_processed['area_manager'] != '孙玉蕾') &\n", " (is_main_org_numeric_main == 1))\n", "else:\n", " data_NGV_processed['条件'] = ((data_NGV_processed['org_type'] == \"一般\") & (data_NGV_processed['org_status'] == '留存') &\n", " (data_NGV_processed['area_manager'] != '殷昊') &\n", " (data_NGV_processed['area_manager'] != '孙玉蕾') &\n", " (data_NGV_processed['is_main_org'] == 1))\n", "\n", "data_NGV_processed = data_NGV_processed.loc[data_NGV_processed[\"条件\"]]\n", "print(f\"满足条件的主店数据量: {len(data_NGV_processed)}\")\n", "\n", "# 合并数据\n", "if len(result) > 0:\n", " data_NGV_processed = pd.concat([data_NGV_processed, result], axis=0)\n", "else:\n", " print(\"警告: 没有分店数据可合并\")\n", "print(f\"合并后总数据量: {len(data_NGV_processed)}\")\n", "\n", "if len(data_NGV_processed) > 0:\n", " data_NGV_processed.to_csv(f\"{output_dir}/24_合并主店分店后数据.csv\", index=False, encoding='utf-8-sig')\n", "else:\n", " print(\"警告: 合并后数据为空,请检查前面的过滤条件\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤10: 数据处理和过滤(第四部分:续约日期处理)\n" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "计算条件前数据量: 20159\n", "条件>0的数据量: 9740\n", "过滤后数据量: 9740\n", "数据量: 9740\n", "年数分布: {3: 7972, 2: 1388, 5: 259, 4: 98, 6: 14, 7: 4, 10: 4, 8: 1}\n" ] } ], "source": [ "data_details = data_NGV_processed.copy()\n", "data_details = data_details.reset_index(drop=True)\n", "\n", "# 判断A列的日期是否大于B列的日期730天\n", "data_details['条件'] = data_details.apply(\n", " lambda row: (\n", " (pd.to_datetime(row['A']) - pd.to_datetime(row['B'])).days\n", " if pd.to_datetime(row['A']) - pd.to_datetime(row['B']) >= pd.Timedelta(days=730)\n", " else 0\n", " ),\n", " axis=1\n", ")\n", "print(f\"计算条件前数据量: {len(data_details)}\")\n", "print(f\"条件>0的数据量: {len(data_details[data_details['条件'] > 0])}\")\n", "data_details = data_details.loc[data_details[\"条件\"] > 0]\n", "print(f\"过滤后数据量: {len(data_details)}\")\n", "data_details.to_csv(f\"{output_dir}/25_续约日期过滤后数据.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 计算年数\n", "def divide_by_365(x):\n", " if isinstance(x, (int, float)):\n", " return int(x / 365)\n", " else:\n", " return x\n", "\n", "data_details['年'] = data_details['条件'].apply(divide_by_365)\n", "data_details = data_details.reset_index(drop=True)\n", "print(f\"数据量: {len(data_details)}\")\n", "print(f\"年数分布: {data_details['年'].value_counts().to_dict()}\")\n", "data_details.to_csv(f\"{output_dir}/26_计算年数后数据.csv\", index=False, encoding='utf-8-sig')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤11: 数据处理和过滤(第五部分:生成历史续约日期)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 从CSV加载前一步的数据(步骤10的输出)\n", "csv_path = f\"{output_dir}/26_计算年数后数据.csv\"\n", "if os.path.exists(csv_path):\n", " print(f\"从CSV文件读取: {csv_path}\")\n", " data_details = pd.read_csv(csv_path, encoding='utf-8-sig')\n", " print(f\"读取到 {len(data_details)} 条记录\")\n", "else:\n", " print(f\"警告: 文件不存在 {csv_path},请先执行步骤10\")\n", " data_details = pd.DataFrame()\n", "\n", "# 创建新的DataFrame用于存储历史续约日期\n", "new_df = pd.DataFrame()\n", "# 使用 from datetime import datetime 避免冲突\n", "from datetime import datetime as dt\n", "\n", "for index, row in data_details.iterrows():\n", " if row[\"renew_date\"] != \"2024-02-29\":\n", " # 修复:确保 '年' 是整数类型,处理可能的NaN或float值\n", " try:\n", " year_value = int(row['年']) if pd.notna(row['年']) else 0\n", " except (ValueError, TypeError):\n", " year_value = 0\n", " \n", " # 只有当年数大于1时才生成历史续约日期\n", " if year_value > 1:\n", " for i_new in range(1, year_value):\n", " row_new = row.copy()\n", " c = row_new[\"renew_date\"]\n", " # 使用 dt.strptime (dt 是 datetime.datetime 的别名)\n", " date_obj = dt.strptime(str(c), \"%Y-%m-%d\")\n", " new_year = date_obj.year + i_new\n", " new_date_obj = date_obj.replace(year=new_year)\n", " new_c = new_date_obj.strftime(\"%Y-%m-%d\")\n", " row_new[\"renew_date\"] = new_c\n", " new_df = pd.concat([new_df, pd.DataFrame([row_new])], ignore_index=True)\n", "\n", "print(f\"生成的历史续约日期数据量: {len(new_df)}\")\n", "if len(new_df) > 0:\n", " new_df.to_csv(f\"{output_dir}/27_生成的历史续约日期数据.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 合并两个DataFrame\n", "merged_df = pd.concat([data_NGV_processed, new_df], axis=0, ignore_index=True)\n", "data_details = merged_df.copy()\n", "print(f\"合并后总数据量: {len(data_details)}\")\n", "data_details.to_csv(f\"{output_dir}/28_合并历史续约日期后数据.csv\", index=False, encoding='utf-8-sig')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤12: 数据处理和过滤(第六部分:最终过滤)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "renew_date不为空的数据量: 38932\n", "过滤前数据量: 38932\n", "创建年份等于续约年份的数据量: 8246\n", "最终过滤后数据量: 30686\n", "\n", "最终数据已保存到: output/debug_revisit_renew/30_最终过滤后数据.csv\n" ] } ], "source": [ "# 从CSV加载前一步的数据(步骤11的输出)\n", "csv_path = f\"{output_dir}/28_合并历史续约日期后数据.csv\"\n", "if os.path.exists(csv_path):\n", " print(f\"从CSV文件读取: {csv_path}\")\n", " data_details = pd.read_csv(csv_path, encoding='utf-8-sig')\n", " print(f\"读取到 {len(data_details)} 条记录\")\n", "else:\n", " print(f\"警告: 文件不存在 {csv_path},请先执行步骤11\")\n", " data_details = pd.DataFrame()\n", "\n", "# 过滤renew_date不为空的数据\n", "data_details_not_null = data_details[data_details['renew_date'].notnull()]\n", "data_details_not_null = data_details_not_null.reset_index(drop=True)\n", "print(f\"renew_date不为空的数据量: {len(data_details_not_null)}\")\n", "data_details_not_null.to_csv(f\"{output_dir}/29_renew_date不为空数据.csv\", index=False, encoding='utf-8-sig')\n", "\n", "data_details = data_details_not_null.copy()\n", "\n", "# 过滤掉创建年份等于续约年份的数据\n", "data_details['saas_create_time'] = data_details['saas_create_time'].str[:4]\n", "data_details['renew_date_new'] = data_details['renew_date'].str[:4]\n", "print(f\"过滤前数据量: {len(data_details)}\")\n", "print(f\"创建年份等于续约年份的数据量: {len(data_details[data_details['saas_create_time'] == data_details['renew_date_new']])}\")\n", "data_details = data_details[data_details['saas_create_time'] != data_details['renew_date_new']]\n", "data_details = data_details.reset_index(drop=True)\n", "print(f\"最终过滤后数据量: {len(data_details)}\")\n", "\n", "data_details.to_csv(f\"{output_dir}/30_最终过滤后数据.csv\", index=False, encoding='utf-8-sig')\n", "print(f\"\\n最终数据已保存到: {output_dir}/30_最终过滤后数据.csv\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤13: 日期计算和循环处理(90/120/180天数据筛选)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "type object 'datetime.datetime' has no attribute 'datetime'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[27]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m 2\u001b[39m date_120 = \u001b[32m113\u001b[39m\n\u001b[32m 3\u001b[39m date_180 = \u001b[32m173\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m start_time = \u001b[43mdatetime\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdatetime\u001b[49m.now()\n\u001b[32m 6\u001b[39m now_time = start_time.replace()\n\u001b[32m 8\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m now_time.strftime(\u001b[33m\"\u001b[39m\u001b[33m%\u001b[39m\u001b[33mY-\u001b[39m\u001b[33m%\u001b[39m\u001b[33mm-\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m\"\u001b[39m) \u001b[38;5;129;01min\u001b[39;00m date_list:\n", "\u001b[31mAttributeError\u001b[39m: type object 'datetime.datetime' has no attribute 'datetime'" ] } ], "source": [ "# 从CSV加载最终过滤后的数据(步骤12的输出)\n", "csv_path = f\"{output_dir}/30_最终过滤后数据.csv\"\n", "if os.path.exists(csv_path):\n", " print(f\"从CSV文件读取: {csv_path}\")\n", " data_details = pd.read_csv(csv_path, encoding='utf-8-sig')\n", " print(f\"读取到 {len(data_details)} 条记录\")\n", "else:\n", " print(f\"警告: 文件不存在 {csv_path},请先执行步骤12\")\n", " data_details = pd.DataFrame()\n", "\n", "# 从CSV加载节假日列表和date_one(步骤3的输出)\n", "date_list_csv = f\"{output_dir}/14_节假日列表.csv\"\n", "if os.path.exists(date_list_csv):\n", " date_list = pd.read_csv(date_list_csv, encoding='utf-8-sig').iloc[:, 0].tolist()\n", "else:\n", " print(f\"警告: 文件不存在 {date_list_csv},请先执行步骤3\")\n", " date_list = []\n", "\n", "date_one_csv = f\"{output_dir}/15_date_one.csv\"\n", "if os.path.exists(date_one_csv):\n", " date_one = pd.read_csv(date_one_csv, encoding='utf-8-sig').iloc[0, 0]\n", "else:\n", " print(f\"警告: 文件不存在 {date_one_csv},请先执行步骤3\")\n", " date_one = 1\n", "\n", "date_90 = 83\n", "date_120 = 113\n", "date_180 = 173\n", "\n", "start_time = datetime.datetime.now()\n", "now_time = start_time.replace()\n", "\n", "if now_time.strftime(\"%Y-%m-%d\") in date_list:\n", " date_one = 0\n", " print(\"开始次数:\", date_one)\n", " print(\"当前日期:\", now_time)\n", "\n", "print(f\"遍历次数:{date_one}\")\n", "\n", "# 存储所有派发数据\n", "all_distribution_data = []\n", "\n", "for i in range(0, date_one):\n", " print(f\"\\n========== 这是第{i}次遍历 ==========\")\n", " now_time = datetime.datetime.now() + datetime.timedelta(days=-(i + 1))\n", " \n", " today = now_time + datetime.timedelta(days=-date_90)\n", " formatted_today_90 = today.strftime(\"%Y-%m-%d\")\n", " today = now_time + datetime.timedelta(days=-date_120)\n", " formatted_today_120 = today.strftime(\"%Y-%m-%d\")\n", " today = now_time + datetime.timedelta(days=-date_180)\n", " formatted_today_180 = today.strftime(\"%Y-%m-%d\")\n", " \n", " print(f\"90天为{formatted_today_90},120天为{formatted_today_120},180天为{formatted_today_180}\")\n", " \n", " # 获取90天数据\n", " data_details_90 = data_details.copy()\n", " data_details_90['条件'] = ((data_details_90['renew_date'] == formatted_today_90) & \n", " (data_details_90['group_grade'] != \"普通客户(VIP)\"))\n", " data_details_90 = data_details_90.loc[data_details_90[\"条件\"]]\n", " print(f\"90天数据量: {len(data_details_90)}\")\n", " \n", " # 获取120天数据\n", " data_details_120 = data_details.copy()\n", " data_details_120['条件'] = ((data_details_120['renew_date'] == formatted_today_120) &\n", " ((data_details_120['saas_edition_fmt'] == '基础版') |\n", " (data_details_120['saas_edition_fmt'] == '入门版')))\n", " data_details_120 = data_details_120.loc[data_details_120[\"条件\"]]\n", " print(f\"120天数据量: {len(data_details_120)}\")\n", " \n", " # 获取180天数据\n", " data_details_180 = data_details.copy()\n", " data_details_180['条件'] = (data_details_180['renew_date'] == formatted_today_180)\n", " data_details_180 = data_details_180.loc[data_details_180[\"条件\"]]\n", " print(f\"180天数据量: {len(data_details_180)}\")\n", " \n", " # 添加跟进阶段和主要目的\n", " data_details_90[\"跟进阶段\"] = \"续约后90天回访\"\n", " data_details_90[\"主要目的\"] = \"关怀使用情况,促进更多功能使用,提升系统使用深度。\"\n", " data_details_120[\"跟进阶段\"] = \"续约后120天回访\"\n", " data_details_120[\"主要目的\"] = \"暂无\"\n", " data_details_180[\"跟进阶段\"] = \"续约后180天回访\"\n", " data_details_180[\"主要目的\"] = \"关怀使用情况,促进增购商机转化,识别潜在风险,及时提报。\"\n", " \n", " # 合并三个DataFrame(去除续约120天回访)\n", " data_result = pd.concat([data_details_90, data_details_180], ignore_index=True)\n", " print(f\"合并后派发数据长度:{len(data_result)}\")\n", " \n", " if len(data_result) > 0:\n", " # 保存每次循环的派发数据\n", " data_result.to_csv(f\"{output_dir}/31_第{i}次遍历_派发数据.csv\", index=False, encoding='utf-8-sig')\n", " all_distribution_data.append(data_result)\n", " else:\n", " print(f\"警告:第{i}次遍历没有派发数据!\")\n", " # 保存空数据的原因分析\n", " analysis = {\n", " '遍历次数': i,\n", " '日期': now_time.strftime('%Y-%m-%d'),\n", " '90天日期': formatted_today_90,\n", " '180天日期': formatted_today_180,\n", " '90天匹配数量': len(data_details_90),\n", " '180天匹配数量': len(data_details_180),\n", " '总数据量': len(data_details),\n", " 'renew_date唯一值数量': data_details['renew_date'].nunique() if len(data_details) > 0 else 0\n", " }\n", " pd.DataFrame([analysis]).to_csv(f\"{output_dir}/32_第{i}次遍历_空数据原因分析.csv\", index=False, encoding='utf-8-sig')\n", "\n", "# 合并所有派发数据\n", "if all_distribution_data:\n", " final_distribution_data = pd.concat(all_distribution_data, ignore_index=True)\n", " print(f\"\\n总派发数据量: {len(final_distribution_data)}\")\n", " final_distribution_data.to_csv(f\"{output_dir}/33_最终派发数据.csv\", index=False, encoding='utf-8-sig')\n", " print(f\"最终派发数据已保存到: {output_dir}/33_最终派发数据.csv\")\n", "else:\n", " print(\"\\n警告:所有遍历都没有派发数据!\")\n", " # 分析为什么没有派发数据\n", " analysis = {\n", " '总数据量': len(data_details),\n", " 'renew_date唯一值': data_details['renew_date'].nunique() if len(data_details) > 0 else 0,\n", " 'renew_date范围': f\"{data_details['renew_date'].min()} 到 {data_details['renew_date'].max()}\" if len(data_details) > 0 else '无数据',\n", " 'date_one': date_one,\n", " '当前日期': datetime.datetime.now().strftime('%Y-%m-%d')\n", " }\n", " pd.DataFrame([analysis]).to_csv(f\"{output_dir}/34_派发数据为空原因分析.csv\", index=False, encoding='utf-8-sig')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 步骤14: 派发数据为空的原因分析\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 分析派发数据为空的原因\n", "print(\"========== 派发数据为空原因分析 ==========\")\n", "\n", "if len(data_details) == 0:\n", " print(\"问题1: 最终过滤后数据为空\")\n", " print(\"请检查步骤12的过滤条件\")\n", "else:\n", " print(f\"最终数据量: {len(data_details)}\")\n", " \n", " # 检查renew_date的分布\n", " print(f\"\\nrenew_date唯一值数量: {data_details['renew_date'].nunique()}\")\n", " print(f\"renew_date范围: {data_details['renew_date'].min()} 到 {data_details['renew_date'].max()}\")\n", " \n", " # 计算目标日期范围\n", " now_time = datetime.datetime.now()\n", " target_date_90 = (now_time - datetime.timedelta(days=83)).strftime(\"%Y-%m-%d\")\n", " target_date_180 = (now_time - datetime.timedelta(days=173)).strftime(\"%Y-%m-%d\")\n", " \n", " print(f\"\\n目标90天日期: {target_date_90}\")\n", " print(f\"目标180天日期: {target_date_180}\")\n", " \n", " # 检查是否有匹配的数据\n", " match_90 = data_details[data_details['renew_date'] == target_date_90]\n", " match_180 = data_details[data_details['renew_date'] == target_date_180]\n", " \n", " print(f\"\\n匹配90天日期的数据量: {len(match_90)}\")\n", " print(f\"匹配180天日期的数据量: {len(match_180)}\")\n", " \n", " if len(match_90) > 0:\n", " # 检查90天数据的group_grade过滤\n", " match_90_filtered = match_90[match_90['group_grade'] != \"普通客户(VIP)\"]\n", " print(f\"90天数据过滤后(排除普通客户): {len(match_90_filtered)}\")\n", " if len(match_90_filtered) == 0:\n", " print(\"问题: 90天数据全部被group_grade过滤掉\")\n", " print(f\"90天数据的group_grade分布: {match_90['group_grade'].value_counts().to_dict()}\")\n", " \n", " # 检查renew_date的日期分布\n", " print(f\"\\nrenew_date日期分布(前20个):\")\n", " print(data_details['renew_date'].value_counts().head(20))\n", " \n", " # 保存分析结果\n", " analysis_result = {\n", " '最终数据量': len(data_details),\n", " 'renew_date唯一值数量': data_details['renew_date'].nunique(),\n", " 'renew_date最小值': data_details['renew_date'].min(),\n", " 'renew_date最大值': data_details['renew_date'].max(),\n", " '目标90天日期': target_date_90,\n", " '目标180天日期': target_date_180,\n", " '匹配90天日期数量': len(match_90),\n", " '匹配180天日期数量': len(match_180),\n", " 'date_one': date_one,\n", " '当前日期': datetime.datetime.now().strftime('%Y-%m-%d')\n", " }\n", " pd.DataFrame([analysis_result]).to_csv(f\"{output_dir}/35_派发数据为空原因分析.csv\", index=False, encoding='utf-8-sig')\n", " \n", " # 保存renew_date的详细分布\n", " renew_date_dist = data_details['renew_date'].value_counts().reset_index()\n", " renew_date_dist.columns = ['renew_date', 'count']\n", " renew_date_dist.to_csv(f\"{output_dir}/36_renew_date分布.csv\", index=False, encoding='utf-8-sig')\n" ] } ], "metadata": { "kernelspec": { "display_name": "SaaS", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.11" } }, "nbformat": 4, "nbformat_minor": 2 }