From b799d383df627e371d5d50533f0f70ac7f539056 Mon Sep 17 00:00:00 2001 From: z66 <1415243231@qq.com> Date: Wed, 15 Oct 2025 15:19:36 +0800 Subject: [PATCH] =?UTF-8?q?ngv=E6=B7=BB=E5=8A=A0=E4=BF=9D=E5=AD=98?= =?UTF-8?q?=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api.py | 8 +- back_ground_module/update_NGV_data.py | 9 +- config.py | 2 +- test/logs/task.log | 7 + test/ngv更新.ipynb | 895 +++++++++++--------------- 5 files changed, 400 insertions(+), 521 deletions(-) diff --git a/api.py b/api.py index 816b960..19985ce 100644 --- a/api.py +++ b/api.py @@ -90,7 +90,6 @@ class API: all_data_batches = [] # 用于存储每次请求返回的数据批次 last_data_id = None exit_flag = False - count = 0 while True: payload = json.dumps({ "app_id": data['api_key'], # 应用ID @@ -217,12 +216,6 @@ class API: 'Authorization': Config.JIANDAOYUN_API_TOKEN, # 曹伟应用api测试 app_key 'Content-Type': 'application/json' } - - """ - data 样式 # 后续优化发送数据样式 目前输入字段,后续优化输入表单名称 - jiandaoyun_data['data'] = {"_widget_1731650067055":{"value":f'{username}{password}'}, - "_widget_1731650067056":{"value": f"{group}"}} - """ # noinspection DuplicatedCode payload = json.dumps({ "app_id": data['api_key'], # 应用ID @@ -376,6 +369,7 @@ class API: def entry_data_banch_update(data: dict, max_retries: int = 20, chunk_size: int = 90) -> list[dict]: # 修改数据 """ 批量修改数据 + :param chunk_size: 批量修改块大小 :param max_retries: 最大重试次数,此处设置100次 :param data: 简道云插件发送过来的data,包含应用id、表单id、数据id等信息 :return: 修改数据后简道云返回的结果 diff --git a/back_ground_module/update_NGV_data.py b/back_ground_module/update_NGV_data.py index c441727..abe51c9 100644 --- a/back_ground_module/update_NGV_data.py +++ b/back_ground_module/update_NGV_data.py @@ -13,7 +13,10 @@ error_task_logger = configure_error_task_logger() start_time = datetime.datetime.now() api_instance = API() common_module = CommonModule() - +output_dir = "output" # 设置输出目录 +# 创建输出目录(如果不存在) +import os +os.makedirs(output_dir, exist_ok=True) class UpdateNGVData: """NGV数据每日新增""" @@ -103,6 +106,10 @@ class UpdateNGVData: # all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j1.iterrows()] # 前两天的全部数据 # all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j.iterrows()] # 前一天的全部数据 all_data = [self.row_to_dict(row, self.field_mapping) for index, row in filtered_df.iterrows()] # 增量数据 + try: + filtered_df.to_csv(output_dir + "\\" + f"{task_start_time}NGV.csv") + except Exception as e: + error_task_logger.error(f"NGV过滤后数据保存异常: {e}") # data = {'api_key': Config.SaaS_Tasks_APP_ID, 'entry_id': Config.NGV_TASKS_ENTRY_ID, "data_list": all_data} diff --git a/config.py b/config.py index 991dff2..f3fddc9 100644 --- a/config.py +++ b/config.py @@ -9,7 +9,7 @@ class Config: "database": "f6_bi", "user": "LTAI5tMJsijFA9BS1R6uBpUT", "password": "PajEQMIRWNRcipd8mYvlud2KHWJr6N", - "host": "hgprecn-cn-to34by1d0001-cn-shanghai.hologres.aliyuncs.com", + "host": "hgpostcn-cn-m1e4gikbu00l-cn-shanghai.hologres.aliyuncs.com", "port": "80" } # SaaS-NGV 数据库链接配置-postgresql diff --git a/test/logs/task.log b/test/logs/task.log index b8d5c59..897ea16 100644 --- a/test/logs/task.log +++ b/test/logs/task.log @@ -1048,3 +1048,10 @@ 2025-09-26 17:54:18,517 - api.py - task_logger - INFO - 获取了145条数据 2025-09-26 17:54:18,517 - ngv更新.py - task_logger - INFO - 已获取数据 2025-09-26 17:54:45,037 - common_module.py - task_logger - INFO - 任务错误发生成功: {'data': {'creator': {'name': 'F6汽车科技', 'username': '#admin', 'status': 1, 'type': 0}, 'updater': {'name': 'F6汽车科技', 'username': '#admin', 'status': 1, 'type': 0}, 'deleter': None, 'createTime': '2025-09-26T09:54:42.260Z', 'updateTime': '2025-09-26T09:54:42.260Z', 'deleteTime': None, '_widget_1744873387500': '2025-09-26T00:00:00.000Z', '_widget_1743644977694': 'NGV更新数据', '_widget_1744873387501': '2025-09-26T09:50:53.000Z', '_widget_1744873387502': '2025-09-26T09:54:44.000Z', '_widget_1744873387504': '231', '_widget_1754981992215': "'org_code'", '_id': '68d662e29d29ff42168403fc', 'appId': '6694d3c4fcb69ca9a111a6c4', 'entryId': '689ae65da00c17578e27cd74'}} +2025-10-15 15:02:50,242 - api.py - task_logger - INFO - 获取了145条数据 +2025-10-15 15:08:43,148 - api.py - task_logger - INFO - 获取了145条数据 +2025-10-15 15:08:43,149 - 4214907192.py - task_logger - INFO - 数据加载完成 +2025-10-15 15:09:13,069 - api.py - task_logger - INFO - 获取了145条数据 +2025-10-15 15:09:13,069 - 477214183.py - task_logger - INFO - 数据加载完成 +2025-10-15 15:09:39,129 - 477214183.py - task_logger - INFO - 时间转换完成 +2025-10-15 15:09:39,140 - 477214183.py - task_logger - INFO - 人员转换完成 diff --git a/test/ngv更新.ipynb b/test/ngv更新.ipynb index 92de36e..60c6b3a 100644 --- a/test/ngv更新.ipynb +++ b/test/ngv更新.ipynb @@ -425,575 +425,446 @@ { "metadata": { "ExecuteTime": { - "end_time": "2025-08-04T08:30:00.126830Z", - "start_time": "2025-08-04T08:27:19.632414Z" + "end_time": "2025-10-15T07:03:16.566548Z", + "start_time": "2025-10-15T07:02:49.835394Z" } }, "cell_type": "code", "source": [ "# -*- coding: utf-8 -*-\n", - "import os\n", - "import datetime\n", - "import concurrent.futures\n", "import pandas as pd\n", + "import datetime\n", + "from config import Config\n", + "from api import API\n", + "from back_ground_module import CommonModule\n", + "from log_config import configure_task_logger, configure_error_task_logger\n", + "import concurrent.futures\n", + "\n", + "# 获取已经配置好的常规日志记录器\n", + "logger = configure_task_logger()\n", + "\n", + "# 获取已经配置好的错误任务日志记录器\n", + "error_task_logger = configure_error_task_logger()\n", + "\n", + "start_time = datetime.datetime.now()\n", + "api_instance = API()\n", + "common_module = CommonModule()\n", + "\n", + "\n", + "class UpdateAllNGVDataDaily:\n", + " \"\"\"NGV数据每日更新\"\"\"\n", + "\n", + " def __init__(self):\n", + " self.field_mapping = {}\n", + " # self.fields()\n", + "\n", + " def main(self):\n", + " # 保存为CSV文件\n", + " output_dir = \"output\" # 设置输出目录\n", + "\n", + " # 创建输出目录(如果不存在)\n", + " import os\n", + " os.makedirs(output_dir, exist_ok=True)\n", + "\n", + " task_start_time = datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n", + " # 获取NGV数据\n", + " payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"675bb02bd2d53c2034c665e4\"}\n", + " # NGV_data_list = api_instance.entry_data_list(payload).get(\"data\", [])\n", + " # jdy_NGV_data = pd.DataFrame(NGV_data_list)\n", + "\n", + " payload = {\"api_key\": \"6694d3c4fcb69ca9a111a6c4\",\n", + " \"entry_id\": \"6769204a1902c9341340a1bc\",\n", + " }\n", + " staff_id = api_instance.entry_data_list(payload)\n", + " staff_id_list = staff_id.get(\"data\") # api请求格式,将数据封装在data字典里\n", + "\n", + " # for i in range(1,2):\n", + " data_NGV_j = common_module.get_ngv_details(days_back=1)\n", + " data_NGV_j.to_csv(os.path.join(output_dir, f\"data_NGV_j.csv\"), index=False)\n", + " data_NGV_j1 = common_module.get_ngv_details(days_back=2)\n", + "\n", + " # 对 data_NGV 进行进一步的过滤,只保留 org_type 为 \"一般\" 的记录\n", + " data_NGV_j = data_NGV_j[data_NGV_j['org_type'] == '一般']\n", + " data_NGV_j1 = data_NGV_j1[data_NGV_j1['org_type'] == '一般']\n", + "\n", + " # 去除不需要的列\n", + " columns_to_remove = {'date_id', 'date_fmt', 'pt', 'etl_time'}\n", + "\n", + " # 获取所有列名并计算要保留的列\n", + " columns_to_keep_df1 = list(set(data_NGV_j.columns) - columns_to_remove)\n", + " columns_to_keep_df2 = list(set(data_NGV_j1.columns) - columns_to_remove)\n", + "\n", + " # 过滤DataFrame以去除指定列\n", + " df1_filtered = data_NGV_j[columns_to_keep_df1]\n", + " df2_filtered = data_NGV_j1[columns_to_keep_df2]\n", + "\n", + "\n", + "if __name__ == '__main__':\n", + " start = UpdateAllNGVDataDaily()\n", + " start.main()\n" + ], + "id": "82d58cced4a6e02", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "已获取 100 条数据\n", + "已获取 145 条数据\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001B[92m2025-10-15 15:02:50,242 - api.py - task_logger - INFO - 获取了145条数据\u001B[0m\n" + ] + } + ], + "execution_count": 4 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-10-15T07:14:36.277142Z", + "start_time": "2025-10-15T07:14:11.975620Z" + } + }, + "cell_type": "code", + "source": [ + "# -*- coding: utf-8 -*-\n", + "import pandas as pd\n", + "import datetime\n", "from config import Config\n", "from api import API\n", "from back_ground_module import CommonModule\n", "from log_config import configure_task_logger, configure_error_task_logger\n", "\n", - "# 获取日志记录器\n", "logger = configure_task_logger()\n", - "error_logger = configure_error_task_logger()\n", "\n", - "class NGVDataUpdater:\n", - " \"\"\"NGV数据每日更新处理器\"\"\"\n", + "# 获取已经配置好的错误任务日志记录器\n", + "error_task_logger = configure_error_task_logger()\n", + "start_time = datetime.datetime.now()\n", + "api_instance = API()\n", + "common_module = CommonModule()\n", + " # 保存为CSV文件\n", + "output_dir = \"output\" # 设置输出目录\n", + "\n", + "# 创建输出目录(如果不存在)\n", + "import os\n", + "os.makedirs(output_dir, exist_ok=True)\n", + "\n", + "class UpdateNGVData:\n", + " \"\"\"NGV数据每日新增\"\"\"\n", "\n", " def __init__(self):\n", - " self.api = API()\n", - " self.common = CommonModule()\n", - " self.output_dir = \"output\"\n", - " self.start_time = datetime.datetime.now()\n", - " self.field_mapping = self._initialize_field_mapping()\n", + " self.staff_id_list = None\n", + " self.field_mapping = {}\n", + " self.fields()\n", "\n", - " # 创建输出目录\n", - " os.makedirs(self.output_dir, exist_ok=True)\n", + " def load_all_data(self):\n", + " # 获取简道云员工id\n", + " payload = {\"api_key\": \"6694d3c4fcb69ca9a111a6c4\",\n", + " \"entry_id\": \"6769204a1902c9341340a1bc\",\n", + " }\n", + " staff_id = api_instance.entry_data_list(payload)\n", + " self.staff_id_list = staff_id.get(\"data\") # api请求格式,将数据封装在data字典里\n", "\n", - " def _initialize_field_mapping(self):\n", - " \"\"\"初始化字段映射关系\"\"\"\n", - " return dict(date_id='_widget_1734062123065', date_fmt='_widget_1734062123066',\n", - " id_own_group='_widget_1734062123067', group_name='_widget_1734062123068',\n", - " id_own_org='_widget_1734062123069', org_name='_widget_1734062123070',\n", - " org_code='_widget_1734062123071', group_grade='_widget_1734062123072',\n", - " org_type='_widget_1734062123073', org_status='_widget_1734062123074',\n", - " saas_version='_widget_1734062123075', is_wechat='_widget_1734062123076',\n", - " is_mini_app='_widget_1734062123077', is_wx_shop='_widget_1734062123078',\n", - " is_camera_service='_widget_1734062123079',\n", - " is_maintenance_service='_widget_1734062123080',\n", - " saas_create_time='_widget_1734062123081', expiry_time='_widget_1734062123082',\n", - " saas_use_days='_widget_1734062123083', saas_use_year='_widget_1734062123084',\n", - " is_main_org='_widget_1734062123085', license_code='_widget_1734062123086',\n", - " license_name='_widget_1734062123087', org_crm_id='_widget_1734062123088',\n", - " province_id='_widget_1734062123089', province_name='_widget_1734062123090',\n", - " city_id='_widget_1734062123091', city_name='_widget_1734062123092',\n", - " area_id='_widget_1734062123093', area_name='_widget_1734062123094',\n", - " region_name='_widget_1734062123095', region_short_name='_widget_1734062123096',\n", - " branch_name='_widget_1734062123097', carzone_store_id='_widget_1734062123098',\n", - " carzone_store_name='_widget_1734062123099',\n", - " customer_carzone_id='_widget_1734062123100', salesmen='_widget_1734062123101',\n", - " area_manager='_widget_1734062123102', service_salesmen='_widget_1734062123103',\n", - " impl_principal='_widget_1734062123104',\n", - " service_impl_principal='_widget_1734062123105',\n", - " active_user_count='_widget_1734062123106', active_user_type='_widget_1734062123107',\n", - " limit_user_count='_widget_1734062123108', limit_user_type='_widget_1734062123109',\n", - " is_n='_widget_1734062123110', is_g='_widget_1734062123111',\n", - " is_v='_widget_1734062123112', is_visited='_widget_1734062123113',\n", - " is_active='_widget_1734062123114', active_status_fmt='_widget_1734062123115',\n", - " bill_count_last_30_day='_widget_1734062123116',\n", - " bill_day_count_last_30_day='_widget_1734062123117',\n", - " bill_day_count_this_month='_widget_1734062123118',\n", - " bill_count_last_7_day='_widget_1734062123119',\n", - " bill_day_count_last_7_day='_widget_1734062123120', pv_count='_widget_1734062123121',\n", - " uv_count='_widget_1734062123122', bill_count_1d='_widget_1734062123123',\n", - " bill_count_2d='_widget_1734062123124', bill_count_3d='_widget_1734062123125',\n", - " bill_count_4d='_widget_1734062123126', bill_count_5d='_widget_1734062123127',\n", - " bill_count_6d='_widget_1734062123128', bill_count_7d='_widget_1734062123129',\n", - " bill_count_8d='_widget_1734062123130', bill_count_9d='_widget_1734062123131',\n", - " bill_count_10d='_widget_1734062123132', bill_count_11d='_widget_1734062123133',\n", - " bill_count_12d='_widget_1734062123134', bill_count_13d='_widget_1734062123135',\n", - " bill_count_14d='_widget_1734062123136', bill_count_15d='_widget_1734062123137',\n", - " bill_count_16d='_widget_1734062123138', bill_count_17d='_widget_1734062123139',\n", - " bill_count_18d='_widget_1734062123140', bill_count_19d='_widget_1734062123141',\n", - " bill_count_20d='_widget_1734062123142', bill_count_21d='_widget_1734062123143',\n", - " bill_count_22d='_widget_1734062123144', bill_count_23d='_widget_1734062123145',\n", - " bill_count_24d='_widget_1734062123146', bill_count_25d='_widget_1734062123147',\n", - " bill_count_26d='_widget_1734062123148', bill_count_27d='_widget_1734062123149',\n", - " bill_count_28d='_widget_1734062123150', bill_count_29d='_widget_1734062123151',\n", - " bill_count_30d='_widget_1734062123152', bill_count_31d='_widget_1734062123153',\n", - " etl_time='_widget_1734062123154',\n", - " maintain_bill_count_last_30_day='_widget_1734062123155',\n", - " washing_bill_count_last_30_day='_widget_1734062123156',\n", - " maintain_bill_day_count_last_30_day='_widget_1734062123157',\n", - " washing_bill_day_count_last_30_day='_widget_1734062123158',\n", - " retail_bill_count_last_30_day='_widget_1734062123159',\n", - " retail_bill_day_count_last_30_day='_widget_1734062123160',\n", - " purchase_bill_count_last_30_day='_widget_1734062123161',\n", - " purchase_bill_day_count_last_30_day='_widget_1734062123162',\n", - " card_bill_count_last_30_day='_widget_1734062123163',\n", - " card_bill_day_count_last_30_day='_widget_1734062123164',\n", - " gd_sales_bill_count_last_30_day='_widget_1734062123165',\n", - " gd_sales_bill_day_count_last_30_day='_widget_1734062123166',\n", - " g_change_flag='_widget_1734062123167', saas_package='_widget_1734062123168',\n", - " manage_model='_widget_1734062123169', contacts='_widget_1734062123170',\n", - " contact_number='_widget_1734062123171', contact_mobile='_widget_1734062123172',\n", - " g_month_count='_widget_1734062123173', g_month_percentage='_widget_1734062123174',\n", - " is_install_service='_widget_1734062123175',\n", - " install_create_time='_widget_1734062123176', last_end_date='_widget_1734062123177',\n", - " renew_date='_widget_1734062123178', is_chain_owner='_widget_1734062123179',\n", - " group_org_count='_widget_1734062123180',\n", - " recent_bill_warning_days='_widget_1734062123181',\n", - " g_change_flag_d='_widget_1734062123182', g_lost_warning_days='_widget_1734062123183',\n", - " saas_edition_fmt='_widget_1734062123184', g_flag_1m='_widget_1734062123185',\n", - " g_flag_2m='_widget_1734062123186', g_flag_3m='_widget_1734062123187',\n", - " g_flag_4m='_widget_1734062123188', g_flag_5m='_widget_1734062123189',\n", - " g_flag_6m='_widget_1734062123190', g_flag_day_count='_widget_1734062123191',\n", - " add_org_flag='_widget_1734062123192', pt='_widget_1734062123193',\n", - " org_size='_widget_1734062123194', qualification_type_fmt='_widget_1734062123195',\n", - " business_scope_fmt='_widget_1734062123196', store_type_fmt='_widget_1734062123197',\n", - " area='_widget_1734062123198', station_number='_widget_1734062123199',\n", - " header_type_fmt='_widget_1734062123200', org_stage='_widget_1734062123201',\n", - " g_count_this_month='_widget_1734062123202',\n", - " saas_customer_type='_widget_1734062123203', technician='_widget_1734062123204',\n", - " tmall_maintain_service_status_desc='_widget_1734062123205',\n", - " date_fmt_date='_widget_1749000071375',\n", - " area_manager_staff_id='_widget_1748496855779',\n", - " service_impl_principal_staff_id=\"_widget_1748496855780\",\n", - " service_salesmen_staff_id=\"_widget_1748496855778\",\n", - " technician_staff_id=\"_widget_1751877712235\",\n", - " saas_create_time_date=\"_widget_1749000071377\",\n", - " expiry_time_date=\"_widget_1749000071382\",\n", - " install_create_time_date=\"_widget_1749000071384\",\n", - " last_end_date_date=\"_widget_1749000071389\", renew_date_date=\"_widget_1749000071391\")\n", - "\n", - " def _get_ngv_data(self, days_back):\n", - " \"\"\"获取NGV数据\"\"\"\n", - " try:\n", - " data = self.common.get_ngv_details(days_back=days_back)\n", - " return data[data['org_type'] == '一般']\n", - " except Exception as e:\n", - " error_logger.error(f\"获取NGV数据失败: {str(e)}\")\n", - " raise\n", - "\n", - " def _get_jdy_data(self):\n", - " \"\"\"获取简道云数据\"\"\"\n", - " try:\n", - " payload = {\n", - " \"api_key\": \"675b900991ad2491c69389ca\",\n", - " \"entry_id\": \"675bb02bd2d53c2034c665e4\"\n", - " }\n", - " response = self.api.entry_data_list(payload)\n", - " return pd.DataFrame(response.get(\"data\", []))\n", - " except Exception as e:\n", - " error_logger.error(f\"获取简道云数据失败: {str(e)}\")\n", - " raise\n", - "\n", - " def _get_staff_data(self):\n", - " \"\"\"获取员工数据\"\"\"\n", - " try:\n", - " payload = {\n", - " \"api_key\": \"6694d3c4fcb69ca9a111a6c4\",\n", - " \"entry_id\": \"6769204a1902c9341340a1bc\",\n", - " }\n", - " response = self.api.entry_data_list(payload)\n", - " return response.get(\"data\", [])\n", - " except Exception as e:\n", - " error_logger.error(f\"获取员工数据失败: {str(e)}\")\n", - " raise\n", - "\n", - " def _prepare_dataframes(self, df1, df2):\n", - " \"\"\"准备数据框进行比较\"\"\"\n", - " # 去除不需要的列\n", - " columns_to_remove = {'date_id', 'date_fmt', 'pt', 'etl_time'}\n", - " df1 = df1.drop(columns=columns_to_remove.intersection(df1.columns))\n", - " df2 = df2.drop(columns=columns_to_remove.intersection(df2.columns))\n", - "\n", - " # 设置索引并处理空值\n", - " df1 = df1.set_index('id_own_org').astype(str).replace(['nan', 'None'], '').fillna(\"\")\n", - " df2 = df2.set_index('id_own_org').astype(str).replace(['nan', 'None'], '').fillna(\"\")\n", - "\n", - " return df1, df2\n", - "\n", - " def _compare_dataframes(self, df1, df2):\n", - " \"\"\"比较两个数据框\"\"\"\n", - " # 找到共有的索引\n", - " common_index = df1.index.intersection(df2.index)\n", - "\n", - " # 重新索引并确保列顺序一致\n", - " common_columns = df1.columns.intersection(df2.columns)\n", - " df1_common = df1.loc[common_index, common_columns]\n", - " df2_common = df2.loc[common_index, common_columns]\n", - "\n", - " # 比较内容\n", - " matches = (df1_common == df2_common).all(axis=1)\n", - " df1_common['match_status'] = matches.map({True: '一致', False: '不一致'})\n", - "\n", - " # 获取仅在某一数据框中的行\n", - " df1_only = df1.loc[df1.index.difference(df2.index)]\n", - " df2_only = df2.loc[df2.index.difference(df1.index)]\n", - "\n", - " return df1_common, df1_only, df2_only\n", - "\n", - " def _process_jdy_data(self, jdy_data):\n", - " \"\"\"处理简道云数据\"\"\"\n", - " jdy_data = jdy_data.copy()\n", - " if '_widget_1734062123069' not in jdy_data.columns:\n", - " logger.warning(\"列 '门店id' 不存在\")\n", - " jdy_data = jdy_data.rename(columns={'_widget_1734062123069': 'id_own_org'})\n", - " return jdy_data.set_index('id_own_org')\n", - "\n", - " def _mark_deleted_stores(self, jdy_data, ngv_data):\n", - " \"\"\"标记已删除的门店\"\"\"\n", - " ids_in_jdy_not_in_ngv = jdy_data.index[~jdy_data.index.isin(ngv_data.index)]\n", - " only_in_jdy = jdy_data.loc[ids_in_jdy_not_in_ngv]\n", - "\n", - " for _, row in only_in_jdy.iterrows():\n", - " if '_id' in row and not pd.isna(row['_id']):\n", - " data = {\n", - " 'api_key': Config.SaaS_Tasks_APP_ID,\n", - " 'entry_id': Config.NGV_TASKS_ENTRY_ID,\n", - " \"data_id\": str(row['_id']),\n", - " \"data\": {\"_widget_1754285499851\": {\"value\": \"已删除\"}}\n", - " }\n", - " self.api.entry_data_update(data=data, max_retries=20)\n", - "\n", - " def _process_datetime_fields(self, df):\n", - " \"\"\"处理日期时间字段\"\"\"\n", - " time_columns = ['saas_create_time', 'expiry_time', 'install_create_time', \"last_end_date\", \"renew_date\"]\n", - " df = df.copy()\n", - "\n", - " for col in time_columns:\n", - " if col in df.columns:\n", - " # 转换为datetime类型\n", - " df[col] = pd.to_datetime(df[col], errors='coerce', utc=False)\n", - " # 本地化为北京时间并转换为UTC\n", - " df[col + '_date'] = (\n", - " df[col]\n", - " .dt.tz_localize('Asia/Shanghai', ambiguous='infer', nonexistent='NaT')\n", - " .dt.tz_convert('UTC')\n", - " .dt.strftime('%Y-%m-%dT%H:%M:%SZ')\n", - " )\n", - " return df\n", - "\n", - " def _process_staff_fields(self, df, staff_data):\n", - " \"\"\"处理员工字段\"\"\"\n", - " staff_columns = ['area_manager', 'service_impl_principal', \"service_salesmen\", \"technician\"]\n", - "\n", - " for col in staff_columns:\n", - " if col in df.columns:\n", - " # 创建员工ID映射\n", - " staff_map = {\n", - " str(staff['_widget_1734942794144']): staff['_widget_1734942794145']\n", - " for staff in staff_data\n", - " }\n", - " # 映射员工ID\n", - " df[col + \"_staff_id\"] = df[col].map(staff_map)\n", - "\n", - " return df\n", - "\n", - " def _update_ngv_data(self, df):\n", - " \"\"\"更新NGV数据\"\"\"\n", - " futures = []\n", - "\n", - " for _, row in df.iterrows():\n", - " data_dict = {}\n", - "\n", - " # 构建数据字典\n", - " for col_name, widget_id in self.field_mapping.items():\n", - " if col_name in df.columns:\n", - " value = row[col_name]\n", - " clean_value = None if pd.isna(value) else value\n", - " data_dict[widget_id] = {\"value\": clean_value}\n", - "\n", - " # 根据是否有_id决定是更新还是创建\n", - " if '_id' in row and not pd.isna(row['_id']):\n", - " data = {\n", - " 'api_key': Config.SaaS_Tasks_APP_ID,\n", - " 'entry_id': Config.NGV_TASKS_ENTRY_ID,\n", - " \"data_id\": str(row['_id']),\n", - " \"data\": data_dict\n", - " }\n", - " futures.append(self.api.entry_data_update(data=data, max_retries=20))\n", - " else:\n", - " data = {\n", - " 'api_key': Config.SaaS_Tasks_APP_ID,\n", - " 'entry_id': Config.NGV_TASKS_ENTRY_ID,\n", - " \"data\": data_dict\n", - " }\n", - " futures.append(self.api.data_batch_create(data=data, max_retries=20))\n", - "\n", - " # 等待所有请求完成\n", - " for future in concurrent.futures.as_completed(futures):\n", - " try:\n", - " future.result()\n", - " except Exception as exc:\n", - " error_logger.error(f\"请求发生异常: {exc}\")\n", + " @staticmethod\n", + " def get_staff_id(row_item, name):\n", + " \"\"\"辅助函数,用于获取员工ID\"\"\"\n", + " if str(row_item[\"_widget_1734942794144\"]) == str(name): # 检查姓名是否匹配\n", + " return row_item[\"_widget_1734942794145\"] # 返回员工ID\n", + " return None\n", "\n", " def main(self):\n", - " \"\"\"执行数据更新流程\"\"\"\n", " task_start_time = datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n", - "\n", " try:\n", - " logger.info(\"开始NGV数据更新流程\")\n", + " self.load_all_data()\n", + " logger.info(f\"数据加载完成\")\n", "\n", - " # 获取数据\n", - " data_ngv_today = self._get_ngv_data(days_back=1)\n", - " data_ngv_yesterday = self._get_ngv_data(days_back=2)\n", - " jdy_data = self._get_jdy_data()\n", - " staff_data = self._get_staff_data()\n", + " data_NGV_j = common_module.get_ngv_details(days_back=2)\n", + " data_NGV_j1 = common_module.get_ngv_details(days_back=3)\n", "\n", - " # 准备和比较数据\n", - " df1, df2 = self._prepare_dataframes(data_ngv_today, data_ngv_yesterday)\n", - " df_common, _, _ = self._compare_dataframes(df1, df2)\n", + " # 找出在 data_NGV_j 中存在但在 data_NGV_j1 中不存在的 data_id\n", + " unique_data_ids = data_NGV_j[~data_NGV_j['org_code'].isin(data_NGV_j1['org_code'])]\n", "\n", - " # 处理简道云数据\n", - " jdy_data_processed = self._process_jdy_data(jdy_data)\n", + " # 创建一个新的 DataFrame 保存这些唯一的 data_id 及其对应的数据\n", + " new_df = data_NGV_j[data_NGV_j['org_code'].isin(unique_data_ids['org_code'])]\n", "\n", - " # 标记已删除的门店\n", - " self._mark_deleted_stores(jdy_data_processed, df_common)\n", + " # 对 new_df 进行进一步的过滤,只保留 org_type 为 \"一般\" 的记录\n", + " data_NGV_j = data_NGV_j[data_NGV_j['org_type'] == '一般']\n", + " data_NGV_j1 = data_NGV_j1[data_NGV_j1['org_type'] == '一般']\n", + " filtered_df = new_df[new_df['org_type'] == '一般']\n", "\n", - " # 合并简道云ID\n", - " df_common = df_common.join(jdy_data_processed[\"_id\"], how='left')\n", - " df_common = df_common[df_common['match_status'] == '不一致']\n", + " # 日期字段转换为日期格式\n", + " time_columns = ['date_fmt', 'saas_create_time', 'expiry_time', 'install_create_time', \"last_end_date\",\n", + " \"renew_date\"]\n", + " new_filtered_df = filtered_df.copy() # 复制df,以调整时间\n", + " for col in time_columns:\n", + " # 1. 转换为datetime类型(带错误处理)\n", + " # 使用.loc安全赋值\n", + " new_filtered_df[col] = pd.to_datetime(filtered_df[col], errors='coerce', utc=False)\n", "\n", - " # 处理特殊字段\n", - " df_common = self._process_datetime_fields(df_common)\n", - " df_common = self._process_staff_fields(df_common, staff_data)\n", + " # 2. 优化后的时区转换(高效向量化操作)\n", + " filtered_df[col + '_date'] = (\n", + " new_filtered_df[col]\n", + " # 本地化为北京时间(东八区)\n", + " .dt.tz_localize('Asia/Shanghai', ambiguous='infer', nonexistent='NaT')\n", + " # 转换为UTC时区\n", + " .dt.tz_convert('UTC')\n", + " # 格式化为ISO8601字符串\n", + " .dt.strftime('%Y-%m-%dT%H:%M:%SZ')\n", + " )\n", + " logger.info(f\"时间转换完成\")\n", "\n", - " # 更新数据\n", - " self._update_ngv_data(df_common)\n", + " # 人员字段转换为人员字段\n", + " staff_columns = ['area_manager', 'service_impl_principal', \"service_salesmen\", \"technician\"]\n", + " # 将员工列表转为DataFrame\n", + " # 三重循环临时方案(确保可写入)\n", + " for col in staff_columns:\n", + " staff_ids = []\n", + " for _, row in filtered_df.iterrows():\n", + " matched = False\n", + " for staff in self.staff_id_list:\n", + " if str(staff['_widget_1734942794144']) == str(row[col]):\n", + " staff_ids.append(staff['_widget_1734942794145'])\n", + " matched = True\n", + " break\n", + " if not matched:\n", + " staff_ids.append(None)\n", + " filtered_df[col + \"_staff_id\"] = staff_ids\n", + " logger.info(f\"人员转换完成\")\n", "\n", - " # 记录执行时间\n", - " end_time = datetime.datetime.now()\n", - " time_diff = end_time - self.start_time\n", - " logger.info(f\"执行时间: {time_diff.days} 天, {time_diff.seconds} 秒, {time_diff.microseconds} 微秒\")\n", + " # filtered_df.to_csv(r\"D:\\Idea Project\\SaaS_V1.3\\back_ground_module\\output\\NGV.csv\")\n", "\n", - " # 发送任务状态\n", - " self.common.send_task_status(task_start_time, \"NGV更新数据\")\n", + " # 生成包含所有行转换后的字典列表\n", + " # all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j1.iterrows()] # 前两天的全部数据\n", + " # all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j.iterrows()] # 前一天的全部数据\n", + " all_data = [self.row_to_dict(row, self.field_mapping) for index, row in filtered_df.iterrows()] # 增量数据\n", + " filtered_df.to_csv(output_dir + \"\\\\filtered_df.csv\", index=False)\n", "\n", + "\n", + " #\n", + " # data = {'api_key': Config.SaaS_Tasks_APP_ID, 'entry_id': Config.NGV_TASKS_ENTRY_ID, \"data_list\": all_data}\n", + " #\n", + " # result = api_instance.entry_data_batch_create(data)\n", + " # logger.info(f\"数据已推送:{result}\")\n", + " # result_str = str(result)\n", + " # print(result_str[:500])\n", + "\n", + " # 保存到Excel文件\n", + " # output_path = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\ngv明细1.xlsx'\n", + " # filtered_df.to_excel(output_path, index=False)\n", + " # data_NGV_j1.to_excel( r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\ngv明细j1.xlsx', index=False)\n", + " # data_NGV_j.to_excel( r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\ngv明细j.xlsx', index=False)\n", + " # new_df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\ngv明细ndf.xlsx', index=False)\n", + "\n", + " # common_module.send_task_status(task_start_time, \"NGV新增数据\")\n", + " # logger.info(f\"任务完成。\")\n", " except Exception as e:\n", - " error_logger.error(f\"NGV数据更新流程失败: {str(e)}\")\n", - " raise\n", + " error_task_logger.error(f\"任务执行时发生异常: {e}\")\n", + " common_module.send_task_error(task_start_time, \"NGV新增数据\", str(e))\n", + "\n", + " @staticmethod\n", + " def row_to_dict(row, field_mapping):\n", + " \"\"\"将一行数据转换为指定格式的字典\"\"\"\n", + " result = {}\n", + " for col_name, widget_id in field_mapping.items():\n", + " if col_name in row:\n", + " value = row[col_name]\n", + " clean_value = None if pd.isna(value) else value\n", + " result[widget_id] = {\"value\": clean_value}\n", + " return result\n", + "\n", + " def fields(self):\n", + " self.field_mapping = dict(date_id='_widget_1734062123065', date_fmt='_widget_1734062123066',\n", + " id_own_group='_widget_1734062123067', group_name='_widget_1734062123068',\n", + " id_own_org='_widget_1734062123069', org_name='_widget_1734062123070',\n", + " org_code='_widget_1734062123071', group_grade='_widget_1734062123072',\n", + " org_type='_widget_1734062123073', org_status='_widget_1734062123074',\n", + " saas_version='_widget_1734062123075', is_wechat='_widget_1734062123076',\n", + " is_mini_app='_widget_1734062123077', is_wx_shop='_widget_1734062123078',\n", + " is_camera_service='_widget_1734062123079',\n", + " is_maintenance_service='_widget_1734062123080',\n", + " saas_create_time='_widget_1734062123081', expiry_time='_widget_1734062123082',\n", + " saas_use_days='_widget_1734062123083', saas_use_year='_widget_1734062123084',\n", + " is_main_org='_widget_1734062123085', license_code='_widget_1734062123086',\n", + " license_name='_widget_1734062123087', org_crm_id='_widget_1734062123088',\n", + " province_id='_widget_1734062123089', province_name='_widget_1734062123090',\n", + " city_id='_widget_1734062123091', city_name='_widget_1734062123092',\n", + " area_id='_widget_1734062123093', area_name='_widget_1734062123094',\n", + " region_name='_widget_1734062123095', region_short_name='_widget_1734062123096',\n", + " branch_name='_widget_1734062123097', carzone_store_id='_widget_1734062123098',\n", + " carzone_store_name='_widget_1734062123099',\n", + " customer_carzone_id='_widget_1734062123100', salesmen='_widget_1734062123101',\n", + " area_manager='_widget_1734062123102', service_salesmen='_widget_1734062123103',\n", + " impl_principal='_widget_1734062123104',\n", + " service_impl_principal='_widget_1734062123105',\n", + " active_user_count='_widget_1734062123106', active_user_type='_widget_1734062123107',\n", + " limit_user_count='_widget_1734062123108', limit_user_type='_widget_1734062123109',\n", + " is_n='_widget_1734062123110', is_g='_widget_1734062123111',\n", + " is_v='_widget_1734062123112', is_visited='_widget_1734062123113',\n", + " is_active='_widget_1734062123114', active_status_fmt='_widget_1734062123115',\n", + " bill_count_last_30_day='_widget_1734062123116',\n", + " bill_day_count_last_30_day='_widget_1734062123117',\n", + " bill_day_count_this_month='_widget_1734062123118',\n", + " bill_count_last_7_day='_widget_1734062123119',\n", + " bill_day_count_last_7_day='_widget_1734062123120', pv_count='_widget_1734062123121',\n", + " uv_count='_widget_1734062123122', bill_count_1d='_widget_1734062123123',\n", + " bill_count_2d='_widget_1734062123124', bill_count_3d='_widget_1734062123125',\n", + " bill_count_4d='_widget_1734062123126', bill_count_5d='_widget_1734062123127',\n", + " bill_count_6d='_widget_1734062123128', bill_count_7d='_widget_1734062123129',\n", + " bill_count_8d='_widget_1734062123130', bill_count_9d='_widget_1734062123131',\n", + " bill_count_10d='_widget_1734062123132', bill_count_11d='_widget_1734062123133',\n", + " bill_count_12d='_widget_1734062123134', bill_count_13d='_widget_1734062123135',\n", + " bill_count_14d='_widget_1734062123136', bill_count_15d='_widget_1734062123137',\n", + " bill_count_16d='_widget_1734062123138', bill_count_17d='_widget_1734062123139',\n", + " bill_count_18d='_widget_1734062123140', bill_count_19d='_widget_1734062123141',\n", + " bill_count_20d='_widget_1734062123142', bill_count_21d='_widget_1734062123143',\n", + " bill_count_22d='_widget_1734062123144', bill_count_23d='_widget_1734062123145',\n", + " bill_count_24d='_widget_1734062123146', bill_count_25d='_widget_1734062123147',\n", + " bill_count_26d='_widget_1734062123148', bill_count_27d='_widget_1734062123149',\n", + " bill_count_28d='_widget_1734062123150', bill_count_29d='_widget_1734062123151',\n", + " bill_count_30d='_widget_1734062123152', bill_count_31d='_widget_1734062123153',\n", + " etl_time='_widget_1734062123154',\n", + " maintain_bill_count_last_30_day='_widget_1734062123155',\n", + " washing_bill_count_last_30_day='_widget_1734062123156',\n", + " maintain_bill_day_count_last_30_day='_widget_1734062123157',\n", + " washing_bill_day_count_last_30_day='_widget_1734062123158',\n", + " retail_bill_count_last_30_day='_widget_1734062123159',\n", + " retail_bill_day_count_last_30_day='_widget_1734062123160',\n", + " purchase_bill_count_last_30_day='_widget_1734062123161',\n", + " purchase_bill_day_count_last_30_day='_widget_1734062123162',\n", + " card_bill_count_last_30_day='_widget_1734062123163',\n", + " card_bill_day_count_last_30_day='_widget_1734062123164',\n", + " gd_sales_bill_count_last_30_day='_widget_1734062123165',\n", + " gd_sales_bill_day_count_last_30_day='_widget_1734062123166',\n", + " g_change_flag='_widget_1734062123167', saas_package='_widget_1734062123168',\n", + " manage_model='_widget_1734062123169', contacts='_widget_1734062123170',\n", + " contact_number='_widget_1734062123171', contact_mobile='_widget_1734062123172',\n", + " g_month_count='_widget_1734062123173', g_month_percentage='_widget_1734062123174',\n", + " is_install_service='_widget_1734062123175',\n", + " install_create_time='_widget_1734062123176', last_end_date='_widget_1734062123177',\n", + " renew_date='_widget_1734062123178', is_chain_owner='_widget_1734062123179',\n", + " group_org_count='_widget_1734062123180',\n", + " recent_bill_warning_days='_widget_1734062123181',\n", + " g_change_flag_d='_widget_1734062123182', g_lost_warning_days='_widget_1734062123183',\n", + " saas_edition_fmt='_widget_1734062123184', g_flag_1m='_widget_1734062123185',\n", + " g_flag_2m='_widget_1734062123186', g_flag_3m='_widget_1734062123187',\n", + " g_flag_4m='_widget_1734062123188', g_flag_5m='_widget_1734062123189',\n", + " g_flag_6m='_widget_1734062123190', g_flag_day_count='_widget_1734062123191',\n", + " add_org_flag='_widget_1734062123192', pt='_widget_1734062123193',\n", + " org_size='_widget_1734062123194', qualification_type_fmt='_widget_1734062123195',\n", + " business_scope_fmt='_widget_1734062123196', store_type_fmt='_widget_1734062123197',\n", + " area='_widget_1734062123198', station_number='_widget_1734062123199',\n", + " header_type_fmt='_widget_1734062123200', org_stage='_widget_1734062123201',\n", + " g_count_this_month='_widget_1734062123202',\n", + " saas_customer_type='_widget_1734062123203', technician='_widget_1734062123204',\n", + " tmall_maintain_service_status_desc='_widget_1734062123205',\n", + " date_fmt_date='_widget_1749000071375',\n", + " area_manager_staff_id='_widget_1748496855779',\n", + " service_impl_principal_staff_id=\"_widget_1748496855780\",\n", + " service_salesmen_staff_id=\"_widget_1748496855778\",\n", + " technician_staff_id=\"_widget_1751877712235\",\n", + " saas_create_time_date=\"_widget_1749000071377\",\n", + " expiry_time_date=\"_widget_1749000071382\",\n", + " install_create_time_date=\"_widget_1749000071384\",\n", + " last_end_date_date=\"_widget_1749000071389\", renew_date_date=\"_widget_1749000071391\")\n", + "\n", "\n", "if __name__ == '__main__':\n", - " updater = NGVDataUpdater()\n", - " updater.main()" + " start = UpdateNGVData()\n", + " start.main()\n" ], - "id": "82d58cced4a6e02", + "id": "6ce49b7686e91712", "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2025-08-04 16:27:26,074 - task_logger - INFO - 开始NGV数据更新流程\n" + "\u001B[92m2025-10-15 15:14:12,293 - api.py - task_logger - INFO - 获取了145条数据\u001B[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "1de1bc8bae6d3111bb0f6332472b8cd4\n", - "1de1bc8bae6d3111bb0f6332472b8cd4\n", "已获取 100 条数据\n", - "已获取 200 条数据\n", - "已获取 300 条数据\n", - "已获取 400 条数据\n", - "已获取 500 条数据\n", - "已获取 600 条数据\n", - "已获取 700 条数据\n", - "已获取 800 条数据\n", - "已获取 900 条数据\n", - "已获取 1000 条数据\n", - "已获取 1100 条数据\n", - "已获取 1200 条数据\n", - "已获取 1300 条数据\n", - "已获取 1400 条数据\n", - "已获取 1500 条数据\n", - "已获取 1600 条数据\n", - "已获取 1700 条数据\n", - "已获取 1800 条数据\n", - "已获取 1900 条数据\n", - "已获取 2000 条数据\n", - "已获取 2100 条数据\n", - "已获取 2200 条数据\n", - "已获取 2300 条数据\n", - "已获取 2400 条数据\n", - "已获取 2500 条数据\n", - "已获取 2600 条数据\n", - "已获取 2700 条数据\n", - "已获取 2800 条数据\n", - "已获取 2900 条数据\n", - "已获取 3000 条数据\n", - "已获取 3100 条数据\n", - "已获取 3200 条数据\n", - "已获取 3300 条数据\n", - "已获取 3400 条数据\n", - "已获取 3500 条数据\n", - "已获取 3600 条数据\n", - "已获取 3700 条数据\n", - "已获取 3800 条数据\n", - "已获取 3900 条数据\n", - "已获取 4000 条数据\n", - "已获取 4100 条数据\n", - "已获取 4200 条数据\n", - "已获取 4300 条数据\n", - "已获取 4400 条数据\n", - "已获取 4500 条数据\n", - "已获取 4600 条数据\n", - "已获取 4700 条数据\n", - "已获取 4800 条数据\n", - "已获取 4900 条数据\n", - "已获取 5000 条数据\n", - "已获取 5100 条数据\n", - "已获取 5200 条数据\n", - "已获取 5300 条数据\n", - "已获取 5400 条数据\n", - "已获取 5500 条数据\n", - "已获取 5600 条数据\n", - "已获取 5700 条数据\n", - "已获取 5800 条数据\n", - "已获取 5900 条数据\n", - "已获取 6000 条数据\n", - "已获取 6100 条数据\n", - "已获取 6200 条数据\n", - "已获取 6300 条数据\n", - "已获取 6400 条数据\n", - "已获取 6500 条数据\n", - "已获取 6600 条数据\n", - "已获取 6700 条数据\n", - "已获取 6800 条数据\n", - "已获取 6900 条数据\n", - "已获取 7000 条数据\n", - "已获取 7100 条数据\n", - "已获取 7200 条数据\n", - "已获取 7300 条数据\n", - "已获取 7400 条数据\n", - "已获取 7500 条数据\n", - "已获取 7600 条数据\n", - "已获取 7700 条数据\n", - "已获取 7800 条数据\n", - "已获取 7900 条数据\n", - "已获取 8000 条数据\n", - "已获取 8100 条数据\n", - "已获取 8200 条数据\n", - "已获取 8300 条数据\n", - "已获取 8400 条数据\n", - "已获取 8500 条数据\n", - "已获取 8600 条数据\n", - "已获取 8700 条数据\n", - "已获取 8800 条数据\n", - "已获取 8900 条数据\n", - "已获取 9000 条数据\n", - "已获取 9100 条数据\n", - "已获取 9200 条数据\n", - "已获取 9300 条数据\n", - "已获取 9400 条数据\n", - "已获取 9500 条数据\n", - "已获取 9600 条数据\n", - "已获取 9700 条数据\n", - "已获取 9800 条数据\n", - "已获取 9900 条数据\n", - "已获取 10000 条数据\n", - "已获取 10100 条数据\n", - "已获取 10200 条数据\n", - "已获取 10300 条数据\n", - "已获取 10400 条数据\n", - "已获取 10500 条数据\n", - "已获取 10600 条数据\n", - "已获取 10700 条数据\n", - "已获取 10800 条数据\n", - "已获取 10900 条数据\n", - "已获取 11000 条数据\n", - "已获取 11100 条数据\n", - "已获取 11200 条数据\n", - "已获取 11300 条数据\n", - "已获取 11400 条数据\n", - "已获取 11500 条数据\n", - "已获取 11600 条数据\n", - "已获取 11700 条数据\n", - "已获取 11800 条数据\n", - "已获取 11900 条数据\n", - "已获取 12000 条数据\n", - "已获取 12100 条数据\n", - "已获取 12200 条数据\n", - "已获取 12300 条数据\n", - "已获取 12400 条数据\n", - "已获取 12500 条数据\n", - "已获取 12600 条数据\n", - "已获取 12700 条数据\n", - "已获取 12800 条数据\n", - "已获取 12900 条数据\n", - "已获取 13000 条数据\n", - "已获取 13100 条数据\n", - "已获取 13200 条数据\n", - "已获取 13300 条数据\n", - "已获取 13400 条数据\n", - "已获取 13500 条数据\n", - "已获取 13600 条数据\n", - "已获取 13700 条数据\n", - "已获取 13800 条数据\n", - "已获取 13900 条数据\n", - "已获取 14000 条数据\n", - "已获取 14100 条数据\n", - "已获取 14200 条数据\n", - "已获取 14300 条数据\n", - "已获取 14400 条数据\n", - "已获取 14500 条数据\n", - "已获取 14600 条数据\n", - "已获取 14700 条数据\n", - "已获取 14800 条数据\n", - "已获取 14900 条数据\n", - "已获取 15000 条数据\n", - "已获取 15100 条数据\n", - "已获取 15200 条数据\n", - "已获取 15300 条数据\n", - "已获取 15400 条数据\n", - "已获取 15500 条数据\n", - "已获取 15600 条数据\n", - "已获取 15700 条数据\n", - "已获取 15800 条数据\n", - "已获取 15900 条数据\n", - "已获取 16000 条数据\n", - "已获取 16100 条数据\n", - "已获取 16200 条数据\n", - "已获取 16300 条数据\n", - "已获取 16400 条数据\n", - "已获取 16500 条数据\n", - "已获取 16600 条数据\n", - "已获取 16700 条数据\n", - "已获取 16800 条数据\n", - "已获取 16900 条数据\n", - "已获取 17000 条数据\n", - "已获取 17100 条数据\n", - "已获取 17200 条数据\n", - "已获取 17300 条数据\n", - "已获取 17400 条数据\n", - "已获取 17500 条数据\n", - "已获取 17600 条数据\n", - "已获取 17700 条数据\n", - "已获取 17800 条数据\n", - "已获取 17900 条数据\n", - "已获取 18000 条数据\n", - "已获取 18100 条数据\n", - "已获取 18200 条数据\n", - "已获取 18300 条数据\n", - "已获取 18400 条数据\n", - "已获取 18500 条数据\n", - "已获取 18600 条数据\n", - "已获取 18700 条数据\n", - "已获取 18800 条数据\n", - "已获取 18900 条数据\n", - "已获取 19000 条数据\n", - "已获取 19100 条数据\n", - "已获取 19200 条数据\n", - "已获取 19300 条数据\n", - "已获取 19400 条数据\n", - "已获取 19500 条数据\n", - "已获取 19600 条数据\n", - "已获取 19700 条数据\n", - "已获取 19800 条数据\n", - "已获取 19900 条数据\n" + "已获取 145 条数据\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ + "\u001B[92m2025-10-15 15:14:12,294 - 4281365028.py - task_logger - INFO - 数据加载完成\u001B[0m\n", + "C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", - "KeyboardInterrupt\n", - "\n" + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " filtered_df[col + '_date'] = (\n", + "C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " filtered_df[col + '_date'] = (\n", + "C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " filtered_df[col + '_date'] = (\n", + "C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " filtered_df[col + '_date'] = (\n", + "C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " filtered_df[col + '_date'] = (\n", + "C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " filtered_df[col + '_date'] = (\n", + "\u001B[92m2025-10-15 15:14:36,032 - 4281365028.py - task_logger - INFO - 时间转换完成\u001B[0m\n", + "C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:102: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " filtered_df[col + \"_staff_id\"] = staff_ids\n", + "C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:102: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " filtered_df[col + \"_staff_id\"] = staff_ids\n", + "C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:102: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " filtered_df[col + \"_staff_id\"] = staff_ids\n", + "C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:102: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " filtered_df[col + \"_staff_id\"] = staff_ids\n", + "\u001B[92m2025-10-15 15:14:36,045 - 4281365028.py - task_logger - INFO - 人员转换完成\u001B[0m\n" ] } ], - "execution_count": 1 + "execution_count": 2 } ], "metadata": {