NGV查缺补漏更新代码

This commit is contained in:
z66
2025-11-06 15:51:51 +08:00
parent 027a66b973
commit a8d0a2d564
7 changed files with 430 additions and 23 deletions
-1
View File
@@ -196,7 +196,6 @@ class API:
# 复制 data_get,避免修改原始数据 # 复制 data_get,避免修改原始数据
data_get_copy = json.loads(json.dumps(data_get)) # 深拷贝 data_get_copy = json.loads(json.dumps(data_get)) # 深拷贝
# 替换 data 字段下的所有键
if 'data' in data_get_copy: if 'data' in data_get_copy:
data_get_copy['data'] = replace_keys(data_get_copy['data']) data_get_copy['data'] = replace_keys(data_get_copy['data'])
+53
View File
@@ -8472,3 +8472,56 @@
2025-10-28 09:22:31,893 - update_email_to_store_daily_use.py - error_task_logger - ERROR - 邮件 2 POP3协议错误: b'RUpUZEVKVFZFSlRkRUpUSkRKVGRDSlRJeWEyeGhjM01sTWpJbE0wRWxNakpzWldGbUpUSXlKVEpE',跳过该邮件继续处理 2025-10-28 09:22:31,893 - update_email_to_store_daily_use.py - error_task_logger - ERROR - 邮件 2 POP3协议错误: b'RUpUZEVKVFZFSlRkRUpUSkRKVGRDSlRJeWEyeGhjM01sTWpJbE0wRWxNakpzWldGbUpUSXlKVEpE',跳过该邮件继续处理
2025-10-28 09:22:31,893 - update_email_to_store_daily_use.py - error_task_logger - ERROR - 邮件 1 POP3协议错误: b'SlRJeWRHVjRkQ1V5TWlVelFTVXlNakV3TGpFMkpVVTBKVUk0SlRoQkpVVTNKVUpCSlVKR0pUSXlK',跳过该邮件继续处理 2025-10-28 09:22:31,893 - update_email_to_store_daily_use.py - error_task_logger - ERROR - 邮件 1 POP3协议错误: b'SlRJeWRHVjRkQ1V5TWlVelFTVXlNakV3TGpFMkpVVTBKVUk0SlRoQkpVVTNKVUpCSlVKR0pUSXlK',跳过该邮件继续处理
2025-10-28 09:22:32,014 - update_email_to_store_daily_use.py - error_task_logger - ERROR - 任务执行时发生异常: b'VEpESlRJeWJXRnlhM01sTWpJbE0wRWxOVUlsTjBJbE1qSnJiR0Z6Y3lVeU1pVXpRU1V5TW0xaGNt' 2025-10-28 09:22:32,014 - update_email_to_store_daily_use.py - error_task_logger - ERROR - 任务执行时发生异常: b'VEpESlRJeWJXRnlhM01sTWpJbE0wRWxOVUlsTjBJbE1qSnJiR0Z6Y3lVeU1pVXpRU1V5TW0xaGNt'
2025-11-06 14:08:00,667 - update_all_NGV_data_daily.py - error_task_logger - ERROR - NGV更新数据执行时发生异常: [Errno 22] Invalid argument: '2025-11-06 14:01:34_ngv_data_today.csv'
Traceback (most recent call last):
File "D:\Idea Project\SaaS_V1.7\back_ground_module\update_all_NGV_data_daily.py", line 157, in main
ngv_data_today, ngv_data_yesterday = self._load_ngv_source_data(task_start_time)
~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "D:\Idea Project\SaaS_V1.7\back_ground_module\update_all_NGV_data_daily.py", line 307, in _load_ngv_source_data
ngv_data_1.to_csv(f"{task_start_time}_ngv_data_today.csv", index=False)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\util\_decorators.py", line 333, in wrapper
return func(*args, **kwargs)
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\core\generic.py", line 3986, in to_csv
return DataFrameRenderer(formatter).to_csv(
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
path_or_buf,
^^^^^^^^^^^^
...<14 lines>...
storage_options=storage_options,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\io\formats\format.py", line 1014, in to_csv
csv_formatter.save()
~~~~~~~~~~~~~~~~~~^^
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\io\formats\csvs.py", line 251, in save
with get_handle(
~~~~~~~~~~^
self.filepath_or_buffer,
^^^^^^^^^^^^^^^^^^^^^^^^
...<4 lines>...
storage_options=self.storage_options,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
) as handles:
^
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\io\common.py", line 873, in get_handle
handle = open(
handle,
...<3 lines>...
newline="",
)
OSError: [Errno 22] Invalid argument: '2025-11-06 14:01:34_ngv_data_today.csv'
2025-11-06 14:10:36,473 - update_all_NGV_data_daily.py - error_task_logger - ERROR - NGV更新数据执行时发生异常: 'org_code'
Traceback (most recent call last):
File "D:\Idea Project\SaaS_V1.7\back_ground_module\update_all_NGV_data_daily.py", line 160, in main
self._handle_deleted_stores(jdy_ngv_data, ngv_data_today)
~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\Idea Project\SaaS_V1.7\back_ground_module\update_all_NGV_data_daily.py", line 403, in _handle_deleted_stores
ngv_org_codes = set(ngv_current_data['org_code'].dropna().unique())
~~~~~~~~~~~~~~~~^^^^^^^^^^^^
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\core\frame.py", line 4107, in __getitem__
indexer = self.columns.get_loc(key)
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\core\indexes\range.py", line 417, in get_loc
raise KeyError(key)
KeyError: 'org_code'
+19 -9
View File
@@ -5,6 +5,7 @@ from config import Config
from api import API from api import API
from back_ground_module import CommonModule from back_ground_module import CommonModule
from log_config import configure_task_logger, configure_error_task_logger from log_config import configure_task_logger, configure_error_task_logger
import time
logger = configure_task_logger() logger = configure_task_logger()
# 获取已经配置好的错误任务日志记录器 # 获取已经配置好的错误任务日志记录器
@@ -15,8 +16,10 @@ common_module = CommonModule()
output_dir = "output" # 设置输出目录 output_dir = "output" # 设置输出目录
# 创建输出目录(如果不存在) # 创建输出目录(如果不存在)
import os import os
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
class UpdateNGVData: class UpdateNGVData:
"""NGV数据每日新增""" """NGV数据每日新增"""
@@ -48,9 +51,11 @@ class UpdateNGVData:
data_NGV_j = common_module.get_ngv_details(days_back=1) data_NGV_j = common_module.get_ngv_details(days_back=1)
data_NGV_j1 = common_module.get_ngv_details(days_back=2) data_NGV_j1 = common_module.get_ngv_details(days_back=2)
timestamp = time.time()
data_NGV_j.to_csv(os.path.join(output_dir, f"{task_start_time}up_NGV_j.csv"))
data_NGV_j1.to_csv(os.path.join(output_dir, f"{task_start_time}up_NGV_j1.csv")) # data_NGV_j.to_csv(os.path.join(output_dir, f"{timestamp}up_NGV_j.csv"))
# data_NGV_j1.to_csv(os.path.join(output_dir, f"{timestamp}up_NGV_j1.csv"))
# 找出在 data_NGV_j 中存在但在 data_NGV_j1 中不存在的 data_id # 找出在 data_NGV_j 中存在但在 data_NGV_j1 中不存在的 data_id
unique_data_ids = data_NGV_j[~data_NGV_j['org_code'].isin(data_NGV_j1['org_code'])] unique_data_ids = data_NGV_j[~data_NGV_j['org_code'].isin(data_NGV_j1['org_code'])]
@@ -62,6 +67,9 @@ class UpdateNGVData:
data_NGV_j = data_NGV_j[data_NGV_j['org_type'] == '一般'] data_NGV_j = data_NGV_j[data_NGV_j['org_type'] == '一般']
data_NGV_j1 = data_NGV_j1[data_NGV_j1['org_type'] == '一般'] data_NGV_j1 = data_NGV_j1[data_NGV_j1['org_type'] == '一般']
filtered_df = new_df[new_df['org_type'] == '一般'] filtered_df = new_df[new_df['org_type'] == '一般']
filtered_df = filtered_df.copy()
# 默认未删除
filtered_df['源ngv是否已删除'] = '未删除'
# 日期字段转换为日期格式 # 日期字段转换为日期格式
time_columns = ['date_fmt', 'saas_create_time', 'expiry_time', 'install_create_time', "last_end_date", time_columns = ['date_fmt', 'saas_create_time', 'expiry_time', 'install_create_time', "last_end_date",
@@ -109,14 +117,15 @@ class UpdateNGVData:
# all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j.iterrows()] # 前一天的全部数据 # all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j.iterrows()] # 前一天的全部数据
all_data = [self.row_to_dict(row, self.field_mapping) for index, row in filtered_df.iterrows()] # 增量数据 all_data = [self.row_to_dict(row, self.field_mapping) for index, row in filtered_df.iterrows()] # 增量数据
try: # try:
filtered_df.to_csv(os.path.join(output_dir, f"{task_start_time}NGV.csv")) # filtered_df.to_csv(os.path.join(output_dir, f"{timestamp}NGV.csv"))
except Exception as e: # except Exception as e:
error_task_logger.error(f"NGV过滤后数据保存异常: {e}") # error_task_logger.error(f"NGV过滤后数据保存异常: {e}")
pass # pass
# #
data = {'api_key': Config.SaaS_Tasks_APP_ID, 'entry_id': Config.NGV_TASKS_ENTRY_ID, "data_list": all_data,"is_start_trigger":"true"} data = {'api_key': Config.SaaS_Tasks_APP_ID, 'entry_id': Config.NGV_TASKS_ENTRY_ID, "data_list": all_data,
"is_start_trigger": "true"}
result = api_instance.entry_data_batch_create(data) result = api_instance.entry_data_batch_create(data)
logger.info(f"数据已推送:{result}") logger.info(f"数据已推送:{result}")
@@ -240,7 +249,8 @@ class UpdateNGVData:
saas_create_time_date="_widget_1749000071377", saas_create_time_date="_widget_1749000071377",
expiry_time_date="_widget_1749000071382", expiry_time_date="_widget_1749000071382",
install_create_time_date="_widget_1749000071384", install_create_time_date="_widget_1749000071384",
last_end_date_date="_widget_1749000071389", renew_date_date="_widget_1749000071391") last_end_date_date="_widget_1749000071389", renew_date_date="_widget_1749000071391"
, 源NGV是否已删除="_widget_1754285499851")
if __name__ == '__main__': if __name__ == '__main__':
+59 -13
View File
@@ -101,7 +101,7 @@ RESET_ALL_DELETED_STATUS = False # 首次运行设为True,之后设为False
# 【3. 并发更新配置】 # 【3. 并发更新配置】
# 是否使用并发更新(多线程同时更新,速度快) # 是否使用并发更新(多线程同时更新,速度快)
USE_CONCURRENT_UPDATE = False # True=并发更新(快),False=串行更新(慢) USE_CONCURRENT_UPDATE = True # True=并发更新(快),False=串行更新(慢)
# 并发线程数(同时执行的更新任务数) # 并发线程数(同时执行的更新任务数)
# 建议值:5-20,过大可能被API限流,过小影响速度 # 建议值:5-20,过大可能被API限流,过小影响速度
@@ -177,6 +177,46 @@ class UpdateAllNGVDataDaily:
common_module.send_task_error(task_start_time, "NGV更新数据", str(e)) common_module.send_task_error(task_start_time, "NGV更新数据", str(e))
raise raise
def _compose_key_values(self, org_name, group_name, org_code, id_own_group, id_own_org):
"""将五个字段组合为稳定索引键,空值用空字符串,占位并去除首尾空格。"""
def nv(x):
return '' if pd.isna(x) else str(x).strip()
parts = [nv(org_name), nv(group_name), nv(org_code), nv(id_own_group), nv(id_own_org)]
return '||'.join(parts)
def _compose_key_df_ngv(self, df):
"""为NGV数据增加composite_key列(基于字段名)。"""
cols = ['org_name', 'group_name', 'org_code', 'id_own_group', 'id_own_org']
for c in cols:
if c not in df.columns:
df[c] = ''
df['composite_key'] = [
self._compose_key_values(r['org_name'], r['group_name'], r['org_code'], r['id_own_group'], r['id_own_org'])
for _, r in df.iterrows()
]
return df
def _compose_key_df_jdy(self, df):
"""为简道云数据增加composite_key列(基于widget列名)。"""
# 对应字段widget id
col_map = {
'_widget_1734062123070': 'org_name',
'_widget_1734062123068': 'group_name',
'_widget_1734062123071': 'org_code',
'_widget_1734062123067': 'id_own_group',
'_widget_1734062123069': 'id_own_org',
}
tmp = df.copy()
for wid in col_map.keys():
if wid not in tmp.columns:
tmp[wid] = ''
tmp_renamed = tmp.rename(columns=col_map)
tmp_renamed['composite_key'] = [
self._compose_key_values(r.get('org_name', ''), r.get('group_name', ''), r.get('org_code', ''), r.get('id_own_group', ''), r.get('id_own_org', ''))
for _, r in tmp_renamed.iterrows()
]
return tmp_renamed
def _load_base_data(self): def _load_base_data(self):
""" """
步骤1: 加载基础数据 步骤1: 加载基础数据
@@ -263,9 +303,12 @@ class UpdateAllNGVDataDaily:
ngv_data_1 = common_module.get_ngv_details(days_back=1) ngv_data_1 = common_module.get_ngv_details(days_back=1)
ngv_data_2 = common_module.get_ngv_details(days_back=2) ngv_data_2 = common_module.get_ngv_details(days_back=2)
import time
nowtime = time.time()
# 存储每天获取到的数据 # 存储每天获取到的数据
ngv_data_1.to_csv(f"{task_start_time}_ngv_data_today.csv", index=False) ngv_data_1.to_csv(f"{nowtime}_ngv_data_today.csv", index=False)
ngv_data_2.to_csv(f"{task_start_time}_ngv_data_yesterday.csv", index=False) ngv_data_2.to_csv(f"{nowtime}_ngv_data_yesterday.csv", index=False)
# 只保留 org_type 为 "一般" 的记录 # 只保留 org_type 为 "一般" 的记录
ngv_data_1 = ngv_data_1[ngv_data_1['org_type'] == '一般'] ngv_data_1 = ngv_data_1[ngv_data_1['org_type'] == '一般']
@@ -360,7 +403,7 @@ class UpdateAllNGVDataDaily:
ngv_org_codes = set(ngv_current_data['org_code'].dropna().unique()) ngv_org_codes = set(ngv_current_data['org_code'].dropna().unique())
jdy_org_codes_unique = set(temp_jdy_data['org_code'].dropna().unique()) jdy_org_codes_unique = set(temp_jdy_data['org_code'].dropna().unique())
# 找出在简道云存在但NGV中不存在的门店(唯一org_code # 找出在简道云存在但NGV中不存在的门店(唯一复合索引
missing_org_codes = jdy_org_codes_unique - ngv_org_codes missing_org_codes = jdy_org_codes_unique - ngv_org_codes
if len(missing_org_codes) == 0: if len(missing_org_codes) == 0:
@@ -401,7 +444,7 @@ class UpdateAllNGVDataDaily:
logger.info("步骤4: 开始对比数据变化...") logger.info("步骤4: 开始对比数据变化...")
# 移除不需要对比的列 # 移除不需要对比的列
columns_to_remove = {'date_id', 'date_fmt', 'pt', 'etl_time'} columns_to_remove = {'date_id', 'date_fmt', 'pt', 'etl_time','id_own_org'}
# 过滤列 # 过滤列
df1_filtered = ngv_today[[col for col in ngv_today.columns if col not in columns_to_remove]] df1_filtered = ngv_today[[col for col in ngv_today.columns if col not in columns_to_remove]]
@@ -440,7 +483,7 @@ class UpdateAllNGVDataDaily:
# 只保留不一致的数据 # 只保留不一致的数据
changed_data = df1_common[df1_common['match_status'] == '不一致'].copy() changed_data = df1_common[df1_common['match_status'] == '不一致'].copy()
# 关联简道云的_id # 关联简道云的_id(基于org_code
temp_jdy = jdy_ngv_data.copy() temp_jdy = jdy_ngv_data.copy()
temp_jdy.reset_index(drop=True, inplace=True) temp_jdy.reset_index(drop=True, inplace=True)
@@ -653,13 +696,16 @@ class UpdateAllNGVDataDaily:
# 执行创建 # 执行创建
create_count = 0 create_count = 0
if len(create_data_list) > 0:
if USE_BATCH_CREATE: # create_df = pd.DataFrame(create_data_list)
create_count = self._batch_create(create_data_list) # create_df.to_csv(f"create_data.csv", index=False)
else: # if len(create_data_list) > 0:
create_count = self._single_create(create_data_list) # if USE_BATCH_CREATE:
# 输出新增数据 # create_count = self._batch_create(create_data_list)
self._save_create_data(create_data_list) # else:
# create_count = self._single_create(create_data_list)
# # 输出新增数据
# self._save_create_data(create_data_list)
logger.info(f" ✓ 同步完成: 更新 {update_count} 条, 创建 {create_count}") logger.info(f" ✓ 同步完成: 更新 {update_count} 条, 创建 {create_count}")
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long