NGV查缺补漏更新代码
This commit is contained in:
@@ -8472,3 +8472,56 @@
|
||||
2025-10-28 09:22:31,893 - update_email_to_store_daily_use.py - error_task_logger - ERROR - 邮件 2 POP3协议错误: b'RUpUZEVKVFZFSlRkRUpUSkRKVGRDSlRJeWEyeGhjM01sTWpJbE0wRWxNakpzWldGbUpUSXlKVEpE',跳过该邮件继续处理
|
||||
2025-10-28 09:22:31,893 - update_email_to_store_daily_use.py - error_task_logger - ERROR - 邮件 1 POP3协议错误: b'SlRJeWRHVjRkQ1V5TWlVelFTVXlNakV3TGpFMkpVVTBKVUk0SlRoQkpVVTNKVUpCSlVKR0pUSXlK',跳过该邮件继续处理
|
||||
2025-10-28 09:22:32,014 - update_email_to_store_daily_use.py - error_task_logger - ERROR - 任务执行时发生异常: b'VEpESlRJeWJXRnlhM01sTWpJbE0wRWxOVUlsTjBJbE1qSnJiR0Z6Y3lVeU1pVXpRU1V5TW0xaGNt'
|
||||
2025-11-06 14:08:00,667 - update_all_NGV_data_daily.py - error_task_logger - ERROR - NGV更新数据执行时发生异常: [Errno 22] Invalid argument: '2025-11-06 14:01:34_ngv_data_today.csv'
|
||||
Traceback (most recent call last):
|
||||
File "D:\Idea Project\SaaS_V1.7\back_ground_module\update_all_NGV_data_daily.py", line 157, in main
|
||||
ngv_data_today, ngv_data_yesterday = self._load_ngv_source_data(task_start_time)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
|
||||
File "D:\Idea Project\SaaS_V1.7\back_ground_module\update_all_NGV_data_daily.py", line 307, in _load_ngv_source_data
|
||||
ngv_data_1.to_csv(f"{task_start_time}_ngv_data_today.csv", index=False)
|
||||
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\util\_decorators.py", line 333, in wrapper
|
||||
return func(*args, **kwargs)
|
||||
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\core\generic.py", line 3986, in to_csv
|
||||
return DataFrameRenderer(formatter).to_csv(
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^
|
||||
path_or_buf,
|
||||
^^^^^^^^^^^^
|
||||
...<14 lines>...
|
||||
storage_options=storage_options,
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
)
|
||||
^
|
||||
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\io\formats\format.py", line 1014, in to_csv
|
||||
csv_formatter.save()
|
||||
~~~~~~~~~~~~~~~~~~^^
|
||||
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\io\formats\csvs.py", line 251, in save
|
||||
with get_handle(
|
||||
~~~~~~~~~~^
|
||||
self.filepath_or_buffer,
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
...<4 lines>...
|
||||
storage_options=self.storage_options,
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
) as handles:
|
||||
^
|
||||
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\io\common.py", line 873, in get_handle
|
||||
handle = open(
|
||||
handle,
|
||||
...<3 lines>...
|
||||
newline="",
|
||||
)
|
||||
OSError: [Errno 22] Invalid argument: '2025-11-06 14:01:34_ngv_data_today.csv'
|
||||
2025-11-06 14:10:36,473 - update_all_NGV_data_daily.py - error_task_logger - ERROR - NGV更新数据执行时发生异常: 'org_code'
|
||||
Traceback (most recent call last):
|
||||
File "D:\Idea Project\SaaS_V1.7\back_ground_module\update_all_NGV_data_daily.py", line 160, in main
|
||||
self._handle_deleted_stores(jdy_ngv_data, ngv_data_today)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Idea Project\SaaS_V1.7\back_ground_module\update_all_NGV_data_daily.py", line 403, in _handle_deleted_stores
|
||||
ngv_org_codes = set(ngv_current_data['org_code'].dropna().unique())
|
||||
~~~~~~~~~~~~~~~~^^^^^^^^^^^^
|
||||
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\core\frame.py", line 4107, in __getitem__
|
||||
indexer = self.columns.get_loc(key)
|
||||
File "D:\ProgramTools\anaconda3\envs\saas\Lib\site-packages\pandas\core\indexes\range.py", line 417, in get_loc
|
||||
raise KeyError(key)
|
||||
KeyError: 'org_code'
|
||||
|
||||
@@ -5,6 +5,7 @@ from config import Config
|
||||
from api import API
|
||||
from back_ground_module import CommonModule
|
||||
from log_config import configure_task_logger, configure_error_task_logger
|
||||
import time
|
||||
|
||||
logger = configure_task_logger()
|
||||
# 获取已经配置好的错误任务日志记录器
|
||||
@@ -15,8 +16,10 @@ common_module = CommonModule()
|
||||
output_dir = "output" # 设置输出目录
|
||||
# 创建输出目录(如果不存在)
|
||||
import os
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
|
||||
class UpdateNGVData:
|
||||
"""NGV数据每日新增"""
|
||||
|
||||
@@ -48,9 +51,11 @@ class UpdateNGVData:
|
||||
|
||||
data_NGV_j = common_module.get_ngv_details(days_back=1)
|
||||
data_NGV_j1 = common_module.get_ngv_details(days_back=2)
|
||||
timestamp = time.time()
|
||||
|
||||
data_NGV_j.to_csv(os.path.join(output_dir, f"{task_start_time}up_NGV_j.csv"))
|
||||
data_NGV_j1.to_csv(os.path.join(output_dir, f"{task_start_time}up_NGV_j1.csv"))
|
||||
|
||||
# data_NGV_j.to_csv(os.path.join(output_dir, f"{timestamp}up_NGV_j.csv"))
|
||||
# data_NGV_j1.to_csv(os.path.join(output_dir, f"{timestamp}up_NGV_j1.csv"))
|
||||
|
||||
# 找出在 data_NGV_j 中存在但在 data_NGV_j1 中不存在的 data_id
|
||||
unique_data_ids = data_NGV_j[~data_NGV_j['org_code'].isin(data_NGV_j1['org_code'])]
|
||||
@@ -62,6 +67,9 @@ class UpdateNGVData:
|
||||
data_NGV_j = data_NGV_j[data_NGV_j['org_type'] == '一般']
|
||||
data_NGV_j1 = data_NGV_j1[data_NGV_j1['org_type'] == '一般']
|
||||
filtered_df = new_df[new_df['org_type'] == '一般']
|
||||
filtered_df = filtered_df.copy()
|
||||
# 默认未删除
|
||||
filtered_df['源ngv是否已删除'] = '未删除'
|
||||
|
||||
# 日期字段转换为日期格式
|
||||
time_columns = ['date_fmt', 'saas_create_time', 'expiry_time', 'install_create_time', "last_end_date",
|
||||
@@ -109,14 +117,15 @@ class UpdateNGVData:
|
||||
# all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j.iterrows()] # 前一天的全部数据
|
||||
all_data = [self.row_to_dict(row, self.field_mapping) for index, row in filtered_df.iterrows()] # 增量数据
|
||||
|
||||
try:
|
||||
filtered_df.to_csv(os.path.join(output_dir, f"{task_start_time}NGV.csv"))
|
||||
except Exception as e:
|
||||
error_task_logger.error(f"NGV过滤后数据保存异常: {e}")
|
||||
pass
|
||||
# try:
|
||||
# filtered_df.to_csv(os.path.join(output_dir, f"{timestamp}NGV.csv"))
|
||||
# except Exception as e:
|
||||
# error_task_logger.error(f"NGV过滤后数据保存异常: {e}")
|
||||
# pass
|
||||
|
||||
#
|
||||
data = {'api_key': Config.SaaS_Tasks_APP_ID, 'entry_id': Config.NGV_TASKS_ENTRY_ID, "data_list": all_data,"is_start_trigger":"true"}
|
||||
data = {'api_key': Config.SaaS_Tasks_APP_ID, 'entry_id': Config.NGV_TASKS_ENTRY_ID, "data_list": all_data,
|
||||
"is_start_trigger": "true"}
|
||||
|
||||
result = api_instance.entry_data_batch_create(data)
|
||||
logger.info(f"数据已推送:{result}")
|
||||
@@ -240,7 +249,8 @@ class UpdateNGVData:
|
||||
saas_create_time_date="_widget_1749000071377",
|
||||
expiry_time_date="_widget_1749000071382",
|
||||
install_create_time_date="_widget_1749000071384",
|
||||
last_end_date_date="_widget_1749000071389", renew_date_date="_widget_1749000071391")
|
||||
last_end_date_date="_widget_1749000071389", renew_date_date="_widget_1749000071391"
|
||||
, 源NGV是否已删除="_widget_1754285499851")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -101,7 +101,7 @@ RESET_ALL_DELETED_STATUS = False # 首次运行设为True,之后设为False
|
||||
|
||||
# 【3. 并发更新配置】
|
||||
# 是否使用并发更新(多线程同时更新,速度快)
|
||||
USE_CONCURRENT_UPDATE = False # True=并发更新(快),False=串行更新(慢)
|
||||
USE_CONCURRENT_UPDATE = True # True=并发更新(快),False=串行更新(慢)
|
||||
|
||||
# 并发线程数(同时执行的更新任务数)
|
||||
# 建议值:5-20,过大可能被API限流,过小影响速度
|
||||
@@ -177,6 +177,46 @@ class UpdateAllNGVDataDaily:
|
||||
common_module.send_task_error(task_start_time, "NGV更新数据", str(e))
|
||||
raise
|
||||
|
||||
def _compose_key_values(self, org_name, group_name, org_code, id_own_group, id_own_org):
|
||||
"""将五个字段组合为稳定索引键,空值用空字符串,占位并去除首尾空格。"""
|
||||
def nv(x):
|
||||
return '' if pd.isna(x) else str(x).strip()
|
||||
parts = [nv(org_name), nv(group_name), nv(org_code), nv(id_own_group), nv(id_own_org)]
|
||||
return '||'.join(parts)
|
||||
|
||||
def _compose_key_df_ngv(self, df):
|
||||
"""为NGV数据增加composite_key列(基于字段名)。"""
|
||||
cols = ['org_name', 'group_name', 'org_code', 'id_own_group', 'id_own_org']
|
||||
for c in cols:
|
||||
if c not in df.columns:
|
||||
df[c] = ''
|
||||
df['composite_key'] = [
|
||||
self._compose_key_values(r['org_name'], r['group_name'], r['org_code'], r['id_own_group'], r['id_own_org'])
|
||||
for _, r in df.iterrows()
|
||||
]
|
||||
return df
|
||||
|
||||
def _compose_key_df_jdy(self, df):
|
||||
"""为简道云数据增加composite_key列(基于widget列名)。"""
|
||||
# 对应字段widget id
|
||||
col_map = {
|
||||
'_widget_1734062123070': 'org_name',
|
||||
'_widget_1734062123068': 'group_name',
|
||||
'_widget_1734062123071': 'org_code',
|
||||
'_widget_1734062123067': 'id_own_group',
|
||||
'_widget_1734062123069': 'id_own_org',
|
||||
}
|
||||
tmp = df.copy()
|
||||
for wid in col_map.keys():
|
||||
if wid not in tmp.columns:
|
||||
tmp[wid] = ''
|
||||
tmp_renamed = tmp.rename(columns=col_map)
|
||||
tmp_renamed['composite_key'] = [
|
||||
self._compose_key_values(r.get('org_name', ''), r.get('group_name', ''), r.get('org_code', ''), r.get('id_own_group', ''), r.get('id_own_org', ''))
|
||||
for _, r in tmp_renamed.iterrows()
|
||||
]
|
||||
return tmp_renamed
|
||||
|
||||
def _load_base_data(self):
|
||||
"""
|
||||
步骤1: 加载基础数据
|
||||
@@ -263,9 +303,12 @@ class UpdateAllNGVDataDaily:
|
||||
ngv_data_1 = common_module.get_ngv_details(days_back=1)
|
||||
ngv_data_2 = common_module.get_ngv_details(days_back=2)
|
||||
|
||||
import time
|
||||
nowtime = time.time()
|
||||
|
||||
# 存储每天获取到的数据
|
||||
ngv_data_1.to_csv(f"{task_start_time}_ngv_data_today.csv", index=False)
|
||||
ngv_data_2.to_csv(f"{task_start_time}_ngv_data_yesterday.csv", index=False)
|
||||
ngv_data_1.to_csv(f"{nowtime}_ngv_data_today.csv", index=False)
|
||||
ngv_data_2.to_csv(f"{nowtime}_ngv_data_yesterday.csv", index=False)
|
||||
|
||||
# 只保留 org_type 为 "一般" 的记录
|
||||
ngv_data_1 = ngv_data_1[ngv_data_1['org_type'] == '一般']
|
||||
@@ -360,7 +403,7 @@ class UpdateAllNGVDataDaily:
|
||||
ngv_org_codes = set(ngv_current_data['org_code'].dropna().unique())
|
||||
jdy_org_codes_unique = set(temp_jdy_data['org_code'].dropna().unique())
|
||||
|
||||
# 找出在简道云存在但NGV中不存在的门店(唯一org_code)
|
||||
# 找出在简道云存在但NGV中不存在的门店(唯一复合索引)
|
||||
missing_org_codes = jdy_org_codes_unique - ngv_org_codes
|
||||
|
||||
if len(missing_org_codes) == 0:
|
||||
@@ -401,7 +444,7 @@ class UpdateAllNGVDataDaily:
|
||||
logger.info("步骤4: 开始对比数据变化...")
|
||||
|
||||
# 移除不需要对比的列
|
||||
columns_to_remove = {'date_id', 'date_fmt', 'pt', 'etl_time'}
|
||||
columns_to_remove = {'date_id', 'date_fmt', 'pt', 'etl_time','id_own_org'}
|
||||
|
||||
# 过滤列
|
||||
df1_filtered = ngv_today[[col for col in ngv_today.columns if col not in columns_to_remove]]
|
||||
@@ -440,7 +483,7 @@ class UpdateAllNGVDataDaily:
|
||||
# 只保留不一致的数据
|
||||
changed_data = df1_common[df1_common['match_status'] == '不一致'].copy()
|
||||
|
||||
# 关联简道云的_id
|
||||
# 关联简道云的_id(基于org_code)
|
||||
temp_jdy = jdy_ngv_data.copy()
|
||||
temp_jdy.reset_index(drop=True, inplace=True)
|
||||
|
||||
@@ -653,13 +696,16 @@ class UpdateAllNGVDataDaily:
|
||||
|
||||
# 执行创建
|
||||
create_count = 0
|
||||
if len(create_data_list) > 0:
|
||||
if USE_BATCH_CREATE:
|
||||
create_count = self._batch_create(create_data_list)
|
||||
else:
|
||||
create_count = self._single_create(create_data_list)
|
||||
# 输出新增数据
|
||||
self._save_create_data(create_data_list)
|
||||
|
||||
# create_df = pd.DataFrame(create_data_list)
|
||||
# create_df.to_csv(f"create_data.csv", index=False)
|
||||
# if len(create_data_list) > 0:
|
||||
# if USE_BATCH_CREATE:
|
||||
# create_count = self._batch_create(create_data_list)
|
||||
# else:
|
||||
# create_count = self._single_create(create_data_list)
|
||||
# # 输出新增数据
|
||||
# self._save_create_data(create_data_list)
|
||||
|
||||
logger.info(f" ✓ 同步完成: 更新 {update_count} 条, 创建 {create_count} 条")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user