Compare commits

...

10 Commits

Author SHA1 Message Date
panda 73e91234e7 ngv添加保存数据 2025-10-20 10:39:45 +08:00
panda b799d383df ngv添加保存数据 2025-10-15 15:19:36 +08:00
panda d6c6ead9f3 宜搭api完善 2025-10-11 15:35:15 +08:00
panda 47c42d1a19 非标业绩提报、合伙人新增字段 2025-09-24 14:09:08 +08:00
panda 54dc3d02ec 异常回访新增过滤逻辑 2025-09-24 13:46:22 +08:00
panda 6321bb4cf8 高德匹配手机号 2025-09-23 16:35:34 +08:00
panda 30cacc7da2 借车宝数据链接空值处理更新 2025-09-18 10:22:52 +08:00
panda a33518e078 泰国新增2字段 2025-09-10 15:08:57 +08:00
panda c24e5cfceb 非标业绩提报新增2字段 2025-09-02 10:48:23 +08:00
panda 2840d4871a 简道云成员id与字段监控分离 2025-08-28 11:03:46 +08:00
35 changed files with 30454 additions and 771 deletions
+21
View File
@@ -3,6 +3,20 @@
<component name="CsvFileAttributes">
<option name="attributeMap">
<map>
<entry key="\back_ground_module\CRM.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="\back_ground_module\DF.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="\db\task_queue.csv">
<value>
<Attribute>
@@ -17,6 +31,13 @@
</Attribute>
</value>
</entry>
<entry key="\test\feibiao.csv">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
</map>
</option>
</component>
+61 -9
View File
@@ -106,7 +106,7 @@ class API:
if data_get["data"]:
all_data_batches.extend(data_get['data'])
last_data_id = data_get['data'][-1].get('_id')
logger.info(f"已获取 {len(all_data_batches)} 条数据")
print(f"已获取 {len(all_data_batches)} 条数据")
break # 成功则跳出循环
else:
if 'data' not in data_get or len(data_get['data']) == 0:
@@ -130,6 +130,7 @@ class API:
final_data = {
'data': all_data_batches # 'data' 键对应的值是列表的列表
}
logger.info(f"获取了{len(all_data_batches)}条数据")
if replace:
print("进行了替换")
return_data = self.field_replacement(data, final_data) # 字段替换,由id替换为标签名
@@ -215,12 +216,6 @@ class API:
'Authorization': Config.JIANDAOYUN_API_TOKEN, # 曹伟应用api测试 app_key
'Content-Type': 'application/json'
}
"""
data 样式 # 后续优化发送数据样式 目前输入字段,后续优化输入表单名称
jiandaoyun_data['data'] = {"_widget_1731650067055":{"value":f'{username}{password}'},
"_widget_1731650067056":{"value": f"{group}"}}
"""
# noinspection DuplicatedCode
payload = json.dumps({
"app_id": data['api_key'], # 应用ID
@@ -303,6 +298,7 @@ class API:
while retries <= max_retries:
try:
res: requests.Response = requests.post(url=url, data=payload, headers=headers, timeout=10)
# print(res.json())
res.raise_for_status() # 检查HTTP响应状态码,如果不等于200会抛出异常
data_get = res.json()
if data_get["status"] == "success":
@@ -353,6 +349,7 @@ class API:
res: requests.Response = requests.post(url=url, data=payload, headers=headers, timeout=10)
res.raise_for_status() # 检查HTTP响应状态码,如果不等于200会抛出异常
data_get = res.json()
# print(data_get)
if res.status_code == 200:
break # 成功则跳出循环
else:
@@ -368,6 +365,61 @@ class API:
continue
return data_get
@staticmethod
def entry_data_banch_update(data: dict, max_retries: int = 20, chunk_size: int = 90) -> list[dict]: # 修改数据
"""
批量修改数据
:param chunk_size: 批量修改块大小
:param max_retries: 最大重试次数,此处设置100次
:param data: 简道云插件发送过来的data,包含应用id、表单id、数据id等信息
:return: 修改数据后简道云返回的结果
"""
url = 'https://api.jiandaoyun.com/api/v5/app/entry/data/batch_update'
headers = {
'Authorization': Config.JIANDAOYUN_API_TOKEN, # 曹伟应用api测试 appKey
'Content-Type': 'application/json'
}
# 获取data_list长度
total_length = len(data['data_ids'])
logger.info(f"多数据写入行数: {total_length}")
# 计算需要发送的次数
num_chunks = (total_length + chunk_size - 1) // chunk_size # //整除向下取证,需要加上chunk_size - 1保证不会有缺失数据
data_get_list = []
for i in range(num_chunks):
start_index = i * chunk_size
end_index = min(start_index + chunk_size, total_length)
payload = json.dumps({
"app_id": data['api_key'], # 应用ID
"entry_id": data['entry_id'], # 表单ID
"data_list": data['data_ids'][start_index:end_index],
"data": data['data']
}, cls=NpEncoder)
retries = 0
while retries <= max_retries:
try:
res: requests.Response = requests.post(url=url, data=payload, headers=headers, timeout=10)
res.raise_for_status() # 检查HTTP响应状态码,如果不等于200会抛出异常
data_get = res.json()
# print(data_get)
if res.status_code == 200:
data_get_list.append(data_get)
break # 成功则跳出循环
else:
logger.warning(f"请求异常, 将重新请求")
retries += 1
time.sleep(3) # 在重试之间稍作停顿
except requests.exceptions.RequestException as e:
logger.warning(f"请求异常: {e}, 将重新请求")
retries += 1
time.sleep(10) # 在重试之间稍作停顿
if retries > max_retries:
error_task_logger.error(f"任务 {data['data_id']} 连续{max_retries}次请求失败,放弃此次请求。")
continue
return data_get_list
@staticmethod
def entry_data_delete(data: dict, max_retries: int = 20, ) -> dict:
"""
@@ -407,7 +459,7 @@ class API:
# 检查其他状态码
res.raise_for_status() # 只对非 4001 的状态码进行检查
logger.info(f"返回结果:, {delete_status}")
# logger.info(f"返回结果:, {delete_status}")
if res.status_code == 200:
break # 成功则跳出循环
else:
@@ -548,7 +600,7 @@ class API:
"username": data["username"],
"instance_id": data["instance_id"],
"task_id": data['task_id'],
"comment": "自动转交"
"comment": ""
}
)
+76 -1
View File
@@ -16,7 +16,8 @@ logger = configure_task_logger()
# 获取已经配置好的错误任务日志记录器
error_task_logger = configure_error_task_logger()
output_dir = "output" # 设置输出目录
os.makedirs(output_dir, exist_ok=True)
class NewExceptionTask:
"""
@@ -24,6 +25,7 @@ class NewExceptionTask:
"""
def __init__(self):
self.exception_service_todo = None
self.get_feature_usage = None
self.saas_create_time = None
self.index = None
@@ -44,6 +46,47 @@ class NewExceptionTask:
self.different_industries = None
self.different_industries_list = None
self.groupnotification = None
self.fields_mapping = {
"门店名称": "_widget_1748241895830",
"联系人": "_widget_1748241895831",
"开户时间": "_widget_1748241895839",
"门店编码": "_widget_1748241895842",
"联系方式": "_widget_1748241895832",
"系统版本": "_widget_1748241895850",
"公司名称": "_widget_1748241895844",
"运营顾问": "_widget_1748246808679",
"区域经理": "_widget_1748246808682",
"公司等级": "_widget_1748241895846",
"运营专家": "_widget_1748246808681",
"操作模式E.L/E.S": "_widget_1748241895853",
"活跃健康状态变化": "_widget_1748241895829",
"初始日": "_widget_1748241895833",
"推进日": "_widget_1748241895834",
"异常跟进情况描述": "_widget_1748512176640",
"异常变化原因": "_widget_1748512176641",
"正常使用": "_widget_1748512176643",
"门店原因": "_widget_1748512176645",
"服务原因": "_widget_1748512176647",
"产品原因": "_widget_1748512176649",
"未正式切换": "_widget_1748512176651",
"跟进状态": "_widget_1748512176655",
"是否可激活": "_widget_1758615839701",
"是否有续约风险": "_widget_1758615839703",
"当前跟进人": "_widget_1748246808678",
"激活策略": "_widget_1758615839717",
"跟进时间": "_widget_1748512176654",
"是否跟进完成": "_widget_1751273412737",
"区域客服": "_widget_1748246808680",
"大区": "_widget_1748241895847",
"": "_widget_1748241895848",
"城市": "_widget_1748241895855",
"门店类型": "_widget_1748241895849",
"saas客户类型": "_widget_1748241895851",
"门店阶段": "_widget_1748241895852",
"提交人": "creator",
"提交时间": "createTime",
"更新时间": "updateTime"
}
def calculate_date_one(self, start_offset=0):
"""
@@ -131,6 +174,11 @@ class NewExceptionTask:
self.NGV_data_list = api_instance.entry_data_list(payload).get("data", [])
# print("NGV获取后的类型:", type(self.NGV_data_list))
# 获取异常服务待办
payload = {"api_key": "675b900991ad2491c69389ca", "entry_id": "68340de79f116c0b66b6b0cc"}
self.exception_service_todo = api_instance.entry_data_list(payload).get("data", [])
print(self.exception_service_todo)
@staticmethod
def build_index(json_list):
index = {}
@@ -186,6 +234,7 @@ class NewExceptionTask:
logger.info("开始运行SaaS异常回访")
data_yichang = self.data_yichang_S.copy()
# data_yichang.to_csv(os.path.join(output_dir,"data_yichang.csv"), index=False)
def replace_values(series):
# 使用条件判断来进行替换
@@ -196,6 +245,16 @@ class NewExceptionTask:
for index_num, row in data_yichang.iterrows(): # 对过滤后的每一条进行派发
try:
is_pass = False
for exception_service in self.exception_service_todo :
if exception_service['_widget_1748241895842'] == row['org_code'] and exception_service['_widget_1748512176655'] in ['未处理', '处理中']:
is_pass = True
break
if is_pass:
logger.info(f"已存在待办,跳过该条记录: {row}")
continue
payload_dict = {}
distribution_date = datetime.datetime.now(datetime.timezone.utc)
@@ -225,6 +284,8 @@ class NewExceptionTask:
UUid = time.strftime("%Y%m%d%H%M%S", time.localtime())
NGV_data_id = None
reason = None
create_exception =None
# 获取关联数据
for NGV_Data in self.NGV_data_list:
# NGV_Data = NGV_Data.get("data")
@@ -233,7 +294,20 @@ class NewExceptionTask:
province_name = NGV_Data.get("_widget_1734062123090")
city_name = NGV_Data.get("_widget_1734062123092")
area_name = NGV_Data.get("_widget_1734062123094")
# 门店原因
reason = NGV_Data.get("_widget_1758617393828")
logger.info(f"获取关联数据成功:{NGV_data_id}, {province_name}, {city_name}, {area_name}")
# 是否生成异常待办
create_exception = NGV_Data.get("_widget_1758769279995")
# 判断门店原因
# if reason in ["门店倒闭", "门店转让", "加盟其他连锁","切换竞品","虚拟门店","重新开户","已退款","二套系统"]:
# continue
# 判断是否继续生成异常待办
if create_exception == "":
continue
if not NGV_data_id:
logger.warning(f"未找到关联数据,请检查门店编码: {row['org_code']}")
@@ -261,6 +335,7 @@ class NewExceptionTask:
# 推进日
"_widget_1748246808678": {"value": customer_service}, # 当前跟进人
# "_widget_1748246808678": {"value": "083726094935447433"}, # 当前跟进人
"_widget_1748246808679": {"value": relationship_manager}, # 运营负责人
+211
View File
@@ -0,0 +1,211 @@
from datetime import datetime
import os
from config import Config
import pandas as pd
from back_ground_module import CommonModule
from api import API
from log_config import configure_task_logger, configure_error_task_logger
import requests
import numpy as np # 确保导入numpy(如果涉及numpy数组)
logger = configure_task_logger()
error_task_logger = configure_error_task_logger()
output_dir = "output" # 设置输出目录
os.makedirs(output_dir, exist_ok=True)
common_module = CommonModule()
api_instance = API()
class GDMatchPhoneNumber:
def __init__(self):
self.loader_company_data = None
self.fild_mapping = {
"是否已查询": "_widget_1758594869262",
"": "_widget_1758594869257",
"": "_widget_1758594869258",
"": "_widget_1758594869259",
"公司名称": "_widget_1758594869260",
"详细地址": "_widget_1758594869261",
}
self.upload_fild_mapping = {
"源文件省": "_widget_1758598285406",
"源文件市": "_widget_1758598285407",
"源文件区": "_widget_1758598285408",
"源文件地址": "_widget_1758598285409",
"源文件门店店名": "_widget_1758598285410",
"名称相似度": "_widget_1758598285411",
"地址相似度": "_widget_1758598285412",
"综合相似度": "_widget_1758598285413",
"address": "_widget_1758598285387",
"pname": "_widget_1758598285389",
"cityname": "_widget_1758598285393",
"adname": "_widget_1758598285400",
"name": "_widget_1758598285401",
"tel": "_widget_1758598285403",
"parent": "_widget_1758598285386",
"distance": "_widget_1758598285388",
"importance": "_widget_1758598285390",
"biz_ext": "_widget_1758598285391",
"biz_type": "_widget_1758598285392",
"type": "_widget_1758598285394",
"photos": "_widget_1758598285395",
"typecode": "_widget_1758598285396",
"shopinfo": "_widget_1758598285397",
"poiweight": "_widget_1758598285398",
"childtype": "_widget_1758598285399",
"location": "_widget_1758598285402",
"shopid": "_widget_1758598285404",
"id": "_widget_1758598285405"
}
def load_all_data(self):
# 获取经销商新签服务单数据
payload = {"api_key": "66f3a68c6e56814df2c6b1af",
"entry_id": "68d20734a9add4c6126ee9f2",
}
loader_company = api_instance.entry_data_list(payload)
self.loader_company_data = loader_company.get("data") # api请求格式,将数据封装在data字典里
@staticmethod
def row_to_dict(row, field_mapping):
"""将一行数据转换为指定格式的字典"""
result = {}
for col_name, widget_id in field_mapping.items():
if col_name in row:
value = row[col_name]
# 处理空数组/列表的情况
if isinstance(value, (list, np.ndarray)):
if len(value) == 0:
clean_value = None # 空数组视为None
else:
clean_value = value # 非空数组保留原值
# 处理缺失值
elif pd.isna(value):
clean_value = None
# 处理时间戳
elif isinstance(value, pd.Timestamp):
clean_value = value.strftime('%Y-%m-%dT%H:%M:%SZ')
else:
clean_value = value
result[widget_id] = {"value": clean_value}
return result
def match_phone_number(self):
# 替换列明
df = pd.DataFrame(self.loader_company_data)
reserve_mapping = {v: k for k, v in self.fild_mapping.items()}
df.rename(columns=reserve_mapping, inplace=True)
# 统计出本日查询的订单数量
count = 0
url = "https://restapi.amap.com/v3/place/text?parameters"
all_data = []
for index, row in df.iterrows():
if row["是否已查询"] == "":
continue
# 处理详细地址
cleaned = row['详细地址'].replace(row[''], '').strip()
cleaned = cleaned.replace(row[''], '').strip()
cleaned = ' '.join(cleaned.split())
row["详细地址"] = cleaned
# 特殊处理直辖市
if row[""] in ["天津市", "上海市", "重庆市", "北京市"] and row[""] == "市辖区":
row[""] = row[""]
key_words = row["公司名称"].replace("(个体工商户)", "").strip()
region = row[""]
detail_address = row["详细地址"]
def search_amap(keywords, region, page_num):
params = {
# "key": "f61b09d406ac49f8a034bf585e60c442",
"key": "273b328f2e85b7e1ad6faa0d4f33ccf2",
"keywords": keywords,
"types": "010400|010500|010800|020000|030000",
"city":region,
# "region": region,
"city_limit": "true",
"page_size": "20",
"page_num": str(page_num)
}
if count > 150:
params.update({"key": "f61b09d406ac49f8a034bf585e60c442"})
res = requests.get(url=url, params=params)
# print(res.json())
return res.json().get("pois", [])
# 初始搜索关键词
current_keywords = key_words
max_pages = 2 # 最多请求2页
for page_num in range(1, max_pages + 1):
pois = search_amap(current_keywords, region, page_num)
for poi in pois:
poi.update({"源文件省": row[""]})
poi.update({"源文件市": row[""]})
poi.update({"源文件区": row[""]})
poi.update({"源文件地址": row["详细地址"]})
poi.update({"源文件门店店名": row["公司名称"]})
all_data.append(poi)
count += 1
# 更新状态为已查询
modify_payload = {
"api_key": "66f3a68c6e56814df2c6b1af",
"entry_id": "68d20734a9add4c6126ee9f2",
"data_id": row["_id"],
"data":
{
"_widget_1758594869262": {"value":""}
}
}
# print(modify_payload)
api_instance.entry_data_update(modify_payload)
if count > 300:
break
result_df = pd.DataFrame(all_data)
return result_df
def upload_df(self, result_df):
all_data = [self.row_to_dict(row, self.upload_fild_mapping) for index, row in result_df.iterrows()] # 增量数据
payload = {
"api_key": "66f3a68c6e56814df2c6b1af",
"entry_id": "68d2148d8bcb4d1716b1c03f",
"data_list": all_data
}
api_instance.entry_data_batch_create(payload)
def main(self):
task_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
try:
# 获取数据
self.load_all_data()
logger.info(f"数据加载完成。")
# 根据高德api匹配手机号
result_df = self.match_phone_number()
logger.info(f"数据匹配完成。")
# result_df.to_csv(os.path.join(output_dir, "result.csv"), index=False)
# 结果上传到简道云
self.upload_df(result_df)
logger.info(f"数据上传完成。")
except Exception as e:
# common_module.send_task_error(task_start_time, "高德匹配手机号", str(e))
error_task_logger.error(f"任务高德匹配手机号执行失败。")
raise
common_module.send_task_status(task_start_time, "高德匹配手机号")
if __name__ == '__main__':
gd_match_phone_number = GDMatchPhoneNumber()
gd_match_phone_number.main()
+34 -19
View File
@@ -73,7 +73,7 @@ class JCBEfficientCarPickup:
# 调换两个子列表的位置并重新组合
today_customer_service_start_list = first_part + second_part
else:
# 如果没有找到“是”,保持原列表不变
# 如果没有找到"是",保持原列表不变
today_customer_service_start_list = all_customer_service_list
pass
@@ -84,6 +84,10 @@ class JCBEfficientCarPickup:
return today_customer_service_list, is_customer_service_data_id, all_customer_service_list
def send_request(self, df):
if df is None or df.empty: # 检查DataFrame是否为None或空
logger.info("当前派发数据为空或None,跳过此派发")
return
today_customer_service_list, is_customer_service_data_id, all_customer_service_list = self.today_customer_service_list()
# 初始化派发索引
next_dispatcher_index = 0
@@ -138,6 +142,10 @@ class JCBEfficientCarPickup:
try:
logger.info(f"接车宝日常回访开始执行")
data_JCB = common_module.get_jcb_details()
if data_JCB is None:
logger.error("获取接车宝数据失败,返回None")
raise ValueError("获取接车宝数据失败,返回None")
logger.info(f"数据加载完成")
# data_JCB.to_csv(os.path.join(output_dir, 'JCB_all_data.csv'), index=False)
@@ -164,12 +172,14 @@ class JCBEfficientCarPickup:
row['日期'] = row["日期"].strftime("%Y-%m-%d")
new_sign_abnormal.append(row)
new_sign_abnormal = pd.DataFrame(new_sign_abnormal)
new_sign_abnormal["表单类型"] = "新签异常待办"
new_sign_abnormal["派发日期"] = current_date_str
self.send_request(new_sign_abnormal) # 发送请求
logger.info(f"新签异常待办回访完成")
new_sign_abnormal = pd.DataFrame(new_sign_abnormal) if new_sign_abnormal else None
if new_sign_abnormal is not None and not new_sign_abnormal.empty:
new_sign_abnormal["表单类型"] = "新签异常待办"
new_sign_abnormal["派发日期"] = current_date_str
self.send_request(new_sign_abnormal) # 发送请求
logger.info(f"新签异常待办回访完成")
else:
logger.info(f"新签异常待办回访无数据,跳过")
# 优质客户转商机
# current_date = datetime.now()
@@ -199,12 +209,14 @@ class JCBEfficientCarPickup:
# 推送给客服
pass
customer_to_opportunity = pd.DataFrame(customer_to_opportunity)
customer_to_opportunity["表单类型"] = "续约优质客户转商机"
customer_to_opportunity["派发日期"] = current_date_str
self.send_request(customer_to_opportunity)
logger.info(f"优质客户转商机完成")
customer_to_opportunity = pd.DataFrame(customer_to_opportunity) if customer_to_opportunity else None
if customer_to_opportunity is not None and not customer_to_opportunity.empty:
customer_to_opportunity["表单类型"] = "续约优质客户转商机"
customer_to_opportunity["派发日期"] = current_date_str
self.send_request(customer_to_opportunity)
logger.info(f"优质客户转商机完成")
else:
logger.info(f"优质客户转商机无数据,跳过")
# 过期7天客服回访
# current_date = datetime.now()
@@ -224,11 +236,14 @@ class JCBEfficientCarPickup:
# 推送给客服
pass
outdated_30 = pd.DataFrame(outdated_30)
outdated_30["表单类型"] = "过期7天回访"
outdated_30["派发日期"] = current_date_str
self.send_request(outdated_30)
logger.info(f"过期7天客服回访完成")
outdated_30 = pd.DataFrame(outdated_30) if outdated_30 else None
if outdated_30 is not None and not outdated_30.empty:
outdated_30["表单类型"] = "过期7天回访"
outdated_30["派发日期"] = current_date_str
self.send_request(outdated_30)
logger.info(f"过期7天客服回访完成")
else:
logger.info(f"过期7天客服回访无数据,跳过")
common_module.send_task_status(task_start_time, "接车宝日常派发")
logger.info(f"接车宝日常派发执行完成")
@@ -266,4 +281,4 @@ class JCBEfficientCarPickup:
if __name__ == "__main__":
start = JCBEfficientCarPickup()
start.main()
start.main()
+1
View File
@@ -26,3 +26,4 @@ from back_ground_module.data_monitor import DataMonitor
from back_ground_module.new_dealer_service_order_to_bi import NewDealerServiceOrderToBI
from back_ground_module.non_standar_performance_to_BI import NonStandardPerformanceToBI
from back_ground_module.partner_settlement_to_BI import PartnerSettlementToBI
from back_ground_module.GD_match_phone_number import GDMatchPhoneNumber
+1
View File
@@ -7,6 +7,7 @@ import pymysql
from api import API
from log_config import configure_task_logger, configure_error_task_logger
api_instance = API()
# 获取已经配置好的常规日志记录器
logger = configure_task_logger()
+321 -123
View File
@@ -1,21 +1,40 @@
"""字段监控(多平台适配版)"""
import sys
import platform
from datetime import datetime, timedelta, timezone
from pathlib import Path
import pandas as pd
import zipfile
import json
from datetime import datetime, timezone, timedelta, date
import requests
from typing import Optional, List, Dict, Any
from decimal import Decimal
import time
from api import API
from log_config import configure_task_logger, configure_error_task_logger
from back_ground_module import CommonModule
import numpy as np
import json
def replace_decimals(obj):
"""替换Decimal类型为float"""
if isinstance(obj, dict):
return {k: replace_decimals(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [replace_decimals(item) for item in obj]
elif isinstance(obj, Decimal):
return float(obj)
return obj
class NpEncoder(json.JSONEncoder):
"""NumPy类型JSON编码器"""
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return super(NpEncoder, self).default(obj)
# 初始化日志记录器
logger = configure_task_logger()
error_task_logger = configure_error_task_logger()
common_tools = CommonModule()
# ---------------------------- 配置项(多平台适配)---------------------------
class Config:
@@ -27,12 +46,21 @@ class Config:
DATA_DIR = OUTPUT_DIR / "data_snapshots"
ARCHIVE_DIR = OUTPUT_DIR / "archives"
# API配置
JIANDAOYUN_API_TOKEN = "Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN"
# 运行参数
RETAIN_DAYS = 7
COMPRESS_FORMAT = "zip"
MAX_RETRIES = 3
RETRY_DELAY = 0.5 # 秒
# 监控表单配置
MONITOR_APP_ID = "6694d3c4fcb69ca9a111a6c4"
MONITOR_ENTRY_ID = "6850c044f17c934b3ec01fea"
CHANGES_ENTRY_ID = "6863a402a77925690a470cc5"
STATUS_ENTRY_ID = "67ede908eb9c22261016466e"
@classmethod
def get_log_file(cls):
"""获取跨平台兼容的日志文件路径"""
@@ -43,6 +71,7 @@ class Config:
"""获取跨平台兼容的变更记录文件路径"""
return cls.OUTPUT_DIR / "changes_summary.csv"
# ---------------------- 工具函数(多平台兼容)-----------------------
class Utils:
@staticmethod
@@ -51,10 +80,9 @@ class Utils:
path = Path(path) if not isinstance(path, Path) else path
try:
path.mkdir(parents=True, exist_ok=True)
logger.debug(f"Directory ensured: {path}")
return True
except Exception as e:
error_task_logger.error(f"Failed to create directory {path}: {str(e)}")
print(f"创建目录失败: {e}")
return False
@staticmethod
@@ -83,19 +111,19 @@ class Utils:
temp_file.rename(file_path)
return True
except Exception as e:
error_task_logger.error(f"Failed to write {file_path}: {str(e)}")
print(f"CSV写入失败: {e}")
if temp_file.exists():
temp_file.unlink()
return False
# ---------------------- API客户端(多平台兼容)-----------------------
class APIClient:
def __init__(self):
self.headers = {
'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN',
'Authorization': Config.JIANDAOYUN_API_TOKEN,
'Content-Type': 'application/json'
}
self.api = API()
def request(self, url, payload, method='POST'):
"""带重试机制的API请求"""
@@ -113,7 +141,106 @@ class APIClient:
if retry == Config.MAX_RETRIES:
raise
time.sleep(Config.RETRY_DELAY)
logger.warning(f"Request failed (attempt {retry + 1}/{Config.MAX_RETRIES}): {str(e)}")
def data_batch_create(self, data: dict, max_retries: int = 20) -> Optional[Dict]:
"""新建单条表单数据"""
url = 'https://api.jiandaoyun.com/api/v5/app/entry/data/create'
payload = json.dumps({
"app_id": data['api_key'],
"entry_id": data['entry_id'],
"data": data['data'],
"is_start_workflow": data.get('is_start_workflow', "false"),
"is_start_trigger": data.get('is_start_trigger', "false"),
"transaction_id": data.get('transaction_id', "")
})
for retry in range(max_retries + 1):
try:
response = requests.post(url=url, data=payload, headers=self.headers, timeout=10)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
if retry == max_retries:
print(f"创建数据失败: {e}")
return None
time.sleep(3)
def entry_data_list(self, data: dict, max_retries: int = 20) -> Dict:
"""获取多条表单数据"""
url = 'https://api.jiandaoyun.com/api/v5/app/entry/data/list'
all_data_batches = []
last_data_id = None
while True:
payload = json.dumps({
"app_id": data['api_key'],
"entry_id": data['entry_id'],
"limit": 100,
"data_id": last_data_id
})
for retry in range(max_retries + 1):
try:
response = requests.post(url=url, data=payload, headers=self.headers, timeout=10)
response.raise_for_status()
data_get = response.json()
if data_get.get("data"):
all_data_batches.extend(data_get['data'])
last_data_id = data_get['data'][-1].get('_id')
break
else:
return {"data": all_data_batches}
except requests.exceptions.RequestException as e:
if retry == max_retries:
print(f"获取数据列表失败: {e}")
return {"data": all_data_batches}
time.sleep(0.1)
if not data_get.get("data") or len(data_get['data']) < 100:
break
return {"data": all_data_batches}
def entry_data_batch_create(self, data: dict, chunk_size: int = 90, max_retries: int = 20) -> List[Optional[Dict]]:
"""新建多条数据"""
data = replace_decimals(data)
url = 'https://api.jiandaoyun.com/api/v5/app/entry/data/batch_create'
data_get_list = []
total_length = len(data['data_list'])
num_chunks = (total_length + chunk_size - 1) // chunk_size
for i in range(num_chunks):
start_index = i * chunk_size
end_index = min(start_index + chunk_size, total_length)
payload = json.dumps({
"app_id": data['api_key'],
"entry_id": data['entry_id'],
"data_list": data['data_list'][start_index:end_index],
"is_start_workflow": data.get('is_start_workflow', "false"),
"is_start_trigger": data.get('is_start_trigger', "false"),
}, cls=NpEncoder)
for retry in range(max_retries + 1):
try:
response = requests.post(url=url, data=payload, headers=self.headers, timeout=10)
response.raise_for_status()
data_get = response.json()
if data_get.get("status") == "success":
data_get_list.append(data_get)
break
except requests.exceptions.RequestException as e:
if retry == max_retries:
print(f"批量创建数据失败: {e}")
data_get_list.append(None)
time.sleep(0.1)
return data_get_list
# ---------------------- 数据处理基类 -----------------------
class DataHandler:
@@ -141,7 +268,7 @@ class DataHandler:
last_widget = pd.read_csv(self.last_widget_file) if self.last_widget_file.exists() else None
return last_data, last_widget
except Exception as e:
error_task_logger.error(f"Failed to load last data: {str(e)}")
print(f"加载上次数据失败: {e}")
return None, None
def save_last_data(self, data, widget_data):
@@ -151,9 +278,16 @@ class DataHandler:
success &= Utils.safe_csv_write(widget_data, self.last_widget_file)
return success
except Exception as e:
error_task_logger.error(f"Failed to save current data: {str(e)}")
print(f"保存数据失败: {e}")
return False
def field_replacement(self, data, final_data):
"""字段替换方法(由id替换为标签名)"""
# 这里实现具体的字段替换逻辑
# 由于具体替换规则未知,返回原始数据
return final_data
# ---------------------- 数据监控主类 -----------------------
class DataMonitor(DataHandler):
def __init__(self):
@@ -164,8 +298,12 @@ class DataMonitor(DataHandler):
"""获取所有应用列表"""
url = "https://api.jiandaoyun.com/api/v5/app/list"
payload = json.dumps({"skip": 0, "limit": 100})
response = self.api.request(url, payload)
return pd.DataFrame(response.json().get("apps", []))
try:
response = self.api.request(url, payload)
return pd.DataFrame(response.json().get("apps", []))
except Exception as e:
print(f"获取应用列表失败: {e}")
return pd.DataFrame()
def fetch_entries(self, app_df):
"""获取所有表单条目"""
@@ -174,15 +312,19 @@ class DataMonitor(DataHandler):
for _, app in app_df.iterrows():
payload = json.dumps({"app_id": app['app_id']})
response = self.api.request(url, payload)
entries = response.json().get("forms", [])
try:
response = self.api.request(url, payload)
entries = response.json().get("forms", [])
if entries:
entry_df = pd.DataFrame(entries)
entry_df['app_id'] = app['app_id']
all_entries.append(entry_df)
if entries:
entry_df = pd.DataFrame(entries)
entry_df['app_id'] = app['app_id']
all_entries.append(entry_df)
except Exception as e:
print(f"获取表单条目失败: {e}")
continue
return pd.concat(all_entries, ignore_index=True) if all_entries else None
return pd.concat(all_entries, ignore_index=True) if all_entries else pd.DataFrame()
def fetch_widgets(self, entry_df):
"""获取所有字段组件"""
@@ -194,37 +336,50 @@ class DataMonitor(DataHandler):
"app_id": entry['app_id'],
"entry_id": entry['entry_id']
})
response = self.api.request(url, payload)
widgets = response.json().get('widgets', [])
try:
response = self.api.request(url, payload)
widgets = response.json().get('widgets', [])
if widgets:
widget_df = pd.DataFrame(widgets)
widget_df['app_id'] = entry['app_id']
widget_df['entry_id'] = entry['entry_id']
all_widgets.append(widget_df)
if widgets:
widget_df = pd.DataFrame(widgets)
widget_df['app_id'] = entry['app_id']
widget_df['entry_id'] = entry['entry_id']
all_widgets.append(widget_df)
except Exception as e:
print(f"获取字段组件失败: {e}")
continue
return pd.concat(all_widgets, ignore_index=True) if all_widgets else None
return pd.concat(all_widgets, ignore_index=True) if all_widgets else pd.DataFrame()
def fetch_monitor_data(self):
"""获取监控数据"""
payload = {
"api_key": "6694d3c4fcb69ca9a111a6c4",
"entry_id": "6850c044f17c934b3ec01fea"
"api_key": Config.MONITOR_APP_ID,
"entry_id": Config.MONITOR_ENTRY_ID
}
data = self.api.api.entry_data_list(payload).get("data")
data_list = pd.DataFrame(data)
try:
data = self.api.entry_data_list(payload).get("data", [])
data_list = pd.DataFrame(data)
# 处理复杂数据类型
for col in data_list.columns:
if data_list[col].apply(lambda x: isinstance(x, (dict, list))).any():
data_list[col] = data_list[col].astype(str)
# 处理复杂数据类型
for col in data_list.columns:
if data_list[col].apply(lambda x: isinstance(x, (dict, list))).any():
data_list[col] = data_list[col].astype(str)
return data_list.drop_duplicates()
return data_list.drop_duplicates()
except Exception as e:
print(f"获取监控数据失败: {e}")
return pd.DataFrame()
def match_widgets(self, data_list, widget_list):
"""匹配数据列表和组件列表"""
if '_widget_1750122565203' not in data_list.columns:
raise ValueError("Missing required column '_widget_1750122565203'")
print("缺少必需的列 '_widget_1750122565203'")
return pd.DataFrame()
if widget_list.empty:
return pd.DataFrame()
return widget_list[widget_list['entry_id'].isin(data_list['_widget_1750122565203'])]
def archive_old_data(self):
@@ -234,12 +389,11 @@ class DataMonitor(DataHandler):
for i in range(Config.RETAIN_DAYS)
]
# 查找需要归档的文件
files_to_archive = [
f for f in self.data_dir.iterdir()
if f.is_file() and
(f.name.startswith("snapshot_") or f.name.startswith("all_widgets_")) and
f.suffix == '.csv'
(f.name.startswith("snapshot_") or f.name.startswith("all_widgets_")) and
f.suffix == '.csv'
]
for file_path in files_to_archive:
@@ -253,80 +407,101 @@ class DataMonitor(DataHandler):
with zipfile.ZipFile(archive_path, 'a', zipfile.ZIP_DEFLATED) as zipf:
zipf.write(file_path, arcname=file_path.name)
file_path.unlink()
logger.debug(f"Archived {file_path.name} to {archive_path}")
except Exception as e:
error_task_logger.error(f"Failed to archive {file_path}: {str(e)}")
print(f"归档文件失败: {e}")
def compare_data(self, current_data):
"""比较新旧数据差异"""
if not self.last_data_file.exists():
if not self.last_data_file.exists() or current_data.empty:
return None
last_data = pd.read_csv(self.last_data_file)
last_data['unique_id'] = last_data['name'].astype(str) + last_data['app_id'].astype(str)
current_data['unique_id'] = current_data['name'].astype(str) + current_data['app_id'].astype(str)
try:
last_data = pd.read_csv(self.last_data_file)
if last_data.empty:
return None
merged = pd.merge(
last_data, current_data,
on=['unique_id'],
how='outer',
suffixes=('_last', '_current'),
indicator=True
)
last_data['unique_id'] = last_data['name'].astype(str) + last_data['app_id'].astype(str)
current_data['unique_id'] = current_data['name'].astype(str) + current_data['app_id'].astype(str)
changes = {
'added': merged[merged['_merge'] == 'right_only'],
'deleted': merged[merged['_merge'] == 'left_only'],
'modified': pd.DataFrame()
}
merged = pd.merge(
last_data, current_data,
on=['unique_id'],
how='outer',
suffixes=('_last', '_current'),
indicator=True
)
for col in ['label', 'type']:
last_col = f"{col}_last"
current_col = f"{col}_current"
changes = {
'added': merged[merged['_merge'] == 'right_only'],
'deleted': merged[merged['_merge'] == 'left_only'],
'modified': pd.DataFrame()
}
if last_col in merged.columns and current_col in merged.columns:
mask = (merged['_merge'] == 'both') & (merged[last_col] != merged[current_col])
mask = mask & ~merged[last_col].isna() & ~merged[current_col].isna()
for col in ['label', 'type']:
last_col = f"{col}_last"
current_col = f"{col}_current"
if mask.any():
modified = merged.loc[mask].copy()
modified['changed_field'] = col
modified['old_value'] = modified[last_col]
modified['new_value'] = modified[current_col]
modified['change_status'] = 'update'
changes['modified'] = pd.concat([changes['modified'], modified])
if last_col in merged.columns and current_col in merged.columns:
mask = (merged['_merge'] == 'both') & (merged[last_col] != merged[current_col])
mask = mask & ~merged[last_col].isna() & ~merged[current_col].isna()
return changes
if mask.any():
modified = merged.loc[mask].copy()
modified['changed_field'] = col
modified['old_value'] = modified[last_col]
modified['new_value'] = modified[current_col]
modified['change_status'] = 'update'
changes['modified'] = pd.concat([changes['modified'], modified])
return changes
except Exception as e:
print(f"比较数据失败: {e}")
return None
def save_changes(self, changes, apps, entries):
"""保存变更记录"""
if not changes or all(len(v) == 0 for v in changes.values()):
return False
result_rows = []
for change_type in ['added', 'deleted', 'modified']:
df = changes[change_type]
if df.empty:
continue
suffix = 'current' if change_type in ['added', 'modified'] else 'last'
for _, row in changes[change_type].iterrows():
app_id = row[f'app_id_{suffix}']
entry_id = row[f'entry_id_{suffix}']
for _, row in df.iterrows():
app_id = row.get(f'app_id_{suffix}')
entry_id = row.get(f'entry_id_{suffix}')
app_name = apps.loc[apps['app_id'] == app_id, 'name'].values[0] if not apps[
apps['app_id'] == app_id].empty else 'Unknown App'
entry_name = entries.loc[
(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id), 'name'
].values[0] if not entries[
(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id)
].empty else 'Unknown Entry'
if not app_id or not entry_id:
continue
app_name = 'Unknown App'
entry_name = 'Unknown Entry'
if not apps.empty:
app_match = apps[apps['app_id'] == app_id]
if not app_match.empty:
app_name = app_match['name'].values[0]
if not entries.empty:
entry_match = entries[(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id)]
if not entry_match.empty:
entry_name = entry_match['name'].values[0]
if change_type == 'added':
content = f"Added field: {row['label_current']}"
content = f"Added field: {row.get('label_current', 'Unknown')}"
elif change_type == 'deleted':
content = f"Deleted field: {row['label_last']}"
content = f"Deleted field: {row.get('label_last', 'Unknown')}"
else:
content = f"Changed from \"{row['old_value']}\" to \"{row['new_value']}\""
content = f"Changed from \"{row.get('old_value', 'Unknown')}\" to \"{row.get('new_value', 'Unknown')}\""
result_rows.append({
'timestamp': self.execution_time,
'unique_id': row['unique_id'],
'unique_id': row.get('unique_id', ''),
'app_id': app_id,
'app_name': app_name,
'entry_id': entry_id,
@@ -339,7 +514,6 @@ class DataMonitor(DataHandler):
result_df = pd.DataFrame(result_rows)
changes_file = Config.get_changes_file()
try:
# 追加模式写入,保留历史记录
result_df.to_csv(
changes_file,
mode='a',
@@ -350,7 +524,7 @@ class DataMonitor(DataHandler):
self.add_to_jiandaoyun(result_df)
return True
except Exception as e:
error_task_logger.error(f"Failed to save changes: {str(e)}")
print(f"保存变更记录失败: {e}")
return False
return False
@@ -365,24 +539,51 @@ class DataMonitor(DataHandler):
} for _, row in result_df.iterrows()]
payload = {
"api_key": "6694d3c4fcb69ca9a111a6c4",
"entry_id": "6863a402a77925690a470cc5",
"api_key": Config.MONITOR_APP_ID,
"entry_id": Config.CHANGES_ENTRY_ID,
"data_list": all_data
}
response = self.api.api.entry_data_batch_create(payload)
if isinstance(response, list):
logger.info(f"Successfully wrote {len(response)} records to Jiandaoyun")
return True
else:
error_task_logger.error(f"Failed to write to Jiandaoyun: {response.get('message', 'Unknown error')}")
try:
response = self.api.entry_data_batch_create(payload)
return isinstance(response, list) and len(response) > 0
except Exception as e:
print(f"写入简道云失败: {e}")
return False
def send_task_status(self, task_start_time: str, task_name: str) -> None:
"""将任务状态发送到简道云"""
try:
end_time_utc = datetime.now(timezone.utc)
task_start_naive = datetime.strptime(task_start_time, "%Y-%m-%d %H:%M:%S")
task_start_utc = task_start_naive - timedelta(hours=8)
task_start_utc = task_start_utc.replace(tzinfo=timezone.utc)
run_time = end_time_utc - task_start_utc
run_time_sec = int(run_time.total_seconds())
today_utc = end_time_utc.strftime("%Y-%m-%d")
task_end_iso = end_time_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
task_start_iso = task_start_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
payload = {
"api_key": Config.MONITOR_APP_ID,
"entry_id": Config.STATUS_ENTRY_ID,
"data": {
"_widget_1744873387500": {"value": today_utc},
"_widget_1743644977694": {"value": task_name},
"_widget_1744873387501": {"value": task_start_iso},
"_widget_1744873387502": {"value": task_end_iso},
"_widget_1744873387504": {"value": run_time_sec},
}
}
self.api.data_batch_create(payload)
except Exception as e:
print(f"发送任务状态失败: {e}")
def run_daily_snapshot(self):
"""执行每日快照任务"""
logger.info("=== Starting daily snapshot task ===")
try:
apps = self.fetch_apps()
entries = self.fetch_entries(apps)
@@ -390,28 +591,30 @@ class DataMonitor(DataHandler):
monitor_data = self.fetch_monitor_data()
matched_data = self.match_widgets(monitor_data, widgets)
# 保存数据
if matched_data.empty:
print("没有匹配到数据")
return False
today_file = self.data_dir / f"snapshot_{self.today}.csv"
widget_file = self.data_dir / f"all_widgets_{self.today}.csv"
if not Utils.safe_csv_write(matched_data, today_file):
raise Exception("Failed to save snapshot data")
print("保存快照数据失败")
return False
if not Utils.safe_csv_write(widgets, widget_file):
raise Exception("Failed to save widget data")
print("保存组件数据失败")
return False
self.archive_old_data()
self.save_last_data(matched_data, widgets)
logger.info("=== Daily snapshot task completed successfully ===")
return True
except Exception as e:
error_task_logger.error(f"Daily snapshot task failed: {str(e)}")
print(f"每日快照任务失败: {e}")
return False
def run_hourly_check(self):
"""执行每小时检查任务"""
logger.info("=== Starting hourly check task ===")
try:
apps = self.fetch_apps()
entries = self.fetch_entries(apps)
@@ -425,32 +628,27 @@ class DataMonitor(DataHandler):
self.save_last_data(current_data, widgets)
logger.info("=== Hourly check task completed successfully ===")
return True
except Exception as e:
error_task_logger.error(f"Hourly check task failed: {str(e)}")
print(f"每小时检查任务失败: {e}")
return False
def main(self):
"""主运行逻辑"""
task_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
try:
logger.info(f"=== Starting data monitoring task ({self.execution_time}) ===")
if Utils.is_first_run_today():
success = self.run_daily_snapshot()
else:
success = self.run_hourly_check()
common_tools.send_task_status(task_start_time, "字段监控")
logger.info("=== Data monitoring task completed ===")
self.send_task_status(task_start_time, "字段监控")
return success
except Exception as e:
error_task_logger.error(f"Data monitoring task failed: {e}")
common_tools.send_task_error(task_start_time, "字段监控", str(e))
print(f"主运行逻辑失败: {e}")
return False
if __name__ == "__main__":
# 确保输出目录存在
Utils.ensure_dir(Config.OUTPUT_DIR)
+6
View File
@@ -8,3 +8,9 @@
2025-08-21 09:38:43,196 - data_Support_Commission.py - error_task_logger - ERROR - 小六提成数据支撑任务执行出错:'Logger' object has no attribute 'warrning'
2025-08-21 09:38:43,333 - data_Support_Commission.py - error_task_logger - ERROR - 小六提成数据支撑任务执行出错:cannot access local variable 'data_commission' where it is not associated with a value
2025-08-21 09:54:25,836 - update_denominator_reporting_adjustment_to_bi.py - error_task_logger - ERROR - 写入数据时发生错误: 1054 (42S22): Unknown column 'nan' in 'field list'
2025-09-05 14:57:31,453 - update_BI_CRM_info.py - error_task_logger - ERROR - 导入数据时发生错误: 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'end date, Contract start date, url) VALUES ('C-241113-001', 'Haikarnyang', '全' at line 1
2025-09-05 14:57:31,894 - update_BI_CRM_info.py - error_task_logger - ERROR - 任务简道云海外项目CRM客户档案迁移BI执行失败。
2025-09-15 14:06:08,361 - JCB_efficient_car_pickup.py - error_task_logger - ERROR - 接车宝日常派发执行出错:'NoneType' object has no attribute 'iterrows'
2025-09-15 14:14:19,894 - JCB_efficient_car_pickup.py - error_task_logger - ERROR - 接车宝日常派发执行出错:'NoneType' object has no attribute 'iterrows'
2025-09-15 14:17:26,819 - JCB_efficient_car_pickup.py - error_task_logger - ERROR - 接车宝日常派发执行出错:获取接车宝数据失败,返回None
2025-09-26 11:03:13,587 - update_all_NGV_data_daily.py - error_task_logger - ERROR - NGV更新数据执行时发生异常: cannot reindex on an axis with duplicate labels
File diff suppressed because one or more lines are too long
@@ -50,8 +50,8 @@ class NewDealerServiceOrderToBI:
'不上货原因': '_widget_1742268351779', '是否培训系统使用': '_widget_1749717287367',
'不培训系统使用原因': '_widget_1749717287369', '是否补货': '_widget_1749717287373',
'不补货原因': '_widget_1749717287375',
'是否进行滞销回抽+盘点介绍': '_widget_1742200372561',
'不进行滞销回抽+盘点介绍原因': '_widget_1742268351780',
'是否进行滞销回抽盘点介绍': '_widget_1742200372561',
'不进行滞销回抽盘点介绍原因': '_widget_1742268351780',
'服务是否满意': '_widget_1743148999298', '服务不满意原因': '_widget_1743148999308',
'产品是否满意': '_widget_1743148999300', '产品不满意原因': '_widget_1743148999309',
# '上传评价图片': '_widget_1743148999310',
@@ -56,9 +56,13 @@ class NonStandardPerformanceToBI:
"新签提成比例-首年": "_widget_1753778922503",
"新签提成比例-非首年": "_widget_1753778922548",
"新签阶段及提成比例": "_widget_1753778656359",
"业绩动作":"_widget_1756708722933","提成动作":"_widget_1756708722932",
"新签阶段及提成比例.选择提成阶段": "_widget_1753778656359._widget_1753778656361",
"新签阶段及提成比例.新签阶段": "_widget_1753778656359._widget_1753948745962",
"新签阶段及提成比例.提成比例": "_widget_1753778656359._widget_1753778656362",
"提交人": "creator",
"提交时间": "createTime",
"更新时间": "updateTime"
}
# 定义需要特殊处理的列表字段及其内部字段映射
@@ -121,7 +125,7 @@ class NonStandardPerformanceToBI:
df[col] = df[col].map(lambda x: x.get("name", "") if isinstance(x, dict) else "")
# 3.日期字段转为北京时间
time_columns = ["支付日期", "开户/处理日期"]
time_columns = ["支付日期", "开户/处理日期","提交时间","更新时间"]
df[time_columns] = df[time_columns].apply(
lambda col: pd.to_datetime(col, errors='coerce')
@@ -158,12 +162,7 @@ class NonStandardPerformanceToBI:
def write_to_bi(self, df):
# 数据库连接信息
HS_DB_Config = {
'host': "f6-public.rwlb.rds.aliyuncs.com",
'user': "rw_operation_data_relay",
'password': "m+q5Z4%IVuF9bf",
'database': "f6operation_data_relay"
}
HS_DB_Config = Config.HS_DB_Config
table_name = "non_standard_performance_to_BI" # 替换为你的实际表名
# 建立数据库连接
+14 -3
View File
@@ -40,7 +40,7 @@ class PartnerSettlementToBI:
"合伙人分类": "_widget_1753943042503",
"战区": "_widget_1754530653275",
"订单登记表": "_widget_1712803222905",
"订单登记表.订单编号": "_widget_1712803222905._widget_1712803222907",
"订单登记表.订单编号": "_widget_1712803222905._widget_1756870421570",
"订单登记表.销售阶段": "_widget_1712803222905._widget_1712805391009",
"订单登记表.版本": "_widget_1712803222905._widget_1712803222908",
"订单登记表.年限": "_widget_1712803222905._widget_1712815331264",
@@ -53,12 +53,14 @@ class PartnerSettlementToBI:
"特殊情况备注": "_widget_1712805391035",
"合伙人介绍证明(微信聊天截图等)": "_widget_1712815331256",
"合伙人类型": "_widget_1753957844818",
"提交时间": "createTime",
"更新时间": "updateTime"
}
# 定义需要特殊处理的列表字段及其内部字段映射
self.list_fields_config = {
"订单登记表": {
"_widget_1712803222907": "订单编号",
"_widget_1756870421570": "订单编号",
"_widget_1712805391009": "销售阶段",
"_widget_1712803222908": "版本",
"_widget_1712815331264": "年限",
@@ -124,7 +126,16 @@ class PartnerSettlementToBI:
for col in user_columns:
df[col] = df[col].map(lambda x: x.get("name", "") if isinstance(x, dict) else "")
# 3.处理订单登记表列表字段,将其拆分成多行
# 3.日期字段转为北京时间
time_columns = ["提交时间", "更新时间"]
df[time_columns] = df[time_columns].apply(
lambda col: pd.to_datetime(col, errors='coerce')
.dt.tz_localize(None)
.dt.strftime('%Y-%m-%d %H:%M:%S')
)
# 4.处理订单登记表列表字段,将其拆分成多行
if "订单登记表" in df.columns:
# 先处理订单登记表字段
df["订单登记表"] = df["订单登记表"].apply(
+8 -6
View File
@@ -111,6 +111,8 @@ class CRMDataProcessor:
'_widget_1709110881016': '银行账户',
"_widget_1749204198412": "原系统情况描述",
"_widget_1749632087678": "付款金额_泰铢",
"_widget_1756951762528": "Contract end date",
"_widget_1756951762527": "Contract start date",
'creator': '提交人',
'createTime': '提交时间',
'updateTime': '更新时间'
@@ -146,7 +148,9 @@ class CRMDataProcessor:
'_widget_1747123933310', # 客户签约日期
'_widget_1747123933311', # 客户付款日期
'_widget_0201002000007', # 领取时间
'_widget_0201002000008' # 最后跟进时间
'_widget_0201002000008', # 最后跟进时间
'_widget_1756951762528', # Contract end date
'_widget_1756951762527', # Contract start date
]
def connect_db(self):
@@ -178,7 +182,7 @@ class CRMDataProcessor:
CRM_data_list = self.api_instance.entry_data_list(payload).get("data")
CRM_data = pd.DataFrame(CRM_data_list)
print("成功从API获取CRM数据")
CRM_data.to_csv("CRM.csv")
# CRM_data.to_csv("CRM.csv")
return CRM_data
def process_data(self, df):
@@ -302,8 +306,6 @@ class CRMDataProcessor:
# 处理空值 - 将NaN/NaT/空字符串统一转为None
df = df.map(lambda x: None if pd.isna(x) or str(x).strip() == '' else x)
# # 确保所有空值(包括NaN、None、空字符串)转为None
df = df.replace([np.nan, None, r'^\s*$'], None, regex=True)
# 检查表结构是否匹配
@@ -313,8 +315,8 @@ class CRMDataProcessor:
# 只保留表中存在的列
df = df[[col for col in df.columns if col in table_columns]]
# 生成插入语句
columns = ', '.join(df.columns)
# 修改这里:为所有列名添加反引号
columns = ', '.join([f'`{col}`' for col in df.columns]) # 添加反引号
placeholders = ', '.join(['%s'] * len(df.columns))
insert_query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"
+408 -37
View File
@@ -1,22 +1,36 @@
from typing import Optional, List, Dict, Any
import requests
import json
import pandas as pd
from api import API
from config import Config
from log_config import configure_task_logger, configure_error_task_logger
from back_ground_module import CommonModule
from datetime import datetime
from datetime import datetime, timezone, timedelta, date, UTC
import time
from decimal import Decimal
import numpy as np
import requests
import json
# 初始化API实例
api_instance = API()
# 获取已经配置好的常规日志记录器
logger = configure_task_logger()
def replace_decimals(obj):
if isinstance(obj, dict):
return {k: replace_decimals(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [replace_decimals(item) for item in obj]
elif isinstance(obj, Decimal):
return float(obj) # 或者 str(obj)
return obj
# 获取已经配置好的错误任务日志记录器
error_task_logger = configure_error_task_logger()
common_module = CommonModule()
class NpEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return super(NpEncoder, self).default(obj)
class update_ID_form:
@@ -24,7 +38,7 @@ class update_ID_form:
def __init__(self):
self.headers = {
'Authorization': Config.JIANDAOYUN_API_TOKEN, # 曹伟应用api测试 app_key
'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN', # 曹伟应用api测试 app_key
'Content-Type': 'application/json'
}
self.url = "https://api.jiandaoyun.com/api/v5/corp/department/user/list"
@@ -52,27 +66,367 @@ class update_ID_form:
search_department_member = response.json()
departments_members = search_department_member.get('users')
df1 = pd.DataFrame(departments_members)
logger.info("部门成员及ID表已成功获取")
return df1
except Exception as e:
error_task_logger.error(f"获取部门成员及ID表失败:{e}")
task_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
common_module.send_task_error(task_start_time, "简道云员工ID表更新", str(e))
return None
@staticmethod
def data_batch_create(data: dict, max_retries: int = 20) -> Optional[requests.Response]: # 新建单条数据
"""
新建单条表单数据
:param max_retries: 最大重试次数
:param data: 应该包含应用id表单id以及新建的数据data['data']
:return: 返回创建后简道云返回的信息
"""
url = 'https://api.jiandaoyun.com/api/v5/app/entry/data/create'
headers = {
'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN', # 曹伟应用api测试 app_key
'Content-Type': 'application/json'
}
"""
data 样式 # 后续优化发送数据样式 目前输入字段,后续优化输入表单名称
jiandaoyun_data['data'] = {"_widget_1731650067055":{"value":f'{username}{password}'},
"_widget_1731650067056":{"value": f"{group}"}}
"""
# noinspection DuplicatedCode
payload = json.dumps({
"app_id": data['api_key'], # 应用ID
"entry_id": data['entry_id'], # 表单ID
"data": data['data'],
"is_start_workflow": data.get('is_start_workflow', "false"),
"is_start_trigger": data.get('is_start_trigger', "false"),
"transaction_id": data.get('transaction_id', "")
}
)
retries = 0
while retries <= max_retries:
try:
res: requests.Response = requests.post(url=url, data=payload, headers=headers, timeout=10)
res.raise_for_status() # 检查HTTP响应状态码,如果不等于200会抛出异常
data_get = res.json()
if res.status_code == 200:
return data_get
else:
retries += 1
time.sleep(3) # 在重试之间稍作停顿
except requests.exceptions.RequestException as e:
retries += 1
time.sleep(3) # 在重试之间稍作停顿
if retries > max_retries:
return None
@staticmethod
def entry_widget_list(data: dict) -> Optional[Dict[str, Any]]: # 获取表单字段
"""
获取表单字段
:param data: 简道云插件发送过来的data包含应用id表单id数据id等信息
:return:
"""
url = 'https://api.jiandaoyun.com/api/v5/app/entry/widget/list'
headers = {
'Authorization': "Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN", # 曹伟应用api测试 app_key
'Content-Type': 'application/json'
}
payload = json.dumps({
"app_id": data['api_key'],
"entry_id": data['entry_id'],
})
res = requests.post(url=url, data=payload, headers=headers, timeout=10)
return res.json()
@staticmethod
def field_replacement(data: dict, data_get: dict) -> dict:
"""
字段替换将id替换为标签名即唯一值替换为表单中显示字段的名字
:param data: 简道云插件发送过来的data包含表单id数据id应用id
:param data_get: 简道云请求的数据一般是根据数据id获取到表单的数据
:return: 将根据数据id获取到的表单数据进行替换返回替换后的数据
"""
# 获取表单对应字段标签名称
widget_list = update_ID_form.entry_widget_list(data)
# 检查widget_list是否有效
if not widget_list or 'widgets' not in widget_list or not isinstance(widget_list['widgets'], list):
raise ValueError("映射表没有接受到数据")
# 创建一个映射表,将_widget_名称映射到label
name_to_label = {widget['name']: widget['label'] for widget in widget_list['widgets']}
def replace_keys(obj):
"""递归替换字典中的键名"""
if isinstance(obj, dict):
new_dict = {}
for key, value in obj.items():
new_key = name_to_label.get(key, key)
new_dict[new_key] = replace_keys(value)
return new_dict
elif isinstance(obj, list):
return [replace_keys(item) for item in obj]
else:
return obj
# 复制 data_get,避免修改原始数据
data_get_copy = json.loads(json.dumps(data_get)) # 深拷贝
# 替换 data 字段下的所有键
if 'data' in data_get_copy:
data_get_copy['data'] = replace_keys(data_get_copy['data'])
return data_get_copy
@staticmethod
def entry_data_list(data: dict, replace: bool = False, max_retries: int = 20) -> Dict: # 获取多条表单数据
"""
获取多条表单数据
:param max_retries: 最大重试次数
:param replace: 是否替换字段
:param data:
api_key: 应用id
entry_id: 表单id
:return:
"""
url = 'https://api.jiandaoyun.com/api/v5/app/entry/data/list'
headers = {
'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN', # 曹伟应用api测试 app_key
'Content-Type': 'application/json'
}
all_data_batches = [] # 用于存储每次请求返回的数据批次
last_data_id = None
exit_flag = False
while True:
payload = json.dumps({
"app_id": data['api_key'], # 应用ID
"entry_id": data['entry_id'], # 表单ID
"limit": 100,
"data_id": last_data_id
})
retries = 0
while retries <= max_retries:
try:
res = requests.post(url=url, data=payload, headers=headers, timeout=10)
res.raise_for_status() # 检查HTTP响应状态码,如果不等于200会抛出异常
data_get = res.json()
if data_get["data"]:
all_data_batches.extend(data_get['data'])
last_data_id = data_get['data'][-1].get('_id')
break # 成功则跳出循环
else:
if 'data' not in data_get or len(data_get['data']) == 0:
exit_flag = True
break
retries += 1
time.sleep(0.1) # 在重试之间稍作停顿
except requests.exceptions.RequestException as e:
retries += 1
time.sleep(0.1) # 在重试之间稍作停顿
if retries > max_retries:
all_data_batches.append(None) # 或者可以选择记录失败的payload以便后续处理
if exit_flag:
break
# 构建最终返回的字典
final_data = {
'data': all_data_batches # 'data' 键对应的值是列表的列表
}
if replace:
print("进行了替换")
return_data = update_ID_form.field_replacement(data, final_data) # 字段替换,由id替换为标签名
return return_data
else:
return final_data
def send_task_status(self, task_start_time: str, task_name: str) -> None:
"""
将任务状态发送到简道云开始时间为北京时间需转换到 UTC
:param task_start_time: 任务开始时间字符串格式"%Y-%m-%d %H:%M:%S"表示北京时间 UTC+8
:param task_name: 任务名称
"""
try:
# 1. 获取当前 UTC 时间(时区感知对象)
end_time_utc = datetime.now(UTC) # ✅ 替代 utcnow()
# 2. 解析传入的北京时间(UTC+8)
task_start_naive = datetime.strptime(task_start_time, "%Y-%m-%d %H:%M:%S")
# 3. 转换为 UTC 时间(减去 8 小时,并附加 UTC 时区)
task_start_utc = task_start_naive - timedelta(hours=8)
task_start_utc = task_start_utc.replace(tzinfo=timezone.utc) # 显式标记为 UTC
# 4. 计算运行时间(时区感知对象可直接相减)
run_time = end_time_utc - task_start_utc
run_time_sec = int(run_time.total_seconds())
# 5. 格式化时间为 UTC 的 ISO 8601 格式(带 "Z"
today_utc = end_time_utc.strftime("%Y-%m-%d")
task_end_iso = end_time_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
task_start_iso = task_start_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
# 6. 构造请求数据(所有时间以 UTC 格式发送)
payload = {
"api_key": "6694d3c4fcb69ca9a111a6c4",
"entry_id": "67ede908eb9c22261016466e",
"data": {
"_widget_1744873387500": {"value": today_utc}, # UTC 日期
"_widget_1743644977694": {"value": task_name},
"_widget_1744873387501": {"value": task_start_iso}, # UTC 开始时间
"_widget_1744873387502": {"value": task_end_iso}, # UTC 结束时间
"_widget_1744873387504": {"value": run_time_sec},
}
}
# 7. 发送请求
response = update_ID_form.data_batch_create(payload)
except Exception as e:
pass
@staticmethod
def entry_data_batch_create(
data: dict,
chunk_size: int = 90,
max_retries: int = 20
) -> List[Optional[requests.Response]]: # 新建多条数据 注意简道云限制1次最多100条数据
"""
新建多条数据
:param max_retries: 最大重试次数此处设置20次
:param data:应包含数据id表单id以及需要新建的信息新建信息应该是一个列表
:param chunk_size: 简道云限制批量新建一次最多100条这里默认值设置为90条一次
:return:返回请求后的结果
"""
data = replace_decimals(data)
url = 'https://api.jiandaoyun.com/api/v5/app/entry/data/batch_create'
headers = {
'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN', # 曹伟应用api测试 appKey
'Content-Type': 'application/json'
}
"""
data_list 样式 # 后续优化发送数据样式 目前输入字段,后续优化输入表单名称
jiandaoyun_data_list['data_list'] = [{"_widget_1731650067055":{"value":f'{username}{password}'},
"_widget_1731650067056":{"value": f"{group}"}},
{"_widget_1731650067055":{"value":f'{username}{password}'},
"_widget_1731650067056":{"value": f"{group}"}}]
"""
# 获取data_list长度
total_length = len(data['data_list'])
# 计算需要发送的次数
num_chunks = (total_length + chunk_size - 1) // chunk_size # //整除向下取证,需要加上chunk_size - 1保证不会有缺失数据
data_get_list = []
for i in range(num_chunks):
start_index = i * chunk_size
end_index = min(start_index + chunk_size, total_length)
payload = json.dumps({
"app_id": data['api_key'], # 应用ID
"entry_id": data['entry_id'], # 表单ID
"data_list": data['data_list'][start_index:end_index],
"is_start_workflow": data.get('is_start_workflow', "false"),
"is_start_trigger": data.get('is_start_trigger', "false"),
}, cls=NpEncoder)
retries = 0
while retries <= max_retries:
try:
res: requests.Response = requests.post(url=url, data=payload, headers=headers, timeout=10)
res.raise_for_status() # 检查HTTP响应状态码,如果不等于200会抛出异常
data_get = res.json()
if data_get["status"] == "success":
data_get_list.append(data_get)
break # 成功则跳出循环
else:
retries += 1
time.sleep(3) # 在重试之间稍作停顿
except requests.exceptions.RequestException as e:
retries += 1
time.sleep(0.1) # 在重试之间稍作停顿
if retries > max_retries:
data_get_list.append(None) # 或者可以选择记录失败的payload以便后续处理
return data_get_list
def get_existing_id_form(self):
"""读取现有的ID表"""
try:
now_ID_form = api_instance.entry_data_list(self.payload1).get('data')
now_ID_form = update_ID_form.entry_data_list(self.payload1).get('data')
df = pd.DataFrame(now_ID_form)
logger.info("现有的ID表已成功读取")
return df
except Exception as e:
error_task_logger.error(f"读取现有的ID表失败:{e}")
task_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
common_module.send_task_error(task_start_time, "简道云员工ID表更新", str(e))
return None
@staticmethod
def entry_data_batch_delete(
data: dict,
chunk_size: int = 90,
max_retries: int = 20
) -> List[Optional[requests.Response]]: # 新建多条数据 注意简道云限制1次最多100条数据
"""
批量删除数据
:param data: 应包含应用ID表单ID数据ID列表
:param chunk_size:单词删除最大条数默认90
:param max_retries:重试次数默认20
:return:
"""
data = replace_decimals(data)
url = 'https://api.jiandaoyun.com/api/v5/app/entry/data/batch_delete'
headers = {
'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN', # 曹伟应用api测试 appKey
'Content-Type': 'application/json'
}
# 获取data_list长度
total_length = len(data['data_ids'])
# 计算需要发送的次数
num_chunks = (total_length + chunk_size - 1) // chunk_size # //整除向下取证,需要加上chunk_size - 1保证不会有缺失数据
data_get_list = []
for i in range(num_chunks):
start_index = i * chunk_size
end_index = min(start_index + chunk_size, total_length)
payload = json.dumps({
"app_id": data['api_key'], # 应用ID
"entry_id": data['entry_id'], # 表单ID
"data_ids": data['data_ids'][start_index:end_index],
}, cls=NpEncoder)
retries = 0
while retries <= max_retries:
try:
res: requests.Response = requests.post(url=url, data=payload, headers=headers, timeout=10)
res.raise_for_status() # 检查HTTP响应状态码,如果不等于200会抛出异常
data_get = res.json()
if data_get["status"] == "success":
data_get_list.append(data_get)
break # 成功则跳出循环
else:
retries += 1
time.sleep(3) # 在重试之间稍作停顿
except requests.exceptions.RequestException as e:
retries += 1
time.sleep(0.1) # 在重试之间稍作停顿
if retries > max_retries:
data_get_list.append(None) # 或者可以选择记录失败的payload以便后续处理
return data_get_list
def delete_existing_data(self, df):
"""批量删除现有数据"""
try:
@@ -80,12 +434,9 @@ class update_ID_form:
for index, i in df.iterrows():
all_data.append(i["_id"])
self.delete_payload["data_ids"] = all_data
api_instance.entry_data_batch_delete(self.delete_payload)
logger.info("现有数据已成功删除")
res = update_ID_form.entry_data_batch_delete(self.delete_payload)
except Exception as e:
error_task_logger.error(f"批量删除现有数据失败:{e}")
task_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
common_module.send_task_error(task_start_time, "简道云员工ID表更新", str(e))
pass
def update_data(self, df1):
"""批量写入新数据"""
@@ -97,29 +448,49 @@ class update_ID_form:
"_widget_1734942794145": {"value": i["username"]},
})
self.update_payload["data_list"] = all_data1
api_instance.entry_data_batch_create(self.update_payload)
logger.info("新数据已成功写入")
update_ID_form.entry_data_batch_create(self.update_payload)
except Exception as e:
error_task_logger.error(f"批量写入新数据失败:{e}")
task_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
common_module.send_task_error(task_start_time, "简道云员工ID表更新", str(e))
pass
def get_out_department_memberships(self):
"""获取外部公司成员及ID表"""
try:
url = "https://api.jiandaoyun.com/api/v5/corp/guest/user/list"
headers = {
'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN',
'Content-Type': 'application/json'
}
response = requests.post(url, headers=headers)
all_data = []
member_list = response.json().get("member_list", [])
for member in member_list:
name = member.get("name")
username = member.get("username") # 用户id
all_data.append({"name": name, "username": username})
df2 = pd.DataFrame(all_data)
return df2
except:
print("获取外部公司成员及ID表失败")
def main(self):
"""主函数"""
task_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
try:
logger.info("每日任务开始执行")
df1 = self.get_department_members()
df2 = self.get_out_department_memberships()
if df1 is not None:
df = self.get_existing_id_form()
if df is not None:
self.delete_existing_data(df)
self.update_data(df1)
logger.info("每日任务执行完成")
common_module.send_task_status(task_start_time, "简道云员工ID表更新")
self.update_data(df2)
self.send_task_status(task_start_time, "简道云员工ID表更新")
except Exception as e:
error_task_logger.error(f"简道云员工ID表更新任务执行失败:{e}")
common_module.send_task_error(task_start_time, "简道云员工ID表更新", str(e))
print(str(e))
if __name__ == '__main__':
+8 -2
View File
@@ -7,13 +7,15 @@ from back_ground_module import CommonModule
from log_config import configure_task_logger, configure_error_task_logger
logger = configure_task_logger()
# 获取已经配置好的错误任务日志记录器
error_task_logger = configure_error_task_logger()
start_time = datetime.datetime.now()
api_instance = API()
common_module = CommonModule()
output_dir = "output" # 设置输出目录
# 创建输出目录(如果不存在)
import os
os.makedirs(output_dir, exist_ok=True)
class UpdateNGVData:
"""NGV数据每日新增"""
@@ -103,6 +105,10 @@ class UpdateNGVData:
# all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j1.iterrows()] # 前两天的全部数据
# all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j.iterrows()] # 前一天的全部数据
all_data = [self.row_to_dict(row, self.field_mapping) for index, row in filtered_df.iterrows()] # 增量数据
try:
filtered_df.to_csv(output_dir + "\\" + f"{task_start_time}NGV.csv")
except Exception as e:
error_task_logger.error(f"NGV过滤后数据保存异常: {e}")
#
data = {'api_key': Config.SaaS_Tasks_APP_ID, 'entry_id': Config.NGV_TASKS_ENTRY_ID, "data_list": all_data}
+65 -13
View File
@@ -6,6 +6,7 @@ from api import API
from back_ground_module import CommonModule
from log_config import configure_task_logger, configure_error_task_logger
import concurrent.futures
from tqdm import tqdm
# 获取已经配置好的常规日志记录器
logger = configure_task_logger()
@@ -39,7 +40,9 @@ class UpdateAllNGVDataDaily:
# 获取NGV数据
payload = {"api_key": "675b900991ad2491c69389ca", "entry_id": "675bb02bd2d53c2034c665e4"}
NGV_data_list = api_instance.entry_data_list(payload).get("data", [])
jdy_NGV_data = pd.DataFrame(NGV_data_list)
jdy_NGV_data.to_csv(os.path.join(output_dir, f"jdy_NGV_data.csv"))
payload = {"api_key": "6694d3c4fcb69ca9a111a6c4",
"entry_id": "6769204a1902c9341340a1bc",
@@ -57,6 +60,55 @@ class UpdateAllNGVDataDaily:
data_NGV_j = data_NGV_j[data_NGV_j['org_type'] == '一般']
data_NGV_j1 = data_NGV_j1[data_NGV_j1['org_type'] == '一般']
temp_jdy_NGV_data = jdy_NGV_data.copy()
temp_jdy_NGV_data.reset_index(inplace=True) # 如果 '门店id' 是索引,则先将其转换为普通列
if '_widget_1734062123071' not in temp_jdy_NGV_data.columns:
error_task_logger.error("'门店编码' 不存在")
temp_jdy_NGV_data.rename(columns={'_widget_1734062123071': 'org_code'}, inplace=True)
temp_jdy_NGV_data.set_index('org_code', inplace=True)
# 如果简道云存在,NGV不存在则标记NGV已删除
# 找出在 temp_jdy_NGV_data 中存在,但在 data_NGV_j 中不存在的索引
df1_index = data_NGV_j.set_index('org_code')
ids_in_jdy_not_in_df1 = temp_jdy_NGV_data.index[~temp_jdy_NGV_data.index.isin(df1_index.index)]
# 提取这些行,形成新的 DataFrame
only_in_temp_jdy = temp_jdy_NGV_data.loc[ids_in_jdy_not_in_df1]
# only_in_temp_jdy.to_csv(os.path.join(output_dir, 'only_in_temp_jdy.csv'), index_label='org_code')
# 对数据源已经去掉的门店进行标记
# 标记list
update_list = []
for index, item in only_in_temp_jdy.iterrows():
update_list.append(item["_id"])
data = {
'api_key': Config.SaaS_Tasks_APP_ID,
'entry_id': Config.NGV_TASKS_ENTRY_ID,
"data_ids": update_list,
"data": {"_widget_1754285499851": {"value": "未删除"}}
}
api_instance.entry_data_banch_update(data=data, max_retries=20)
mark_list = []
for index, only_row in only_in_temp_jdy.iterrows():
result = {}
if '_id' in only_in_temp_jdy.columns:
_id_value = str(only_row['_id']) if not pd.isna(only_row['_id']) else None
result["_id"] = _id_value
if result["_id"]:
data = {
'api_key': Config.SaaS_Tasks_APP_ID,
'entry_id': Config.NGV_TASKS_ENTRY_ID,
"data_id": result["_id"],
"data": {"_widget_1754285499851": {"value": "已删除"}}
}
append = {"data_id": result["_id"], "org_code": only_row["org_code"]}
mark_list.append(append)
# print(result["_id"])
api_instance.entry_data_update(data=data, max_retries=20)
# mark_df = pd.DataFrame(mark_list)
# mark_df.to_csv(os.path.join(output_dir, 'mark_list.csv'), index=False)
# 去除不需要的列
columns_to_remove = {'date_id', 'date_fmt', 'pt', 'etl_time'}
@@ -69,8 +121,8 @@ class UpdateAllNGVDataDaily:
df2_filtered = data_NGV_j1[columns_to_keep_df2]
# 设置唯一标识列作为索引
df1_set_index = df1_filtered.set_index('id_own_org')
df2_set_index = df2_filtered.set_index('id_own_org')
df1_set_index = df1_filtered.set_index('org_code')
df2_set_index = df2_filtered.set_index('org_code')
df1_set_index = df1_set_index.astype(str).replace(['nan', 'None'], '', ).fillna("")
df2_set_index = df2_set_index.astype(str).replace(['nan', 'None'], '', ).fillna("")
@@ -108,16 +160,16 @@ class UpdateAllNGVDataDaily:
df2_only_rows = df2_set_index.loc[df2_only_index].copy()
# 保存匹配结果
# df1_common.to_csv(os.path.join(output_dir, 'matched_results.csv'), index_label='id_own_org')
# df1_common.to_csv(os.path.join(output_dir, 'matched_results.csv'), index_label='org_type')
# 保存仅在df1中的行
# df1_only_rows.to_csv(os.path.join(output_dir, 'df1_only_rows.csv'), index_label='id_own_org')
# df1_only_rows.to_csv(os.path.join(output_dir, 'df1_only_rows.csv'), index_label='org_type')
# 保存仅在df2中的行
# df2_only_rows.to_csv(os.path.join(output_dir, 'df2_only_rows.csv'), index_label='id_own_org')
# data_NGV_j.to_csv(os.path.join(output_dir, 'data_NGV_j.csv'), index_label='id_own_org')
# data_NGV_j1.to_csv(os.path.join(output_dir, 'data_NGV_j1.csv'), index_label='id_own_org')
# jdy_NGV_data.to_csv(os.path.join(output_dir, 'jdy_NGV_data.csv'), index_label='id_own_org')
# df2_only_rows.to_csv(os.path.join(output_dir, 'df2_only_rows.csv'), index_label='org_type')
# data_NGV_j.to_csv(os.path.join(output_dir, 'data_NGV_j.csv'), index_label='org_type')
# data_NGV_j1.to_csv(os.path.join(output_dir, 'data_NGV_j1.csv'), index_label='org_type')
# jdy_NGV_data.to_csv(os.path.join(output_dir, 'jdy_NGV_data.csv'), index_label='org_type')
# print(f"\nCSV文件已保存到目录: {output_dir}")
@@ -126,10 +178,10 @@ class UpdateAllNGVDataDaily:
# temp_jdy_NGV_data.to_csv(os.path.join(output_dir, 'jdy_NGV_data.csv'), index=False)
temp_jdy_NGV_data.reset_index(inplace=True) # 如果 '门店id' 是索引,则先将其转换为普通列
# temp_jdy_NGV_data.to_csv(os.path.join(output_dir, 'jdy_NGV_data1.csv'), index=False)
if '_widget_1734062123069' not in temp_jdy_NGV_data.columns:
error_task_logger.error("'门店id' 不存在")
temp_jdy_NGV_data.rename(columns={'_widget_1734062123069': 'id_own_org'}, inplace=True)
temp_jdy_NGV_data.set_index('id_own_org', inplace=True)
if '_widget_1734062123071' not in temp_jdy_NGV_data.columns:
error_task_logger.error("'门店编码' 不存在")
temp_jdy_NGV_data.rename(columns={'_widget_1734062123071': 'org_code'}, inplace=True)
temp_jdy_NGV_data.set_index('org_code', inplace=True)
# 如果简道云存在,NGV不存在则标记NGV已删除
# 找出在 temp_jdy_NGV_data 中存在,但在 df1_common 中不存在的索引
@@ -201,7 +253,7 @@ class UpdateAllNGVDataDaily:
all_data = []
logger.info(f"今日更新数据量为:{len(df1_common)}")
for idx, row in df1_common.iterrows():
for idx, row in tqdm(df1_common.iterrows(), total=len(df1_common), desc="更新数据"):
result = {}
data_dict = {}
+1 -1
View File
@@ -9,7 +9,7 @@ class Config:
"database": "f6_bi",
"user": "LTAI5tMJsijFA9BS1R6uBpUT",
"password": "PajEQMIRWNRcipd8mYvlud2KHWJr6N",
"host": "hgprecn-cn-to34by1d0001-cn-shanghai.hologres.aliyuncs.com",
"host": "hgpostcn-cn-m1e4gikbu00l-cn-shanghai.hologres.aliyuncs.com",
"port": "80"
} # SaaS-NGV 数据库链接配置-postgresql
+11 -1
View File
@@ -52,7 +52,17 @@ common_module 增加日期字段转UTC 方法,支持处理Timestamp输入与
## 1.7更新
新增合伙人结算登记同步到BI、非标业绩提报脚本
新增合伙人结算登记同步到BI、非标业绩提报脚本高德api匹配手机号脚本
数据源账号更新
简道云成员id与字段监控分离
完善接车宝空数据处理
异常回访新增过滤逻辑
+1 -1
View File
@@ -27,7 +27,7 @@ def main():
# 主循环,用于持续检查和执行定时任务
while True:
schedule.run_pending()
time.sleep(1)
time.sleep(60)
# 每秒检查一次
now = datetime.now()
if now.hour == 23:
+15 -3
View File
@@ -306,7 +306,7 @@ class Module:
@staticmethod
def new_dealer_service_order_to_bi():
print("data_monitor")
print("new_dealer_service_order_to_bi")
try:
new_dealer_service_order_to_bi = back_ground_module.NewDealerServiceOrderToBI()
thread = threading.Thread(target=new_dealer_service_order_to_bi.main)
@@ -318,7 +318,7 @@ class Module:
@staticmethod
def non_standar_performance_to_BI():
print("data_monitor")
print("non_standar_performance_to_BI")
try:
non_standar_performance_to_BI = back_ground_module.NonStandardPerformanceToBI()
thread = threading.Thread(target=non_standar_performance_to_BI.main)
@@ -330,7 +330,7 @@ class Module:
@staticmethod
def partner_settlement_to_BI():
print("data_monitor")
print("partner_settlement_to_BI")
try:
partner_settlement_to_BI = back_ground_module.PartnerSettlementToBI()
thread = threading.Thread(target=partner_settlement_to_BI.main)
@@ -340,6 +340,18 @@ class Module:
print("data_Exception_Task", e)
return False
@staticmethod
def GD_match_phone_number():
print("GD_match_phone_number")
try:
GD_match_phone_number = back_ground_module.GDMatchPhoneNumber()
thread = threading.Thread(target=GD_match_phone_number.main)
thread.start()
return "data_Exception_Task"
except Exception as e:
print("data_Exception_Task", e)
return False
@staticmethod
def text3():
print("text3")
+1
View File
@@ -41,6 +41,7 @@ def execute_task(task_id) -> bool:
"经销商新签服务单转BI": Module.new_dealer_service_order_to_bi,
"合伙人结算登记同步到BI": Module.partner_settlement_to_BI,
"非标业绩提报转BI": Module.non_standar_performance_to_BI,
"高德匹配手机号": Module.GD_match_phone_number,
# 添加更多任务函数映射...
}
+111 -14
View File
@@ -122,6 +122,7 @@
"} # 衡时数据库链接配置-mysql\n",
"table_name = \"thailand_store_data_email\" # 请替换为实际的表名\n",
"# table_name = \"yida_process_time_statistics\"\n",
"\n",
"# 连接\n",
"connection = mysql.connector.connect(\n",
" host=HS_DB_Config[\"host\"],\n",
@@ -130,12 +131,12 @@
" database=HS_DB_Config[\"database\"]\n",
")\n",
"\n",
"print(f\"成功连接 {HS_DB_Config[\"database\"]}\")\n",
"print(f\"成功连接 {HS_DB_Config['database']}\")\n",
"cursor = connection.cursor()\n",
"\n",
"# 读取Excel文件\n",
"df = pd.read_excel(\n",
" r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Downloads\\门店日使用数据Workshops_Daily_Usage_Data_20250805101517.xlsx\",\n",
" r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Downloads\\门店日使用数据Workshop's_Daily_Usage_Data_20250805101517.xlsx\",\n",
" sheet_name=\"Sheet1\")\n",
"\n",
"# 处理空值 - 将NaN/NaT/空字符串统一转为None\n",
@@ -146,16 +147,23 @@
"placeholders = ', '.join(['%s'] * len(df.columns))\n",
"insert_query = f\"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})\"\n",
"\n",
"# 批量插入数据\n",
"# 批量插入数据,每次1000条\n",
"records = [tuple(row) for row in df.values]\n",
"cursor.executemany(insert_query, records)\n",
"connection.commit()\n",
"batch_size = 1000\n",
"total_records = len(records)\n",
"inserted_count = 0\n",
"\n",
"print(f\"成功导入 {cursor.rowcount} 条记录到 {table_name} 表\")\n",
"for i in range(0, total_records, batch_size):\n",
" batch = records[i:i+batch_size]\n",
" cursor.executemany(insert_query, batch)\n",
" connection.commit()\n",
" inserted_count += len(batch)\n",
" print(f\"已成功导入 {inserted_count}/{total_records} 条记录\")\n",
"\n",
"print(f\"总共成功导入 {inserted_count} 条记录到 {table_name} 表\")\n",
"\n",
"cursor.close()\n",
"\n",
"connection.close()\n"
"connection.close()"
],
"id": "a98f8dd324b53eeb",
"outputs": [
@@ -249,8 +257,8 @@
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-20T08:07:48.856164Z",
"start_time": "2025-08-20T08:07:48.650261Z"
"end_time": "2025-09-24T05:56:32.216878Z",
"start_time": "2025-09-24T05:56:31.974390Z"
}
},
"cell_type": "code",
@@ -266,8 +274,8 @@
"} # 衡时数据库链接配置-mysql\n",
"# table_name = \"new_dealer_service_order_to_bi\" # 替换为你的实际表名\n",
"\n",
"table_name = \"non_standard_performance_to_BI\"\n",
"column_name = \"开户/处理日期\"\n",
"table_name = \"partner_settlement_to_BI\"\n",
"column_name = \"提交时间\"\n",
"# new_column_type = \"VARCHAR(255)\" # 目标数据类型\n",
"new_column_type = \"DATETIME\" # 目标数据类型\n",
"\n",
@@ -328,13 +336,102 @@
"name": "stdout",
"output_type": "stream",
"text": [
"❌ 操作失败:1146 (42S02): Table 'f6operation_data_relay.non_standard_performance_to_bi' doesn't exist\n",
"✅ 成功添加字段: `提交时间`\n",
"数据库连接已关闭\n"
]
}
],
"execution_count": 4
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## 修改数据库列名",
"id": "16823b535b354ced"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-09-08T08:59:14.444529Z",
"start_time": "2025-09-08T08:59:14.153424Z"
}
},
"cell_type": "code",
"source": [
"import mysql.connector\n",
"from mysql.connector import Error\n",
"\n",
"HS_DB_Config = {\n",
" 'host': \"f6-public.rwlb.rds.aliyuncs.com\",\n",
" 'user': \"rw_operation_data_relay\",\n",
" 'password': \"m+q5Z4%IVuF9bf\",\n",
" 'database': \"f6operation_data_relay\"\n",
" } # 衡时数据库链接配置-mysql\n",
"\n",
"table_name = \"new_dealer_service_order_to_bi\"\n",
"old_column_name = \"开户/处理日期\"\n",
"new_column_name = \"开户处理日期\" # 替换为你想要的新列名\n",
"column_type = \"DATETIME\" # 列的数据类型\n",
"# column_type = \"VARCHAR(255)\" # 列的数据类型\n",
"\n",
"try:\n",
" # 连接数据库\n",
" connection = mysql.connector.connect(\n",
" host=HS_DB_Config[\"host\"],\n",
" user=HS_DB_Config[\"user\"],\n",
" password=HS_DB_Config[\"password\"],\n",
" database=HS_DB_Config[\"database\"]\n",
" )\n",
" cursor = connection.cursor()\n",
"\n",
" # 检查字段是否存在\n",
" cursor.execute(f\"\"\"\n",
" SELECT COLUMN_NAME\n",
" FROM INFORMATION_SCHEMA.COLUMNS\n",
" WHERE TABLE_SCHEMA = DATABASE()\n",
" AND TABLE_NAME = '{table_name}'\n",
" AND COLUMN_NAME = '{old_column_name}';\n",
" \"\"\")\n",
"\n",
" column_info = cursor.fetchone()\n",
"\n",
" if column_info:\n",
" # 字段存在时重命名\n",
" alter_query = f\"\"\"\n",
" ALTER TABLE `{table_name}`\n",
" CHANGE COLUMN `{old_column_name}` `{new_column_name}` {column_type};\n",
" \"\"\"\n",
" cursor.execute(alter_query)\n",
" print(f\"✅ 成功将字段 `{old_column_name}` 重命名为 `{new_column_name}`\")\n",
" else:\n",
" print(f\"❌ 字段 `{old_column_name}` 不存在,无法重命名\")\n",
"\n",
" connection.commit()\n",
"\n",
"except Error as e:\n",
" print(f\"❌ 操作失败:{e}\")\n",
" if connection.is_connected():\n",
" connection.rollback()\n",
"\n",
"finally:\n",
" if connection.is_connected():\n",
" cursor.close()\n",
" connection.close()\n",
" print(\"数据库连接已关闭\")"
],
"id": "1e4789710abfa1de",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ 成功将字段 `开户/处理日期` 重命名为 `开户处理日期`\n",
"数据库连接已关闭\n"
]
}
],
"execution_count": 9
},
{
"metadata": {},
"cell_type": "markdown",
@@ -407,7 +504,7 @@
{
"metadata": {},
"cell_type": "markdown",
"source": "## 删除区间数据",
"source": "### 删除区间数据",
"id": "8192d432b3f65bc2"
},
{
+255
View File
@@ -0,0 +1,255 @@
import pandas as pd
import requests
import json
from time import sleep
from module import F6_module
import mysql.connector
from mysql.connector import Error
from datetime import datetime
class CouponDataProcessor:
def __init__(self):
self.f6_module = F6_module()
self.base_url = "https://yunxiu.f6car.cn/macan/coupon/info/pagingCouponUsageRecord"
self.headers = {
'accept': 'application/json, text/plain, */*',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'referer': 'https://yunxiu.f6car.cn/erp/view/index.html'
}
self.db_config = {
'host': "f6-public.rwlb.rds.aliyuncs.com",
'user': "rw_operation_data_relay",
'password': "m+q5Z4%IVuF9bf",
'database': "f6operation_data_relay"
} # 衡时数据库链接配置-mysql
self.username = "15222738424"
self.password = "cw25966929@"
def drop_column(self, cursor, table_name, column_name):
"""删除表中的指定列"""
try:
# 检查列是否存在
cursor.execute(f"SHOW COLUMNS FROM {table_name} LIKE '{column_name}'")
if cursor.fetchone():
# 如果列存在,则删除
drop_query = f"ALTER TABLE {table_name} DROP COLUMN {column_name}"
cursor.execute(drop_query)
print(f"成功从表 {table_name} 中删除列 {column_name}")
else:
print(f"{table_name} 中不存在列 {column_name}")
except Error as e:
print(f"删除列失败: {e}")
def _fetch_all_coupons(self, page_size=100):
"""获取所有分页数据"""
cookies = self._login()
params = {
'keyword': '',
'couponName': '',
'currentPage': '1',
'pageSize': str(page_size),
'sorts': ''
}
# 获取第一页确定总页数
first_page = self._fetch_page(params, cookies)
if not first_page:
return None
total_records = first_page.get('info', {}).get('total', 0)
if total_records == 0:
return None
total_pages = (total_records + page_size - 1) // page_size
print(f"共发现 {total_records} 条记录,{total_pages}")
# 收集所有数据
all_data = first_page.get('info', {}).get('list', [])
for page in range(2, total_pages + 1):
params['currentPage'] = str(page)
print(f"正在获取第 {page}/{total_pages} 页...")
page_data = self._fetch_page(params, cookies)
if page_data:
all_data.extend(page_data.get('info', {}).get('list', []))
sleep(0.5) # 礼貌延迟
return all_data
def _login(self):
"""登录获取cookies"""
res = self.f6_module.login_in(self.username, self.password)
return requests.utils.dict_from_cookiejar(res.cookies)
def _fetch_page(self, params, cookies, max_retries=3):
"""带重试机制的页面请求"""
for attempt in range(max_retries):
try:
response = requests.get(
self.base_url,
params=params,
cookies=cookies,
headers=self.headers,
timeout=10
)
response.raise_for_status()
return response.json()
except Exception as e:
print(f"请求失败(尝试 {attempt + 1}/{max_retries}: {str(e)}")
if attempt < max_retries - 1:
sleep(2)
return None
def _process_data(self, raw_data):
"""处理原始数据"""
df = pd.DataFrame(raw_data)
if not df.empty:
# 处理couponCarList字段(列表/字典转为JSON字符串)
if 'couponCarList' in df.columns:
df['couponCarList'] = df['couponCarList'].apply(
lambda x: json.dumps(x, ensure_ascii=False) if pd.notna(x) else None
)
# 同时提取carId和carNo
df['carId'] = df['couponCarList'].apply(
lambda x: json.loads(x)[0].get('carId') if pd.notna(x) else None
)
df['carNo'] = df['couponCarList'].apply(
lambda x: json.loads(x)[0].get('carNo') if pd.notna(x) else None
)
# 处理couponInfo字段(字典转为JSON字符串)
if 'couponInfo' in df.columns:
df['couponInfo'] = df['couponInfo'].apply(
lambda x: json.dumps(x, ensure_ascii=False) if pd.notna(x) else None
)
# 同时展开部分常用字段
try:
coupon_info = pd.json_normalize(df['couponInfo'].apply(
lambda x: json.loads(x) if pd.notna(x) else {}
))
df = pd.concat([df, coupon_info.add_prefix('couponInfo.')], axis=1)
except Exception as e:
print(f"展开couponInfo时出错: {str(e)}")
# 处理时间字段
if 'takeTime' in df.columns:
df['takeTime'] = pd.to_datetime(df['takeTime'], unit='ms')
if 'useTime' in df.columns:
df['useTime'] = pd.to_datetime(df['useTime'], unit='ms')
# 重命名列
if 'id' in df.columns:
df = df.rename(columns={'id': 'id1'})
return df
def _import_to_database(self, df, table_name="coupon_usage_record_details", batch_size=1000):
"""直接将处理后的DataFrame导入MySQL"""
conn = None
cursor = None
try:
# 连接数据库
conn = mysql.connector.connect(**self.db_config)
cursor = conn.cursor()
# 删除表中的所有数据
print(f"正在清空表 {table_name} 中的数据...")
cursor.execute(f"DELETE FROM {table_name}")
cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1")
conn.commit()
print(f"已成功清空表 {table_name} 中的所有数据")
# 处理时间类型数据
datetime_columns = [col for col in df.columns if df[col].dtype == 'datetime64[ns]']
for col in datetime_columns:
df[col] = df[col].apply(self._convert_datetime)
# 处理所有数据,将NaN转为None
df = df.where(pd.notna(df), None)
# 获取数据库列信息
cursor.execute(f"SHOW COLUMNS FROM {table_name}")
db_columns = [col[0] for col in cursor.fetchall() if col[0] != 'id']
# 确保DataFrame列与数据库列一致
df = df[db_columns]
# 生成插入语句
columns = ', '.join([f"`{col}`" for col in df.columns])
placeholders = ', '.join(['%s'] * len(df.columns))
insert_query = f"INSERT INTO `{table_name}` ({columns}) VALUES ({placeholders})"
# 分批插入数据
print("开始导入数据...")
total_rows = len(df)
for i in range(0, total_rows, batch_size):
batch = df.iloc[i:i + batch_size]
# 将DataFrame转换为元组列表,并处理所有数据类型
records = [tuple(self._convert_datetime(val) if isinstance(val, (pd.Timestamp, datetime)) else val
for val in row)
for row in batch.values]
try:
cursor.executemany(insert_query, records)
conn.commit()
print(f"已导入 {min(i + batch_size, total_rows)}/{total_rows} 条记录")
except Error as e:
conn.rollback()
print(f"批量导入失败: {e}")
# 尝试逐条导入以找出问题行
for idx, record in enumerate(records):
try:
cursor.execute(insert_query, record)
conn.commit()
except Error as e:
print(f"{i + idx + 1} 行导入失败: {e}")
print(f"问题数据: {record}")
conn.rollback()
print(f"成功导入 {total_rows} 条记录到 {table_name}")
except Error as e:
print(f"数据库操作失败: {e}")
except Exception as e:
print(f"发生错误: {e}")
finally:
if cursor:
cursor.close()
if conn:
conn.close()
@staticmethod
def _convert_datetime(value):
"""将Pandas/NumPy时间类型转换为MySQL兼容的datetime"""
if pd.isna(value):
return None
if isinstance(value, pd.Timestamp):
return value.to_pydatetime()
if isinstance(value, datetime):
return value
return value
def execute_pipeline(self):
"""执行完整数据处理流程"""
try:
# 1. 获取数据
print("开始获取优惠券数据...")
raw_data = self._fetch_all_coupons()
if not raw_data:
raise Exception("未能获取有效数据")
# 2. 处理数据
print("处理数据中...")
processed_df = self._process_data(raw_data)
# 3. 直接导入数据库
self._import_to_database(processed_df)
print("数据处理流程完成!")
except Exception as e:
print(f"流程执行失败: {e}")
if __name__ == "__main__":
processor = CouponDataProcessor()
processor.execute_pipeline()
+334
View File
@@ -0,0 +1,334 @@
import sys
import pandas as pd
from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
QHBoxLayout, QPushButton, QLabel, QFileDialog,
QTableWidget, QTableWidgetItem, QComboBox, QProgressBar,
QStatusBar, QGroupBox, QFormLayout, QMessageBox)
from PyQt5.QtCore import Qt, QThread, pyqtSignal
from PyQt5.QtGui import QFont
from thefuzz import fuzz
# 确保中文正常显示
import matplotlib
matplotlib.rcParams["font.family"] = ["SimHei", "WenQuanYi Micro Hei", "Heiti TC"]
class CalculationThread(QThread):
"""计算线程,避免UI卡顿"""
progress_updated = pyqtSignal(int)
calculation_finished = pyqtSignal(pd.DataFrame)
error_occurred = pyqtSignal(str)
def __init__(self, df, source_name_col, source_loc_col, target_name_col, target_loc_col):
super().__init__()
self.df = df.copy()
self.source_name_col = source_name_col
self.source_loc_col = source_loc_col
self.target_name_col = target_name_col
self.target_loc_col = target_loc_col
def run(self):
try:
total_rows = len(self.df)
# 定义相似度计算函数
def calculate_similarity(row, index):
# 更新进度
progress = int((index / total_rows) * 100)
self.progress_updated.emit(progress)
# 获取当前行的四个值
name_src = str(row[self.source_name_col])
loc_src = str(row[self.source_loc_col])
name_tgt = str(row[self.target_name_col])
loc_tgt = str(row[self.target_loc_col])
# 计算相似度
name_similarity = fuzz.ratio(name_src, name_tgt)
loc_similarity = fuzz.ratio(loc_src, loc_tgt)
combined_similarity = (name_similarity + loc_similarity) / 2
return pd.Series([name_similarity, loc_similarity, combined_similarity])
# 应用计算函数
results = []
for idx, row in self.df.iterrows():
results.append(calculate_similarity(row, idx))
# 添加结果到DataFrame
results_df = pd.DataFrame(results, columns=['名称相似度', '地址相似度', '综合相似度'])
self.df = pd.concat([self.df, results_df], axis=1)
# 发送计算完成信号
self.calculation_finished.emit(self.df)
except Exception as e:
self.error_occurred.emit(str(e))
class SimilarityCalculator(QMainWindow):
def __init__(self):
super().__init__()
self.df = None
self.init_ui()
def init_ui(self):
"""初始化用户界面"""
# 设置窗口标题和大小
self.setWindowTitle('地址名称模糊匹配相似度计算工具')
self.setGeometry(100, 100, 1200, 800)
# 创建中心部件和主布局
central_widget = QWidget()
self.setCentralWidget(central_widget)
main_layout = QVBoxLayout(central_widget)
# 添加文件选择区域
file_layout = QHBoxLayout()
self.file_path_label = QLabel('未选择文件')
self.file_path_label.setWordWrap(True)
self.select_file_btn = QPushButton('选择Excel文件')
self.select_file_btn.clicked.connect(self.select_file)
file_layout.addWidget(self.select_file_btn)
file_layout.addWidget(self.file_path_label, 1)
main_layout.addLayout(file_layout)
# 添加列配置区域
self.column_group = QGroupBox('列配置')
column_layout = QFormLayout()
self.source_name_combo = QComboBox()
self.source_loc_combo = QComboBox()
self.target_name_combo = QComboBox()
self.target_loc_combo = QComboBox()
column_layout.addRow('源名称列:', self.source_name_combo)
column_layout.addRow('源位置列:', self.source_loc_combo)
column_layout.addRow('目标名称列:', self.target_name_combo)
column_layout.addRow('目标位置列:', self.target_loc_combo)
self.column_group.setLayout(column_layout)
self.column_group.setEnabled(False) # 初始禁用,选择文件后启用
main_layout.addWidget(self.column_group)
# 添加操作按钮区域
btn_layout = QHBoxLayout()
self.calculate_btn = QPushButton('开始计算相似度')
self.calculate_btn.clicked.connect(self.start_calculation)
self.calculate_btn.setEnabled(False)
self.save_btn = QPushButton('保存结果')
self.save_btn.clicked.connect(self.save_results)
self.save_btn.setEnabled(False)
btn_layout.addWidget(self.calculate_btn)
btn_layout.addWidget(self.save_btn)
main_layout.addLayout(btn_layout)
# 添加进度条
self.progress_bar = QProgressBar()
self.progress_bar.setVisible(False)
main_layout.addWidget(self.progress_bar)
# 添加结果表格
self.result_table = QTableWidget()
self.result_table.horizontalHeader().setStretchLastSection(True)
main_layout.addWidget(self.result_table)
# 设置状态栏
self.setStatusBar(QStatusBar())
self.statusBar().showMessage('就绪')
def select_file(self):
"""选择Excel文件"""
file_path, _ = QFileDialog.getOpenFileName(
self, '选择Excel文件', '', 'Excel Files (*.xlsx *.xls)'
)
if file_path:
try:
self.df = pd.read_excel(file_path)
self.file_path_label.setText(file_path)
self.statusBar().showMessage(f'已加载文件,共 {len(self.df)} 行数据')
# 填充下拉框并设置默认列
self.populate_column_combos()
# 启用列配置和计算按钮
self.column_group.setEnabled(True)
self.calculate_btn.setEnabled(True)
# 显示数据
self.display_data(self.df)
except Exception as e:
QMessageBox.critical(self, '错误', f'无法读取文件: {str(e)}')
self.statusBar().showMessage('文件读取失败')
def populate_column_combos(self):
"""填充列下拉框,并设置指定默认列"""
columns = self.df.columns.tolist()
# 清空现有选项
self.source_name_combo.clear()
self.source_loc_combo.clear()
self.target_name_combo.clear()
self.target_loc_combo.clear()
# 为所有下拉框添加所有列名
for col in columns:
self.source_name_combo.addItem(col)
self.source_loc_combo.addItem(col)
self.target_name_combo.addItem(col)
self.target_loc_combo.addItem(col)
# 明确设置默认列(存在则选中,不存在则保持下拉框默认状态)
default_cols = {
self.source_name_combo: "源文件门店店名",
self.source_loc_combo: "源文件地址",
self.target_name_combo: "name",
self.target_loc_combo: "address"
}
for combo, default_col in default_cols.items():
if default_col in columns:
combo.setCurrentText(default_col)
def display_data(self, df):
"""在表格中显示数据"""
# 限制显示的行数,避免过大的数据导致UI卡顿
display_df = df.head(1000) # 只显示前1000行
# 设置表格行数和列数
self.result_table.setRowCount(min(len(display_df), 1000))
self.result_table.setColumnCount(len(display_df.columns))
# 设置列名
self.result_table.setHorizontalHeaderLabels(display_df.columns)
# 填充数据
for row_idx, (_, row) in enumerate(display_df.iterrows()):
for col_idx, value in enumerate(row):
item = QTableWidgetItem(str(value))
item.setTextAlignment(Qt.AlignCenter)
# 如果是相似度列,根据值设置背景色
if display_df.columns[col_idx] in ['名称相似度', '地址相似度', '综合相似度']:
try:
val = float(value)
# 设置颜色从红色(0)到绿色(100)
r = 255 - int(val * 2.55)
g = int(val * 2.55)
b = 100
item.setBackground(f"rgb({r}, {g}, {b})")
item.setForeground(Qt.white if val < 50 else Qt.black)
except:
pass
self.result_table.setItem(row_idx, col_idx, item)
# 调整列宽
self.result_table.resizeColumnsToContents()
def start_calculation(self):
"""开始计算相似度"""
# 获取选中的列
source_name_col = self.source_name_combo.currentText()
source_loc_col = self.source_loc_combo.currentText()
target_name_col = self.target_name_combo.currentText()
target_loc_col = self.target_loc_combo.currentText()
# 检查列是否有效(下拉框保证选中的列一定存在,故可简化检查)
if not all([source_name_col, source_loc_col, target_name_col, target_loc_col]):
QMessageBox.warning(self, '警告', '请选择所有列')
return
# 禁用按钮
self.calculate_btn.setEnabled(False)
self.select_file_btn.setEnabled(False)
self.save_btn.setEnabled(False)
# 显示进度条
self.progress_bar.setVisible(True)
self.progress_bar.setValue(0)
self.statusBar().showMessage('正在计算相似度...')
# 创建并启动计算线程
self.calc_thread = CalculationThread(
self.df, source_name_col, source_loc_col, target_name_col, target_loc_col
)
self.calc_thread.progress_updated.connect(self.update_progress)
self.calc_thread.calculation_finished.connect(self.on_calculation_finished)
self.calc_thread.error_occurred.connect(self.on_calculation_error)
self.calc_thread.start()
def update_progress(self, value):
"""更新进度条"""
self.progress_bar.setValue(value)
self.statusBar().showMessage(f'正在计算相似度... {value}%')
def on_calculation_finished(self, result_df):
"""计算完成后的处理"""
self.df = result_df
self.display_data(self.df)
self.progress_bar.setValue(100)
self.statusBar().showMessage('相似度计算完成')
# 启用按钮
self.calculate_btn.setEnabled(True)
self.select_file_btn.setEnabled(True)
self.save_btn.setEnabled(True)
QMessageBox.information(self, '完成', '相似度计算已完成')
def on_calculation_error(self, error_msg):
"""处理计算错误"""
self.statusBar().showMessage('计算出错')
QMessageBox.critical(self, '计算错误', f'计算过程中发生错误: {error_msg}')
# 启用按钮
self.calculate_btn.setEnabled(True)
self.select_file_btn.setEnabled(True)
def save_results(self):
"""保存结果到Excel文件(增强错误处理)"""
if self.df is None:
QMessageBox.warning(self, '警告', '没有可保存的数据')
return
file_path, _ = QFileDialog.getSaveFileName(
self, '保存结果', '', 'Excel Files (*.xlsx)'
)
if file_path:
try:
# 确保文件扩展名正确
if not file_path.endswith('.xlsx'):
file_path += '.xlsx'
# 尝试保存(带详细错误捕获)
self.df.to_excel(file_path, index=False)
self.statusBar().showMessage(f'结果已保存到 {file_path}')
QMessageBox.information(self, '成功', f'结果已成功保存到 {file_path}')
except PermissionError:
QMessageBox.critical(self, '权限错误',
'保存失败:没有写入权限,请检查文件是否被占用,或选择其他路径/文件名。')
except FileNotFoundError:
QMessageBox.critical(self, '路径错误',
'保存失败:目标路径不存在,请选择有效的保存位置。')
except Exception as e:
QMessageBox.critical(self, '未知错误', f'保存文件失败: {str(e)}')
self.statusBar().showMessage('保存文件失败')
if __name__ == '__main__':
app = QApplication(sys.argv)
# 设置全局字体,确保中文正常显示
font = QFont()
font.setFamily("SimHei")
app.setFont(font)
window = SimilarityCalculator()
window.show()
sys.exit(app.exec_())
+1
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+1037
View File
File diff suppressed because it is too large Load Diff
+383 -512
View File
@@ -425,575 +425,446 @@
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-04T08:30:00.126830Z",
"start_time": "2025-08-04T08:27:19.632414Z"
"end_time": "2025-10-15T07:03:16.566548Z",
"start_time": "2025-10-15T07:02:49.835394Z"
}
},
"cell_type": "code",
"source": [
"# -*- coding: utf-8 -*-\n",
"import os\n",
"import datetime\n",
"import concurrent.futures\n",
"import pandas as pd\n",
"import datetime\n",
"from config import Config\n",
"from api import API\n",
"from back_ground_module import CommonModule\n",
"from log_config import configure_task_logger, configure_error_task_logger\n",
"import concurrent.futures\n",
"\n",
"# 获取已经配置好的常规日志记录器\n",
"logger = configure_task_logger()\n",
"\n",
"# 获取已经配置好的错误任务日志记录器\n",
"error_task_logger = configure_error_task_logger()\n",
"\n",
"start_time = datetime.datetime.now()\n",
"api_instance = API()\n",
"common_module = CommonModule()\n",
"\n",
"\n",
"class UpdateAllNGVDataDaily:\n",
" \"\"\"NGV数据每日更新\"\"\"\n",
"\n",
" def __init__(self):\n",
" self.field_mapping = {}\n",
" # self.fields()\n",
"\n",
" def main(self):\n",
" # 保存为CSV文件\n",
" output_dir = \"output\" # 设置输出目录\n",
"\n",
" # 创建输出目录(如果不存在)\n",
" import os\n",
" os.makedirs(output_dir, exist_ok=True)\n",
"\n",
" task_start_time = datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
" # 获取NGV数据\n",
" payload = {\"api_key\": \"675b900991ad2491c69389ca\", \"entry_id\": \"675bb02bd2d53c2034c665e4\"}\n",
" # NGV_data_list = api_instance.entry_data_list(payload).get(\"data\", [])\n",
" # jdy_NGV_data = pd.DataFrame(NGV_data_list)\n",
"\n",
" payload = {\"api_key\": \"6694d3c4fcb69ca9a111a6c4\",\n",
" \"entry_id\": \"6769204a1902c9341340a1bc\",\n",
" }\n",
" staff_id = api_instance.entry_data_list(payload)\n",
" staff_id_list = staff_id.get(\"data\") # api请求格式,将数据封装在data字典里\n",
"\n",
" # for i in range(1,2):\n",
" data_NGV_j = common_module.get_ngv_details(days_back=1)\n",
" data_NGV_j.to_csv(os.path.join(output_dir, f\"data_NGV_j.csv\"), index=False)\n",
" data_NGV_j1 = common_module.get_ngv_details(days_back=2)\n",
"\n",
" # 对 data_NGV 进行进一步的过滤,只保留 org_type 为 \"一般\" 的记录\n",
" data_NGV_j = data_NGV_j[data_NGV_j['org_type'] == '一般']\n",
" data_NGV_j1 = data_NGV_j1[data_NGV_j1['org_type'] == '一般']\n",
"\n",
" # 去除不需要的列\n",
" columns_to_remove = {'date_id', 'date_fmt', 'pt', 'etl_time'}\n",
"\n",
" # 获取所有列名并计算要保留的列\n",
" columns_to_keep_df1 = list(set(data_NGV_j.columns) - columns_to_remove)\n",
" columns_to_keep_df2 = list(set(data_NGV_j1.columns) - columns_to_remove)\n",
"\n",
" # 过滤DataFrame以去除指定列\n",
" df1_filtered = data_NGV_j[columns_to_keep_df1]\n",
" df2_filtered = data_NGV_j1[columns_to_keep_df2]\n",
"\n",
"\n",
"if __name__ == '__main__':\n",
" start = UpdateAllNGVDataDaily()\n",
" start.main()\n"
],
"id": "82d58cced4a6e02",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"已获取 100 条数据\n",
"已获取 145 条数据\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001B[92m2025-10-15 15:02:50,242 - api.py - task_logger - INFO - 获取了145条数据\u001B[0m\n"
]
}
],
"execution_count": 4
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-10-15T07:14:36.277142Z",
"start_time": "2025-10-15T07:14:11.975620Z"
}
},
"cell_type": "code",
"source": [
"# -*- coding: utf-8 -*-\n",
"import pandas as pd\n",
"import datetime\n",
"from config import Config\n",
"from api import API\n",
"from back_ground_module import CommonModule\n",
"from log_config import configure_task_logger, configure_error_task_logger\n",
"\n",
"# 获取日志记录器\n",
"logger = configure_task_logger()\n",
"error_logger = configure_error_task_logger()\n",
"\n",
"class NGVDataUpdater:\n",
" \"\"\"NGV数据每日更新处理器\"\"\"\n",
"# 获取已经配置好的错误任务日志记录器\n",
"error_task_logger = configure_error_task_logger()\n",
"start_time = datetime.datetime.now()\n",
"api_instance = API()\n",
"common_module = CommonModule()\n",
" # 保存为CSV文件\n",
"output_dir = \"output\" # 设置输出目录\n",
"\n",
"# 创建输出目录(如果不存在)\n",
"import os\n",
"os.makedirs(output_dir, exist_ok=True)\n",
"\n",
"class UpdateNGVData:\n",
" \"\"\"NGV数据每日新增\"\"\"\n",
"\n",
" def __init__(self):\n",
" self.api = API()\n",
" self.common = CommonModule()\n",
" self.output_dir = \"output\"\n",
" self.start_time = datetime.datetime.now()\n",
" self.field_mapping = self._initialize_field_mapping()\n",
" self.staff_id_list = None\n",
" self.field_mapping = {}\n",
" self.fields()\n",
"\n",
" # 创建输出目录\n",
" os.makedirs(self.output_dir, exist_ok=True)\n",
" def load_all_data(self):\n",
" # 获取简道云员工id\n",
" payload = {\"api_key\": \"6694d3c4fcb69ca9a111a6c4\",\n",
" \"entry_id\": \"6769204a1902c9341340a1bc\",\n",
" }\n",
" staff_id = api_instance.entry_data_list(payload)\n",
" self.staff_id_list = staff_id.get(\"data\") # api请求格式,将数据封装在data字典里\n",
"\n",
" def _initialize_field_mapping(self):\n",
" \"\"\"初始化字段映射关系\"\"\"\n",
" return dict(date_id='_widget_1734062123065', date_fmt='_widget_1734062123066',\n",
" id_own_group='_widget_1734062123067', group_name='_widget_1734062123068',\n",
" id_own_org='_widget_1734062123069', org_name='_widget_1734062123070',\n",
" org_code='_widget_1734062123071', group_grade='_widget_1734062123072',\n",
" org_type='_widget_1734062123073', org_status='_widget_1734062123074',\n",
" saas_version='_widget_1734062123075', is_wechat='_widget_1734062123076',\n",
" is_mini_app='_widget_1734062123077', is_wx_shop='_widget_1734062123078',\n",
" is_camera_service='_widget_1734062123079',\n",
" is_maintenance_service='_widget_1734062123080',\n",
" saas_create_time='_widget_1734062123081', expiry_time='_widget_1734062123082',\n",
" saas_use_days='_widget_1734062123083', saas_use_year='_widget_1734062123084',\n",
" is_main_org='_widget_1734062123085', license_code='_widget_1734062123086',\n",
" license_name='_widget_1734062123087', org_crm_id='_widget_1734062123088',\n",
" province_id='_widget_1734062123089', province_name='_widget_1734062123090',\n",
" city_id='_widget_1734062123091', city_name='_widget_1734062123092',\n",
" area_id='_widget_1734062123093', area_name='_widget_1734062123094',\n",
" region_name='_widget_1734062123095', region_short_name='_widget_1734062123096',\n",
" branch_name='_widget_1734062123097', carzone_store_id='_widget_1734062123098',\n",
" carzone_store_name='_widget_1734062123099',\n",
" customer_carzone_id='_widget_1734062123100', salesmen='_widget_1734062123101',\n",
" area_manager='_widget_1734062123102', service_salesmen='_widget_1734062123103',\n",
" impl_principal='_widget_1734062123104',\n",
" service_impl_principal='_widget_1734062123105',\n",
" active_user_count='_widget_1734062123106', active_user_type='_widget_1734062123107',\n",
" limit_user_count='_widget_1734062123108', limit_user_type='_widget_1734062123109',\n",
" is_n='_widget_1734062123110', is_g='_widget_1734062123111',\n",
" is_v='_widget_1734062123112', is_visited='_widget_1734062123113',\n",
" is_active='_widget_1734062123114', active_status_fmt='_widget_1734062123115',\n",
" bill_count_last_30_day='_widget_1734062123116',\n",
" bill_day_count_last_30_day='_widget_1734062123117',\n",
" bill_day_count_this_month='_widget_1734062123118',\n",
" bill_count_last_7_day='_widget_1734062123119',\n",
" bill_day_count_last_7_day='_widget_1734062123120', pv_count='_widget_1734062123121',\n",
" uv_count='_widget_1734062123122', bill_count_1d='_widget_1734062123123',\n",
" bill_count_2d='_widget_1734062123124', bill_count_3d='_widget_1734062123125',\n",
" bill_count_4d='_widget_1734062123126', bill_count_5d='_widget_1734062123127',\n",
" bill_count_6d='_widget_1734062123128', bill_count_7d='_widget_1734062123129',\n",
" bill_count_8d='_widget_1734062123130', bill_count_9d='_widget_1734062123131',\n",
" bill_count_10d='_widget_1734062123132', bill_count_11d='_widget_1734062123133',\n",
" bill_count_12d='_widget_1734062123134', bill_count_13d='_widget_1734062123135',\n",
" bill_count_14d='_widget_1734062123136', bill_count_15d='_widget_1734062123137',\n",
" bill_count_16d='_widget_1734062123138', bill_count_17d='_widget_1734062123139',\n",
" bill_count_18d='_widget_1734062123140', bill_count_19d='_widget_1734062123141',\n",
" bill_count_20d='_widget_1734062123142', bill_count_21d='_widget_1734062123143',\n",
" bill_count_22d='_widget_1734062123144', bill_count_23d='_widget_1734062123145',\n",
" bill_count_24d='_widget_1734062123146', bill_count_25d='_widget_1734062123147',\n",
" bill_count_26d='_widget_1734062123148', bill_count_27d='_widget_1734062123149',\n",
" bill_count_28d='_widget_1734062123150', bill_count_29d='_widget_1734062123151',\n",
" bill_count_30d='_widget_1734062123152', bill_count_31d='_widget_1734062123153',\n",
" etl_time='_widget_1734062123154',\n",
" maintain_bill_count_last_30_day='_widget_1734062123155',\n",
" washing_bill_count_last_30_day='_widget_1734062123156',\n",
" maintain_bill_day_count_last_30_day='_widget_1734062123157',\n",
" washing_bill_day_count_last_30_day='_widget_1734062123158',\n",
" retail_bill_count_last_30_day='_widget_1734062123159',\n",
" retail_bill_day_count_last_30_day='_widget_1734062123160',\n",
" purchase_bill_count_last_30_day='_widget_1734062123161',\n",
" purchase_bill_day_count_last_30_day='_widget_1734062123162',\n",
" card_bill_count_last_30_day='_widget_1734062123163',\n",
" card_bill_day_count_last_30_day='_widget_1734062123164',\n",
" gd_sales_bill_count_last_30_day='_widget_1734062123165',\n",
" gd_sales_bill_day_count_last_30_day='_widget_1734062123166',\n",
" g_change_flag='_widget_1734062123167', saas_package='_widget_1734062123168',\n",
" manage_model='_widget_1734062123169', contacts='_widget_1734062123170',\n",
" contact_number='_widget_1734062123171', contact_mobile='_widget_1734062123172',\n",
" g_month_count='_widget_1734062123173', g_month_percentage='_widget_1734062123174',\n",
" is_install_service='_widget_1734062123175',\n",
" install_create_time='_widget_1734062123176', last_end_date='_widget_1734062123177',\n",
" renew_date='_widget_1734062123178', is_chain_owner='_widget_1734062123179',\n",
" group_org_count='_widget_1734062123180',\n",
" recent_bill_warning_days='_widget_1734062123181',\n",
" g_change_flag_d='_widget_1734062123182', g_lost_warning_days='_widget_1734062123183',\n",
" saas_edition_fmt='_widget_1734062123184', g_flag_1m='_widget_1734062123185',\n",
" g_flag_2m='_widget_1734062123186', g_flag_3m='_widget_1734062123187',\n",
" g_flag_4m='_widget_1734062123188', g_flag_5m='_widget_1734062123189',\n",
" g_flag_6m='_widget_1734062123190', g_flag_day_count='_widget_1734062123191',\n",
" add_org_flag='_widget_1734062123192', pt='_widget_1734062123193',\n",
" org_size='_widget_1734062123194', qualification_type_fmt='_widget_1734062123195',\n",
" business_scope_fmt='_widget_1734062123196', store_type_fmt='_widget_1734062123197',\n",
" area='_widget_1734062123198', station_number='_widget_1734062123199',\n",
" header_type_fmt='_widget_1734062123200', org_stage='_widget_1734062123201',\n",
" g_count_this_month='_widget_1734062123202',\n",
" saas_customer_type='_widget_1734062123203', technician='_widget_1734062123204',\n",
" tmall_maintain_service_status_desc='_widget_1734062123205',\n",
" date_fmt_date='_widget_1749000071375',\n",
" area_manager_staff_id='_widget_1748496855779',\n",
" service_impl_principal_staff_id=\"_widget_1748496855780\",\n",
" service_salesmen_staff_id=\"_widget_1748496855778\",\n",
" technician_staff_id=\"_widget_1751877712235\",\n",
" saas_create_time_date=\"_widget_1749000071377\",\n",
" expiry_time_date=\"_widget_1749000071382\",\n",
" install_create_time_date=\"_widget_1749000071384\",\n",
" last_end_date_date=\"_widget_1749000071389\", renew_date_date=\"_widget_1749000071391\")\n",
"\n",
" def _get_ngv_data(self, days_back):\n",
" \"\"\"获取NGV数据\"\"\"\n",
" try:\n",
" data = self.common.get_ngv_details(days_back=days_back)\n",
" return data[data['org_type'] == '一般']\n",
" except Exception as e:\n",
" error_logger.error(f\"获取NGV数据失败: {str(e)}\")\n",
" raise\n",
"\n",
" def _get_jdy_data(self):\n",
" \"\"\"获取简道云数据\"\"\"\n",
" try:\n",
" payload = {\n",
" \"api_key\": \"675b900991ad2491c69389ca\",\n",
" \"entry_id\": \"675bb02bd2d53c2034c665e4\"\n",
" }\n",
" response = self.api.entry_data_list(payload)\n",
" return pd.DataFrame(response.get(\"data\", []))\n",
" except Exception as e:\n",
" error_logger.error(f\"获取简道云数据失败: {str(e)}\")\n",
" raise\n",
"\n",
" def _get_staff_data(self):\n",
" \"\"\"获取员工数据\"\"\"\n",
" try:\n",
" payload = {\n",
" \"api_key\": \"6694d3c4fcb69ca9a111a6c4\",\n",
" \"entry_id\": \"6769204a1902c9341340a1bc\",\n",
" }\n",
" response = self.api.entry_data_list(payload)\n",
" return response.get(\"data\", [])\n",
" except Exception as e:\n",
" error_logger.error(f\"获取员工数据失败: {str(e)}\")\n",
" raise\n",
"\n",
" def _prepare_dataframes(self, df1, df2):\n",
" \"\"\"准备数据框进行比较\"\"\"\n",
" # 去除不需要的列\n",
" columns_to_remove = {'date_id', 'date_fmt', 'pt', 'etl_time'}\n",
" df1 = df1.drop(columns=columns_to_remove.intersection(df1.columns))\n",
" df2 = df2.drop(columns=columns_to_remove.intersection(df2.columns))\n",
"\n",
" # 设置索引并处理空值\n",
" df1 = df1.set_index('id_own_org').astype(str).replace(['nan', 'None'], '').fillna(\"\")\n",
" df2 = df2.set_index('id_own_org').astype(str).replace(['nan', 'None'], '').fillna(\"\")\n",
"\n",
" return df1, df2\n",
"\n",
" def _compare_dataframes(self, df1, df2):\n",
" \"\"\"比较两个数据框\"\"\"\n",
" # 找到共有的索引\n",
" common_index = df1.index.intersection(df2.index)\n",
"\n",
" # 重新索引并确保列顺序一致\n",
" common_columns = df1.columns.intersection(df2.columns)\n",
" df1_common = df1.loc[common_index, common_columns]\n",
" df2_common = df2.loc[common_index, common_columns]\n",
"\n",
" # 比较内容\n",
" matches = (df1_common == df2_common).all(axis=1)\n",
" df1_common['match_status'] = matches.map({True: '一致', False: '不一致'})\n",
"\n",
" # 获取仅在某一数据框中的行\n",
" df1_only = df1.loc[df1.index.difference(df2.index)]\n",
" df2_only = df2.loc[df2.index.difference(df1.index)]\n",
"\n",
" return df1_common, df1_only, df2_only\n",
"\n",
" def _process_jdy_data(self, jdy_data):\n",
" \"\"\"处理简道云数据\"\"\"\n",
" jdy_data = jdy_data.copy()\n",
" if '_widget_1734062123069' not in jdy_data.columns:\n",
" logger.warning(\"列 '门店id' 不存在\")\n",
" jdy_data = jdy_data.rename(columns={'_widget_1734062123069': 'id_own_org'})\n",
" return jdy_data.set_index('id_own_org')\n",
"\n",
" def _mark_deleted_stores(self, jdy_data, ngv_data):\n",
" \"\"\"标记已删除的门店\"\"\"\n",
" ids_in_jdy_not_in_ngv = jdy_data.index[~jdy_data.index.isin(ngv_data.index)]\n",
" only_in_jdy = jdy_data.loc[ids_in_jdy_not_in_ngv]\n",
"\n",
" for _, row in only_in_jdy.iterrows():\n",
" if '_id' in row and not pd.isna(row['_id']):\n",
" data = {\n",
" 'api_key': Config.SaaS_Tasks_APP_ID,\n",
" 'entry_id': Config.NGV_TASKS_ENTRY_ID,\n",
" \"data_id\": str(row['_id']),\n",
" \"data\": {\"_widget_1754285499851\": {\"value\": \"已删除\"}}\n",
" }\n",
" self.api.entry_data_update(data=data, max_retries=20)\n",
"\n",
" def _process_datetime_fields(self, df):\n",
" \"\"\"处理日期时间字段\"\"\"\n",
" time_columns = ['saas_create_time', 'expiry_time', 'install_create_time', \"last_end_date\", \"renew_date\"]\n",
" df = df.copy()\n",
"\n",
" for col in time_columns:\n",
" if col in df.columns:\n",
" # 转换为datetime类型\n",
" df[col] = pd.to_datetime(df[col], errors='coerce', utc=False)\n",
" # 本地化为北京时间并转换为UTC\n",
" df[col + '_date'] = (\n",
" df[col]\n",
" .dt.tz_localize('Asia/Shanghai', ambiguous='infer', nonexistent='NaT')\n",
" .dt.tz_convert('UTC')\n",
" .dt.strftime('%Y-%m-%dT%H:%M:%SZ')\n",
" )\n",
" return df\n",
"\n",
" def _process_staff_fields(self, df, staff_data):\n",
" \"\"\"处理员工字段\"\"\"\n",
" staff_columns = ['area_manager', 'service_impl_principal', \"service_salesmen\", \"technician\"]\n",
"\n",
" for col in staff_columns:\n",
" if col in df.columns:\n",
" # 创建员工ID映射\n",
" staff_map = {\n",
" str(staff['_widget_1734942794144']): staff['_widget_1734942794145']\n",
" for staff in staff_data\n",
" }\n",
" # 映射员工ID\n",
" df[col + \"_staff_id\"] = df[col].map(staff_map)\n",
"\n",
" return df\n",
"\n",
" def _update_ngv_data(self, df):\n",
" \"\"\"更新NGV数据\"\"\"\n",
" futures = []\n",
"\n",
" for _, row in df.iterrows():\n",
" data_dict = {}\n",
"\n",
" # 构建数据字典\n",
" for col_name, widget_id in self.field_mapping.items():\n",
" if col_name in df.columns:\n",
" value = row[col_name]\n",
" clean_value = None if pd.isna(value) else value\n",
" data_dict[widget_id] = {\"value\": clean_value}\n",
"\n",
" # 根据是否有_id决定是更新还是创建\n",
" if '_id' in row and not pd.isna(row['_id']):\n",
" data = {\n",
" 'api_key': Config.SaaS_Tasks_APP_ID,\n",
" 'entry_id': Config.NGV_TASKS_ENTRY_ID,\n",
" \"data_id\": str(row['_id']),\n",
" \"data\": data_dict\n",
" }\n",
" futures.append(self.api.entry_data_update(data=data, max_retries=20))\n",
" else:\n",
" data = {\n",
" 'api_key': Config.SaaS_Tasks_APP_ID,\n",
" 'entry_id': Config.NGV_TASKS_ENTRY_ID,\n",
" \"data\": data_dict\n",
" }\n",
" futures.append(self.api.data_batch_create(data=data, max_retries=20))\n",
"\n",
" # 等待所有请求完成\n",
" for future in concurrent.futures.as_completed(futures):\n",
" try:\n",
" future.result()\n",
" except Exception as exc:\n",
" error_logger.error(f\"请求发生异常: {exc}\")\n",
" @staticmethod\n",
" def get_staff_id(row_item, name):\n",
" \"\"\"辅助函数,用于获取员工ID\"\"\"\n",
" if str(row_item[\"_widget_1734942794144\"]) == str(name): # 检查姓名是否匹配\n",
" return row_item[\"_widget_1734942794145\"] # 返回员工ID\n",
" return None\n",
"\n",
" def main(self):\n",
" \"\"\"执行数据更新流程\"\"\"\n",
" task_start_time = datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
"\n",
" try:\n",
" logger.info(\"开始NGV数据更新流程\")\n",
" self.load_all_data()\n",
" logger.info(f\"数据加载完成\")\n",
"\n",
" # 获取数据\n",
" data_ngv_today = self._get_ngv_data(days_back=1)\n",
" data_ngv_yesterday = self._get_ngv_data(days_back=2)\n",
" jdy_data = self._get_jdy_data()\n",
" staff_data = self._get_staff_data()\n",
" data_NGV_j = common_module.get_ngv_details(days_back=2)\n",
" data_NGV_j1 = common_module.get_ngv_details(days_back=3)\n",
"\n",
" # 准备和比较数据\n",
" df1, df2 = self._prepare_dataframes(data_ngv_today, data_ngv_yesterday)\n",
" df_common, _, _ = self._compare_dataframes(df1, df2)\n",
" # 找出在 data_NGV_j 中存在但在 data_NGV_j1 中不存在的 data_id\n",
" unique_data_ids = data_NGV_j[~data_NGV_j['org_code'].isin(data_NGV_j1['org_code'])]\n",
"\n",
" # 处理简道云数据\n",
" jdy_data_processed = self._process_jdy_data(jdy_data)\n",
" # 创建一个新的 DataFrame 保存这些唯一的 data_id 及其对应的数据\n",
" new_df = data_NGV_j[data_NGV_j['org_code'].isin(unique_data_ids['org_code'])]\n",
"\n",
" # 标记已删除的门店\n",
" self._mark_deleted_stores(jdy_data_processed, df_common)\n",
" # 对 new_df 进行进一步的过滤,只保留 org_type 为 \"一般\" 的记录\n",
" data_NGV_j = data_NGV_j[data_NGV_j['org_type'] == '一般']\n",
" data_NGV_j1 = data_NGV_j1[data_NGV_j1['org_type'] == '一般']\n",
" filtered_df = new_df[new_df['org_type'] == '一般']\n",
"\n",
" # 合并简道云ID\n",
" df_common = df_common.join(jdy_data_processed[\"_id\"], how='left')\n",
" df_common = df_common[df_common['match_status'] == '不一致']\n",
" # 日期字段转换为日期格式\n",
" time_columns = ['date_fmt', 'saas_create_time', 'expiry_time', 'install_create_time', \"last_end_date\",\n",
" \"renew_date\"]\n",
" new_filtered_df = filtered_df.copy() # 复制df,以调整时间\n",
" for col in time_columns:\n",
" # 1. 转换为datetime类型(带错误处理)\n",
" # 使用.loc安全赋值\n",
" new_filtered_df[col] = pd.to_datetime(filtered_df[col], errors='coerce', utc=False)\n",
"\n",
" # 处理特殊字段\n",
" df_common = self._process_datetime_fields(df_common)\n",
" df_common = self._process_staff_fields(df_common, staff_data)\n",
" # 2. 优化后的时区转换(高效向量化操作)\n",
" filtered_df[col + '_date'] = (\n",
" new_filtered_df[col]\n",
" # 本地化为北京时间(东八区)\n",
" .dt.tz_localize('Asia/Shanghai', ambiguous='infer', nonexistent='NaT')\n",
" # 转换为UTC时区\n",
" .dt.tz_convert('UTC')\n",
" # 格式化为ISO8601字符串\n",
" .dt.strftime('%Y-%m-%dT%H:%M:%SZ')\n",
" )\n",
" logger.info(f\"时间转换完成\")\n",
"\n",
" # 更新数据\n",
" self._update_ngv_data(df_common)\n",
" # 人员字段转换为人员字段\n",
" staff_columns = ['area_manager', 'service_impl_principal', \"service_salesmen\", \"technician\"]\n",
" # 将员工列表转为DataFrame\n",
" # 三重循环临时方案(确保可写入)\n",
" for col in staff_columns:\n",
" staff_ids = []\n",
" for _, row in filtered_df.iterrows():\n",
" matched = False\n",
" for staff in self.staff_id_list:\n",
" if str(staff['_widget_1734942794144']) == str(row[col]):\n",
" staff_ids.append(staff['_widget_1734942794145'])\n",
" matched = True\n",
" break\n",
" if not matched:\n",
" staff_ids.append(None)\n",
" filtered_df[col + \"_staff_id\"] = staff_ids\n",
" logger.info(f\"人员转换完成\")\n",
"\n",
" # 记录执行时间\n",
" end_time = datetime.datetime.now()\n",
" time_diff = end_time - self.start_time\n",
" logger.info(f\"执行时间: {time_diff.days} 天, {time_diff.seconds} 秒, {time_diff.microseconds} 微秒\")\n",
" # filtered_df.to_csv(r\"D:\\Idea Project\\SaaS_V1.3\\back_ground_module\\output\\NGV.csv\")\n",
"\n",
" # 发送任务状态\n",
" self.common.send_task_status(task_start_time, \"NGV更新数据\")\n",
" # 生成包含所有行转换后的字典列表\n",
" # all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j1.iterrows()] # 前两天的全部数据\n",
" # all_data = [self.row_to_dict(row, self.field_mapping) for index, row in data_NGV_j.iterrows()] # 前一天的全部数据\n",
" all_data = [self.row_to_dict(row, self.field_mapping) for index, row in filtered_df.iterrows()] # 增量数据\n",
" filtered_df.to_csv(output_dir + \"\\\\filtered_df.csv\", index=False)\n",
"\n",
"\n",
" #\n",
" # data = {'api_key': Config.SaaS_Tasks_APP_ID, 'entry_id': Config.NGV_TASKS_ENTRY_ID, \"data_list\": all_data}\n",
" #\n",
" # result = api_instance.entry_data_batch_create(data)\n",
" # logger.info(f\"数据已推送:{result}\")\n",
" # result_str = str(result)\n",
" # print(result_str[:500])\n",
"\n",
" # 保存到Excel文件\n",
" # output_path = r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\ngv明细1.xlsx'\n",
" # filtered_df.to_excel(output_path, index=False)\n",
" # data_NGV_j1.to_excel( r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\ngv明细j1.xlsx', index=False)\n",
" # data_NGV_j.to_excel( r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\ngv明细j.xlsx', index=False)\n",
" # new_df.to_excel(r'D:\\Idea Project\\F6+宜搭+其它(1)\\new\\文件输出\\ngv明细ndf.xlsx', index=False)\n",
"\n",
" # common_module.send_task_status(task_start_time, \"NGV新增数据\")\n",
" # logger.info(f\"任务完成。\")\n",
" except Exception as e:\n",
" error_logger.error(f\"NGV数据更新流程失败: {str(e)}\")\n",
" raise\n",
" error_task_logger.error(f\"任务执行时发生异常: {e}\")\n",
" common_module.send_task_error(task_start_time, \"NGV新增数据\", str(e))\n",
"\n",
" @staticmethod\n",
" def row_to_dict(row, field_mapping):\n",
" \"\"\"将一行数据转换为指定格式的字典\"\"\"\n",
" result = {}\n",
" for col_name, widget_id in field_mapping.items():\n",
" if col_name in row:\n",
" value = row[col_name]\n",
" clean_value = None if pd.isna(value) else value\n",
" result[widget_id] = {\"value\": clean_value}\n",
" return result\n",
"\n",
" def fields(self):\n",
" self.field_mapping = dict(date_id='_widget_1734062123065', date_fmt='_widget_1734062123066',\n",
" id_own_group='_widget_1734062123067', group_name='_widget_1734062123068',\n",
" id_own_org='_widget_1734062123069', org_name='_widget_1734062123070',\n",
" org_code='_widget_1734062123071', group_grade='_widget_1734062123072',\n",
" org_type='_widget_1734062123073', org_status='_widget_1734062123074',\n",
" saas_version='_widget_1734062123075', is_wechat='_widget_1734062123076',\n",
" is_mini_app='_widget_1734062123077', is_wx_shop='_widget_1734062123078',\n",
" is_camera_service='_widget_1734062123079',\n",
" is_maintenance_service='_widget_1734062123080',\n",
" saas_create_time='_widget_1734062123081', expiry_time='_widget_1734062123082',\n",
" saas_use_days='_widget_1734062123083', saas_use_year='_widget_1734062123084',\n",
" is_main_org='_widget_1734062123085', license_code='_widget_1734062123086',\n",
" license_name='_widget_1734062123087', org_crm_id='_widget_1734062123088',\n",
" province_id='_widget_1734062123089', province_name='_widget_1734062123090',\n",
" city_id='_widget_1734062123091', city_name='_widget_1734062123092',\n",
" area_id='_widget_1734062123093', area_name='_widget_1734062123094',\n",
" region_name='_widget_1734062123095', region_short_name='_widget_1734062123096',\n",
" branch_name='_widget_1734062123097', carzone_store_id='_widget_1734062123098',\n",
" carzone_store_name='_widget_1734062123099',\n",
" customer_carzone_id='_widget_1734062123100', salesmen='_widget_1734062123101',\n",
" area_manager='_widget_1734062123102', service_salesmen='_widget_1734062123103',\n",
" impl_principal='_widget_1734062123104',\n",
" service_impl_principal='_widget_1734062123105',\n",
" active_user_count='_widget_1734062123106', active_user_type='_widget_1734062123107',\n",
" limit_user_count='_widget_1734062123108', limit_user_type='_widget_1734062123109',\n",
" is_n='_widget_1734062123110', is_g='_widget_1734062123111',\n",
" is_v='_widget_1734062123112', is_visited='_widget_1734062123113',\n",
" is_active='_widget_1734062123114', active_status_fmt='_widget_1734062123115',\n",
" bill_count_last_30_day='_widget_1734062123116',\n",
" bill_day_count_last_30_day='_widget_1734062123117',\n",
" bill_day_count_this_month='_widget_1734062123118',\n",
" bill_count_last_7_day='_widget_1734062123119',\n",
" bill_day_count_last_7_day='_widget_1734062123120', pv_count='_widget_1734062123121',\n",
" uv_count='_widget_1734062123122', bill_count_1d='_widget_1734062123123',\n",
" bill_count_2d='_widget_1734062123124', bill_count_3d='_widget_1734062123125',\n",
" bill_count_4d='_widget_1734062123126', bill_count_5d='_widget_1734062123127',\n",
" bill_count_6d='_widget_1734062123128', bill_count_7d='_widget_1734062123129',\n",
" bill_count_8d='_widget_1734062123130', bill_count_9d='_widget_1734062123131',\n",
" bill_count_10d='_widget_1734062123132', bill_count_11d='_widget_1734062123133',\n",
" bill_count_12d='_widget_1734062123134', bill_count_13d='_widget_1734062123135',\n",
" bill_count_14d='_widget_1734062123136', bill_count_15d='_widget_1734062123137',\n",
" bill_count_16d='_widget_1734062123138', bill_count_17d='_widget_1734062123139',\n",
" bill_count_18d='_widget_1734062123140', bill_count_19d='_widget_1734062123141',\n",
" bill_count_20d='_widget_1734062123142', bill_count_21d='_widget_1734062123143',\n",
" bill_count_22d='_widget_1734062123144', bill_count_23d='_widget_1734062123145',\n",
" bill_count_24d='_widget_1734062123146', bill_count_25d='_widget_1734062123147',\n",
" bill_count_26d='_widget_1734062123148', bill_count_27d='_widget_1734062123149',\n",
" bill_count_28d='_widget_1734062123150', bill_count_29d='_widget_1734062123151',\n",
" bill_count_30d='_widget_1734062123152', bill_count_31d='_widget_1734062123153',\n",
" etl_time='_widget_1734062123154',\n",
" maintain_bill_count_last_30_day='_widget_1734062123155',\n",
" washing_bill_count_last_30_day='_widget_1734062123156',\n",
" maintain_bill_day_count_last_30_day='_widget_1734062123157',\n",
" washing_bill_day_count_last_30_day='_widget_1734062123158',\n",
" retail_bill_count_last_30_day='_widget_1734062123159',\n",
" retail_bill_day_count_last_30_day='_widget_1734062123160',\n",
" purchase_bill_count_last_30_day='_widget_1734062123161',\n",
" purchase_bill_day_count_last_30_day='_widget_1734062123162',\n",
" card_bill_count_last_30_day='_widget_1734062123163',\n",
" card_bill_day_count_last_30_day='_widget_1734062123164',\n",
" gd_sales_bill_count_last_30_day='_widget_1734062123165',\n",
" gd_sales_bill_day_count_last_30_day='_widget_1734062123166',\n",
" g_change_flag='_widget_1734062123167', saas_package='_widget_1734062123168',\n",
" manage_model='_widget_1734062123169', contacts='_widget_1734062123170',\n",
" contact_number='_widget_1734062123171', contact_mobile='_widget_1734062123172',\n",
" g_month_count='_widget_1734062123173', g_month_percentage='_widget_1734062123174',\n",
" is_install_service='_widget_1734062123175',\n",
" install_create_time='_widget_1734062123176', last_end_date='_widget_1734062123177',\n",
" renew_date='_widget_1734062123178', is_chain_owner='_widget_1734062123179',\n",
" group_org_count='_widget_1734062123180',\n",
" recent_bill_warning_days='_widget_1734062123181',\n",
" g_change_flag_d='_widget_1734062123182', g_lost_warning_days='_widget_1734062123183',\n",
" saas_edition_fmt='_widget_1734062123184', g_flag_1m='_widget_1734062123185',\n",
" g_flag_2m='_widget_1734062123186', g_flag_3m='_widget_1734062123187',\n",
" g_flag_4m='_widget_1734062123188', g_flag_5m='_widget_1734062123189',\n",
" g_flag_6m='_widget_1734062123190', g_flag_day_count='_widget_1734062123191',\n",
" add_org_flag='_widget_1734062123192', pt='_widget_1734062123193',\n",
" org_size='_widget_1734062123194', qualification_type_fmt='_widget_1734062123195',\n",
" business_scope_fmt='_widget_1734062123196', store_type_fmt='_widget_1734062123197',\n",
" area='_widget_1734062123198', station_number='_widget_1734062123199',\n",
" header_type_fmt='_widget_1734062123200', org_stage='_widget_1734062123201',\n",
" g_count_this_month='_widget_1734062123202',\n",
" saas_customer_type='_widget_1734062123203', technician='_widget_1734062123204',\n",
" tmall_maintain_service_status_desc='_widget_1734062123205',\n",
" date_fmt_date='_widget_1749000071375',\n",
" area_manager_staff_id='_widget_1748496855779',\n",
" service_impl_principal_staff_id=\"_widget_1748496855780\",\n",
" service_salesmen_staff_id=\"_widget_1748496855778\",\n",
" technician_staff_id=\"_widget_1751877712235\",\n",
" saas_create_time_date=\"_widget_1749000071377\",\n",
" expiry_time_date=\"_widget_1749000071382\",\n",
" install_create_time_date=\"_widget_1749000071384\",\n",
" last_end_date_date=\"_widget_1749000071389\", renew_date_date=\"_widget_1749000071391\")\n",
"\n",
"\n",
"if __name__ == '__main__':\n",
" updater = NGVDataUpdater()\n",
" updater.main()"
" start = UpdateNGVData()\n",
" start.main()\n"
],
"id": "82d58cced4a6e02",
"id": "6ce49b7686e91712",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-08-04 16:27:26,074 - task_logger - INFO - 开始NGV数据更新流程\n"
"\u001B[92m2025-10-15 15:14:12,293 - api.py - task_logger - INFO - 获取了145条数据\u001B[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1de1bc8bae6d3111bb0f6332472b8cd4\n",
"1de1bc8bae6d3111bb0f6332472b8cd4\n",
"已获取 100 条数据\n",
"已获取 200 条数据\n",
"已获取 300 条数据\n",
"已获取 400 条数据\n",
"已获取 500 条数据\n",
"已获取 600 条数据\n",
"已获取 700 条数据\n",
"已获取 800 条数据\n",
"已获取 900 条数据\n",
"已获取 1000 条数据\n",
"已获取 1100 条数据\n",
"已获取 1200 条数据\n",
"已获取 1300 条数据\n",
"已获取 1400 条数据\n",
"已获取 1500 条数据\n",
"已获取 1600 条数据\n",
"已获取 1700 条数据\n",
"已获取 1800 条数据\n",
"已获取 1900 条数据\n",
"已获取 2000 条数据\n",
"已获取 2100 条数据\n",
"已获取 2200 条数据\n",
"已获取 2300 条数据\n",
"已获取 2400 条数据\n",
"已获取 2500 条数据\n",
"已获取 2600 条数据\n",
"已获取 2700 条数据\n",
"已获取 2800 条数据\n",
"已获取 2900 条数据\n",
"已获取 3000 条数据\n",
"已获取 3100 条数据\n",
"已获取 3200 条数据\n",
"已获取 3300 条数据\n",
"已获取 3400 条数据\n",
"已获取 3500 条数据\n",
"已获取 3600 条数据\n",
"已获取 3700 条数据\n",
"已获取 3800 条数据\n",
"已获取 3900 条数据\n",
"已获取 4000 条数据\n",
"已获取 4100 条数据\n",
"已获取 4200 条数据\n",
"已获取 4300 条数据\n",
"已获取 4400 条数据\n",
"已获取 4500 条数据\n",
"已获取 4600 条数据\n",
"已获取 4700 条数据\n",
"已获取 4800 条数据\n",
"已获取 4900 条数据\n",
"已获取 5000 条数据\n",
"已获取 5100 条数据\n",
"已获取 5200 条数据\n",
"已获取 5300 条数据\n",
"已获取 5400 条数据\n",
"已获取 5500 条数据\n",
"已获取 5600 条数据\n",
"已获取 5700 条数据\n",
"已获取 5800 条数据\n",
"已获取 5900 条数据\n",
"已获取 6000 条数据\n",
"已获取 6100 条数据\n",
"已获取 6200 条数据\n",
"已获取 6300 条数据\n",
"已获取 6400 条数据\n",
"已获取 6500 条数据\n",
"已获取 6600 条数据\n",
"已获取 6700 条数据\n",
"已获取 6800 条数据\n",
"已获取 6900 条数据\n",
"已获取 7000 条数据\n",
"已获取 7100 条数据\n",
"已获取 7200 条数据\n",
"已获取 7300 条数据\n",
"已获取 7400 条数据\n",
"已获取 7500 条数据\n",
"已获取 7600 条数据\n",
"已获取 7700 条数据\n",
"已获取 7800 条数据\n",
"已获取 7900 条数据\n",
"已获取 8000 条数据\n",
"已获取 8100 条数据\n",
"已获取 8200 条数据\n",
"已获取 8300 条数据\n",
"已获取 8400 条数据\n",
"已获取 8500 条数据\n",
"已获取 8600 条数据\n",
"已获取 8700 条数据\n",
"已获取 8800 条数据\n",
"已获取 8900 条数据\n",
"已获取 9000 条数据\n",
"已获取 9100 条数据\n",
"已获取 9200 条数据\n",
"已获取 9300 条数据\n",
"已获取 9400 条数据\n",
"已获取 9500 条数据\n",
"已获取 9600 条数据\n",
"已获取 9700 条数据\n",
"已获取 9800 条数据\n",
"已获取 9900 条数据\n",
"已获取 10000 条数据\n",
"已获取 10100 条数据\n",
"已获取 10200 条数据\n",
"已获取 10300 条数据\n",
"已获取 10400 条数据\n",
"已获取 10500 条数据\n",
"已获取 10600 条数据\n",
"已获取 10700 条数据\n",
"已获取 10800 条数据\n",
"已获取 10900 条数据\n",
"已获取 11000 条数据\n",
"已获取 11100 条数据\n",
"已获取 11200 条数据\n",
"已获取 11300 条数据\n",
"已获取 11400 条数据\n",
"已获取 11500 条数据\n",
"已获取 11600 条数据\n",
"已获取 11700 条数据\n",
"已获取 11800 条数据\n",
"已获取 11900 条数据\n",
"已获取 12000 条数据\n",
"已获取 12100 条数据\n",
"已获取 12200 条数据\n",
"已获取 12300 条数据\n",
"已获取 12400 条数据\n",
"已获取 12500 条数据\n",
"已获取 12600 条数据\n",
"已获取 12700 条数据\n",
"已获取 12800 条数据\n",
"已获取 12900 条数据\n",
"已获取 13000 条数据\n",
"已获取 13100 条数据\n",
"已获取 13200 条数据\n",
"已获取 13300 条数据\n",
"已获取 13400 条数据\n",
"已获取 13500 条数据\n",
"已获取 13600 条数据\n",
"已获取 13700 条数据\n",
"已获取 13800 条数据\n",
"已获取 13900 条数据\n",
"已获取 14000 条数据\n",
"已获取 14100 条数据\n",
"已获取 14200 条数据\n",
"已获取 14300 条数据\n",
"已获取 14400 条数据\n",
"已获取 14500 条数据\n",
"已获取 14600 条数据\n",
"已获取 14700 条数据\n",
"已获取 14800 条数据\n",
"已获取 14900 条数据\n",
"已获取 15000 条数据\n",
"已获取 15100 条数据\n",
"已获取 15200 条数据\n",
"已获取 15300 条数据\n",
"已获取 15400 条数据\n",
"已获取 15500 条数据\n",
"已获取 15600 条数据\n",
"已获取 15700 条数据\n",
"已获取 15800 条数据\n",
"已获取 15900 条数据\n",
"已获取 16000 条数据\n",
"已获取 16100 条数据\n",
"已获取 16200 条数据\n",
"已获取 16300 条数据\n",
"已获取 16400 条数据\n",
"已获取 16500 条数据\n",
"已获取 16600 条数据\n",
"已获取 16700 条数据\n",
"已获取 16800 条数据\n",
"已获取 16900 条数据\n",
"已获取 17000 条数据\n",
"已获取 17100 条数据\n",
"已获取 17200 条数据\n",
"已获取 17300 条数据\n",
"已获取 17400 条数据\n",
"已获取 17500 条数据\n",
"已获取 17600 条数据\n",
"已获取 17700 条数据\n",
"已获取 17800 条数据\n",
"已获取 17900 条数据\n",
"已获取 18000 条数据\n",
"已获取 18100 条数据\n",
"已获取 18200 条数据\n",
"已获取 18300 条数据\n",
"已获取 18400 条数据\n",
"已获取 18500 条数据\n",
"已获取 18600 条数据\n",
"已获取 18700 条数据\n",
"已获取 18800 条数据\n",
"已获取 18900 条数据\n",
"已获取 19000 条数据\n",
"已获取 19100 条数据\n",
"已获取 19200 条数据\n",
"已获取 19300 条数据\n",
"已获取 19400 条数据\n",
"已获取 19500 条数据\n",
"已获取 19600 条数据\n",
"已获取 19700 条数据\n",
"已获取 19800 条数据\n",
"已获取 19900 条数据\n"
"已获取 145 条数据\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001B[92m2025-10-15 15:14:12,294 - 4281365028.py - task_logger - INFO - 数据加载完成\u001B[0m\n",
"C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"KeyboardInterrupt\n",
"\n"
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" filtered_df[col + '_date'] = (\n",
"C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" filtered_df[col + '_date'] = (\n",
"C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" filtered_df[col + '_date'] = (\n",
"C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" filtered_df[col + '_date'] = (\n",
"C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" filtered_df[col + '_date'] = (\n",
"C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:76: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" filtered_df[col + '_date'] = (\n",
"\u001B[92m2025-10-15 15:14:36,032 - 4281365028.py - task_logger - INFO - 时间转换完成\u001B[0m\n",
"C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:102: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" filtered_df[col + \"_staff_id\"] = staff_ids\n",
"C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:102: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" filtered_df[col + \"_staff_id\"] = staff_ids\n",
"C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:102: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" filtered_df[col + \"_staff_id\"] = staff_ids\n",
"C:\\Users\\zy187\\AppData\\Local\\Temp\\ipykernel_5028\\4281365028.py:102: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" filtered_df[col + \"_staff_id\"] = staff_ids\n",
"\u001B[92m2025-10-15 15:14:36,045 - 4281365028.py - task_logger - INFO - 人员转换完成\u001B[0m\n"
]
}
],
"execution_count": 1
"execution_count": 2
}
],
"metadata": {
+387
View File
@@ -0,0 +1,387 @@
# -*- coding: utf-8 -*-
import pandas as pd
import datetime
from config import Config
from api import API
from back_ground_module import CommonModule
from log_config import configure_task_logger, configure_error_task_logger
import concurrent.futures
from tqdm import tqdm
# 获取已经配置好的常规日志记录器
logger = configure_task_logger()
# 获取已经配置好的错误任务日志记录器
error_task_logger = configure_error_task_logger()
start_time = datetime.datetime.now()
api_instance = API()
common_module = CommonModule()
# 保存为CSV文件
output_dir = "output" # 设置输出目录
# 创建输出目录(如果不存在)
import os
os.makedirs(output_dir, exist_ok=True)
class UpdateAllNGVDataDaily:
"""NGV数据每日更新"""
def __init__(self):
self.field_mapping = {}
self.fields()
def main(self):
task_start_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
try:
logger.info("开始执行任务:{}".format(task_start_time))
# 获取NGV数据
payload = {"api_key": "675b900991ad2491c69389ca", "entry_id": "675bb02bd2d53c2034c665e4"}
NGV_data_list = api_instance.entry_data_list(payload).get("data", [])
jdy_NGV_data = pd.DataFrame(NGV_data_list)
jdy_NGV_data.to_csv(os.path.join(output_dir, f"jdy_NGV_data.csv"))
payload = {"api_key": "6694d3c4fcb69ca9a111a6c4",
"entry_id": "6769204a1902c9341340a1bc",
}
staff_id = api_instance.entry_data_list(payload)
staff_id_list = staff_id.get("data") # api请求格式,将数据封装在data字典里
logger.info("已获取数据")
# for i in range(1,2):
data_NGV_j = common_module.get_ngv_details(days_back=1)
data_NGV_j.to_csv(os.path.join(output_dir, f"data_NGV_j.csv"), index=False)
data_NGV_j1 = common_module.get_ngv_details(days_back=2)
data_NGV_j1.to_csv(os.path.join(output_dir, f"data_NGV_j1.csv"), index=False)
# 对 data_NGV 进行进一步的过滤,只保留 org_type 为 "一般" 的记录
data_NGV_j = data_NGV_j[data_NGV_j['org_type'] == '一般']
data_NGV_j1 = data_NGV_j1[data_NGV_j1['org_type'] == '一般']
temp_jdy_NGV_data = jdy_NGV_data.copy()
temp_jdy_NGV_data.reset_index(inplace=True) # 如果 '门店id' 是索引,则先将其转换为普通列
if '_widget_1734062123071' not in temp_jdy_NGV_data.columns:
error_task_logger.error("'门店编码' 不存在")
temp_jdy_NGV_data.rename(columns={'_widget_1734062123071': 'org_code'}, inplace=True)
temp_jdy_NGV_data.set_index('org_code', inplace=True)
# 如果简道云存在,NGV不存在则标记NGV已删除
# 找出在 temp_jdy_NGV_data 中存在,但在 data_NGV_j 中不存在的索引
df1_index = data_NGV_j.set_index('org_code')
ids_in_jdy_not_in_df1 = temp_jdy_NGV_data.index[~temp_jdy_NGV_data.index.isin(df1_index.index)]
# 提取这些行,形成新的 DataFrame
only_in_temp_jdy = temp_jdy_NGV_data.loc[ids_in_jdy_not_in_df1]
only_in_temp_jdy.to_csv(os.path.join(output_dir, 'only_in_temp_jdy.csv'), index_label='org_code')
# 对数据源已经去掉的门店进行标记
# 标记list
# update_list = []
# for index,item in only_in_temp_jdy.iterrows():
# update_list.append(item["_id"])
# data = {
# 'api_key': Config.SaaS_Tasks_APP_ID,
# 'entry_id': Config.NGV_TASKS_ENTRY_ID,
# "data_ids": update_list,
# "data": {"_widget_1754285499851": {"value": "未删除"}}
# }
# api_instance.entry_data_banch_update(data=data, max_retries=20)
mark_list = []
for index, only_row in only_in_temp_jdy.iterrows():
result = {}
if '_id' in only_in_temp_jdy.columns:
_id_value = str(only_row['_id']) if not pd.isna(only_row['_id']) else None
result["_id"] = _id_value
if result["_id"]:
data = {
'api_key': Config.SaaS_Tasks_APP_ID,
'entry_id': Config.NGV_TASKS_ENTRY_ID,
"data_id": result["_id"],
"data": {"_widget_1754285499851": {"value": "已删除"}}
}
append = {"data_id": result["_id"], "org_code": only_row["org_code"]}
mark_list.append(append)
print(result["_id"])
api_instance.entry_data_update(data=data, max_retries=20)
mark_df = pd.DataFrame(mark_list)
mark_df.to_csv(os.path.join(output_dir, 'mark_list.csv'), index=False)
# 去除不需要的列
columns_to_remove = {'date_id', 'date_fmt', 'pt', 'etl_time'}
# 获取所有列名并计算要保留的列
columns_to_keep_df1 = list(set(data_NGV_j.columns) - columns_to_remove)
columns_to_keep_df2 = list(set(data_NGV_j1.columns) - columns_to_remove)
# 过滤DataFrame以去除指定列
df1_filtered = data_NGV_j[columns_to_keep_df1]
df2_filtered = data_NGV_j1[columns_to_keep_df2]
# 设置唯一标识列作为索引
df1_set_index = df1_filtered.set_index('org_code')
df2_set_index = df2_filtered.set_index('org_code')
df1_set_index = df1_set_index.astype(str).replace(['nan', 'None'], '', ).fillna("")
df2_set_index = df2_set_index.astype(str).replace(['nan', 'None'], '', ).fillna("")
# 找到两个DataFrame共有的索引
common_index = df1_set_index.index.intersection(df2_set_index.index)
# 使用共同的索引来重新索引两个DataFrame
df1_common = df1_set_index.reindex(common_index).fillna('')
df2_common = df2_set_index.reindex(common_index).fillna('')
# 确保两个DataFrame有相同的列顺序
common_columns = df1_common.columns.intersection(df2_common.columns)
df1_common = df1_common[common_columns]
df2_common = df2_common[common_columns]
# 比较两个DataFrame的内容
comparison_column = 'match_status'
# 创建一个布尔Series,指示每一行是否完全相同
matches = (df1_common == df2_common).all(axis=1)
# 添加新列到第一个DataFrame,标记是否匹配
df1_common[comparison_column] = matches.map({True: '一致', False: '不一致'})
# df1_common.to_csv(os.path.join(output_dir, f"df1_common.csv"))
# 如果需要也可以添加到第二个DataFrame(这里假设只需要处理df1_common)
# df2_common[comparison_column] = matches.map({True: '一致', False: '不一致'})
# 提取只在一个DataFrame中存在的索引对应的行
df1_only_index = df1_set_index.index.difference(df2_set_index.index)
df2_only_index = df2_set_index.index.difference(df1_set_index.index)
df1_only_rows = df1_set_index.loc[df1_only_index].copy()
df2_only_rows = df2_set_index.loc[df2_only_index].copy()
# 保存匹配结果
# df1_common.to_csv(os.path.join(output_dir, 'matched_results.csv'), index_label='org_type')
# 保存仅在df1中的行
# df1_only_rows.to_csv(os.path.join(output_dir, 'df1_only_rows.csv'), index_label='org_type')
# 保存仅在df2中的行
# df2_only_rows.to_csv(os.path.join(output_dir, 'df2_only_rows.csv'), index_label='org_type')
# data_NGV_j.to_csv(os.path.join(output_dir, 'data_NGV_j.csv'), index_label='org_type')
# data_NGV_j1.to_csv(os.path.join(output_dir, 'data_NGV_j1.csv'), index_label='org_type')
# jdy_NGV_data.to_csv(os.path.join(output_dir, 'jdy_NGV_data.csv'), index_label='org_type')
# print(f"\nCSV文件已保存到目录: {output_dir}")
# 简道云与ngv不一致的数据做关联
df1_common = df1_common.join(temp_jdy_NGV_data["_id"], how='left')
df1_common = df1_common[df1_common['match_status'] == '不一致']
# 日期字段转换为日期格式
time_columns = ['saas_create_time', 'expiry_time', 'install_create_time', "last_end_date",
"renew_date"]
new_filtered_df = df1_common.copy() # 复制df,以调整时间
for col in time_columns:
# 1. 转换为datetime类型(带错误处理)
# 使用.loc安全赋值
new_filtered_df[col] = pd.to_datetime(df1_common[col], errors='coerce', utc=False)
# 2. 优化后的时区转换(高效向量化操作)
df1_common[col + '_date'] = (
new_filtered_df[col]
# 本地化为北京时间(东八区)
.dt.tz_localize('Asia/Shanghai', ambiguous='infer', nonexistent='NaT')
# 转换为UTC时区
.dt.tz_convert('UTC')
# 格式化为ISO8601字符串
.dt.strftime('%Y-%m-%dT%H:%M:%SZ')
)
logger.info("日期已转换为UTC格式")
# 人员字段转换为人员字段
staff_columns = ['area_manager', 'service_impl_principal', "service_salesmen", "technician"]
# 将员工列表转为DataFrame
# 三重循环临时方案(确保可写入)
for col in staff_columns:
staff_ids = []
for _, row in df1_common.iterrows():
matched = False
for staff in staff_id_list:
if str(staff['_widget_1734942794144']) == str(row[col]):
staff_ids.append(staff['_widget_1734942794145'])
matched = True
break
if not matched:
staff_ids.append(None)
df1_common[col + "_staff_id"] = staff_ids
logger.info("人员字段已替换")
# 并发请求
futures = []
all_data = []
logger.info(f"今日更新数据量为:{len(df1_common)}")
# for idx, row in tqdm(df1_common.iterrows(), total=len(df1_common), desc="更新数据"):
# result = {}
# data_dict = {}
#
# # 根据 field_mapping 进行字段替换
# for col_name, widget_id in self.field_mapping.items():
# if col_name in df1_common.columns:
# value = row[col_name]
# clean_value = None if pd.isna(value) else value
# data_dict[widget_id] = {"value": clean_value}
#
# # 单独处理 _id 列,并将其转换为字符串
# if '_id' in df1_common.columns:
# _id_value = str(row['_id']) if not pd.isna(row['_id']) else None
# result["_id"] = _id_value
#
# # 组装最终结果
# if result["_id"]:
# data = {
# 'api_key': Config.SaaS_Tasks_APP_ID,
# 'entry_id': Config.NGV_TASKS_ENTRY_ID,
# "data_id": result["_id"],
# "data": data_dict
# }
#
# api_instance.entry_data_update(data=data, max_retries=20)
# else:
# # continue
# data1 = {'api_key': Config.SaaS_Tasks_APP_ID, 'entry_id': Config.NGV_TASKS_ENTRY_ID,
# "data": data_dict}
# res = api_instance.data_batch_create(data=data1, max_retries=20)
# logger.info(f"补派数据:{res}")
# # all_data.append(data_dict)
#
# # 收集所有结果
# for future in concurrent.futures.as_completed(futures):
# try:
# result = future.result()
# logger.info(f"所有请求结果:{result}")
# except Exception as exc:
# error_task_logger.error(f"请求发生异常: {exc}")
#
# common_module.send_task_status(task_start_time, "NGV更新数据")
# logger.info("NGV更新数据任务已完成。")
except Exception as e:
error_task_logger.error(f"NGV更新数据执行时发生异常: {e}")
common_module.send_task_error(task_start_time, "NGV更新数据", str(e))
@staticmethod
def row_to_dict(row, field_mapping):
"""将一行数据转换为指定格式的字典"""
result = {}
for col_name, widget_id in field_mapping.items():
if col_name in row:
value = row[col_name]
clean_value = None if pd.isna(value) else value
result[widget_id] = {"value": clean_value}
return result
def fields(self):
self.field_mapping = dict(date_id='_widget_1734062123065', date_fmt='_widget_1734062123066',
id_own_group='_widget_1734062123067', group_name='_widget_1734062123068',
id_own_org='_widget_1734062123069', org_name='_widget_1734062123070',
org_code='_widget_1734062123071', group_grade='_widget_1734062123072',
org_type='_widget_1734062123073', org_status='_widget_1734062123074',
saas_version='_widget_1734062123075', is_wechat='_widget_1734062123076',
is_mini_app='_widget_1734062123077', is_wx_shop='_widget_1734062123078',
is_camera_service='_widget_1734062123079',
is_maintenance_service='_widget_1734062123080',
saas_create_time='_widget_1734062123081', expiry_time='_widget_1734062123082',
saas_use_days='_widget_1734062123083', saas_use_year='_widget_1734062123084',
is_main_org='_widget_1734062123085', license_code='_widget_1734062123086',
license_name='_widget_1734062123087', org_crm_id='_widget_1734062123088',
province_id='_widget_1734062123089', province_name='_widget_1734062123090',
city_id='_widget_1734062123091', city_name='_widget_1734062123092',
area_id='_widget_1734062123093', area_name='_widget_1734062123094',
region_name='_widget_1734062123095', region_short_name='_widget_1734062123096',
branch_name='_widget_1734062123097', carzone_store_id='_widget_1734062123098',
carzone_store_name='_widget_1734062123099',
customer_carzone_id='_widget_1734062123100', salesmen='_widget_1734062123101',
area_manager='_widget_1734062123102', service_salesmen='_widget_1734062123103',
impl_principal='_widget_1734062123104',
service_impl_principal='_widget_1734062123105',
active_user_count='_widget_1734062123106', active_user_type='_widget_1734062123107',
limit_user_count='_widget_1734062123108', limit_user_type='_widget_1734062123109',
is_n='_widget_1734062123110', is_g='_widget_1734062123111',
is_v='_widget_1734062123112', is_visited='_widget_1734062123113',
is_active='_widget_1734062123114', active_status_fmt='_widget_1734062123115',
bill_count_last_30_day='_widget_1734062123116',
bill_day_count_last_30_day='_widget_1734062123117',
bill_day_count_this_month='_widget_1734062123118',
bill_count_last_7_day='_widget_1734062123119',
bill_day_count_last_7_day='_widget_1734062123120', pv_count='_widget_1734062123121',
uv_count='_widget_1734062123122', bill_count_1d='_widget_1734062123123',
bill_count_2d='_widget_1734062123124', bill_count_3d='_widget_1734062123125',
bill_count_4d='_widget_1734062123126', bill_count_5d='_widget_1734062123127',
bill_count_6d='_widget_1734062123128', bill_count_7d='_widget_1734062123129',
bill_count_8d='_widget_1734062123130', bill_count_9d='_widget_1734062123131',
bill_count_10d='_widget_1734062123132', bill_count_11d='_widget_1734062123133',
bill_count_12d='_widget_1734062123134', bill_count_13d='_widget_1734062123135',
bill_count_14d='_widget_1734062123136', bill_count_15d='_widget_1734062123137',
bill_count_16d='_widget_1734062123138', bill_count_17d='_widget_1734062123139',
bill_count_18d='_widget_1734062123140', bill_count_19d='_widget_1734062123141',
bill_count_20d='_widget_1734062123142', bill_count_21d='_widget_1734062123143',
bill_count_22d='_widget_1734062123144', bill_count_23d='_widget_1734062123145',
bill_count_24d='_widget_1734062123146', bill_count_25d='_widget_1734062123147',
bill_count_26d='_widget_1734062123148', bill_count_27d='_widget_1734062123149',
bill_count_28d='_widget_1734062123150', bill_count_29d='_widget_1734062123151',
bill_count_30d='_widget_1734062123152', bill_count_31d='_widget_1734062123153',
etl_time='_widget_1734062123154',
maintain_bill_count_last_30_day='_widget_1734062123155',
washing_bill_count_last_30_day='_widget_1734062123156',
maintain_bill_day_count_last_30_day='_widget_1734062123157',
washing_bill_day_count_last_30_day='_widget_1734062123158',
retail_bill_count_last_30_day='_widget_1734062123159',
retail_bill_day_count_last_30_day='_widget_1734062123160',
purchase_bill_count_last_30_day='_widget_1734062123161',
purchase_bill_day_count_last_30_day='_widget_1734062123162',
card_bill_count_last_30_day='_widget_1734062123163',
card_bill_day_count_last_30_day='_widget_1734062123164',
gd_sales_bill_count_last_30_day='_widget_1734062123165',
gd_sales_bill_day_count_last_30_day='_widget_1734062123166',
g_change_flag='_widget_1734062123167', saas_package='_widget_1734062123168',
manage_model='_widget_1734062123169', contacts='_widget_1734062123170',
contact_number='_widget_1734062123171', contact_mobile='_widget_1734062123172',
g_month_count='_widget_1734062123173', g_month_percentage='_widget_1734062123174',
is_install_service='_widget_1734062123175',
install_create_time='_widget_1734062123176', last_end_date='_widget_1734062123177',
renew_date='_widget_1734062123178', is_chain_owner='_widget_1734062123179',
group_org_count='_widget_1734062123180',
recent_bill_warning_days='_widget_1734062123181',
g_change_flag_d='_widget_1734062123182', g_lost_warning_days='_widget_1734062123183',
saas_edition_fmt='_widget_1734062123184', g_flag_1m='_widget_1734062123185',
g_flag_2m='_widget_1734062123186', g_flag_3m='_widget_1734062123187',
g_flag_4m='_widget_1734062123188', g_flag_5m='_widget_1734062123189',
g_flag_6m='_widget_1734062123190', g_flag_day_count='_widget_1734062123191',
add_org_flag='_widget_1734062123192', pt='_widget_1734062123193',
org_size='_widget_1734062123194', qualification_type_fmt='_widget_1734062123195',
business_scope_fmt='_widget_1734062123196', store_type_fmt='_widget_1734062123197',
area='_widget_1734062123198', station_number='_widget_1734062123199',
header_type_fmt='_widget_1734062123200', org_stage='_widget_1734062123201',
g_count_this_month='_widget_1734062123202',
saas_customer_type='_widget_1734062123203', technician='_widget_1734062123204',
tmall_maintain_service_status_desc='_widget_1734062123205',
date_fmt_date='_widget_1749000071375',
area_manager_staff_id='_widget_1748496855779',
service_impl_principal_staff_id="_widget_1748496855780",
service_salesmen_staff_id="_widget_1748496855778",
technician_staff_id="_widget_1751877712235",
saas_create_time_date="_widget_1749000071377",
expiry_time_date="_widget_1749000071382",
install_create_time_date="_widget_1749000071384",
last_end_date_date="_widget_1749000071389", renew_date_date="_widget_1749000071391")
if __name__ == '__main__':
start = UpdateAllNGVDataDaily()
start.main()
+393
View File
@@ -0,0 +1,393 @@
import datetime
import os
import time
import requests
from api import API
from back_ground_module import CommonModule
import pandas as pd
from log_config import configure_task_logger, configure_error_task_logger
api_instance = API()
common_module = CommonModule()
# start_time = datetime.datetime.now()
# 获取已经配置好的常规日志记录器
logger = configure_task_logger()
# 获取已经配置好的错误任务日志记录器
error_task_logger = configure_error_task_logger()
output_dir = "output" # 设置输出目录
os.makedirs(output_dir, exist_ok=True)
class NewExceptionTask:
"""
SaaS异常回访
"""
def __init__(self):
self.exception_service_todo = None
self.get_feature_usage = None
self.saas_create_time = None
self.index = None
self.date_one = None
self.data_yichang_S = None
self.date_list = None
self.Smart_detection = None
self.service_remind = None
self.NGV_data_list = None
self.permissions_table = None
self.staff_id_list = None
self.json_list = []
self.policy_recognition = None
self.widget_list = None
self.private_domain = None
self.public_domain = None
self.public_domain_list = None
self.different_industries = None
self.different_industries_list = None
self.groupnotification = None
self.fields_mapping = {
"门店名称": "_widget_1748241895830",
"联系人": "_widget_1748241895831",
"开户时间": "_widget_1748241895839",
"门店编码": "_widget_1748241895842",
"联系方式": "_widget_1748241895832",
"系统版本": "_widget_1748241895850",
"公司名称": "_widget_1748241895844",
"运营顾问": "_widget_1748246808679",
"区域经理": "_widget_1748246808682",
"公司等级": "_widget_1748241895846",
"运营专家": "_widget_1748246808681",
"操作模式E.L/E.S": "_widget_1748241895853",
"活跃健康状态变化": "_widget_1748241895829",
"初始日": "_widget_1748241895833",
"推进日": "_widget_1748241895834",
"异常跟进情况描述": "_widget_1748512176640",
"异常变化原因": "_widget_1748512176641",
"正常使用": "_widget_1748512176643",
"门店原因": "_widget_1748512176645",
"服务原因": "_widget_1748512176647",
"产品原因": "_widget_1748512176649",
"未正式切换": "_widget_1748512176651",
"跟进状态": "_widget_1748512176655",
"是否可激活": "_widget_1758615839701",
"是否有续约风险": "_widget_1758615839703",
"当前跟进人": "_widget_1748246808678",
"激活策略": "_widget_1758615839717",
"跟进时间": "_widget_1748512176654",
"是否跟进完成": "_widget_1751273412737",
"区域客服": "_widget_1748246808680",
"大区": "_widget_1748241895847",
"": "_widget_1748241895848",
"城市": "_widget_1748241895855",
"门店类型": "_widget_1748241895849",
"saas客户类型": "_widget_1748241895851",
"门店阶段": "_widget_1748241895852",
"提交人": "creator",
"提交时间": "createTime",
"更新时间": "updateTime"
}
def calculate_date_one(self, start_offset=0):
"""
计算从当前日期或指定偏移量的日期开始往前遍历遇到date_list中日期的次数
参数:
- start_offset: 从当前日期起始的天数偏移量默认为0即今天负数表示过去正数表示未来
返回:
- date_one: 遍历到date_list中日期的次数
"""
jdy_date = datetime.datetime.now().strftime("%Y-%m-%d")
jdy_start_time = datetime.datetime.now().strftime("%Y-%m-%d ")
# 设置起始日期
now_time = datetime.datetime.now() + datetime.timedelta(days=start_offset)
# 初始化计数器
date_one = 1
print("当前日期:", now_time.strftime("%Y-%m-%d"))
# 检查起始日期是否在date_list中
if now_time.strftime("%Y-%m-%d") in self.date_list:
date_one = 0
print("开始次数:", date_one)
else:
# 遍历日期
for i in range(1, 10):
new_date = now_time + datetime.timedelta(days=-i)
new_date_str = new_date.strftime("%Y-%m-%d")
print("遍历日期:", new_date_str)
if new_date_str in self.date_list:
date_one += 1
print("节假日期:", new_date_str)
else:
break
print("遍历次数:", date_one)
return date_one
@staticmethod
def download_url_content(url, save_path):
"""
下载指定 URL 的内容并保存到本地文件
:param url: 要下载内容的 URL
:param save_path: 保存文件的路径
"""
try:
# 发送 GET 请求以获取内容
response = requests.get(url, stream=True)
response.raise_for_status() # 如果响应状态码不是 200,抛出异常
# 确保保存目录存在
os.makedirs(os.path.dirname(save_path), exist_ok=True)
# 将内容写入文件
with open(save_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192): # 分块写入,避免占用过多内存
if chunk: # 过滤掉空块
file.write(chunk)
print(f"文件已成功保存到 {save_path}")
except requests.exceptions.RequestException as e:
print(f"下载失败: {e}")
except Exception as e:
print(f"发生错误: {e}")
def load_all_data(self):
"""加载所有必要的数据表"""
# 省市区人员关系表
payload = {"api_key": "675b900991ad2491c69389ca", "entry_id": "676512ac3e54dc3159460c0a"}
json_dict = api_instance.entry_data_list(payload)
self.json_list = json_dict.get("data")
# 获取简道云员工id
payload = {"api_key": "6694d3c4fcb69ca9a111a6c4",
"entry_id": "6769204a1902c9341340a1bc",
}
staff_id = api_instance.entry_data_list(payload)
self.staff_id_list = staff_id.get("data") # api请求格式,将数据封装在data字典里
# 获取NGV数据
payload = {"api_key": "675b900991ad2491c69389ca", "entry_id": "675bb02bd2d53c2034c665e4"}
self.NGV_data_list = api_instance.entry_data_list(payload).get("data", [])
# print("NGV获取后的类型:", type(self.NGV_data_list))
# 获取异常服务待办
payload = {"api_key": "675b900991ad2491c69389ca", "entry_id": "68340de79f116c0b66b6b0cc"}
self.exception_service_todo = api_instance.entry_data_list(payload).get("data", [])
print(self.exception_service_todo)
@staticmethod
def build_index(json_list):
index = {}
for json_item in json_list:
try:
key = (json_item['_widget_1734677164861'], json_item['_widget_1734677164862'],
json_item['_widget_1734677164863']) # 省市区
if '_widget_1734677164870' not in json_item: # 异常回访客服
raise KeyError("缺少 '异常回访客服'")
index[key] = json_item
except KeyError as e:
print(f"警告:{e},跳过该条记录: {json_item}")
continue
print('index', index)
return index
@staticmethod
def find_customer_service(province_name, city_name, area_name, index):
key = (province_name, city_name, area_name)
# print(index)
if key not in index:
return "数据缺失: 未找到对应的异常回访客服"
return index[key]
@staticmethod
def get_staff_id(row_item, name):
"""辅助函数,用于获取员工ID"""
if str(row_item["_widget_1734942794144"]) == str(name): # 检查姓名是否匹配
return row_item["_widget_1734942794145"] # 返回员工ID
return None
def assign_customer_service(self, province_name, city_name, area_name, index):
"""根据省市区派发给异常回访客服"""
# try:
customer_service_info = self.find_customer_service(province_name, city_name, area_name, index)
customer_service = customer_service_info.get('_widget_1734677164870', {}).get('username') # 异常回访客服
return customer_service
# except Exception as e:
# print(f"Error finding customer service: {e}")
# return "分配失败,请检查", "分配失败,请检查", "分配失败,请检查"
def main(self):
task_start_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
try:
global png_url, key, upload_key, province_name, city_name, area_name
self.load_all_data()
self.data_yichang_S = common_module.get_yichang_details(days_back=1).astype(str) # 获取data_NGV 并转为str
self.index = self.build_index(self.json_list)
logger.info("开始运行SaaS异常回访")
data_yichang = self.data_yichang_S.copy()
# data_yichang.to_csv(os.path.join(output_dir,"data_yichang.csv"), index=False)
def replace_values(series):
# 使用条件判断来进行替换
return series.apply(lambda x: '' if pd.isna(x) or x in ['NA', 'None', ''] else x)
# 对整个DataFrame的所有列应用替换函数
data_yichang = data_yichang.apply(replace_values)
for index_num, row in data_yichang.iterrows(): # 对过滤后的每一条进行派发
try:
is_pass = False
for exception_service in self.exception_service_todo :
if exception_service['_widget_1748241895842'] == row['org_code'] and exception_service['_widget_1748512176655'] in ['未处理', '处理中']:
is_pass = True
break
if is_pass:
logger.info(f"已存在待办,跳过该条记录: {row}")
continue
payload_dict = {}
distribution_date = datetime.datetime.now(datetime.timezone.utc)
distribution_date = distribution_date.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z'
date_obj1 = datetime.datetime.strptime(row["init_day"], "%Y%m%d").strftime("%Y-%m-%d")
date_obj2 = datetime.datetime.strptime(row["push_day"], "%Y%m%d").strftime("%Y-%m-%d")
NGV_roles = {
'service_impl_principal': row['service_impl_principal'], # 运营负责人
'area_manager': row['area_manager'], # 区域经理
'technician': row['technician'], # 运营专家
}
for role, name in NGV_roles.items(): # 寻找对应的员工ID
for row_item in self.staff_id_list:
staff_id = self.get_staff_id(row_item, name)
if staff_id:
NGV_roles[role] = staff_id
break # 找到后退出循环
else:
NGV_roles[role] = None # 如果没有找到对应的员工ID
relationship_manager, area_manager, technician = [NGV_roles[role] for role in
['service_impl_principal',
'area_manager',
'technician']]
UUid = time.strftime("%Y%m%d%H%M%S", time.localtime())
NGV_data_id = None
reason = None
# 获取关联数据
for NGV_Data in self.NGV_data_list:
# NGV_Data = NGV_Data.get("data")
if row["org_code"] == NGV_Data.get("_widget_1734062123071"): # 门店编码
NGV_data_id = NGV_Data.get("_id")
province_name = NGV_Data.get("_widget_1734062123090")
city_name = NGV_Data.get("_widget_1734062123092")
area_name = NGV_Data.get("_widget_1734062123094")
# 门店原因
reason = NGV_Data.get("_widget_1758617393828")
logger.info(f"获取关联数据成功:{NGV_data_id}, {province_name}, {city_name}, {area_name}")
# 判断门店原因
if reason in ["门店倒闭", "门店转让", "加盟其他连锁","切换竞品","虚拟门店","重新开户","已退款","二套系统"]:
continue
if not NGV_data_id:
logger.warning(f"未找到关联数据,请检查门店编码: {row['org_code']}")
# 根据省市区派发给异常回访客服
customer_service = self.assign_customer_service(province_name, city_name, area_name, self.index)
payload_dict.update({
"_widget_1748241895829": {"value": row["health_warning_info"]}, # 活跃健康状态变化
"_widget_1748241895830": {"value": row["org_name"]}, # 门店名称
"_widget_1748241895831": {"value": row["contacts"]}, # 联系人
"_widget_1748241895832": {"value": row['contact_mobile']}, # 联系方式
"_widget_1748241895833": {
"value": int(time.mktime(time.strptime(date_obj1, "%Y-%m-%d")) * 1000) if row[
"init_day"] != '' else ''},
# 初始日
"_widget_1748241895834": {
"value": int(time.mktime(time.strptime(date_obj2, "%Y-%m-%d")) * 1000) if row[
"push_day"] != '' else ''},
# 推进日
"_widget_1748246808678": {"value": customer_service}, # 当前跟进人
"_widget_1748246808679": {"value": relationship_manager}, # 运营负责人
"_widget_1748246808680": {"value": customer_service}, # 区域客服
"_widget_1748241895839": {
"value": int(time.mktime(time.strptime(row["saas_create_time"], "%Y-%m-%d")) * 1000) if row[
"saas_create_time"] != '' else ''},
# 开户时间
"_widget_1748246808681": {"value": technician}, # 技术专家
"_widget_1748246808682": {"value": area_manager}, # 区域经理
"_widget_1748241895842": {"value": row['org_code']}, # 门店编码
"_widget_1748241895844": {"value": row['group_name']}, # 公司名称
"_widget_1748241895846": {"value": row['group_grade']}, # 公司等级
"_widget_1748241895847": {"value": row['region_name']}, # 大区
"_widget_1748241895848": {"value": row['province_name']}, # 省
"_widget_1748241895849": {"value": row['org_type']}, # 门店类型
"_widget_1748241895850": {"value": row['saas_edition_fmt']}, # 系统版本
"_widget_1748241895851": {"value": row['saas_customer_type']}, # saas客户类型
"_widget_1748241895852": {"value": row['org_stage']}, # 门店阶段
"_widget_1748241895853": {"value": row['contact_mobile']}, # 操作模式E.L/E.S
"_widget_1748241895855": {"value": row['city_name']}, # 城市
"_widget_1748247754304": {"value": NGV_data_id}, # 数据id
"_widget_1748512176655": {"value": "未处理"}, # 跟进状态
})
routine_follow_up_payload = {
"api_key": "675b900991ad2491c69389ca",
"entry_id": "68340de79f116c0b66b6b0cc", # 异常服务跟进待办
"is_start_workflow": "true",
"data": payload_dict,
"transaction_id": UUid
}
res = api_instance.data_batch_create(routine_follow_up_payload)
logger.info(f"创建结果:{res}")
except:
pass
common_module.send_task_status(task_start_time, "异常服务待办派发")
except Exception as e:
error_task_logger.error(f"异常服务待办派发执行时发生异常: {e}")
common_module.send_task_error(task_start_time, "异常服务待办派发", str(e))
if __name__ == '__main__':
start = NewExceptionTask()
start.main()
+101
View File
@@ -0,0 +1,101 @@
{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-09-15T06:38:10.678825Z",
"start_time": "2025-09-15T06:38:10.523582Z"
}
},
"source": [
"from datetime import datetime, timezone, timedelta, date, UTC\n",
"import holidays\n",
"from config import Config\n",
"import psycopg2\n",
"import pandas as pd\n",
"import pymysql\n",
"from api import API\n",
"from log_config import configure_task_logger, configure_error_task_logger\n",
"\n",
"\n",
"def get_jcb_details():\n",
" \"\"\"\n",
" 从固定的数据库中获取前几天的NGV明细。\n",
" 参数 `days_back` 表示相对于今天的天数偏移量,默认为1(即前一天)。\n",
" 返回包含NGV明细的pandas DataFrame。\n",
" \"\"\"\n",
" # 保存为CSV文件\n",
" output_dir = \"output\" # 设置输出目录\n",
"\n",
" # 创建输出目录(如果不存在)\n",
" import os\n",
" os.makedirs(output_dir, exist_ok=True)\n",
"\n",
" try:\n",
" # 获得连接并创建游标\n",
" conn = pymysql.connect(\n",
" host=Config.BI_CONN_host,\n",
" database=Config.BI_CONN_INFO_database,\n",
" user=Config.BI_CONN_INFO_user,\n",
" password=Config.BI_CONN_INFO_password,\n",
" # charset='utf8mb4', # 设置字符集以避免编码问题\n",
" # cursorclass=pymysql.cursors.DictCursor # 返回字典形式的结果\n",
" )\n",
" cursor = conn.cursor()\n",
"\n",
" # 获取指定天数前的日期\n",
" # now_time = datetime.now()\n",
" # target_time = now_time + timedelta(days=-days_back)\n",
" target_date_id = \"接车宝\" # 获取目标日期\n",
"\n",
" # SQL 查询语句\n",
" sql = f\"\"\"\n",
" SELECT * FROM jdy_hs_holo_dws_sales_magic_box_ngv_d;\n",
" \"\"\"\n",
"\n",
" # 执行查询并获取结果\n",
" cursor.execute(sql)\n",
" rows = cursor.fetchall() # pymysql 的 DictCursor 会返回字典列表\n",
" print(rows)\n",
" except:\n",
" pass\n",
"\n",
"get_jcb_details()"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"()\n"
]
}
],
"execution_count": 5
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+202
View File
@@ -0,0 +1,202 @@
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "## 外部成员结构",
"id": "a58995e7e8657dce"
},
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-08-27T03:11:49.414114Z",
"start_time": "2025-08-27T03:11:49.016794Z"
}
},
"source": [
"from api import API\n",
"\n",
"api_instance = API()\n",
"\n",
"payload = {\n",
" \"api_key\": \"6694d3c4fcb69ca9a111a6c4\",\n",
" \"entry_id\": \"68ae76ddedae9bffae06a911\",\n",
"}\n",
"df = api_instance.entry_data_list(payload, replace=True)\n",
"print(df)"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001B[92m2025-08-27 11:11:49,228 - api.py - task_logger - INFO - 已获取 1 条数据\u001B[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"进行了替换\n",
"{'data': [{'creator': {'name': 'F6汽车科技', 'username': '#admin', 'status': 1, 'type': 0}, 'updater': {'name': 'F6汽车科技', 'username': '#admin', 'status': 1, 'type': 0}, 'deleter': None, 'createTime': '2025-08-27T03:09:49.522Z', 'updateTime': '2025-08-27T03:09:49.522Z', 'deleteTime': None, '成员单选': {'name': '申晨', 'username': 'R-688c8ba43678deccfcb5c386-jdy-jv71cd380jlj', 'status': 1, 'type': 2, 'departments': [-979913651]}, '成员多选': [{'name': '申晨', 'username': 'R-688c8ba43678deccfcb5c386-jdy-jv71cd380jlj', 'status': 1, 'type': 2, 'departments': [-979913651]}, {'name': '张阳', 'username': '4210192048793363', 'status': 1, 'type': 0, 'departments': [449008196], 'integrate_id': '4210192048793363'}], '_id': '68ae76fd74ad62855e55e195', 'appId': '6694d3c4fcb69ca9a111a6c4', 'entryId': '68ae76ddedae9bffae06a911'}]}\n"
]
}
],
"execution_count": 2
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## 向表格内写入外部成员",
"id": "e311761d3eff6179"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-27T03:13:40.574272Z",
"start_time": "2025-08-27T03:13:40.428525Z"
}
},
"cell_type": "code",
"source": [
"from api import API\n",
"\n",
"api_instance = API()\n",
"payload = {\n",
" \"api_key\": \"6694d3c4fcb69ca9a111a6c4\",\n",
" \"entry_id\": \"68ae76ddedae9bffae06a911\",\n",
" \"data\":{\"_widget_1756264157512\":{\"value\":\"R-688c8ba43678deccfcb5c386-jdy-jv71cd380jlj\"}}\n",
"}\n",
"dict = api_instance.data_batch_create(payload)\n",
"\n",
"print(dict)"
],
"id": "80256c669800af2f",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'data': {'creator': {'name': 'F6汽车科技', 'username': '#admin', 'status': 1, 'type': 0}, 'updater': {'name': 'F6汽车科技', 'username': '#admin', 'status': 1, 'type': 0}, 'deleter': None, 'createTime': '2025-08-27T03:13:40.349Z', 'updateTime': '2025-08-27T03:13:40.349Z', 'deleteTime': None, '_widget_1756264157512': {'name': '申晨', 'username': 'R-688c8ba43678deccfcb5c386-jdy-jv71cd380jlj', 'status': 1, 'type': 2, 'departments': [-979913651]}, '_widget_1756264157513': [], '_id': '68ae77e44b356b9bc83e338d', 'appId': '6694d3c4fcb69ca9a111a6c4', 'entryId': '68ae76ddedae9bffae06a911'}}\n"
]
}
],
"execution_count": 3
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## 获取关联企业",
"id": "714a210b956fe262"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-27T03:16:34.842344Z",
"start_time": "2025-08-27T03:16:34.456948Z"
}
},
"cell_type": "code",
"source": [
"import requests\n",
"import json\n",
"\n",
"url = \"https://api.jiandaoyun.com/api/v5/corp/guest/department/list\"\n",
"\n",
"headers = {\n",
" 'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN',\n",
" 'Content-Type': 'application/json'\n",
"}\n",
"response = requests.post(url, headers=headers)\n",
"print(response.json())\n",
"dept_list = response.json().get(\"dept_list\",[])\n",
"for dept in dept_list:\n",
" print(dept.get(\"name\"))"
],
"id": "d14ea7a6ab8b00dc",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'dept_list': [{'dept_no': -12, 'name': 'CASTROL LIMITED', 'type': 2, 'status': 1}, {'dept_no': -979913650, 'name': '曹伟手机号注册所处公司', 'type': 2, 'status': 1}, {'dept_no': -979913651, 'name': '申晨', 'type': 2, 'status': 1}]}\n"
]
}
],
"execution_count": 5
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## 获取关联企业对接人",
"id": "28355f7a1f5b7a3a"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-27T03:19:09.922520Z",
"start_time": "2025-08-27T03:19:09.839699Z"
}
},
"cell_type": "code",
"source": [
"import requests\n",
"import json\n",
"url = \"https://api.jiandaoyun.com/api/v5/corp/guest/user/list\"\n",
"\n",
"\n",
"headers = {\n",
" 'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN',\n",
" 'Content-Type': 'application/json'\n",
"}\n",
"\n",
"response = requests.post(url, headers=headers)\n",
"all_data = []\n",
"member_list = response.json().get(\"member_list\",[])\n",
"for member in member_list:\n",
" name = member.get(\"name\")\n",
" username = member.get(\"username\") # 用户id\n",
" all_data.append({\"name\":name,\"username\":username})\n"
],
"id": "11fd29cc47185320",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"sylvia7203@gmail.com\n",
"金鹏测试\n",
"曹伟手机号注册\n",
"葡萄\n",
"申晨\n"
]
}
],
"execution_count": 7
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+211
View File
@@ -0,0 +1,211 @@
from datetime import datetime
import os
from config import Config
import pandas as pd
from back_ground_module import CommonModule
from api import API
from log_config import configure_task_logger, configure_error_task_logger
import requests
import numpy as np # 确保导入numpy(如果涉及numpy数组)
logger = configure_task_logger()
error_task_logger = configure_error_task_logger()
output_dir = "output" # 设置输出目录
os.makedirs(output_dir, exist_ok=True)
common_module = CommonModule()
api_instance = API()
class GDMatchPhoneNumber:
def __init__(self):
self.loader_company_data = None
self.fild_mapping = {
"是否已查询": "_widget_1758594869262",
"": "_widget_1758594869257",
"": "_widget_1758594869258",
"": "_widget_1758594869259",
"公司名称": "_widget_1758594869260",
"详细地址": "_widget_1758594869261",
}
self.upload_fild_mapping = {
"源文件省": "_widget_1758598285406",
"源文件市": "_widget_1758598285407",
"源文件区": "_widget_1758598285408",
"源文件地址": "_widget_1758598285409",
"源文件门店店名": "_widget_1758598285410",
"名称相似度": "_widget_1758598285411",
"地址相似度": "_widget_1758598285412",
"综合相似度": "_widget_1758598285413",
"address": "_widget_1758598285387",
"pname": "_widget_1758598285389",
"cityname": "_widget_1758598285393",
"adname": "_widget_1758598285400",
"name": "_widget_1758598285401",
"tel": "_widget_1758598285403",
"parent": "_widget_1758598285386",
"distance": "_widget_1758598285388",
"importance": "_widget_1758598285390",
"biz_ext": "_widget_1758598285391",
"biz_type": "_widget_1758598285392",
"type": "_widget_1758598285394",
"photos": "_widget_1758598285395",
"typecode": "_widget_1758598285396",
"shopinfo": "_widget_1758598285397",
"poiweight": "_widget_1758598285398",
"childtype": "_widget_1758598285399",
"location": "_widget_1758598285402",
"shopid": "_widget_1758598285404",
"id": "_widget_1758598285405"
}
def load_all_data(self):
# 获取经销商新签服务单数据
payload = {"api_key": "66f3a68c6e56814df2c6b1af",
"entry_id": "68d20734a9add4c6126ee9f2",
}
loader_company = api_instance.entry_data_list(payload)
self.loader_company_data = loader_company.get("data") # api请求格式,将数据封装在data字典里
@staticmethod
def row_to_dict(row, field_mapping):
"""将一行数据转换为指定格式的字典"""
result = {}
for col_name, widget_id in field_mapping.items():
if col_name in row:
value = row[col_name]
# 处理空数组/列表的情况
if isinstance(value, (list, np.ndarray)):
if len(value) == 0:
clean_value = None # 空数组视为None
else:
clean_value = value # 非空数组保留原值
# 处理缺失值
elif pd.isna(value):
clean_value = None
# 处理时间戳
elif isinstance(value, pd.Timestamp):
clean_value = value.strftime('%Y-%m-%dT%H:%M:%SZ')
else:
clean_value = value
result[widget_id] = {"value": clean_value}
return result
def match_phone_number(self):
# 替换列明
df = pd.DataFrame(self.loader_company_data)
reserve_mapping = {v: k for k, v in self.fild_mapping.items()}
df.rename(columns=reserve_mapping, inplace=True)
# 统计出本日查询的订单数量
count = 0
url = "https://restapi.amap.com/v3/place/text?parameters"
all_data = []
for index, row in df.iterrows():
if row["是否已查询"] == "":
continue
# 处理详细地址
cleaned = row['详细地址'].replace(row[''], '').strip()
cleaned = cleaned.replace(row[''], '').strip()
cleaned = ' '.join(cleaned.split())
row["详细地址"] = cleaned
# 特殊处理直辖市
if row[""] in ["天津市", "上海市", "重庆市", "北京市"] and row[""] == "市辖区":
row[""] = row[""]
key_words = row["公司名称"].replace("(个体工商户)", "").strip()
region = row[""]
detail_address = row["详细地址"]
def search_amap(keywords, region, page_num):
params = {
# "key": "f61b09d406ac49f8a034bf585e60c442",
"key": "273b328f2e85b7e1ad6faa0d4f33ccf2",
"keywords": keywords,
"types": "010400|010500|010800|020000|030000",
"city":region,
# "region": region,
"city_limit": "true",
"page_size": "20",
"page_num": str(page_num)
}
if count > 150:
params.update({"key": "f61b09d406ac49f8a034bf585e60c442"})
res = requests.get(url=url, params=params)
# print(res.json())
return res.json().get("pois", [])
# 初始搜索关键词
current_keywords = key_words
max_pages = 2 # 最多请求2页
for page_num in range(1, max_pages + 1):
pois = search_amap(current_keywords, region, page_num)
for poi in pois:
poi.update({"源文件省": row[""]})
poi.update({"源文件市": row[""]})
poi.update({"源文件区": row[""]})
poi.update({"源文件地址": row["详细地址"]})
poi.update({"源文件门店店名": row["公司名称"]})
all_data.append(poi)
count += 1
# 更新状态为已查询
modify_payload = {
"api_key": "66f3a68c6e56814df2c6b1af",
"entry_id": "68d20734a9add4c6126ee9f2",
"data_id": row["_id"],
"data":
{
"_widget_1758594869262": {"value":""}
}
}
# print(modify_payload)
api_instance.entry_data_update(modify_payload)
if count > 300:
break
result_df = pd.DataFrame(all_data)
return result_df
def upload_df(self, result_df):
all_data = [self.row_to_dict(row, self.upload_fild_mapping) for index, row in result_df.iterrows()] # 增量数据
payload = {
"api_key": "66f3a68c6e56814df2c6b1af",
"entry_id": "68d2148d8bcb4d1716b1c03f",
"data_list": all_data
}
api_instance.entry_data_batch_create(payload)
def main(self):
task_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
try:
# 获取数据
self.load_all_data()
logger.info(f"数据加载完成。")
# 根据高德api匹配手机号
result_df = self.match_phone_number()
logger.info(f"数据匹配完成。")
# result_df.to_csv(os.path.join(output_dir, "result.csv"), index=False)
# 结果上传到简道云
self.upload_df(result_df)
logger.info(f"数据上传完成。")
except Exception as e:
# common_module.send_task_error(task_start_time, "高德匹配手机号", str(e))
error_task_logger.error(f"任务高德匹配手机号执行失败。")
raise
common_module.send_task_status(task_start_time, "高德匹配手机号")
if __name__ == '__main__':
gd_match_phone_number = GDMatchPhoneNumber()
gd_match_phone_number.main()
+32 -17
View File
@@ -32,7 +32,7 @@ class YDAPI:
token = res.json().get('accessToken')
return token
def update_from(self, token, formInstanceId, data_new):
def update_from(self, token, formInstanceId, data_new,delay=2, max_retries=10):
"""
函数功能更新表单内容
@@ -62,10 +62,19 @@ class YDAPI:
"updateFormDataJson": json.dumps(data_new, cls=NpEncoder),
}
res = requests.put(api, headers=headers, json=payload)
return res
attempt = 0
while True:
if attempt >= max_retries:
error_task_logger.error(f"请求失败,已达最大重试次数 {max_retries},无法更新表单数据,跳过本次请求。")
break
try:
res = requests.get(api, headers=headers,timeout=10)
res.raise_for_status() # 如果响应状态码不是2xx,则抛出HTTPError
return res.json()
except (requests.exceptions.RequestException, Exception) as e:
print(f"请求出现异常: {e}, 正在重试({attempt + 1}/{max_retries})...")
time.sleep(delay) # 等待指定的延迟时间后再次尝试
attempt += 1
def processes_instancesInfos(self, token, id, appType="APP_UYZ0KG6L0CCNV80GZ66O",
systemToken="XA966F81JAJOFCVVVKO64E9MIIZV1EWE5SFMKJ2", delay=2, max_retries=10):
@@ -106,7 +115,7 @@ class YDAPI:
attempt += 1
def read_processes(self, token, formUuid, page, n, appType="APP_UYZ0KG6L0CCNV80GZ66O",
systemToken="XA966F81JAJOFCVVVKO64E9MIIZV1EWE5SFMKJ2"):
systemToken="XA966F81JAJOFCVVVKO64E9MIIZV1EWE5SFMKJ2", max_retries=10,delay=2):
"""
函数功能读取普通表单的所有数据
@@ -134,9 +143,21 @@ class YDAPI:
'currentPage': page,
'pageSize': n
}
res = requests.post(api, headers=headers, json=formData)
print(formData)
return res.json()
attempt = 0
while True:
if attempt >= max_retries:
error_task_logger.error(f"请求失败,已达最大重试次数 {max_retries},无法获取普通表单数据,跳过本次请求。")
break
try:
res = requests.post(api, headers=headers, json=formData)
res.raise_for_status() # 如果返回状态码不是2xx,抛出异常
return res.json()
except requests.exceptions.RequestException as e:
logger.warning(f"请求异常: {e},正在尝试第 {attempt + 1} 次重试...")
time.sleep(delay)
attempt += 1
def read_processes_instances(self, token, formUuid, page, n, appType="APP_UYZ0KG6L0CCNV80GZ66O",
systemToken="XA966F81JAJOFCVVVKO64E9MIIZV1EWE5SFMKJ2", instanceStatus="RUNNING",
@@ -220,7 +241,7 @@ class YDAPI:
while True:
if attempt >= max_retries:
error_task_logger.error(f"请求失败,已达最大重试次数 {max_retries},无法获取流程实例数据,跳过本次请求。")
error_task_logger.error(f"请求失败,已达最大重试次数 {max_retries},无法获取审批数据,跳过本次请求。")
break
try:
@@ -256,13 +277,6 @@ class YDAPI:
"x-acs-dingtalk-access-token": token
}
#
# prams = {
# "systemToken": systemToken,
# "appType": appType,
# "userId": userId,
# "language": "zh_CN",
# }
while True:
if attempt >= max_retries:
@@ -325,6 +339,7 @@ class YDAPI:
"language": "zh_CN",
"taskId": int(taskId)
}
res = requests.post(api, headers=headers, json=payload)
return res