213 lines
8.2 KiB
Python
213 lines
8.2 KiB
Python
from datetime import datetime
|
|
import os
|
|
from config import Config
|
|
import pandas as pd
|
|
from back_ground_module import CommonModule
|
|
from api import API
|
|
from log_config import configure_task_logger, configure_error_task_logger
|
|
import requests
|
|
import numpy as np # 确保导入numpy(如果涉及numpy数组)
|
|
|
|
logger = configure_task_logger()
|
|
error_task_logger = configure_error_task_logger()
|
|
output_dir = "output" # 设置输出目录
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
common_module = CommonModule()
|
|
api_instance = API()
|
|
|
|
|
|
class GDMatchPhoneNumber:
|
|
"""高德匹配手机号"""
|
|
def __init__(self):
|
|
self.loader_company_data = None
|
|
self.fild_mapping = {
|
|
"是否已查询": "_widget_1758594869262",
|
|
"省": "_widget_1758594869257",
|
|
"市": "_widget_1758594869258",
|
|
"区": "_widget_1758594869259",
|
|
"公司名称": "_widget_1758594869260",
|
|
"详细地址": "_widget_1758594869261",
|
|
}
|
|
self.upload_fild_mapping = {
|
|
"源文件省": "_widget_1758598285406",
|
|
"源文件市": "_widget_1758598285407",
|
|
"源文件区": "_widget_1758598285408",
|
|
"源文件地址": "_widget_1758598285409",
|
|
"源文件门店店名": "_widget_1758598285410",
|
|
"名称相似度": "_widget_1758598285411",
|
|
"地址相似度": "_widget_1758598285412",
|
|
"综合相似度": "_widget_1758598285413",
|
|
"address": "_widget_1758598285387",
|
|
"pname": "_widget_1758598285389",
|
|
"cityname": "_widget_1758598285393",
|
|
"adname": "_widget_1758598285400",
|
|
"name": "_widget_1758598285401",
|
|
"tel": "_widget_1758598285403",
|
|
"parent": "_widget_1758598285386",
|
|
"distance": "_widget_1758598285388",
|
|
"importance": "_widget_1758598285390",
|
|
"biz_ext": "_widget_1758598285391",
|
|
"biz_type": "_widget_1758598285392",
|
|
"type": "_widget_1758598285394",
|
|
"photos": "_widget_1758598285395",
|
|
"typecode": "_widget_1758598285396",
|
|
"shopinfo": "_widget_1758598285397",
|
|
"poiweight": "_widget_1758598285398",
|
|
"childtype": "_widget_1758598285399",
|
|
"location": "_widget_1758598285402",
|
|
"shopid": "_widget_1758598285404",
|
|
"id": "_widget_1758598285405"
|
|
}
|
|
|
|
def load_all_data(self):
|
|
# 获取经销商新签服务单数据
|
|
payload = {"api_key": "66f3a68c6e56814df2c6b1af",
|
|
"entry_id": "68d20734a9add4c6126ee9f2",
|
|
}
|
|
loader_company = api_instance.entry_data_list(payload)
|
|
self.loader_company_data = loader_company.get("data") # api请求格式,将数据封装在data字典里
|
|
|
|
|
|
|
|
@staticmethod
|
|
def row_to_dict(row, field_mapping):
|
|
"""将一行数据转换为指定格式的字典"""
|
|
result = {}
|
|
for col_name, widget_id in field_mapping.items():
|
|
if col_name in row:
|
|
value = row[col_name]
|
|
# 处理空数组/列表的情况
|
|
if isinstance(value, (list, np.ndarray)):
|
|
if len(value) == 0:
|
|
clean_value = None # 空数组视为None
|
|
else:
|
|
clean_value = value # 非空数组保留原值
|
|
# 处理缺失值
|
|
elif pd.isna(value):
|
|
clean_value = None
|
|
# 处理时间戳
|
|
elif isinstance(value, pd.Timestamp):
|
|
clean_value = value.strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
else:
|
|
clean_value = value
|
|
result[widget_id] = {"value": clean_value}
|
|
return result
|
|
|
|
def match_phone_number(self):
|
|
# 替换列明
|
|
df = pd.DataFrame(self.loader_company_data)
|
|
reserve_mapping = {v: k for k, v in self.fild_mapping.items()}
|
|
df.rename(columns=reserve_mapping, inplace=True)
|
|
|
|
# 统计出本日查询的订单数量
|
|
count = 0
|
|
url = "https://restapi.amap.com/v3/place/text?parameters"
|
|
all_data = []
|
|
for index, row in df.iterrows():
|
|
if row["是否已查询"] == "是":
|
|
continue
|
|
|
|
# 处理详细地址
|
|
cleaned = row['详细地址'].replace(row['市'], '').strip()
|
|
cleaned = cleaned.replace(row['区'], '').strip()
|
|
cleaned = ' '.join(cleaned.split())
|
|
row["详细地址"] = cleaned
|
|
|
|
# 特殊处理直辖市
|
|
if row["省"] in ["天津市", "上海市", "重庆市", "北京市"] and row["市"] == "市辖区":
|
|
row["市"] = row["省"]
|
|
|
|
key_words = row["公司名称"].replace("(个体工商户)", "").strip()
|
|
|
|
region = row["市"]
|
|
detail_address = row["详细地址"]
|
|
|
|
def search_amap(keywords, region, page_num):
|
|
params = {
|
|
# "key": "f61b09d406ac49f8a034bf585e60c442",
|
|
"key": "273b328f2e85b7e1ad6faa0d4f33ccf2",
|
|
"keywords": keywords,
|
|
"types": "010400|010500|010800|020000|030000",
|
|
"city":region,
|
|
# "region": region,
|
|
"city_limit": "true",
|
|
"page_size": "20",
|
|
"page_num": str(page_num)
|
|
}
|
|
if count > 150:
|
|
params.update({"key": "f61b09d406ac49f8a034bf585e60c442"})
|
|
res = requests.get(url=url, params=params)
|
|
# print(res.json.json())
|
|
return res.json().get("pois", [])
|
|
|
|
# 初始搜索关键词
|
|
current_keywords = key_words
|
|
max_pages = 2 # 最多请求2页
|
|
|
|
for page_num in range(1, max_pages + 1):
|
|
pois = search_amap(current_keywords, region, page_num)
|
|
for poi in pois:
|
|
poi.update({"源文件省": row["省"]})
|
|
poi.update({"源文件市": row["市"]})
|
|
poi.update({"源文件区": row["区"]})
|
|
poi.update({"源文件地址": row["详细地址"]})
|
|
poi.update({"源文件门店店名": row["公司名称"]})
|
|
all_data.append(poi)
|
|
count += 1
|
|
|
|
# 更新状态为已查询
|
|
modify_payload = {
|
|
"api_key": "66f3a68c6e56814df2c6b1af",
|
|
"entry_id": "68d20734a9add4c6126ee9f2",
|
|
"data_id": row["_id"],
|
|
"data":
|
|
{
|
|
"_widget_1758594869262": {"value":"是"}
|
|
}
|
|
|
|
}
|
|
# print(modify_payload)
|
|
api_instance.entry_data_update(modify_payload)
|
|
|
|
if count > 300:
|
|
break
|
|
result_df = pd.DataFrame(all_data)
|
|
return result_df
|
|
|
|
def upload_df(self, result_df):
|
|
all_data = [self.row_to_dict(row, self.upload_fild_mapping) for index, row in result_df.iterrows()] # 增量数据
|
|
payload = {
|
|
"api_key": "66f3a68c6e56814df2c6b1af",
|
|
"entry_id": "68d2148d8bcb4d1716b1c03f",
|
|
"data_list": all_data
|
|
}
|
|
api_instance.entry_data_batch_create(payload)
|
|
|
|
def main(self):
|
|
task_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
try:
|
|
|
|
# 获取数据
|
|
self.load_all_data()
|
|
logger.info(f"数据加载完成。")
|
|
|
|
# 根据高德api匹配手机号
|
|
result_df = self.match_phone_number()
|
|
logger.info(f"数据匹配完成。")
|
|
# result_df.to_csv(os.path.join(output_dir, "result.csv"), index=False)
|
|
|
|
# 结果上传到简道云
|
|
self.upload_df(result_df)
|
|
logger.info(f"数据上传完成。")
|
|
except Exception as e:
|
|
common_module.send_task_error(task_start_time, "高德匹配手机号", str(e))
|
|
error_task_logger.error(f"任务高德匹配手机号执行失败。")
|
|
raise
|
|
|
|
common_module.send_task_status(task_start_time, "高德匹配手机号")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
gd_match_phone_number = GDMatchPhoneNumber()
|
|
gd_match_phone_number.main()
|