from datetime import datetime import os from config import Config import pandas as pd from back_ground_module import CommonModule from api import API from log_config import configure_task_logger, configure_error_task_logger import requests import numpy as np # 确保导入numpy(如果涉及numpy数组) logger = configure_task_logger() error_task_logger = configure_error_task_logger() output_dir = "output" # 设置输出目录 os.makedirs(output_dir, exist_ok=True) common_module = CommonModule() api_instance = API() class GDMatchPhoneNumber: def __init__(self): self.loader_company_data = None self.fild_mapping = { "是否已查询": "_widget_1758594869262", "省": "_widget_1758594869257", "市": "_widget_1758594869258", "区": "_widget_1758594869259", "公司名称": "_widget_1758594869260", "详细地址": "_widget_1758594869261", } self.upload_fild_mapping = { "源文件省": "_widget_1758598285406", "源文件市": "_widget_1758598285407", "源文件区": "_widget_1758598285408", "源文件地址": "_widget_1758598285409", "源文件门店店名": "_widget_1758598285410", "名称相似度": "_widget_1758598285411", "地址相似度": "_widget_1758598285412", "综合相似度": "_widget_1758598285413", "address": "_widget_1758598285387", "pname": "_widget_1758598285389", "cityname": "_widget_1758598285393", "adname": "_widget_1758598285400", "name": "_widget_1758598285401", "tel": "_widget_1758598285403", "parent": "_widget_1758598285386", "distance": "_widget_1758598285388", "importance": "_widget_1758598285390", "biz_ext": "_widget_1758598285391", "biz_type": "_widget_1758598285392", "type": "_widget_1758598285394", "photos": "_widget_1758598285395", "typecode": "_widget_1758598285396", "shopinfo": "_widget_1758598285397", "poiweight": "_widget_1758598285398", "childtype": "_widget_1758598285399", "location": "_widget_1758598285402", "shopid": "_widget_1758598285404", "id": "_widget_1758598285405" } def load_all_data(self): # 获取经销商新签服务单数据 payload = {"api_key": "66f3a68c6e56814df2c6b1af", "entry_id": "68d20734a9add4c6126ee9f2", } loader_company = api_instance.entry_data_list(payload) self.loader_company_data = loader_company.get("data") # api请求格式,将数据封装在data字典里 @staticmethod def row_to_dict(row, field_mapping): """将一行数据转换为指定格式的字典""" result = {} for col_name, widget_id in field_mapping.items(): if col_name in row: value = row[col_name] # 处理空数组/列表的情况 if isinstance(value, (list, np.ndarray)): if len(value) == 0: clean_value = None # 空数组视为None else: clean_value = value # 非空数组保留原值 # 处理缺失值 elif pd.isna(value): clean_value = None # 处理时间戳 elif isinstance(value, pd.Timestamp): clean_value = value.strftime('%Y-%m-%dT%H:%M:%SZ') else: clean_value = value result[widget_id] = {"value": clean_value} return result def match_phone_number(self): # 替换列明 df = pd.DataFrame(self.loader_company_data) reserve_mapping = {v: k for k, v in self.fild_mapping.items()} df.rename(columns=reserve_mapping, inplace=True) # 统计出本日查询的订单数量 count = 0 url = "https://restapi.amap.com/v3/place/text?parameters" all_data = [] for index, row in df.iterrows(): if row["是否已查询"] == "是": continue # 处理详细地址 cleaned = row['详细地址'].replace(row['市'], '').strip() cleaned = cleaned.replace(row['区'], '').strip() cleaned = ' '.join(cleaned.split()) row["详细地址"] = cleaned # 特殊处理直辖市 if row["省"] in ["天津市", "上海市", "重庆市", "北京市"] and row["市"] == "市辖区": row["市"] = row["省"] key_words = row["公司名称"].replace("(个体工商户)", "").strip() region = row["市"] detail_address = row["详细地址"] def search_amap(keywords, region, page_num): params = { # "key": "f61b09d406ac49f8a034bf585e60c442", "key": "273b328f2e85b7e1ad6faa0d4f33ccf2", "keywords": keywords, "types": "010400|010500|010800|020000|030000", "city":region, # "region": region, "city_limit": "true", "page_size": "20", "page_num": str(page_num) } if count > 150: params.update({"key": "f61b09d406ac49f8a034bf585e60c442"}) res = requests.get(url=url, params=params) # print(res.json()) return res.json().get("pois", []) # 初始搜索关键词 current_keywords = key_words max_pages = 2 # 最多请求2页 for page_num in range(1, max_pages + 1): pois = search_amap(current_keywords, region, page_num) for poi in pois: poi.update({"源文件省": row["省"]}) poi.update({"源文件市": row["市"]}) poi.update({"源文件区": row["区"]}) poi.update({"源文件地址": row["详细地址"]}) poi.update({"源文件门店店名": row["公司名称"]}) all_data.append(poi) count += 1 # 更新状态为已查询 modify_payload = { "api_key": "66f3a68c6e56814df2c6b1af", "entry_id": "68d20734a9add4c6126ee9f2", "data_id": row["_id"], "data": { "_widget_1758594869262": {"value":"是"} } } # print(modify_payload) api_instance.entry_data_update(modify_payload) if count > 300: break result_df = pd.DataFrame(all_data) return result_df def upload_df(self, result_df): all_data = [self.row_to_dict(row, self.upload_fild_mapping) for index, row in result_df.iterrows()] # 增量数据 payload = { "api_key": "66f3a68c6e56814df2c6b1af", "entry_id": "68d2148d8bcb4d1716b1c03f", "data_list": all_data } api_instance.entry_data_batch_create(payload) def main(self): task_start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") try: # 获取数据 self.load_all_data() logger.info(f"数据加载完成。") # 根据高德api匹配手机号 result_df = self.match_phone_number() logger.info(f"数据匹配完成。") # result_df.to_csv(os.path.join(output_dir, "result.csv"), index=False) # 结果上传到简道云 self.upload_df(result_df) logger.info(f"数据上传完成。") except Exception as e: # common_module.send_task_error(task_start_time, "高德匹配手机号", str(e)) error_task_logger.error(f"任务高德匹配手机号执行失败。") raise common_module.send_task_status(task_start_time, "高德匹配手机号") if __name__ == '__main__': gd_match_phone_number = GDMatchPhoneNumber() gd_match_phone_number.main()