Files
F6--/张阳脚本/高德地图api.ipynb
2026-01-30 11:28:35 +08:00

235 lines
17 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "",
"id": "1581ab9b41bf8ced"
},
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-05-07T09:59:05.954334Z",
"start_time": "2025-05-07T09:58:58.593181Z"
}
},
"source": [
"import requests\n",
"import pandas as pd\n",
"from difflib import SequenceMatcher\n",
"\n",
"\n",
"# 相似度计算函数\n",
"def similarity(a, b):\n",
" return SequenceMatcher(None, a, b).ratio()\n",
"\n",
"\n",
"# 地址是否匹配\n",
"def is_address_match(detail, addr):\n",
" if len(detail) < 4:\n",
" return detail in addr\n",
" else:\n",
" return similarity(detail, addr) >= 0.6\n",
"\n",
"\n",
"# 店名是否匹配\n",
"def is_name_match(store_name, poi_name):\n",
" if len(store_name) < 4:\n",
" return store_name in poi_name\n",
" else:\n",
" return similarity(store_name, poi_name) >= 0.6\n",
"\n",
"\n",
"# 读取 Excel 数据\n",
"df = pd.read_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\地址.xlsx\", sheet_name=\"Sheet2\")\n",
"url = \"https://restapi.amap.com/v5/place/text\"\n",
"all_data = []\n",
"\n",
"for index, row in df.iterrows():\n",
"\n",
" # 处理详细地址\n",
" cleaned = row['地址'].replace(row['市'], '').strip()\n",
" cleaned = cleaned.replace(row['区/县'], '').strip()\n",
" cleaned = ' '.join(cleaned.split())\n",
" row[\"详细地址\"] = cleaned\n",
"\n",
" # 特殊处理直辖市\n",
" if row[\"省\"] in [\"天津市\", \"上海市\", \"重庆市\", \"北京市\"] and row[\"市\"] == \"市辖区\":\n",
" row[\"市\"] = row[\"省\"]\n",
"\n",
" key_words = row[\"门店店名\"]\n",
" region = row[\"市\"]\n",
" detail_address = row[\"详细地址\"]\n",
"\n",
" def search_amap(keywords, region, page_num):\n",
" params = {\n",
" \"key\": \"f61b09d406ac49f8a034bf585e60c442\",\n",
" \"keywords\": keywords,\n",
" \"types\": \"010400|010500|010800|020000|030000\",\n",
" \"region\": region,\n",
" \"city_limit\": \"true\",\n",
" \"page_size\": \"20\",\n",
" \"page_num\": str(page_num)\n",
" }\n",
" res = requests.get(url=url, params=params)\n",
" return res.json().get(\"pois\", [])\n",
"\n",
" # 初始搜索关键词\n",
" current_keywords = key_words\n",
" max_pages = 3 # 最多请求3页\n",
" found = False\n",
"\n",
" for page_num in range(1, max_pages + 1):\n",
" pois = search_amap(current_keywords, region, page_num)\n",
"\n",
" for poi in pois:\n",
" addr = poi.get(\"address\", \"\")\n",
" name = poi.get(\"name\", \"\")\n",
"\n",
" if is_address_match(detail_address, addr) and is_name_match(key_words, name):\n",
" poi.update({\"搜索门店\": row[\"门店店名\"]})\n",
" all_data.append(poi)\n",
" found = True\n",
" break # 找到就停止当前页面遍历\n",
"\n",
" if found:\n",
" break # 停止分页循环\n",
"\n",
" if not found:\n",
" # 如果仍未找到,尝试使用 更详细的地址 + 店名 重新搜索多个页面\n",
" current_keywords = detail_address + \" \" + key_words\n",
" for page_num in range(1, max_pages + 1):\n",
" pois = search_amap(current_keywords, region, page_num)\n",
"\n",
" for poi in pois:\n",
" addr = poi.get(\"address\", \"\")\n",
" name = poi.get(\"name\", \"\")\n",
"\n",
" if is_address_match(detail_address, addr) and is_name_match(key_words, name):\n",
" poi.update({\"搜索门店\": row[\"门店店名\"]})\n",
" all_data.append(poi)\n",
" found = True\n",
" break # 找到就停止当前页面遍历\n",
"\n",
" if found:\n",
" break # 停止分页循环\n",
"\n",
" if not found:\n",
" print(f\"{row['门店店名']} 没有找到符合要求的地址或店名\")\n",
"\n",
"# 保存结果到 Excel\n",
"df2 = pd.DataFrame(all_data)\n",
"df2.to_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\返回结果地址.xlsx\", index=False)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"上海车杰汽车服务 没有找到符合要求的地址或店名\n",
"SunRise Garage汽美工作室 没有找到符合要求的地址或店名\n",
"上海 11区洗美工作室 没有找到符合要求的地址或店名\n",
"上海龙腾汽车用品批发 没有找到符合要求的地址或店名\n",
"膜幻视佳量子膜上海中心店 没有找到符合要求的地址或店名\n",
"科奇装具 没有找到符合要求的地址或店名\n",
"博士汽车音响改装 没有找到符合要求的地址或店名\n"
]
},
{
"ename": "PermissionError",
"evalue": "[Errno 13] Permission denied: 'C:\\\\Users\\\\Administrator.DESKTOP-7IC2USJ\\\\Desktop\\\\返回结果地址.xlsx'",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mPermissionError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[1;32mIn[14], line 106\u001B[0m\n\u001B[0;32m 104\u001B[0m \u001B[38;5;66;03m# 保存结果到 Excel\u001B[39;00m\n\u001B[0;32m 105\u001B[0m df2 \u001B[38;5;241m=\u001B[39m pd\u001B[38;5;241m.\u001B[39mDataFrame(all_data)\n\u001B[1;32m--> 106\u001B[0m df2\u001B[38;5;241m.\u001B[39mto_excel(\u001B[38;5;124mr\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mC:\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mUsers\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mAdministrator.DESKTOP-7IC2USJ\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mDesktop\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m返回结果地址.xlsx\u001B[39m\u001B[38;5;124m\"\u001B[39m, index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\util\\_decorators.py:333\u001B[0m, in \u001B[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001B[1;34m(*args, **kwargs)\u001B[0m\n\u001B[0;32m 327\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(args) \u001B[38;5;241m>\u001B[39m num_allow_args:\n\u001B[0;32m 328\u001B[0m warnings\u001B[38;5;241m.\u001B[39mwarn(\n\u001B[0;32m 329\u001B[0m msg\u001B[38;5;241m.\u001B[39mformat(arguments\u001B[38;5;241m=\u001B[39m_format_argument_list(allow_args)),\n\u001B[0;32m 330\u001B[0m \u001B[38;5;167;01mFutureWarning\u001B[39;00m,\n\u001B[0;32m 331\u001B[0m stacklevel\u001B[38;5;241m=\u001B[39mfind_stack_level(),\n\u001B[0;32m 332\u001B[0m )\n\u001B[1;32m--> 333\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m func(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\core\\generic.py:2417\u001B[0m, in \u001B[0;36mNDFrame.to_excel\u001B[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, inf_rep, freeze_panes, storage_options, engine_kwargs)\u001B[0m\n\u001B[0;32m 2404\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mpandas\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mformats\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mexcel\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m ExcelFormatter\n\u001B[0;32m 2406\u001B[0m formatter \u001B[38;5;241m=\u001B[39m ExcelFormatter(\n\u001B[0;32m 2407\u001B[0m df,\n\u001B[0;32m 2408\u001B[0m na_rep\u001B[38;5;241m=\u001B[39mna_rep,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 2415\u001B[0m inf_rep\u001B[38;5;241m=\u001B[39minf_rep,\n\u001B[0;32m 2416\u001B[0m )\n\u001B[1;32m-> 2417\u001B[0m formatter\u001B[38;5;241m.\u001B[39mwrite(\n\u001B[0;32m 2418\u001B[0m excel_writer,\n\u001B[0;32m 2419\u001B[0m sheet_name\u001B[38;5;241m=\u001B[39msheet_name,\n\u001B[0;32m 2420\u001B[0m startrow\u001B[38;5;241m=\u001B[39mstartrow,\n\u001B[0;32m 2421\u001B[0m startcol\u001B[38;5;241m=\u001B[39mstartcol,\n\u001B[0;32m 2422\u001B[0m freeze_panes\u001B[38;5;241m=\u001B[39mfreeze_panes,\n\u001B[0;32m 2423\u001B[0m engine\u001B[38;5;241m=\u001B[39mengine,\n\u001B[0;32m 2424\u001B[0m storage_options\u001B[38;5;241m=\u001B[39mstorage_options,\n\u001B[0;32m 2425\u001B[0m engine_kwargs\u001B[38;5;241m=\u001B[39mengine_kwargs,\n\u001B[0;32m 2426\u001B[0m )\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\formats\\excel.py:943\u001B[0m, in \u001B[0;36mExcelFormatter.write\u001B[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options, engine_kwargs)\u001B[0m\n\u001B[0;32m 941\u001B[0m need_save \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[0;32m 942\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 943\u001B[0m writer \u001B[38;5;241m=\u001B[39m ExcelWriter(\n\u001B[0;32m 944\u001B[0m writer,\n\u001B[0;32m 945\u001B[0m engine\u001B[38;5;241m=\u001B[39mengine,\n\u001B[0;32m 946\u001B[0m storage_options\u001B[38;5;241m=\u001B[39mstorage_options,\n\u001B[0;32m 947\u001B[0m engine_kwargs\u001B[38;5;241m=\u001B[39mengine_kwargs,\n\u001B[0;32m 948\u001B[0m )\n\u001B[0;32m 949\u001B[0m need_save \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[0;32m 951\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\excel\\_xlsxwriter.py:204\u001B[0m, in \u001B[0;36mXlsxWriter.__init__\u001B[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001B[0m\n\u001B[0;32m 201\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m mode \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124ma\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[0;32m 202\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAppend mode is not supported with xlsxwriter!\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m--> 204\u001B[0m \u001B[38;5;28msuper\u001B[39m()\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__init__\u001B[39m(\n\u001B[0;32m 205\u001B[0m path,\n\u001B[0;32m 206\u001B[0m engine\u001B[38;5;241m=\u001B[39mengine,\n\u001B[0;32m 207\u001B[0m date_format\u001B[38;5;241m=\u001B[39mdate_format,\n\u001B[0;32m 208\u001B[0m datetime_format\u001B[38;5;241m=\u001B[39mdatetime_format,\n\u001B[0;32m 209\u001B[0m mode\u001B[38;5;241m=\u001B[39mmode,\n\u001B[0;32m 210\u001B[0m storage_options\u001B[38;5;241m=\u001B[39mstorage_options,\n\u001B[0;32m 211\u001B[0m if_sheet_exists\u001B[38;5;241m=\u001B[39mif_sheet_exists,\n\u001B[0;32m 212\u001B[0m engine_kwargs\u001B[38;5;241m=\u001B[39mengine_kwargs,\n\u001B[0;32m 213\u001B[0m )\n\u001B[0;32m 215\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 216\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_book \u001B[38;5;241m=\u001B[39m Workbook(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handles\u001B[38;5;241m.\u001B[39mhandle, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mengine_kwargs)\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\excel\\_base.py:1246\u001B[0m, in \u001B[0;36mExcelWriter.__init__\u001B[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs)\u001B[0m\n\u001B[0;32m 1242\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handles \u001B[38;5;241m=\u001B[39m IOHandles(\n\u001B[0;32m 1243\u001B[0m cast(IO[\u001B[38;5;28mbytes\u001B[39m], path), compression\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcompression\u001B[39m\u001B[38;5;124m\"\u001B[39m: \u001B[38;5;28;01mNone\u001B[39;00m}\n\u001B[0;32m 1244\u001B[0m )\n\u001B[0;32m 1245\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(path, ExcelWriter):\n\u001B[1;32m-> 1246\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handles \u001B[38;5;241m=\u001B[39m get_handle(\n\u001B[0;32m 1247\u001B[0m path, mode, storage_options\u001B[38;5;241m=\u001B[39mstorage_options, is_text\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[0;32m 1248\u001B[0m )\n\u001B[0;32m 1249\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_cur_sheet \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m 1251\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m date_format \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\common.py:882\u001B[0m, in \u001B[0;36mget_handle\u001B[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001B[0m\n\u001B[0;32m 873\u001B[0m handle \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mopen\u001B[39m(\n\u001B[0;32m 874\u001B[0m handle,\n\u001B[0;32m 875\u001B[0m ioargs\u001B[38;5;241m.\u001B[39mmode,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 878\u001B[0m newline\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m 879\u001B[0m )\n\u001B[0;32m 880\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m 881\u001B[0m \u001B[38;5;66;03m# Binary mode\u001B[39;00m\n\u001B[1;32m--> 882\u001B[0m handle \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mopen\u001B[39m(handle, ioargs\u001B[38;5;241m.\u001B[39mmode)\n\u001B[0;32m 883\u001B[0m handles\u001B[38;5;241m.\u001B[39mappend(handle)\n\u001B[0;32m 885\u001B[0m \u001B[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001B[39;00m\n",
"\u001B[1;31mPermissionError\u001B[0m: [Errno 13] Permission denied: 'C:\\\\Users\\\\Administrator.DESKTOP-7IC2USJ\\\\Desktop\\\\返回结果地址.xlsx'"
]
}
],
"execution_count": 14
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T09:59:23.174447Z",
"start_time": "2025-05-07T09:59:23.134858Z"
}
},
"cell_type": "code",
"source": [
"df2 = pd.DataFrame(all_data)\n",
"df2.to_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\返回结果地址.xlsx\", index=False)"
],
"id": "ffd7630ac0346d03",
"outputs": [],
"execution_count": 15
},
{
"metadata": {},
"cell_type": "markdown",
"source": "处理数据",
"id": "f148b0790bde7067"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T09:37:22.583854Z",
"start_time": "2025-05-07T09:37:22.343548Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"\n",
"df3 = pd.read_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\返回结果地址.xlsx\")\n",
"df = pd.read_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\地址.xlsx\", sheet_name=\"Sheet2\")\n",
"\n",
"for index, row in df.iterrows():\n",
" cleaned = row['地址'].replace(row['市'], '').strip()\n",
" # 再删除 c\n",
" cleaned = cleaned.replace(row['区/县'], '').strip()\n",
" # 删除多余空格(如 \"apple banana\" → \"apple banana\"\n",
" cleaned = ' '.join(cleaned.split())\n",
" # 更新到新列\n",
" df.at[index, '详细地址'] = cleaned\n",
" \n",
"\n"
],
"id": "8c61c7db7bda3982",
"outputs": [],
"execution_count": 9
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}