{ "cells": [ { "metadata": {}, "cell_type": "markdown", "source": "", "id": "1581ab9b41bf8ced" }, { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2025-05-07T09:59:05.954334Z", "start_time": "2025-05-07T09:58:58.593181Z" } }, "source": [ "import requests\n", "import pandas as pd\n", "from difflib import SequenceMatcher\n", "\n", "\n", "# 相似度计算函数\n", "def similarity(a, b):\n", " return SequenceMatcher(None, a, b).ratio()\n", "\n", "\n", "# 地址是否匹配\n", "def is_address_match(detail, addr):\n", " if len(detail) < 4:\n", " return detail in addr\n", " else:\n", " return similarity(detail, addr) >= 0.6\n", "\n", "\n", "# 店名是否匹配\n", "def is_name_match(store_name, poi_name):\n", " if len(store_name) < 4:\n", " return store_name in poi_name\n", " else:\n", " return similarity(store_name, poi_name) >= 0.6\n", "\n", "\n", "# 读取 Excel 数据\n", "df = pd.read_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\地址.xlsx\", sheet_name=\"Sheet2\")\n", "url = \"https://restapi.amap.com/v5/place/text\"\n", "all_data = []\n", "\n", "for index, row in df.iterrows():\n", "\n", " # 处理详细地址\n", " cleaned = row['地址'].replace(row['市'], '').strip()\n", " cleaned = cleaned.replace(row['区/县'], '').strip()\n", " cleaned = ' '.join(cleaned.split())\n", " row[\"详细地址\"] = cleaned\n", "\n", " # 特殊处理直辖市\n", " if row[\"省\"] in [\"天津市\", \"上海市\", \"重庆市\", \"北京市\"] and row[\"市\"] == \"市辖区\":\n", " row[\"市\"] = row[\"省\"]\n", "\n", " key_words = row[\"门店店名\"]\n", " region = row[\"市\"]\n", " detail_address = row[\"详细地址\"]\n", "\n", " def search_amap(keywords, region, page_num):\n", " params = {\n", " \"key\": \"f61b09d406ac49f8a034bf585e60c442\",\n", " \"keywords\": keywords,\n", " \"types\": \"010400|010500|010800|020000|030000\",\n", " \"region\": region,\n", " \"city_limit\": \"true\",\n", " \"page_size\": \"20\",\n", " \"page_num\": str(page_num)\n", " }\n", " res = requests.get(url=url, params=params)\n", " return res.json().get(\"pois\", [])\n", "\n", " # 初始搜索关键词\n", " current_keywords = key_words\n", " max_pages = 3 # 最多请求3页\n", " found = False\n", "\n", " for page_num in range(1, max_pages + 1):\n", " pois = search_amap(current_keywords, region, page_num)\n", "\n", " for poi in pois:\n", " addr = poi.get(\"address\", \"\")\n", " name = poi.get(\"name\", \"\")\n", "\n", " if is_address_match(detail_address, addr) and is_name_match(key_words, name):\n", " poi.update({\"搜索门店\": row[\"门店店名\"]})\n", " all_data.append(poi)\n", " found = True\n", " break # 找到就停止当前页面遍历\n", "\n", " if found:\n", " break # 停止分页循环\n", "\n", " if not found:\n", " # 如果仍未找到,尝试使用 更详细的地址 + 店名 重新搜索多个页面\n", " current_keywords = detail_address + \" \" + key_words\n", " for page_num in range(1, max_pages + 1):\n", " pois = search_amap(current_keywords, region, page_num)\n", "\n", " for poi in pois:\n", " addr = poi.get(\"address\", \"\")\n", " name = poi.get(\"name\", \"\")\n", "\n", " if is_address_match(detail_address, addr) and is_name_match(key_words, name):\n", " poi.update({\"搜索门店\": row[\"门店店名\"]})\n", " all_data.append(poi)\n", " found = True\n", " break # 找到就停止当前页面遍历\n", "\n", " if found:\n", " break # 停止分页循环\n", "\n", " if not found:\n", " print(f\"{row['门店店名']} 没有找到符合要求的地址或店名\")\n", "\n", "# 保存结果到 Excel\n", "df2 = pd.DataFrame(all_data)\n", "df2.to_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\返回结果地址.xlsx\", index=False)" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "上海车杰汽车服务 没有找到符合要求的地址或店名\n", "SunRise Garage汽美工作室 没有找到符合要求的地址或店名\n", "上海 11区洗美工作室 没有找到符合要求的地址或店名\n", "上海龙腾汽车用品批发 没有找到符合要求的地址或店名\n", "膜幻视佳量子膜上海中心店 没有找到符合要求的地址或店名\n", "科奇装具 没有找到符合要求的地址或店名\n", "博士汽车音响改装 没有找到符合要求的地址或店名\n" ] }, { "ename": "PermissionError", "evalue": "[Errno 13] Permission denied: 'C:\\\\Users\\\\Administrator.DESKTOP-7IC2USJ\\\\Desktop\\\\返回结果地址.xlsx'", "output_type": "error", "traceback": [ "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[1;31mPermissionError\u001B[0m Traceback (most recent call last)", "Cell \u001B[1;32mIn[14], line 106\u001B[0m\n\u001B[0;32m 104\u001B[0m \u001B[38;5;66;03m# 保存结果到 Excel\u001B[39;00m\n\u001B[0;32m 105\u001B[0m df2 \u001B[38;5;241m=\u001B[39m pd\u001B[38;5;241m.\u001B[39mDataFrame(all_data)\n\u001B[1;32m--> 106\u001B[0m df2\u001B[38;5;241m.\u001B[39mto_excel(\u001B[38;5;124mr\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mC:\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mUsers\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mAdministrator.DESKTOP-7IC2USJ\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mDesktop\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m返回结果地址.xlsx\u001B[39m\u001B[38;5;124m\"\u001B[39m, index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\n", "File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\util\\_decorators.py:333\u001B[0m, in \u001B[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001B[1;34m(*args, **kwargs)\u001B[0m\n\u001B[0;32m 327\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(args) \u001B[38;5;241m>\u001B[39m num_allow_args:\n\u001B[0;32m 328\u001B[0m warnings\u001B[38;5;241m.\u001B[39mwarn(\n\u001B[0;32m 329\u001B[0m msg\u001B[38;5;241m.\u001B[39mformat(arguments\u001B[38;5;241m=\u001B[39m_format_argument_list(allow_args)),\n\u001B[0;32m 330\u001B[0m \u001B[38;5;167;01mFutureWarning\u001B[39;00m,\n\u001B[0;32m 331\u001B[0m stacklevel\u001B[38;5;241m=\u001B[39mfind_stack_level(),\n\u001B[0;32m 332\u001B[0m )\n\u001B[1;32m--> 333\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m func(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", "File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\core\\generic.py:2417\u001B[0m, in \u001B[0;36mNDFrame.to_excel\u001B[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, inf_rep, freeze_panes, storage_options, engine_kwargs)\u001B[0m\n\u001B[0;32m 2404\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mpandas\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mformats\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mexcel\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m ExcelFormatter\n\u001B[0;32m 2406\u001B[0m formatter \u001B[38;5;241m=\u001B[39m ExcelFormatter(\n\u001B[0;32m 2407\u001B[0m df,\n\u001B[0;32m 2408\u001B[0m na_rep\u001B[38;5;241m=\u001B[39mna_rep,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 2415\u001B[0m inf_rep\u001B[38;5;241m=\u001B[39minf_rep,\n\u001B[0;32m 2416\u001B[0m )\n\u001B[1;32m-> 2417\u001B[0m formatter\u001B[38;5;241m.\u001B[39mwrite(\n\u001B[0;32m 2418\u001B[0m excel_writer,\n\u001B[0;32m 2419\u001B[0m sheet_name\u001B[38;5;241m=\u001B[39msheet_name,\n\u001B[0;32m 2420\u001B[0m startrow\u001B[38;5;241m=\u001B[39mstartrow,\n\u001B[0;32m 2421\u001B[0m startcol\u001B[38;5;241m=\u001B[39mstartcol,\n\u001B[0;32m 2422\u001B[0m freeze_panes\u001B[38;5;241m=\u001B[39mfreeze_panes,\n\u001B[0;32m 2423\u001B[0m engine\u001B[38;5;241m=\u001B[39mengine,\n\u001B[0;32m 2424\u001B[0m storage_options\u001B[38;5;241m=\u001B[39mstorage_options,\n\u001B[0;32m 2425\u001B[0m engine_kwargs\u001B[38;5;241m=\u001B[39mengine_kwargs,\n\u001B[0;32m 2426\u001B[0m )\n", "File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\formats\\excel.py:943\u001B[0m, in \u001B[0;36mExcelFormatter.write\u001B[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options, engine_kwargs)\u001B[0m\n\u001B[0;32m 941\u001B[0m need_save \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[0;32m 942\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 943\u001B[0m writer \u001B[38;5;241m=\u001B[39m ExcelWriter(\n\u001B[0;32m 944\u001B[0m writer,\n\u001B[0;32m 945\u001B[0m engine\u001B[38;5;241m=\u001B[39mengine,\n\u001B[0;32m 946\u001B[0m storage_options\u001B[38;5;241m=\u001B[39mstorage_options,\n\u001B[0;32m 947\u001B[0m engine_kwargs\u001B[38;5;241m=\u001B[39mengine_kwargs,\n\u001B[0;32m 948\u001B[0m )\n\u001B[0;32m 949\u001B[0m need_save \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[0;32m 951\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n", "File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\excel\\_xlsxwriter.py:204\u001B[0m, in \u001B[0;36mXlsxWriter.__init__\u001B[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001B[0m\n\u001B[0;32m 201\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m mode \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124ma\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[0;32m 202\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAppend mode is not supported with xlsxwriter!\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m--> 204\u001B[0m \u001B[38;5;28msuper\u001B[39m()\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__init__\u001B[39m(\n\u001B[0;32m 205\u001B[0m path,\n\u001B[0;32m 206\u001B[0m engine\u001B[38;5;241m=\u001B[39mengine,\n\u001B[0;32m 207\u001B[0m date_format\u001B[38;5;241m=\u001B[39mdate_format,\n\u001B[0;32m 208\u001B[0m datetime_format\u001B[38;5;241m=\u001B[39mdatetime_format,\n\u001B[0;32m 209\u001B[0m mode\u001B[38;5;241m=\u001B[39mmode,\n\u001B[0;32m 210\u001B[0m storage_options\u001B[38;5;241m=\u001B[39mstorage_options,\n\u001B[0;32m 211\u001B[0m if_sheet_exists\u001B[38;5;241m=\u001B[39mif_sheet_exists,\n\u001B[0;32m 212\u001B[0m engine_kwargs\u001B[38;5;241m=\u001B[39mengine_kwargs,\n\u001B[0;32m 213\u001B[0m )\n\u001B[0;32m 215\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 216\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_book \u001B[38;5;241m=\u001B[39m Workbook(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handles\u001B[38;5;241m.\u001B[39mhandle, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mengine_kwargs)\n", "File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\excel\\_base.py:1246\u001B[0m, in \u001B[0;36mExcelWriter.__init__\u001B[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs)\u001B[0m\n\u001B[0;32m 1242\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handles \u001B[38;5;241m=\u001B[39m IOHandles(\n\u001B[0;32m 1243\u001B[0m cast(IO[\u001B[38;5;28mbytes\u001B[39m], path), compression\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcompression\u001B[39m\u001B[38;5;124m\"\u001B[39m: \u001B[38;5;28;01mNone\u001B[39;00m}\n\u001B[0;32m 1244\u001B[0m )\n\u001B[0;32m 1245\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(path, ExcelWriter):\n\u001B[1;32m-> 1246\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handles \u001B[38;5;241m=\u001B[39m get_handle(\n\u001B[0;32m 1247\u001B[0m path, mode, storage_options\u001B[38;5;241m=\u001B[39mstorage_options, is_text\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[0;32m 1248\u001B[0m )\n\u001B[0;32m 1249\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_cur_sheet \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m 1251\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m date_format \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n", "File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\common.py:882\u001B[0m, in \u001B[0;36mget_handle\u001B[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001B[0m\n\u001B[0;32m 873\u001B[0m handle \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mopen\u001B[39m(\n\u001B[0;32m 874\u001B[0m handle,\n\u001B[0;32m 875\u001B[0m ioargs\u001B[38;5;241m.\u001B[39mmode,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 878\u001B[0m newline\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m 879\u001B[0m )\n\u001B[0;32m 880\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m 881\u001B[0m \u001B[38;5;66;03m# Binary mode\u001B[39;00m\n\u001B[1;32m--> 882\u001B[0m handle \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mopen\u001B[39m(handle, ioargs\u001B[38;5;241m.\u001B[39mmode)\n\u001B[0;32m 883\u001B[0m handles\u001B[38;5;241m.\u001B[39mappend(handle)\n\u001B[0;32m 885\u001B[0m \u001B[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001B[39;00m\n", "\u001B[1;31mPermissionError\u001B[0m: [Errno 13] Permission denied: 'C:\\\\Users\\\\Administrator.DESKTOP-7IC2USJ\\\\Desktop\\\\返回结果地址.xlsx'" ] } ], "execution_count": 14 }, { "metadata": { "ExecuteTime": { "end_time": "2025-05-07T09:59:23.174447Z", "start_time": "2025-05-07T09:59:23.134858Z" } }, "cell_type": "code", "source": [ "df2 = pd.DataFrame(all_data)\n", "df2.to_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\返回结果地址.xlsx\", index=False)" ], "id": "ffd7630ac0346d03", "outputs": [], "execution_count": 15 }, { "metadata": {}, "cell_type": "markdown", "source": "处理数据", "id": "f148b0790bde7067" }, { "metadata": { "ExecuteTime": { "end_time": "2025-05-07T09:37:22.583854Z", "start_time": "2025-05-07T09:37:22.343548Z" } }, "cell_type": "code", "source": [ "import pandas as pd\n", "\n", "df3 = pd.read_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\返回结果地址.xlsx\")\n", "df = pd.read_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\地址.xlsx\", sheet_name=\"Sheet2\")\n", "\n", "for index, row in df.iterrows():\n", " cleaned = row['地址'].replace(row['市'], '').strip()\n", " # 再删除 c\n", " cleaned = cleaned.replace(row['区/县'], '').strip()\n", " # 删除多余空格(如 \"apple banana\" → \"apple banana\")\n", " cleaned = ' '.join(cleaned.split())\n", " # 更新到新列\n", " df.at[index, '详细地址'] = cleaned\n", " \n", "\n" ], "id": "8c61c7db7bda3982", "outputs": [], "execution_count": 9 } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }