This commit is contained in:
2026-01-30 11:28:35 +08:00
commit f1831c31b4
399 changed files with 860978 additions and 0 deletions
+234
View File
@@ -0,0 +1,234 @@
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "",
"id": "1581ab9b41bf8ced"
},
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-05-07T09:59:05.954334Z",
"start_time": "2025-05-07T09:58:58.593181Z"
}
},
"source": [
"import requests\n",
"import pandas as pd\n",
"from difflib import SequenceMatcher\n",
"\n",
"\n",
"# 相似度计算函数\n",
"def similarity(a, b):\n",
" return SequenceMatcher(None, a, b).ratio()\n",
"\n",
"\n",
"# 地址是否匹配\n",
"def is_address_match(detail, addr):\n",
" if len(detail) < 4:\n",
" return detail in addr\n",
" else:\n",
" return similarity(detail, addr) >= 0.6\n",
"\n",
"\n",
"# 店名是否匹配\n",
"def is_name_match(store_name, poi_name):\n",
" if len(store_name) < 4:\n",
" return store_name in poi_name\n",
" else:\n",
" return similarity(store_name, poi_name) >= 0.6\n",
"\n",
"\n",
"# 读取 Excel 数据\n",
"df = pd.read_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\地址.xlsx\", sheet_name=\"Sheet2\")\n",
"url = \"https://restapi.amap.com/v5/place/text\"\n",
"all_data = []\n",
"\n",
"for index, row in df.iterrows():\n",
"\n",
" # 处理详细地址\n",
" cleaned = row['地址'].replace(row['市'], '').strip()\n",
" cleaned = cleaned.replace(row['区/县'], '').strip()\n",
" cleaned = ' '.join(cleaned.split())\n",
" row[\"详细地址\"] = cleaned\n",
"\n",
" # 特殊处理直辖市\n",
" if row[\"省\"] in [\"天津市\", \"上海市\", \"重庆市\", \"北京市\"] and row[\"市\"] == \"市辖区\":\n",
" row[\"市\"] = row[\"省\"]\n",
"\n",
" key_words = row[\"门店店名\"]\n",
" region = row[\"市\"]\n",
" detail_address = row[\"详细地址\"]\n",
"\n",
" def search_amap(keywords, region, page_num):\n",
" params = {\n",
" \"key\": \"f61b09d406ac49f8a034bf585e60c442\",\n",
" \"keywords\": keywords,\n",
" \"types\": \"010400|010500|010800|020000|030000\",\n",
" \"region\": region,\n",
" \"city_limit\": \"true\",\n",
" \"page_size\": \"20\",\n",
" \"page_num\": str(page_num)\n",
" }\n",
" res = requests.get(url=url, params=params)\n",
" return res.json().get(\"pois\", [])\n",
"\n",
" # 初始搜索关键词\n",
" current_keywords = key_words\n",
" max_pages = 3 # 最多请求3页\n",
" found = False\n",
"\n",
" for page_num in range(1, max_pages + 1):\n",
" pois = search_amap(current_keywords, region, page_num)\n",
"\n",
" for poi in pois:\n",
" addr = poi.get(\"address\", \"\")\n",
" name = poi.get(\"name\", \"\")\n",
"\n",
" if is_address_match(detail_address, addr) and is_name_match(key_words, name):\n",
" poi.update({\"搜索门店\": row[\"门店店名\"]})\n",
" all_data.append(poi)\n",
" found = True\n",
" break # 找到就停止当前页面遍历\n",
"\n",
" if found:\n",
" break # 停止分页循环\n",
"\n",
" if not found:\n",
" # 如果仍未找到,尝试使用 更详细的地址 + 店名 重新搜索多个页面\n",
" current_keywords = detail_address + \" \" + key_words\n",
" for page_num in range(1, max_pages + 1):\n",
" pois = search_amap(current_keywords, region, page_num)\n",
"\n",
" for poi in pois:\n",
" addr = poi.get(\"address\", \"\")\n",
" name = poi.get(\"name\", \"\")\n",
"\n",
" if is_address_match(detail_address, addr) and is_name_match(key_words, name):\n",
" poi.update({\"搜索门店\": row[\"门店店名\"]})\n",
" all_data.append(poi)\n",
" found = True\n",
" break # 找到就停止当前页面遍历\n",
"\n",
" if found:\n",
" break # 停止分页循环\n",
"\n",
" if not found:\n",
" print(f\"{row['门店店名']} 没有找到符合要求的地址或店名\")\n",
"\n",
"# 保存结果到 Excel\n",
"df2 = pd.DataFrame(all_data)\n",
"df2.to_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\返回结果地址.xlsx\", index=False)"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"上海车杰汽车服务 没有找到符合要求的地址或店名\n",
"SunRise Garage汽美工作室 没有找到符合要求的地址或店名\n",
"上海 11区洗美工作室 没有找到符合要求的地址或店名\n",
"上海龙腾汽车用品批发 没有找到符合要求的地址或店名\n",
"膜幻视佳量子膜上海中心店 没有找到符合要求的地址或店名\n",
"科奇装具 没有找到符合要求的地址或店名\n",
"博士汽车音响改装 没有找到符合要求的地址或店名\n"
]
},
{
"ename": "PermissionError",
"evalue": "[Errno 13] Permission denied: 'C:\\\\Users\\\\Administrator.DESKTOP-7IC2USJ\\\\Desktop\\\\返回结果地址.xlsx'",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mPermissionError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[1;32mIn[14], line 106\u001B[0m\n\u001B[0;32m 104\u001B[0m \u001B[38;5;66;03m# 保存结果到 Excel\u001B[39;00m\n\u001B[0;32m 105\u001B[0m df2 \u001B[38;5;241m=\u001B[39m pd\u001B[38;5;241m.\u001B[39mDataFrame(all_data)\n\u001B[1;32m--> 106\u001B[0m df2\u001B[38;5;241m.\u001B[39mto_excel(\u001B[38;5;124mr\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mC:\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mUsers\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mAdministrator.DESKTOP-7IC2USJ\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124mDesktop\u001B[39m\u001B[38;5;124m\\\u001B[39m\u001B[38;5;124m返回结果地址.xlsx\u001B[39m\u001B[38;5;124m\"\u001B[39m, index\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\util\\_decorators.py:333\u001B[0m, in \u001B[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001B[1;34m(*args, **kwargs)\u001B[0m\n\u001B[0;32m 327\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(args) \u001B[38;5;241m>\u001B[39m num_allow_args:\n\u001B[0;32m 328\u001B[0m warnings\u001B[38;5;241m.\u001B[39mwarn(\n\u001B[0;32m 329\u001B[0m msg\u001B[38;5;241m.\u001B[39mformat(arguments\u001B[38;5;241m=\u001B[39m_format_argument_list(allow_args)),\n\u001B[0;32m 330\u001B[0m \u001B[38;5;167;01mFutureWarning\u001B[39;00m,\n\u001B[0;32m 331\u001B[0m stacklevel\u001B[38;5;241m=\u001B[39mfind_stack_level(),\n\u001B[0;32m 332\u001B[0m )\n\u001B[1;32m--> 333\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m func(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\core\\generic.py:2417\u001B[0m, in \u001B[0;36mNDFrame.to_excel\u001B[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, inf_rep, freeze_panes, storage_options, engine_kwargs)\u001B[0m\n\u001B[0;32m 2404\u001B[0m \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01mpandas\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mio\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mformats\u001B[39;00m\u001B[38;5;21;01m.\u001B[39;00m\u001B[38;5;21;01mexcel\u001B[39;00m \u001B[38;5;28;01mimport\u001B[39;00m ExcelFormatter\n\u001B[0;32m 2406\u001B[0m formatter \u001B[38;5;241m=\u001B[39m ExcelFormatter(\n\u001B[0;32m 2407\u001B[0m df,\n\u001B[0;32m 2408\u001B[0m na_rep\u001B[38;5;241m=\u001B[39mna_rep,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 2415\u001B[0m inf_rep\u001B[38;5;241m=\u001B[39minf_rep,\n\u001B[0;32m 2416\u001B[0m )\n\u001B[1;32m-> 2417\u001B[0m formatter\u001B[38;5;241m.\u001B[39mwrite(\n\u001B[0;32m 2418\u001B[0m excel_writer,\n\u001B[0;32m 2419\u001B[0m sheet_name\u001B[38;5;241m=\u001B[39msheet_name,\n\u001B[0;32m 2420\u001B[0m startrow\u001B[38;5;241m=\u001B[39mstartrow,\n\u001B[0;32m 2421\u001B[0m startcol\u001B[38;5;241m=\u001B[39mstartcol,\n\u001B[0;32m 2422\u001B[0m freeze_panes\u001B[38;5;241m=\u001B[39mfreeze_panes,\n\u001B[0;32m 2423\u001B[0m engine\u001B[38;5;241m=\u001B[39mengine,\n\u001B[0;32m 2424\u001B[0m storage_options\u001B[38;5;241m=\u001B[39mstorage_options,\n\u001B[0;32m 2425\u001B[0m engine_kwargs\u001B[38;5;241m=\u001B[39mengine_kwargs,\n\u001B[0;32m 2426\u001B[0m )\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\formats\\excel.py:943\u001B[0m, in \u001B[0;36mExcelFormatter.write\u001B[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options, engine_kwargs)\u001B[0m\n\u001B[0;32m 941\u001B[0m need_save \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[0;32m 942\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 943\u001B[0m writer \u001B[38;5;241m=\u001B[39m ExcelWriter(\n\u001B[0;32m 944\u001B[0m writer,\n\u001B[0;32m 945\u001B[0m engine\u001B[38;5;241m=\u001B[39mengine,\n\u001B[0;32m 946\u001B[0m storage_options\u001B[38;5;241m=\u001B[39mstorage_options,\n\u001B[0;32m 947\u001B[0m engine_kwargs\u001B[38;5;241m=\u001B[39mengine_kwargs,\n\u001B[0;32m 948\u001B[0m )\n\u001B[0;32m 949\u001B[0m need_save \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[0;32m 951\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\excel\\_xlsxwriter.py:204\u001B[0m, in \u001B[0;36mXlsxWriter.__init__\u001B[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001B[0m\n\u001B[0;32m 201\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m mode \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124ma\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[0;32m 202\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAppend mode is not supported with xlsxwriter!\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m--> 204\u001B[0m \u001B[38;5;28msuper\u001B[39m()\u001B[38;5;241m.\u001B[39m\u001B[38;5;21m__init__\u001B[39m(\n\u001B[0;32m 205\u001B[0m path,\n\u001B[0;32m 206\u001B[0m engine\u001B[38;5;241m=\u001B[39mengine,\n\u001B[0;32m 207\u001B[0m date_format\u001B[38;5;241m=\u001B[39mdate_format,\n\u001B[0;32m 208\u001B[0m datetime_format\u001B[38;5;241m=\u001B[39mdatetime_format,\n\u001B[0;32m 209\u001B[0m mode\u001B[38;5;241m=\u001B[39mmode,\n\u001B[0;32m 210\u001B[0m storage_options\u001B[38;5;241m=\u001B[39mstorage_options,\n\u001B[0;32m 211\u001B[0m if_sheet_exists\u001B[38;5;241m=\u001B[39mif_sheet_exists,\n\u001B[0;32m 212\u001B[0m engine_kwargs\u001B[38;5;241m=\u001B[39mengine_kwargs,\n\u001B[0;32m 213\u001B[0m )\n\u001B[0;32m 215\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 216\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_book \u001B[38;5;241m=\u001B[39m Workbook(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handles\u001B[38;5;241m.\u001B[39mhandle, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mengine_kwargs)\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\excel\\_base.py:1246\u001B[0m, in \u001B[0;36mExcelWriter.__init__\u001B[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs)\u001B[0m\n\u001B[0;32m 1242\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handles \u001B[38;5;241m=\u001B[39m IOHandles(\n\u001B[0;32m 1243\u001B[0m cast(IO[\u001B[38;5;28mbytes\u001B[39m], path), compression\u001B[38;5;241m=\u001B[39m{\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcompression\u001B[39m\u001B[38;5;124m\"\u001B[39m: \u001B[38;5;28;01mNone\u001B[39;00m}\n\u001B[0;32m 1244\u001B[0m )\n\u001B[0;32m 1245\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(path, ExcelWriter):\n\u001B[1;32m-> 1246\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_handles \u001B[38;5;241m=\u001B[39m get_handle(\n\u001B[0;32m 1247\u001B[0m path, mode, storage_options\u001B[38;5;241m=\u001B[39mstorage_options, is_text\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m\n\u001B[0;32m 1248\u001B[0m )\n\u001B[0;32m 1249\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_cur_sheet \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[0;32m 1251\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m date_format \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n",
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\site-packages\\pandas\\io\\common.py:882\u001B[0m, in \u001B[0;36mget_handle\u001B[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001B[0m\n\u001B[0;32m 873\u001B[0m handle \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mopen\u001B[39m(\n\u001B[0;32m 874\u001B[0m handle,\n\u001B[0;32m 875\u001B[0m ioargs\u001B[38;5;241m.\u001B[39mmode,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 878\u001B[0m newline\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m\"\u001B[39m,\n\u001B[0;32m 879\u001B[0m )\n\u001B[0;32m 880\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m 881\u001B[0m \u001B[38;5;66;03m# Binary mode\u001B[39;00m\n\u001B[1;32m--> 882\u001B[0m handle \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mopen\u001B[39m(handle, ioargs\u001B[38;5;241m.\u001B[39mmode)\n\u001B[0;32m 883\u001B[0m handles\u001B[38;5;241m.\u001B[39mappend(handle)\n\u001B[0;32m 885\u001B[0m \u001B[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001B[39;00m\n",
"\u001B[1;31mPermissionError\u001B[0m: [Errno 13] Permission denied: 'C:\\\\Users\\\\Administrator.DESKTOP-7IC2USJ\\\\Desktop\\\\返回结果地址.xlsx'"
]
}
],
"execution_count": 14
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T09:59:23.174447Z",
"start_time": "2025-05-07T09:59:23.134858Z"
}
},
"cell_type": "code",
"source": [
"df2 = pd.DataFrame(all_data)\n",
"df2.to_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\返回结果地址.xlsx\", index=False)"
],
"id": "ffd7630ac0346d03",
"outputs": [],
"execution_count": 15
},
{
"metadata": {},
"cell_type": "markdown",
"source": "处理数据",
"id": "f148b0790bde7067"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T09:37:22.583854Z",
"start_time": "2025-05-07T09:37:22.343548Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"\n",
"df3 = pd.read_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\返回结果地址.xlsx\")\n",
"df = pd.read_excel(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\地址.xlsx\", sheet_name=\"Sheet2\")\n",
"\n",
"for index, row in df.iterrows():\n",
" cleaned = row['地址'].replace(row['市'], '').strip()\n",
" # 再删除 c\n",
" cleaned = cleaned.replace(row['区/县'], '').strip()\n",
" # 删除多余空格(如 \"apple banana\" → \"apple banana\"\n",
" cleaned = ' '.join(cleaned.split())\n",
" # 更新到新列\n",
" df.at[index, '详细地址'] = cleaned\n",
" \n",
"\n"
],
"id": "8c61c7db7bda3982",
"outputs": [],
"execution_count": 9
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}