732 lines
31 KiB
Plaintext
732 lines
31 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"id": "b461eaf2",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 字符集库\n",
|
||
"+ zhon 它提供了中文文本处理中常用的常量。"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "b6dc3dc0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"〇一-鿿㐀-䶿豈-𠀀-𪛟𪜀-𫝀-丽-\n",
|
||
""#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·.!?。。\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from zhon import hanzi\n",
|
||
"\n",
|
||
"# 返回的是中文正则\n",
|
||
"print(hanzi.characters)\n",
|
||
"\n",
|
||
"# 返回所有中文标点符号\n",
|
||
"print(hanzi.punctuation)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "66e07ee5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'你好¥%@中文'"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import re\n",
|
||
"\n",
|
||
"re.sub(f'[{hanzi.punctuation}]','','你好¥%@!中文')"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"id": "53a28e98",
|
||
"metadata": {},
|
||
"source": [
|
||
"+ string库 提供了常用 英文字符集"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "8507180e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\n",
|
||
"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~ \\t\\n\\r\\x0b\\x0c'"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import string\n",
|
||
"\n",
|
||
"print(string.ascii_letters) # 所有的字母\n",
|
||
"print(string.punctuation) # 返回所有英文标点\n",
|
||
"string.printable # 所有可打印字符包含 数字,英文大小写,英文符号,空格,换行符等"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "ac6ebde7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['你', '好', '他', '中', '文']"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import re\n",
|
||
"\n",
|
||
"re.findall(f'[{hanzi.characters}]','你好xxx hello,他中文')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "473d1970-1321-4323-9c98-78cac2d88c68",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import re\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import emoji\n",
|
||
"from zhon import hanzi\n",
|
||
"import string"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "78520850",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<>:3: SyntaxWarning: invalid escape sequence '\\S'\n",
|
||
"<>:3: SyntaxWarning: invalid escape sequence '\\S'\n",
|
||
"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\AppData\\Local\\Temp\\ipykernel_36048\\1735783563.py:3: SyntaxWarning: invalid escape sequence '\\S'\n",
|
||
" text = re.sub(':\\S+?:', '', text)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"def filter_emoji(text):\n",
|
||
" # 先过滤 emoji 表情\n",
|
||
" text = re.sub(':\\S+?:', '', text)\n",
|
||
" # 在过滤其他表情,只保留 数字,中英文,中英文符号,空格\n",
|
||
" return re.sub(r'[^{}^{}^{}]'.format(hanzi.characters,hanzi.punctuation,string.printable),'',text)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"id": "2551fd66",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"',{106} 晋JBT717丰田陈平 #19《67》{56} 海湾280晋A513LR长城哈弗H6'"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# 测试\n",
|
||
"s = \"😃,{106} ★♚♚晋JBT717丰田陈平 #19《67》{56} ★️海湾280晋A513LR长城哈弗H6\"\n",
|
||
"filter_emoji(s)"
|
||
]
|
||
},
|
||
{
|
||
"attachments": {},
|
||
"cell_type": "markdown",
|
||
"id": "dbf763b4",
|
||
"metadata": {},
|
||
"source": [
|
||
"# 处理 Excel 文件"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "b8efc024",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "SyntaxError",
|
||
"evalue": "invalid syntax (332018563.py, line 1)",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[1;36m Cell \u001b[1;32mIn[11], line 1\u001b[1;36m\u001b[0m\n\u001b[1;33m curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "282388e6",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Requirement already satisfied: pip in d:\\programtools\\anaconda\\lib\\site-packages (24.3.1)\n",
|
||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"pip install --upgrade pip"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "70e97daf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Requirement already satisfied: emoji in d:\\programtools\\anaconda\\lib\\site-packages (2.14.0)\n",
|
||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"pip install emoji"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "e0d0887b-dbb0-4776-bf46-1ae8787c455c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "ValueError",
|
||
"evalue": "No engine for filetype: 'xls'",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[1;31mOptionError\u001b[0m Traceback (most recent call last)",
|
||
"\u001b[1;32mc:\\Users\\admin\\.conda\\envs\\F6processing\\lib\\site-packages\\pandas\\io\\excel\\_base.py\u001b[0m in \u001b[0;36m__new__\u001b[1;34m(cls, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs)\u001b[0m\n\u001b[0;32m 1110\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1111\u001b[1;33m \u001b[0mengine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconfig\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_option\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"io.excel.{ext}.writer\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msilent\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1112\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"auto\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||
"\u001b[1;32mc:\\Users\\admin\\.conda\\envs\\F6processing\\lib\\site-packages\\pandas\\_config\\config.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 260\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mT\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 261\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__func__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 262\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
|
||
"\u001b[1;32mc:\\Users\\admin\\.conda\\envs\\F6processing\\lib\\site-packages\\pandas\\_config\\config.py\u001b[0m in \u001b[0;36m_get_option\u001b[1;34m(pat, silent)\u001b[0m\n\u001b[0;32m 134\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_get_option\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpat\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msilent\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mbool\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mAny\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 135\u001b[1;33m \u001b[0mkey\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_single_key\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpat\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msilent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 136\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
|
||
"\u001b[1;32mc:\\Users\\admin\\.conda\\envs\\F6processing\\lib\\site-packages\\pandas\\_config\\config.py\u001b[0m in \u001b[0;36m_get_single_key\u001b[1;34m(pat, silent)\u001b[0m\n\u001b[0;32m 120\u001b[0m \u001b[0m_warn_if_deprecated\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpat\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 121\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mOptionError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"No such keys(s): {repr(pat)}\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 122\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkeys\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||
"\u001b[1;31mOptionError\u001b[0m: \"No such keys(s): 'io.excel.xls.writer'\"",
|
||
"\nThe above exception was the direct cause of the following exception:\n",
|
||
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
||
"\u001b[1;32m<ipython-input-5-4582534eeafe>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mcolumn\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 17\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mfilter_emoji\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 18\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_excel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mr\"C:\\Users\\admin\\Desktop\\大唛养车总部_Sheet1_6_去除后.xls\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
|
||
"\u001b[1;32mc:\\Users\\admin\\.conda\\envs\\F6processing\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mto_excel\u001b[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, inf_rep, freeze_panes, storage_options)\u001b[0m\n\u001b[0;32m 2250\u001b[0m \u001b[0minf_rep\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minf_rep\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2251\u001b[0m )\n\u001b[1;32m-> 2252\u001b[1;33m formatter.write(\n\u001b[0m\u001b[0;32m 2253\u001b[0m \u001b[0mexcel_writer\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2254\u001b[0m \u001b[0msheet_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msheet_name\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||
"\u001b[1;32mc:\\Users\\admin\\.conda\\envs\\F6processing\\lib\\site-packages\\pandas\\io\\formats\\excel.py\u001b[0m in \u001b[0;36mwrite\u001b[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options)\u001b[0m\n\u001b[0;32m 932\u001b[0m \u001b[1;31m# error: Cannot instantiate abstract class 'ExcelWriter' with abstract\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 933\u001b[0m \u001b[1;31m# attributes 'engine', 'save', 'supported_extensions' and 'write_cells'\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 934\u001b[1;33m writer = ExcelWriter( # type: ignore[abstract]\n\u001b[0m\u001b[0;32m 935\u001b[0m \u001b[0mwriter\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstorage_options\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstorage_options\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 936\u001b[0m )\n",
|
||
"\u001b[1;32mc:\\Users\\admin\\.conda\\envs\\F6processing\\lib\\site-packages\\pandas\\io\\excel\\_base.py\u001b[0m in \u001b[0;36m__new__\u001b[1;34m(cls, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs)\u001b[0m\n\u001b[0;32m 1113\u001b[0m \u001b[0mengine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_default_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mext\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"writer\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1114\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1115\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"No engine for filetype: '{ext}'\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1116\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1117\u001b[0m \u001b[1;31m# for mypy\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||
"\u001b[1;31mValueError\u001b[0m: No engine for filetype: 'xls'"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import re\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import emoji\n",
|
||
"from zhon import hanzi\n",
|
||
"import string\n",
|
||
" \n",
|
||
"def filter_emoji(text):\n",
|
||
" # 先过滤 emoji 表情\n",
|
||
" text = re.sub(':\\S+?:', '', text)\n",
|
||
" # 在过滤其他表情,只保留 数字,中英文,中英文符号,空格\n",
|
||
" return re.sub(r'[^{}^{}^{}]'.format(hanzi.characters,hanzi.punctuation,string.printable),'',text)\n",
|
||
"\n",
|
||
"\n",
|
||
"df = pd.read_excel(r\"C:\\Users\\admin\\Desktop\\大唛养车总部_Sheet1_6.xls\", sheet_name='Sheet1', dtype='string')\n",
|
||
"for column in df.columns:\n",
|
||
" df[column]=df[column].apply(lambda x: filter_emoji(x) if isinstance(x, str) else x)\n",
|
||
"df.to_excel(r\"C:\\Users\\admin\\Desktop\\大唛养车总部_Sheet1_6_去除后.xls\",index=False)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "ff220721",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"old = df['客户姓名']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "802e9a09",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 替换\n",
|
||
"\n",
|
||
"df['客户姓名'] = df['客户姓名'].apply(filter_emoji)\n",
|
||
"df.to_excel('output_remove_emoji.xlsx',index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "55471334",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"new = df['客户姓名']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "c13b60a8",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"(old != new).sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "fee4d084",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Series([], Name: 客户姓名, dtype: object)"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"old.loc[old != new]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "3dfa1b3d-dbd1-4128-95d4-ad54e43bd7ee",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>客户姓名</th>\n",
|
||
" <th>性别</th>\n",
|
||
" <th>车牌号</th>\n",
|
||
" <th>手机号码</th>\n",
|
||
" <th>标签</th>\n",
|
||
" <th>VIN码</th>\n",
|
||
" <th>车身颜色</th>\n",
|
||
" <th>备注</th>\n",
|
||
" <th>车辆所有人</th>\n",
|
||
" <th>发动机号</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>会员号</th>\n",
|
||
" <th>可用积分</th>\n",
|
||
" <th>累计获取积分</th>\n",
|
||
" <th>专属门店</th>\n",
|
||
" <th>专属顾问</th>\n",
|
||
" <th>客户推广员工</th>\n",
|
||
" <th>注册日期</th>\n",
|
||
" <th>发证日期</th>\n",
|
||
" <th>车辆来源</th>\n",
|
||
" <th>车辆推广员工</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>杨义林</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>宁E5E155</td>\n",
|
||
" <td>18395213630</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>LSVCG6C43LN102469</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>...</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>仁合鹏</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>宁DU0512</td>\n",
|
||
" <td>15209679692</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>...</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>苏琴</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>宁DP0726</td>\n",
|
||
" <td>18409548289</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>LSVUD60N2MN010961</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>...</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>苏彦林</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>宁D5E765</td>\n",
|
||
" <td>13995443053</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>...</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>马成</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>宁DBL138</td>\n",
|
||
" <td>14795049132</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td>...</td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" <td><NA></td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 37 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" 客户姓名 性别 车牌号 手机号码 标签 VIN码 车身颜色 备注 车辆所有人 \\\n",
|
||
"0 杨义林 <NA> 宁E5E155 18395213630 <NA> LSVCG6C43LN102469 <NA> <NA> <NA> \n",
|
||
"1 仁合鹏 <NA> 宁DU0512 15209679692 <NA> <NA> <NA> <NA> <NA> \n",
|
||
"2 苏琴 <NA> 宁DP0726 18409548289 <NA> LSVUD60N2MN010961 <NA> <NA> <NA> \n",
|
||
"3 苏彦林 <NA> 宁D5E765 13995443053 <NA> <NA> <NA> <NA> <NA> \n",
|
||
"4 马成 <NA> 宁DBL138 14795049132 <NA> <NA> <NA> <NA> <NA> \n",
|
||
"\n",
|
||
" 发动机号 ... 会员号 可用积分 累计获取积分 专属门店 专属顾问 客户推广员工 注册日期 发证日期 车辆来源 车辆推广员工 \n",
|
||
"0 <NA> ... <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> \n",
|
||
"1 <NA> ... <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> \n",
|
||
"2 <NA> ... <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> \n",
|
||
"3 <NA> ... <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> \n",
|
||
"4 <NA> ... <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> \n",
|
||
"\n",
|
||
"[5 rows x 37 columns]"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "17e9c41c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
|
||
"Collecting cowsay\n",
|
||
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/6b/b8/9f497fd045d74fe21d91cbe8debae0b451229989e35b539d218547d79fc6/cowsay-5.0.tar.gz (25 kB)\n",
|
||
" Preparing metadata (setup.py): started\n",
|
||
" Preparing metadata (setup.py): finished with status 'done'\n",
|
||
"Building wheels for collected packages: cowsay\n",
|
||
" Building wheel for cowsay (setup.py): started\n",
|
||
" Building wheel for cowsay (setup.py): finished with status 'done'\n",
|
||
" Created wheel for cowsay: filename=cowsay-5.0-py2.py3-none-any.whl size=25707 sha256=6f5c2b68adcfbcf789f1904146a473ad977272737279c74167e6726335c6e89a\n",
|
||
" Stored in directory: c:\\users\\杨国栋\\appdata\\local\\pip\\cache\\wheels\\26\\07\\b6\\ce0395a0094046669a36aa4995af91f543476d725b9b2baccc\n",
|
||
"Successfully built cowsay\n",
|
||
"Installing collected packages: cowsay\n",
|
||
"Successfully installed cowsay-5.0\n",
|
||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"pip install cowsay"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "b84d6cb7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "ModuleNotFoundError",
|
||
"evalue": "No module named 'cowsay'",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||
"\u001b[1;32m<ipython-input-1-cde26d015784>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mcowsay\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mcowsay\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdaemon\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'哈哈哈'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'cowsay'"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import cowsay\n",
|
||
"cowsay.daemon('哈哈哈111')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "c89a5d5b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
|
||
"Collecting freegames\n",
|
||
" Downloading https://pypi.tuna.tsinghua.edu.cn/packages/b9/22/3da53ac5d408c88ce53c589eb5a7a0e18cccdd6a8b0be40616635db18ca3/freegames-2.5.3-py3-none-any.whl (112 kB)\n",
|
||
" -------------------------------------- 112.8/112.8 kB 1.6 MB/s eta 0:00:00\n",
|
||
"Installing collected packages: freegames\n",
|
||
"Successfully installed freegames-2.5.3\n",
|
||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"pip install freegames"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "1df1bc35",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import os\n",
|
||
"# 查看所有游戏名称\n",
|
||
"os.system('python -m freegames list')\n",
|
||
"# 运行指定游戏\n",
|
||
"os.system('python -m freegames.snake')\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "9634389d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"ename": "ValueError",
|
||
"evalue": "invalid literal for int() with base 16: 'ð\\xa0\\x93¾'",
|
||
"output_type": "error",
|
||
"traceback": [
|
||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
||
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_10260/1880065888.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mencoded_str\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'\\xF0\\xA0\\x93\\xBE'\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mdecoded_str\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mchr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mencoded_str\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m16\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdecoded_str\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||
"\u001b[1;31mValueError\u001b[0m: invalid literal for int() with base 16: 'ð\\xa0\\x93¾'"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"encoded_str = '\\xF0\\xA0\\x93\\xBE'\n",
|
||
"decoded_str = chr(int(encoded_str, 16))\n",
|
||
"print(decoded_str)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.4"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|