脚本
This commit is contained in:
@@ -0,0 +1,18 @@
|
||||
import re
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import emoji
|
||||
from zhon import hanzi
|
||||
import string
|
||||
|
||||
def filter_emoji(text):
|
||||
# 先过滤 emoji 表情
|
||||
text = re.sub(':\S+?:', '', text)
|
||||
# 在过滤其他表情,只保留 数字,中英文,中英文符号,空格
|
||||
return re.sub(r'[^{}^{}^{}]'.format(hanzi.characters,hanzi.punctuation,string.printable),'',text)
|
||||
|
||||
|
||||
df = pd.read_excel(r"C:\Users\admin\Desktop\路普卡车客户新消息 (1).xls", sheet_name='2018年1月至2024年7月', dtype='string')
|
||||
for column in df.columns:
|
||||
df[column]=df[column].apply(lambda x: filter_emoji(x) if isinstance(x, str) else x)
|
||||
df.to_excel(r"C:\Users\admin\Desktop\路普卡车客户新消息 (1)_去除后.xls",index=False)
|
||||
Reference in New Issue
Block a user