F6--/张阳脚本/工具/脚本文件/emoj清洗.py

import xlrd
import re
from zhon import hanzi
import string

def filter_emoji(text):
    # 先过滤 emoji 表情
    text = re.sub(':\\S+?:', '', text)
    # 再过滤其他表情，只保留 数字，中英文，中英文符号，空格
    return re.sub(r'[^{}{}{} ]'.format(hanzi.characters, hanzi.punctuation, string.printable), '', text)

# 文件路径
file_path = r"D:\其他文件\emoji.xls"

# 打开Excel文件
workbook = xlrd.open_workbook(file_path)

# 获取所有工作表的名字
sheet_names = workbook.sheet_names()
print("可用的工作表名称:", sheet_names)

# 用户输入要处理的工作表名称
sheet_name = input("请输入要处理的工作表名称: ")
worksheet = workbook.sheet_by_name(sheet_name)

# 获取第一行作为列名
first_row = worksheet.row_values(0)
print("列名:", first_row)

# 用户选择要处理的列
column_choice = input("请选择要处理的列名(列明为中文): ")
if column_choice not in first_row:
    print(f"无效的列名 '{column_choice}'，请选择正确的列名。")
else:
    # 获取所选列的索引
    column_index = first_row.index(column_choice)

    # 遍历所选列的数据，并应用filter_emoji函数
    for row_idx in range(1, worksheet.nrows):  # 从第二行开始，因为第一行是列名
        cell_value = worksheet.cell(row_idx, column_index).value
        if isinstance(cell_value, str):
            filtered_value = filter_emoji(cell_value)
            print(f"处理前: {cell_value} -> 处理后: {filtered_value}")
            # 这里可以将处理后的值存储到新的列表或文件中，示例中仅打印出来