A new multilingual sentiment analysis module has been added.

This commit is contained in:
戒酒的李白
2025-08-04 19:49:59 +08:00
parent 645242a552
commit bd60e2ed1b
7 changed files with 329 additions and 2 deletions
@@ -7,6 +7,8 @@ def preprocess_text(text):
text = re.sub(r"@.+?( |$)", " ", text) # 去除 @xxx
text = re.sub(r"【.+?】", " ", text) # 去除 【xx】
text = re.sub(r"\u200b", " ", text) # 去除特殊字符
# 删除表情符号
text = re.sub(r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F1E0-\U0001F1FF\U00002600-\U000027BF\U0001f900-\U0001f9ff\U0001f018-\U0001f270\U0000231a-\U0000231b\U0000238d-\U0000238d\U000024c2-\U0001f251]+', '', text)
text = re.sub(r"\s+", " ", text) # 多个空格合并
return text.strip()