【大修bug】添加csv表格原始数据，修改词频统计函数bug

2024-07-03 15:45:08 +08:00
parent d93da880cf
commit f98d111c32
6 changed files with 2773 additions and 49 deletions
@@ -1,31 +0,0 @@
-import jieba
-import re
-
-def main():
-    reader = open('./cutComments.txt','r',encoding='utf8')
-    strs = reader.read()
-    result = open('./cipingTotal.csv','w',encoding='utf8')
-
-    # 分词，去重，列表
-    word_list = jieba.cut(strs,cut_all=True)
-
-    new_words = []
-    for i in word_list:
-        m = re.search("\d+",i)
-        n = re.search("\W+",i)
-        if not m and not n and len(i) > 1:
-            new_words.append(i)
-
-    # 统计词频
-    word_count = {}
-    for i in set(new_words):
-        word_count[i] = new_words.count(i)
-
-    # 格式整理
-    list_count = sorted(word_count.items(),key=lambda x:x[1],reverse=True)
-
-    for i in range(100):
-        print(list_count[i],file=result)
-
-if __name__ == '__main__':
-    main()