bug修改
This commit is contained in:
Generated
+1
@@ -0,0 +1 @@
|
||||
app.py
|
||||
+1
-1
@@ -5,7 +5,7 @@
|
||||
</component>
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.9" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TemplatesService">
|
||||
|
||||
Generated
+4
-1
@@ -3,5 +3,8 @@
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.12 (Desktop)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (Desktop)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
</project>
|
||||
@@ -1,100 +0,0 @@
|
||||
('哈哈', 1236)
|
||||
('哈哈哈', 537)
|
||||
('哈哈哈哈', 157)
|
||||
('真的', 154)
|
||||
('期待', 89)
|
||||
('喜欢', 89)
|
||||
('doge', 88)
|
||||
('宝宝', 87)
|
||||
('可爱', 79)
|
||||
('第一', 73)
|
||||
('演唱', 71)
|
||||
('亲亲', 71)
|
||||
('苦涩', 70)
|
||||
('啊啊啊', 68)
|
||||
('抱抱', 64)
|
||||
('cry', 64)
|
||||
('宝贝', 62)
|
||||
('姐姐', 51)
|
||||
('花花', 50)
|
||||
('送花', 48)
|
||||
('开心', 47)
|
||||
('加油', 47)
|
||||
('老师', 46)
|
||||
('call', 45)
|
||||
('特别', 42)
|
||||
('一个', 42)
|
||||
('抓狂', 40)
|
||||
('嘻嘻', 39)
|
||||
('心心', 38)
|
||||
('悲伤', 38)
|
||||
('世界', 37)
|
||||
('感觉', 35)
|
||||
('孩子', 35)
|
||||
('朋友', 34)
|
||||
('鲜花', 34)
|
||||
('开学', 34)
|
||||
('好好', 34)
|
||||
('演唱会', 33)
|
||||
('感谢', 32)
|
||||
('憧憬', 31)
|
||||
('学季', 31)
|
||||
('快乐', 30)
|
||||
('漂亮', 30)
|
||||
('中国', 30)
|
||||
('音乐', 29)
|
||||
('电影', 28)
|
||||
('莲花', 28)
|
||||
('骄阳', 28)
|
||||
('视频', 27)
|
||||
('老公', 27)
|
||||
('老婆', 27)
|
||||
('值得', 26)
|
||||
('好看', 26)
|
||||
('消失', 26)
|
||||
('希望', 25)
|
||||
('呜呜', 25)
|
||||
('少年', 25)
|
||||
('东西', 25)
|
||||
('实力', 24)
|
||||
('评论', 24)
|
||||
('舞台', 24)
|
||||
('生活', 24)
|
||||
('单身', 24)
|
||||
('努力', 23)
|
||||
('唯一', 23)
|
||||
('幸福', 23)
|
||||
('时间', 23)
|
||||
('超级', 23)
|
||||
('辈子', 22)
|
||||
('童年', 22)
|
||||
('时代', 22)
|
||||
('可怜', 21)
|
||||
('不见', 21)
|
||||
('工作', 21)
|
||||
('有人', 21)
|
||||
('终于', 21)
|
||||
('粉丝', 21)
|
||||
('国家', 21)
|
||||
('callcallcall', 21)
|
||||
('永远', 21)
|
||||
('太阳', 20)
|
||||
('直播', 20)
|
||||
('小时', 20)
|
||||
('星期', 20)
|
||||
('安全', 20)
|
||||
('代言', 19)
|
||||
('支持', 19)
|
||||
('彩虹', 19)
|
||||
('妈妈', 18)
|
||||
('华为', 18)
|
||||
('优秀', 18)
|
||||
('好像', 18)
|
||||
('越来', 18)
|
||||
('大人', 18)
|
||||
('父母', 18)
|
||||
('害怕', 18)
|
||||
('安哥', 18)
|
||||
('加班', 18)
|
||||
('一点', 18)
|
||||
('一场', 17)
|
||||
|
@@ -1,29 +0,0 @@
|
||||
from utils.getPublicData import getAllCommentsData
|
||||
import jieba
|
||||
targetTxt = 'cutComments.txt'
|
||||
|
||||
def stopWordList():
|
||||
stopWords = [line.strip() for line in open('./stopWords.txt',encoding='utf8').readlines()]
|
||||
return stopWords
|
||||
|
||||
def seg_depart(sentence):
|
||||
sentence_depart = jieba.cut(" ".join([x[4] for x in sentence]).strip())
|
||||
stopWords = stopWordList()
|
||||
outStr = ''
|
||||
for word in sentence_depart:
|
||||
if word not in stopWords:
|
||||
if word != '\t':
|
||||
outStr += word
|
||||
return outStr
|
||||
|
||||
def writer_comments_cuts():
|
||||
with open(targetTxt,'a+',encoding='utf-8') as targetFile:
|
||||
seg = jieba.cut(seg_depart(getAllCommentsData()),cut_all=True)
|
||||
output = ' '.join(seg)
|
||||
targetFile.write(output)
|
||||
targetFile.write('\n')
|
||||
print('写入成功')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
writer_comments_cuts()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,100 @@
|
||||
('宝宝', 142)
|
||||
('祝福', 80)
|
||||
('期待', 77)
|
||||
('喜欢', 73)
|
||||
('恭喜', 73)
|
||||
('接接', 71)
|
||||
('真的', 62)
|
||||
('第一', 50)
|
||||
('快乐', 49)
|
||||
('祖国', 34)
|
||||
('舞台', 33)
|
||||
('朋友', 33)
|
||||
('老公', 32)
|
||||
('毕业', 32)
|
||||
('谢谢', 28)
|
||||
('好好', 27)
|
||||
('开心', 27)
|
||||
('维维', 26)
|
||||
('加油', 25)
|
||||
('哥哥', 25)
|
||||
('视频', 24)
|
||||
('世界', 24)
|
||||
('永远', 23)
|
||||
('好听', 23)
|
||||
('香港', 23)
|
||||
('希望', 22)
|
||||
('孩子', 21)
|
||||
('七月', 20)
|
||||
('朋友圈', 19)
|
||||
('敦豪', 19)
|
||||
('生活', 18)
|
||||
('宝贝', 18)
|
||||
('合作', 18)
|
||||
('day', 18)
|
||||
('好看', 18)
|
||||
('可爱', 17)
|
||||
('老师', 17)
|
||||
('涂山', 17)
|
||||
('致敬', 17)
|
||||
('中国', 17)
|
||||
('感觉', 16)
|
||||
('生日', 16)
|
||||
('幸福', 16)
|
||||
('记得', 16)
|
||||
('追风', 16)
|
||||
('蟑螂', 16)
|
||||
('终于', 16)
|
||||
('评论', 15)
|
||||
('厉害', 15)
|
||||
('下次', 15)
|
||||
('一点', 15)
|
||||
('双人', 15)
|
||||
('见面', 15)
|
||||
('关注', 15)
|
||||
('实至名归', 14)
|
||||
('妹妹', 14)
|
||||
('打开', 14)
|
||||
('热巴', 14)
|
||||
('流水', 14)
|
||||
('任何', 13)
|
||||
('手机', 13)
|
||||
('活动', 13)
|
||||
('呜呜', 13)
|
||||
('何人', 13)
|
||||
('电影', 13)
|
||||
('你好', 13)
|
||||
('任何人', 13)
|
||||
('北京', 13)
|
||||
('粉丝', 13)
|
||||
('顺利', 13)
|
||||
('太棒', 12)
|
||||
('支持', 12)
|
||||
('奥运', 12)
|
||||
('人气', 12)
|
||||
('by', 12)
|
||||
('漂亮', 12)
|
||||
('大哥', 12)
|
||||
('生日快乐', 12)
|
||||
('老婆', 12)
|
||||
('精彩', 12)
|
||||
('工作', 12)
|
||||
('照顾', 12)
|
||||
('迢迢', 12)
|
||||
('时间', 12)
|
||||
('初心', 12)
|
||||
('更好', 11)
|
||||
('早安', 11)
|
||||
('未来', 11)
|
||||
('美好', 11)
|
||||
('造型', 11)
|
||||
('晚上', 11)
|
||||
('满满', 11)
|
||||
('火炬', 10)
|
||||
('明天', 10)
|
||||
('魅力', 10)
|
||||
('实况', 10)
|
||||
('爷爷', 10)
|
||||
('骄傲', 10)
|
||||
('有没有', 10)
|
||||
('火炬手', 10)
|
||||
|
@@ -4,7 +4,7 @@ import re
|
||||
def main():
|
||||
reader = open('./cutComments.txt','r',encoding='utf8')
|
||||
strs = reader.read()
|
||||
result = open('./cipingTotal.csv','w',encoding='utf8')
|
||||
result = open('cipingTotal.csv', 'w', encoding='utf8')
|
||||
|
||||
# 分词,去重,列表
|
||||
word_list = jieba.cut(strs,cut_all=True)
|
||||
@@ -0,0 +1,44 @@
|
||||
from utils.getPublicData import getAllCommentsData
|
||||
import jieba
|
||||
import re
|
||||
targetTxt = 'cutComments.txt'
|
||||
|
||||
def stopWordList():
|
||||
stopWords = [line.strip() for line in open('./stopWords.txt',encoding='utf8').readlines()]
|
||||
return stopWords
|
||||
|
||||
def seg_depart(sentence):
|
||||
sentence_depart = jieba.cut(" ".join([clean(x[4]) for x in sentence]).strip())
|
||||
stopWords = stopWordList()
|
||||
outStr = ''
|
||||
for word in sentence_depart:
|
||||
if word not in stopWords:
|
||||
if word != '\t':
|
||||
outStr += word
|
||||
return outStr
|
||||
|
||||
def writer_comments_cuts():
|
||||
with open(targetTxt,'w+',encoding='utf-8') as targetFile:
|
||||
seg = jieba.cut(seg_depart(getAllCommentsData()))
|
||||
output = ' '.join(seg)
|
||||
targetFile.write(output)
|
||||
targetFile.write('\n')
|
||||
print('写入成功')
|
||||
|
||||
def clean(text):
|
||||
text = re.sub(r"(回复)?(//)?\s*@\S*?\s*(:| |$)", " ", text) # 去除正文中的@和回复/转发中的用户名
|
||||
text = re.sub(r"\[\S+\]", "", text) # 去除表情符号
|
||||
# text = re.sub(r"#\S+#", "", text) # 保留话题内容
|
||||
# 去除emoji表情的正则表达式
|
||||
text = re.compile(u'[\U00010000-\U0010ffff]').sub('',text)
|
||||
URL_REGEX = re.compile(
|
||||
r'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))',
|
||||
re.IGNORECASE)
|
||||
text = re.sub(URL_REGEX, "", text) # 去除网址
|
||||
text = text.replace("转发微博", "") # 去除无意义的词语
|
||||
text = re.sub(r"\s+", " ", text) # 合并正文中过多的空格
|
||||
return text.strip()
|
||||
|
||||
if __name__ == '__main__':
|
||||
writer_comments_cuts()
|
||||
# print(clean("想到一次我也看到了这样的,我把我的外套(喷了淡茉莉香水的)递过去了,我当时觉得她可能是因为地铁空调有点冷一直环抱着,我借给她说冷的话可以披一下,我坐到终点站的,然后她说不用了,我好尴尬哇"))
|
||||
File diff suppressed because one or more lines are too long
@@ -1,4 +1,3 @@
|
||||
from flask import render_template
|
||||
|
||||
def errorResponse(errorMsg):
|
||||
return render_template('error.html',errorMsg=errorMsg)
|
||||
@@ -79,7 +79,7 @@ def getAllArticleData():
|
||||
|
||||
def getAllHotWords():
|
||||
data = []
|
||||
df = pd.read_csv('./model/cipingTotal.csv',encoding='utf8')
|
||||
df = pd.read_csv('./utils/cipingTotal.csv',encoding='utf8')
|
||||
for i in df.values:
|
||||
try:
|
||||
data.append([
|
||||
|
||||
+1
-1
@@ -1,5 +1,5 @@
|
||||
from pymysql import *
|
||||
conn = connect(host='10.92.35.13',port=3306,user='XiaoXueQi',password='XiaoXueQi',database='Weibo_PublicOpinion_AnalysisSystem')
|
||||
conn = connect(host='localhost',port=3306,user='root',password='123456',database='weiboarticles')
|
||||
cursor = conn.cursor()
|
||||
def query(sql,params,type="no_select"):
|
||||
params = tuple(params)
|
||||
|
||||
@@ -719,6 +719,8 @@ sup
|
||||
哇
|
||||
哈
|
||||
哈哈
|
||||
哈哈哈
|
||||
哈哈哈哈
|
||||
哉
|
||||
哎
|
||||
哎呀
|
||||
@@ -742,7 +744,13 @@ sup
|
||||
哼唷
|
||||
唉
|
||||
唯有
|
||||
特别
|
||||
超级
|
||||
越来
|
||||
越来越
|
||||
啊
|
||||
啊啊
|
||||
啊啊啊
|
||||
啊呀
|
||||
啊哈
|
||||
啊哟
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user