Merge branch 'main' of github.com:666ghj/Weibo_PublicOpinion_AnalysisSystem

This commit is contained in:
YYL469
2024-07-03 19:50:05 +08:00
15 changed files with 200 additions and 151 deletions
+2 -1
View File
@@ -1 +1,2 @@
.conda
.conda
*__pycache__/
Generated
+1
View File
@@ -0,0 +1 @@
app.py
+1 -1
View File
@@ -5,7 +5,7 @@
</component>
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="jdk" jdkName="Python 3.9" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TemplatesService">
+4 -1
View File
@@ -3,5 +3,8 @@
<component name="Black">
<option name="sdkName" value="Python 3.12 (Desktop)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (Desktop)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>
</project>
-100
View File
@@ -1,100 +0,0 @@
('哈哈', 1236)
('哈哈哈', 537)
('哈哈哈哈', 157)
('真的', 154)
('期待', 89)
('喜欢', 89)
('doge', 88)
('宝宝', 87)
('可爱', 79)
('第一', 73)
('演唱', 71)
('亲亲', 71)
('苦涩', 70)
('啊啊啊', 68)
('抱抱', 64)
('cry', 64)
('宝贝', 62)
('姐姐', 51)
('花花', 50)
('送花', 48)
('开心', 47)
('加油', 47)
('老师', 46)
('call', 45)
('特别', 42)
('一个', 42)
('抓狂', 40)
('嘻嘻', 39)
('心心', 38)
('悲伤', 38)
('世界', 37)
('感觉', 35)
('孩子', 35)
('朋友', 34)
('鲜花', 34)
('开学', 34)
('好好', 34)
('演唱会', 33)
('感谢', 32)
('憧憬', 31)
('学季', 31)
('快乐', 30)
('漂亮', 30)
('中国', 30)
('音乐', 29)
('电影', 28)
('莲花', 28)
('骄阳', 28)
('视频', 27)
('老公', 27)
('老婆', 27)
('值得', 26)
('好看', 26)
('消失', 26)
('希望', 25)
('呜呜', 25)
('少年', 25)
('东西', 25)
('实力', 24)
('评论', 24)
('舞台', 24)
('生活', 24)
('单身', 24)
('努力', 23)
('唯一', 23)
('幸福', 23)
('时间', 23)
('超级', 23)
('辈子', 22)
('童年', 22)
('时代', 22)
('可怜', 21)
('不见', 21)
('工作', 21)
('有人', 21)
('终于', 21)
('粉丝', 21)
('国家', 21)
('callcallcall', 21)
('永远', 21)
('太阳', 20)
('直播', 20)
('小时', 20)
('星期', 20)
('安全', 20)
('代言', 19)
('支持', 19)
('彩虹', 19)
('妈妈', 18)
('华为', 18)
('优秀', 18)
('好像', 18)
('越来', 18)
('大人', 18)
('父母', 18)
('害怕', 18)
('安哥', 18)
('加班', 18)
('一点', 18)
('一场', 17)
1 ('哈哈' 1236)
2 ('哈哈哈' 537)
3 ('哈哈哈哈' 157)
4 ('真的' 154)
5 ('期待' 89)
6 ('喜欢' 89)
7 ('doge' 88)
8 ('宝宝' 87)
9 ('可爱' 79)
10 ('第一' 73)
11 ('演唱' 71)
12 ('亲亲' 71)
13 ('苦涩' 70)
14 ('啊啊啊' 68)
15 ('抱抱' 64)
16 ('cry' 64)
17 ('宝贝' 62)
18 ('姐姐' 51)
19 ('花花' 50)
20 ('送花' 48)
21 ('开心' 47)
22 ('加油' 47)
23 ('老师' 46)
24 ('call' 45)
25 ('特别' 42)
26 ('一个' 42)
27 ('抓狂' 40)
28 ('嘻嘻' 39)
29 ('心心' 38)
30 ('悲伤' 38)
31 ('世界' 37)
32 ('感觉' 35)
33 ('孩子' 35)
34 ('朋友' 34)
35 ('鲜花' 34)
36 ('开学' 34)
37 ('好好' 34)
38 ('演唱会' 33)
39 ('感谢' 32)
40 ('憧憬' 31)
41 ('学季' 31)
42 ('快乐' 30)
43 ('漂亮' 30)
44 ('中国' 30)
45 ('音乐' 29)
46 ('电影' 28)
47 ('莲花' 28)
48 ('骄阳' 28)
49 ('视频' 27)
50 ('老公' 27)
51 ('老婆' 27)
52 ('值得' 26)
53 ('好看' 26)
54 ('消失' 26)
55 ('希望' 25)
56 ('呜呜' 25)
57 ('少年' 25)
58 ('东西' 25)
59 ('实力' 24)
60 ('评论' 24)
61 ('舞台' 24)
62 ('生活' 24)
63 ('单身' 24)
64 ('努力' 23)
65 ('唯一' 23)
66 ('幸福' 23)
67 ('时间' 23)
68 ('超级' 23)
69 ('辈子' 22)
70 ('童年' 22)
71 ('时代' 22)
72 ('可怜' 21)
73 ('不见' 21)
74 ('工作' 21)
75 ('有人' 21)
76 ('终于' 21)
77 ('粉丝' 21)
78 ('国家' 21)
79 ('callcallcall' 21)
80 ('永远' 21)
81 ('太阳' 20)
82 ('直播' 20)
83 ('小时' 20)
84 ('星期' 20)
85 ('安全' 20)
86 ('代言' 19)
87 ('支持' 19)
88 ('彩虹' 19)
89 ('妈妈' 18)
90 ('华为' 18)
91 ('优秀' 18)
92 ('好像' 18)
93 ('越来' 18)
94 ('大人' 18)
95 ('父母' 18)
96 ('害怕' 18)
97 ('安哥' 18)
98 ('加班' 18)
99 ('一点' 18)
100 ('一场' 17)
-29
View File
@@ -1,29 +0,0 @@
from utils.getPublicData import getAllCommentsData
import jieba
targetTxt = 'cutComments.txt'
def stopWordList():
stopWords = [line.strip() for line in open('./stopWords.txt',encoding='utf8').readlines()]
return stopWords
def seg_depart(sentence):
sentence_depart = jieba.cut(" ".join([x[4] for x in sentence]).strip())
stopWords = stopWordList()
outStr = ''
for word in sentence_depart:
if word not in stopWords:
if word != '\t':
outStr += word
return outStr
def writer_comments_cuts():
with open(targetTxt,'a+',encoding='utf-8') as targetFile:
seg = jieba.cut(seg_depart(getAllCommentsData()),cut_all=True)
output = ' '.join(seg)
targetFile.write(output)
targetFile.write('\n')
print('写入成功')
if __name__ == '__main__':
writer_comments_cuts()
+100
View File
@@ -0,0 +1,100 @@
('宝宝', 142)
('祝福', 80)
('期待', 77)
('喜欢', 73)
('恭喜', 73)
('接接', 71)
('真的', 62)
('第一', 50)
('快乐', 49)
('祖国', 34)
('舞台', 33)
('朋友', 33)
('老公', 32)
('毕业', 32)
('谢谢', 28)
('好好', 27)
('开心', 27)
('维维', 26)
('加油', 25)
('哥哥', 25)
('视频', 24)
('世界', 24)
('永远', 23)
('好听', 23)
('香港', 23)
('希望', 22)
('孩子', 21)
('七月', 20)
('朋友圈', 19)
('敦豪', 19)
('生活', 18)
('宝贝', 18)
('合作', 18)
('day', 18)
('好看', 18)
('可爱', 17)
('老师', 17)
('涂山', 17)
('致敬', 17)
('中国', 17)
('感觉', 16)
('生日', 16)
('幸福', 16)
('记得', 16)
('追风', 16)
('蟑螂', 16)
('终于', 16)
('评论', 15)
('厉害', 15)
('下次', 15)
('一点', 15)
('双人', 15)
('见面', 15)
('关注', 15)
('实至名归', 14)
('妹妹', 14)
('打开', 14)
('热巴', 14)
('流水', 14)
('任何', 13)
('手机', 13)
('活动', 13)
('呜呜', 13)
('何人', 13)
('电影', 13)
('你好', 13)
('任何人', 13)
('北京', 13)
('粉丝', 13)
('顺利', 13)
('太棒', 12)
('支持', 12)
('奥运', 12)
('人气', 12)
('by', 12)
('漂亮', 12)
('大哥', 12)
('生日快乐', 12)
('老婆', 12)
('精彩', 12)
('工作', 12)
('照顾', 12)
('迢迢', 12)
('时间', 12)
('初心', 12)
('更好', 11)
('早安', 11)
('未来', 11)
('美好', 11)
('造型', 11)
('晚上', 11)
('满满', 11)
('火炬', 10)
('明天', 10)
('魅力', 10)
('实况', 10)
('爷爷', 10)
('骄傲', 10)
('有没有', 10)
('火炬手', 10)
1 ('宝宝' 142)
2 ('祝福' 80)
3 ('期待' 77)
4 ('喜欢' 73)
5 ('恭喜' 73)
6 ('接接' 71)
7 ('真的' 62)
8 ('第一' 50)
9 ('快乐' 49)
10 ('祖国' 34)
11 ('舞台' 33)
12 ('朋友' 33)
13 ('老公' 32)
14 ('毕业' 32)
15 ('谢谢' 28)
16 ('好好' 27)
17 ('开心' 27)
18 ('维维' 26)
19 ('加油' 25)
20 ('哥哥' 25)
21 ('视频' 24)
22 ('世界' 24)
23 ('永远' 23)
24 ('好听' 23)
25 ('香港' 23)
26 ('希望' 22)
27 ('孩子' 21)
28 ('七月' 20)
29 ('朋友圈' 19)
30 ('敦豪' 19)
31 ('生活' 18)
32 ('宝贝' 18)
33 ('合作' 18)
34 ('day' 18)
35 ('好看' 18)
36 ('可爱' 17)
37 ('老师' 17)
38 ('涂山' 17)
39 ('致敬' 17)
40 ('中国' 17)
41 ('感觉' 16)
42 ('生日' 16)
43 ('幸福' 16)
44 ('记得' 16)
45 ('追风' 16)
46 ('蟑螂' 16)
47 ('终于' 16)
48 ('评论' 15)
49 ('厉害' 15)
50 ('下次' 15)
51 ('一点' 15)
52 ('双人' 15)
53 ('见面' 15)
54 ('关注' 15)
55 ('实至名归' 14)
56 ('妹妹' 14)
57 ('打开' 14)
58 ('热巴' 14)
59 ('流水' 14)
60 ('任何' 13)
61 ('手机' 13)
62 ('活动' 13)
63 ('呜呜' 13)
64 ('何人' 13)
65 ('电影' 13)
66 ('你好' 13)
67 ('任何人' 13)
68 ('北京' 13)
69 ('粉丝' 13)
70 ('顺利' 13)
71 ('太棒' 12)
72 ('支持' 12)
73 ('奥运' 12)
74 ('人气' 12)
75 ('by' 12)
76 ('漂亮' 12)
77 ('大哥' 12)
78 ('生日快乐' 12)
79 ('老婆' 12)
80 ('精彩' 12)
81 ('工作' 12)
82 ('照顾' 12)
83 ('迢迢' 12)
84 ('时间' 12)
85 ('初心' 12)
86 ('更好' 11)
87 ('早安' 11)
88 ('未来' 11)
89 ('美好' 11)
90 ('造型' 11)
91 ('晚上' 11)
92 ('满满' 11)
93 ('火炬' 10)
94 ('明天' 10)
95 ('魅力' 10)
96 ('实况' 10)
97 ('爷爷' 10)
98 ('骄傲' 10)
99 ('有没有' 10)
100 ('火炬手' 10)
@@ -4,7 +4,7 @@ import re
def main():
reader = open('./cutComments.txt','r',encoding='utf8')
strs = reader.read()
result = open('./cipingTotal.csv','w',encoding='utf8')
result = open('cipingTotal.csv', 'w', encoding='utf8')
# 分词,去重,列表
word_list = jieba.cut(strs,cut_all=True)
+44
View File
@@ -0,0 +1,44 @@
from utils.getPublicData import getAllCommentsData
import jieba
import re
targetTxt = 'cutComments.txt'
def stopWordList():
stopWords = [line.strip() for line in open('./stopWords.txt',encoding='utf8').readlines()]
return stopWords
def seg_depart(sentence):
sentence_depart = jieba.cut(" ".join([clean(x[4]) for x in sentence]).strip())
stopWords = stopWordList()
outStr = ''
for word in sentence_depart:
if word not in stopWords:
if word != '\t':
outStr += word
return outStr
def writer_comments_cuts():
with open(targetTxt,'w+',encoding='utf-8') as targetFile:
seg = jieba.cut(seg_depart(getAllCommentsData()))
output = ' '.join(seg)
targetFile.write(output)
targetFile.write('\n')
print('写入成功')
def clean(text):
text = re.sub(r"(回复)?(//)?\s*@\S*?\s*(:| |$)", " ", text) # 去除正文中的@和回复/转发中的用户名
text = re.sub(r"\[\S+\]", "", text) # 去除表情符号
# text = re.sub(r"#\S+#", "", text) # 保留话题内容
# 去除emoji表情的正则表达式
text = re.compile(u'[\U00010000-\U0010ffff]').sub('',text)
URL_REGEX = re.compile(
r'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))',
re.IGNORECASE)
text = re.sub(URL_REGEX, "", text) # 去除网址
text = text.replace("转发微博", "") # 去除无意义的词语
text = re.sub(r"\s+", " ", text) # 合并正文中过多的空格
return text.strip()
if __name__ == '__main__':
writer_comments_cuts()
# print(clean("想到一次我也看到了这样的,我把我的外套(喷了淡茉莉香水的)递过去了,我当时觉得她可能是因为地铁空调有点冷一直环抱着,我借给她说冷的话可以披一下,我坐到终点站的,然后她说不用了,我好尴尬哇"))
File diff suppressed because one or more lines are too long
-1
View File
@@ -1,4 +1,3 @@
from flask import render_template
def errorResponse(errorMsg):
return render_template('error.html',errorMsg=errorMsg)
+1 -1
View File
@@ -79,7 +79,7 @@ def getAllArticleData():
def getAllHotWords():
data = []
df = pd.read_csv('./model/cipingTotal.csv',encoding='utf8')
df = pd.read_csv('./utils/cipingTotal.csv',encoding='utf8')
for i in df.values:
try:
data.append([
+8
View File
@@ -719,6 +719,8 @@ sup
哈哈
哈哈哈
哈哈哈哈
哎呀
@@ -742,7 +744,13 @@ sup
哼唷
唯有
特别
超级
越来
越来越
啊啊
啊啊啊
啊呀
啊哈
啊哟
Binary file not shown.
+37 -16
View File
@@ -1,48 +1,69 @@
import time
from flask import Blueprint, redirect, render_template, request,Flask, session
import hashlib
from flask import Blueprint, redirect, render_template, request, Flask, session
from utils.query import query
from utils.errorResponse import errorResponse
ub = Blueprint('user',
__name__,
url_prefix='/user',
template_folder='templates')
ub = Blueprint('user',__name__,url_prefix='/user',template_folder='templates')
@ub.route('/login',methods=['GET','POST'])
@ub.route('/login', methods=['GET', 'POST'])
def login():
if request.method == 'GET':
return render_template('login.html')
else:
def filter_fn(user):
return request.form['username'] in user and request.form['password'] in user
hash_with_salt = hashlib.sha256('XiaoXueQi2024'.encode('utf-8'))
hash_with_salt.update(request.form['password'].encode('utf-8'))
return request.form[
'username'] in user and hash_with_salt.hexdigest() in user
users = query('select * from user', [], 'select')
login_success = list(filter(filter_fn,users))
if not len(login_success):return errorResponse('账号或密码错误')
login_success = list(filter(filter_fn, users))
if not len(login_success): return errorResponse('账号或密码错误')
session['username'] = request.form['username']
return redirect('/page/home')
@ub.route('/register',methods=['GET','POST'])
@ub.route('/register', methods=['GET', 'POST'])
def register():
if request.method == 'GET':
return render_template('register.html')
else:
if request.form['password'] != request.form['checkPassword']:return errorResponse('两次密码不符合')
if request.form['password'] != request.form['checkPassword']:
return errorResponse('两次密码不符合')
def filter_fn(user):
return request.form['username'] in user
users = query('select * from user',[],'select')
filter_list = list(filter(filter_fn,users))
users = query('select * from user', [], 'select')
filter_list = list(filter(filter_fn, users))
if len(filter_list):
return errorResponse('该用户名已被注册')
else:
time_tuple = time.localtime(time.time())
query('''
hash_with_salt = hashlib.sha256('XiaoXueQi2024'.encode('utf-8'))
hash_with_salt.update(request.form['password'].encode('utf-8'))
query(
'''
insert into user(username,password,createTime) values(%s,%s,%s)
''',[request.form['username'],request.form['password'],str(time_tuple[0]) + '-' + str(time_tuple[1]) + '-' + str(time_tuple[2])])
''', [
request.form['username'],
hash_with_salt.hexdigest(),
str(time_tuple[0]) + '-' + str(time_tuple[1]) + '-' +
str(time_tuple[2])
])
return redirect('/user/login')
@ub.route('/logOut')
def logOut():
session.clear()
return redirect('/user/login')
session.clear()
return redirect('/user/login')