【getHowWordPageData.py】提取热词和相关函数定义
This commit is contained in:
@@ -0,0 +1,94 @@
|
|||||||
|
from utils.getPublicData import getAllCommentsData,getAllArticleData
|
||||||
|
from datetime import datetime
|
||||||
|
import jieba
|
||||||
|
from wordcloud import WordCloud
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
commentsList = getAllCommentsData()
|
||||||
|
articleList = getAllArticleData()
|
||||||
|
|
||||||
|
def getHomeTagsData():# 统计数据库中文章个数,最高点赞作者,发布文章最多的城市
|
||||||
|
articleLenMax = len(articleList)
|
||||||
|
likeCountMax = 0
|
||||||
|
likeCountMaxAuthorName = ''
|
||||||
|
cityDic = {}
|
||||||
|
for article in articleList:
|
||||||
|
if likeCountMax < int(article[1]):
|
||||||
|
likeCountMax = int(article[1])
|
||||||
|
likeCountMaxAuthorName = article[11]
|
||||||
|
if article[4] != '无':
|
||||||
|
if article[4] in cityDic.keys():
|
||||||
|
cityDic[article[4]] += 1
|
||||||
|
else:
|
||||||
|
cityDic[article[4]] = 1
|
||||||
|
cityDicSorted = list(sorted(cityDic.items(),key=lambda x:x[1],reverse=True))
|
||||||
|
return articleLenMax,likeCountMaxAuthorName,cityDicSorted[0][0]
|
||||||
|
|
||||||
|
def getHomeCommentsLikeCountTopFore():# 获取评论中点赞最高的前四条评论
|
||||||
|
return list(sorted(commentsList,key=lambda x:int(x[2]),reverse=True))[:4]
|
||||||
|
|
||||||
|
def getHomeArticleCreatedAtChart():# 根据日期分别计算该日期的文章数
|
||||||
|
xData = list(set([x[7] for x in articleList]))
|
||||||
|
xData = list(sorted(xData,key=lambda x:datetime.strptime(x,'%Y-%m-%d').timestamp(),reverse=True))
|
||||||
|
yData = [0 for x in range(len(xData))]
|
||||||
|
for article in articleList:
|
||||||
|
for index,j in enumerate(xData):# 返回索引和值
|
||||||
|
if article[7] == j:
|
||||||
|
yData[index] += 1
|
||||||
|
return xData,yData
|
||||||
|
|
||||||
|
def getHomeTypeChart():# 统计每种类型的文章数量
|
||||||
|
typeDic = {}
|
||||||
|
for article in articleList:
|
||||||
|
if article[8] in typeDic.keys():
|
||||||
|
typeDic[article[8]] += 1
|
||||||
|
else:
|
||||||
|
typeDic[article[8]] = 1
|
||||||
|
resultData = []
|
||||||
|
for key,value in typeDic.items():
|
||||||
|
resultData.append({
|
||||||
|
'name':key,
|
||||||
|
'value':value
|
||||||
|
})
|
||||||
|
return resultData
|
||||||
|
|
||||||
|
def getHomeCommentCreatedChart():# 统计每天用户评论数量
|
||||||
|
createAtDic = {}
|
||||||
|
for comment in commentsList:
|
||||||
|
if comment[1] in createAtDic.keys():
|
||||||
|
createAtDic[comment[1]] += 1
|
||||||
|
else:
|
||||||
|
createAtDic[comment[1]] = 1
|
||||||
|
resultData = []
|
||||||
|
for key, value in createAtDic.items():
|
||||||
|
resultData.append({
|
||||||
|
'name': key,
|
||||||
|
'value': value
|
||||||
|
})
|
||||||
|
return resultData
|
||||||
|
|
||||||
|
def stopWordList():
|
||||||
|
return [line.strip() for line in open('./stopWords.txt',encoding='utf8').readlines()]
|
||||||
|
|
||||||
|
def getUserNameWordCloud():# 生成用户名字词云
|
||||||
|
text = ''
|
||||||
|
stopWords = stopWordList()
|
||||||
|
for comment in commentsList:
|
||||||
|
text += comment[5]
|
||||||
|
cut = jieba.cut(text)
|
||||||
|
newCut = []
|
||||||
|
for word in cut:
|
||||||
|
if word not in stopWords:newCut.append(word)
|
||||||
|
string = ' '.join(newCut)
|
||||||
|
wc = WordCloud(
|
||||||
|
width=1000,
|
||||||
|
height=600,
|
||||||
|
background_color='#fff',
|
||||||
|
colormap='Blues',
|
||||||
|
font_path='STHUPO.TTF'
|
||||||
|
)
|
||||||
|
wc.generate_from_text(string)
|
||||||
|
fig = plt.figure(1)
|
||||||
|
plt.imshow(wc)
|
||||||
|
plt.axis('off')
|
||||||
|
plt.savefig('./static/authorNameCloud.jpg',dpi=500)
|
||||||
|
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
from utils.getPublicData import *
|
||||||
|
|
||||||
|
def getHotWordLen(hotWord):# 统计包含特定热词评论数量
|
||||||
|
commentsList = getAllCommentsData()
|
||||||
|
hotWordLen = 0
|
||||||
|
for i in commentsList:
|
||||||
|
if i[4].find(hotWord) != -1:
|
||||||
|
hotWordLen+=1
|
||||||
|
return hotWordLen
|
||||||
|
|
||||||
|
def getHotWordPageCreatedAtCharData(hotWord):# 统计包含特定热词的评论在每个日期的数量,并返回日期和对应的评论数量
|
||||||
|
commentsList = getAllCommentsData()
|
||||||
|
createdAt = {}
|
||||||
|
for i in commentsList:
|
||||||
|
if i[4].find(hotWord) != -1:
|
||||||
|
if i[1] in createdAt.keys():
|
||||||
|
createdAt[i[1]] += 1
|
||||||
|
else:
|
||||||
|
createdAt[i[1]] = 1
|
||||||
|
return list(createdAt.keys()),list(createdAt.values())
|
||||||
|
|
||||||
|
def getCommentFilterData(hotWord):# 筛选包含特定热词的评论并返回这些评论的数据
|
||||||
|
commentsList = getAllCommentsData()
|
||||||
|
commentData = []
|
||||||
|
for i in commentsList:
|
||||||
|
if i[4].find(hotWord) != -1:
|
||||||
|
commentData.append(i)
|
||||||
|
return commentData
|
||||||
@@ -1,4 +1,7 @@
|
|||||||
from utils.query import query
|
from utils.query import query
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import pandas as pd
|
||||||
sys.path.append('model')
|
sys.path.append('model')
|
||||||
|
|
||||||
cityList = [
|
cityList = [
|
||||||
@@ -73,3 +76,16 @@ def getAllCommentsData():
|
|||||||
def getAllArticleData():
|
def getAllArticleData():
|
||||||
articleList = query('select * from article',[],'select')
|
articleList = query('select * from article',[],'select')
|
||||||
return articleList
|
return articleList
|
||||||
|
|
||||||
|
def getAllHotWords():
|
||||||
|
data = []
|
||||||
|
df = pd.read_csv('./model/cipingTotal.csv',encoding='utf8')
|
||||||
|
for i in df.values:
|
||||||
|
try:
|
||||||
|
data.append([
|
||||||
|
re.search('[\u4e00-\u9fa5]+',str(i)).group(),
|
||||||
|
re.search('\d+',str(i)).group()
|
||||||
|
])
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return data
|
||||||
+1893
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user