【getHowWordPageData.py】提取热词和相关函数定义

2024-07-02 18:54:21 +08:00
parent d5f19e9d73
commit 2af147fcb2
6 changed files with 2032 additions and 1 deletions
@@ -0,0 +1,94 @@
+from utils.getPublicData import getAllCommentsData,getAllArticleData
+from datetime import datetime
+import jieba
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+commentsList = getAllCommentsData()
+articleList = getAllArticleData()
+
+def getHomeTagsData():# 统计数据库中文章个数，最高点赞作者，发布文章最多的城市
+    articleLenMax = len(articleList)
+    likeCountMax = 0
+    likeCountMaxAuthorName = ''
+    cityDic = {}
+    for article in articleList:
+        if likeCountMax < int(article[1]):
+            likeCountMax = int(article[1])
+            likeCountMaxAuthorName = article[11]
+        if article[4] != '无':
+            if article[4] in cityDic.keys():
+                cityDic[article[4]] += 1
+            else:
+                cityDic[article[4]] = 1
+    cityDicSorted = list(sorted(cityDic.items(),key=lambda x:x[1],reverse=True))
+    return articleLenMax,likeCountMaxAuthorName,cityDicSorted[0][0]
+
+def getHomeCommentsLikeCountTopFore():# 获取评论中点赞最高的前四条评论
+    return list(sorted(commentsList,key=lambda x:int(x[2]),reverse=True))[:4]
+
+def getHomeArticleCreatedAtChart():# 根据日期分别计算该日期的文章数
+    xData = list(set([x[7] for x in articleList]))
+    xData = list(sorted(xData,key=lambda x:datetime.strptime(x,'%Y-%m-%d').timestamp(),reverse=True))
+    yData = [0 for x in range(len(xData))]
+    for article in articleList:
+        for index,j in enumerate(xData):# 返回索引和值
+            if article[7] == j:
+                yData[index] += 1
+    return xData,yData
+
+def getHomeTypeChart():# 统计每种类型的文章数量
+    typeDic = {}
+    for article in articleList:
+        if article[8] in typeDic.keys():
+            typeDic[article[8]] += 1
+        else:
+            typeDic[article[8]] = 1
+    resultData = []
+    for key,value in typeDic.items():
+        resultData.append({
+            'name':key,
+            'value':value
+        })
+    return resultData
+
+def getHomeCommentCreatedChart():# 统计每天用户评论数量
+    createAtDic = {}
+    for comment in commentsList:
+        if comment[1] in createAtDic.keys():
+            createAtDic[comment[1]] += 1
+        else:
+            createAtDic[comment[1]] = 1
+    resultData = []
+    for key, value in createAtDic.items():
+        resultData.append({
+            'name': key,
+            'value': value
+        })
+    return resultData
+
+def stopWordList():
+    return [line.strip() for line in open('./stopWords.txt',encoding='utf8').readlines()]
+
+def getUserNameWordCloud():# 生成用户名字词云
+    text = ''
+    stopWords = stopWordList()
+    for comment in commentsList:
+        text += comment[5]
+    cut = jieba.cut(text)
+    newCut = []
+    for word in cut:
+        if word not in stopWords:newCut.append(word)
+    string = ' '.join(newCut)
+    wc = WordCloud(
+        width=1000,
+        height=600,
+        background_color='#fff',
+        colormap='Blues',
+        font_path='STHUPO.TTF'
+    )
+    wc.generate_from_text(string)
+    fig = plt.figure(1)
+    plt.imshow(wc)
+    plt.axis('off')
+    plt.savefig('./static/authorNameCloud.jpg',dpi=500)
+
@@ -0,0 +1,28 @@
+from utils.getPublicData import *
+
+def getHotWordLen(hotWord):# 统计包含特定热词评论数量
+    commentsList = getAllCommentsData()
+    hotWordLen = 0
+    for i in commentsList:
+        if i[4].find(hotWord) != -1:
+            hotWordLen+=1
+    return hotWordLen
+
+def getHotWordPageCreatedAtCharData(hotWord):# 统计包含特定热词的评论在每个日期的数量，并返回日期和对应的评论数量
+    commentsList = getAllCommentsData()
+    createdAt = {}
+    for i in commentsList:
+        if i[4].find(hotWord) != -1:
+            if i[1] in createdAt.keys():
+                createdAt[i[1]] += 1
+            else:
+                createdAt[i[1]] = 1
+    return list(createdAt.keys()),list(createdAt.values())
+
+def getCommentFilterData(hotWord):# 筛选包含特定热词的评论并返回这些评论的数据
+    commentsList = getAllCommentsData()
+    commentData = []
+    for i in commentsList:
+        if i[4].find(hotWord) != -1:
+            commentData.append(i)
+    return commentData
@@ -1,4 +1,7 @@
 from utils.query import query
+import re
+import sys
+import pandas as pd
 sys.path.append('model')

 cityList = [
@@ -72,4 +75,17 @@ def getAllCommentsData():

 def getAllArticleData():
    articleList = query('select * from article',[],'select')
-    return articleList
+    return articleList
+
+def getAllHotWords():
+    data = []
+    df = pd.read_csv('./model/cipingTotal.csv',encoding='utf8')
+    for i in df.values:
+        try:
+            data.append([
+                re.search('[\u4e00-\u9fa5]+',str(i)).group(),
+                re.search('\d+',str(i)).group()
+            ])
+        except:
+            continue
+    return data