Merge pull request #15 from zhaisang111/main
Optimized the getEchartsData.py script, improving code efficiency and…
This commit is contained in:
+101
-155
@@ -1,209 +1,155 @@
|
||||
from utils.getPublicData import *
|
||||
from utils.mynlp import SnowNLP
|
||||
articleList = getAllArticleData()
|
||||
commentList = getAllCommentsData()
|
||||
from utils.getPublicData import * # Import utility functions for data retrieval
|
||||
from utils.mynlp import SnowNLP # Import SnowNLP for sentiment analysis
|
||||
from collections import Counter # Import Counter for counting occurrences
|
||||
|
||||
articleList = getAllArticleData() # Retrieve all article data
|
||||
commentList = getAllCommentsData() # Retrieve all comment data
|
||||
|
||||
def getTypeList():
|
||||
return list(set([x[8] for x in getAllArticleData()]))
|
||||
# Return a list of unique article types
|
||||
return list(set([x[8] for x in articleList]))
|
||||
|
||||
def getArticleByType(type):
|
||||
articles = []
|
||||
for i in articleList:
|
||||
if i[8] == type:
|
||||
articles.append(i)
|
||||
return articles
|
||||
# Return a list of articles that match the specified type
|
||||
return [article for article in articleList if article[8] == type]
|
||||
|
||||
def getArticleLikeCount(type):
|
||||
# Categorize articles by the number of likes they have
|
||||
articles = getArticleByType(type)
|
||||
X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
|
||||
Y = [0 for x in range(len(X))]
|
||||
intervals = [(0, 100), (100, 1000), (1000, 5000), (5000, 15000),
|
||||
(15000, 30000), (30000, 50000), (50000, float('inf'))]
|
||||
X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000',
|
||||
'30000-50000','50000-~']
|
||||
Y = [0] * len(intervals)
|
||||
for article in articles:
|
||||
likeCount = int(article[1])
|
||||
if likeCount < 100:
|
||||
Y[0] += 1
|
||||
elif likeCount < 1000:
|
||||
Y[1] += 1
|
||||
elif likeCount < 5000:
|
||||
Y[2] += 1
|
||||
elif likeCount < 15000:
|
||||
Y[3] += 1
|
||||
elif likeCount < 30000:
|
||||
Y[4] += 1
|
||||
elif likeCount < 50000:
|
||||
Y[5] += 1
|
||||
elif likeCount >= 50000:
|
||||
Y[6] += 1
|
||||
return X,Y
|
||||
for i, (lower, upper) in enumerate(intervals):
|
||||
if lower <= likeCount < upper:
|
||||
Y[i] += 1
|
||||
break
|
||||
return X, Y
|
||||
|
||||
def getArticleCommentsLen(type):
|
||||
# Categorize articles by the length of comments they have
|
||||
articles = getArticleByType(type)
|
||||
X = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
|
||||
Y = [0 for x in range(len(X))]
|
||||
intervals = [(0, 100), (100, 500), (500, 1000), (1000, 1500),
|
||||
(1500, 3000), (3000, 5000), (5000, 10000),
|
||||
(10000, 15000), (15000, float('inf'))]
|
||||
X = ['0-100','100-500','500-1000','1000-1500','1500-3000',
|
||||
'3000-5000','5000-10000','10000-15000','15000-~']
|
||||
Y = [0] * len(intervals)
|
||||
for article in articles:
|
||||
commentLen = int(article[2])
|
||||
if commentLen < 100:
|
||||
Y[0] += 1
|
||||
elif commentLen < 500:
|
||||
Y[1] += 1
|
||||
elif commentLen < 5000:
|
||||
Y[2] += 1
|
||||
elif commentLen < 1000:
|
||||
Y[3] += 1
|
||||
elif commentLen < 1500:
|
||||
Y[4] += 1
|
||||
elif commentLen < 3000:
|
||||
Y[5] += 1
|
||||
elif commentLen < 5000:
|
||||
Y[6] += 1
|
||||
elif commentLen < 10000:
|
||||
Y[7] += 1
|
||||
elif commentLen >= 15000:
|
||||
Y[8] += 1
|
||||
return X,Y
|
||||
for i, (lower, upper) in enumerate(intervals):
|
||||
if lower <= commentLen < upper:
|
||||
Y[i] += 1
|
||||
break
|
||||
return X, Y
|
||||
|
||||
def getArticleRepotsLen(type):
|
||||
# Categorize articles by the number of reposts
|
||||
articles = getArticleByType(type)
|
||||
X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
|
||||
Y = [0 for x in range(len(X))]
|
||||
intervals = [(0, 100), (100, 300), (300, 500), (500, 1000),
|
||||
(1000, 2000), (2000, 3000), (3000, 4000),
|
||||
(4000, 5000), (5000, 10000), (10000, 15000),
|
||||
(15000, 30000), (30000, 70000), (70000, float('inf'))]
|
||||
X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000',
|
||||
'3000-4000','4000-5000','5000-10000','10000-15000','15000-30000',
|
||||
'30000-70000','70000-~']
|
||||
Y = [0] * len(intervals)
|
||||
for article in articles:
|
||||
repostsCount = int(article[3])
|
||||
if repostsCount < 100:
|
||||
Y[0] += 1
|
||||
elif repostsCount < 300:
|
||||
Y[1] += 1
|
||||
elif repostsCount < 500:
|
||||
Y[2] += 1
|
||||
elif repostsCount < 1000:
|
||||
Y[3] += 1
|
||||
elif repostsCount < 3000:
|
||||
Y[4] += 1
|
||||
elif repostsCount < 4000:
|
||||
Y[5] += 1
|
||||
elif repostsCount < 5000:
|
||||
Y[6] += 1
|
||||
elif repostsCount < 10000:
|
||||
Y[7] += 1
|
||||
elif repostsCount < 15000:
|
||||
Y[8] += 1
|
||||
elif repostsCount < 30000:
|
||||
Y[9] += 1
|
||||
elif repostsCount < 70000:
|
||||
Y[10] += 1
|
||||
elif repostsCount >= 70000:
|
||||
Y[11] += 1
|
||||
return X,Y
|
||||
for i, (lower, upper) in enumerate(intervals):
|
||||
if lower <= repostsCount < upper:
|
||||
Y[i] += 1
|
||||
break
|
||||
return X, Y
|
||||
|
||||
def getIPByArticleRegion():
|
||||
articleRegionDic = {}
|
||||
for i in articleList:
|
||||
if i[4] != '无':
|
||||
if i[4] in articleRegionDic.keys():
|
||||
articleRegionDic[i[4]] += 1
|
||||
else:
|
||||
articleRegionDic[i[4]] = 1
|
||||
resultData = []
|
||||
for key,value in articleRegionDic.items():
|
||||
resultData.append({
|
||||
'name':key,
|
||||
'value':value
|
||||
})
|
||||
# Count articles by their regions, excluding '无'
|
||||
regions = [article[4] for article in articleList if article[4] != '无']
|
||||
region_counts = Counter(regions)
|
||||
resultData = [{'name': key, 'value': value} for key, value in region_counts.items()]
|
||||
return resultData
|
||||
|
||||
def getIPByCommentsRegion():
|
||||
commentRegionDic = {}
|
||||
for i in commentList:
|
||||
if i[3] != '无':
|
||||
if i[3] in commentRegionDic.keys():
|
||||
commentRegionDic[i[3]] += 1
|
||||
else:
|
||||
commentRegionDic[i[3]] = 1
|
||||
resultData = []
|
||||
for key,value in commentRegionDic.items():
|
||||
resultData.append({
|
||||
'name':key,
|
||||
'value':value
|
||||
})
|
||||
# Count comments by their regions, excluding '无'
|
||||
regions = [comment[3] for comment in commentList if comment[3] != '无']
|
||||
region_counts = Counter(regions)
|
||||
resultData = [{'name': key, 'value': value} for key, value in region_counts.items()]
|
||||
return resultData
|
||||
|
||||
def getCommentDataOne():
|
||||
X = []
|
||||
# Categorize comments based on some numerical value, possibly length or count
|
||||
rangeNum = 20
|
||||
for item in range(100):
|
||||
X.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
|
||||
Y = [0 for x in range(len(X))]
|
||||
intervals = [(rangeNum * i, rangeNum * (i + 1)) for i in range(100)]
|
||||
X = [f"{lower}-{upper}" for lower, upper in intervals]
|
||||
Y = [0] * len(intervals)
|
||||
for comment in commentList:
|
||||
for item in range(100):
|
||||
if int(comment[2]) < rangeNum * (item + 1):
|
||||
Y[item] += 1
|
||||
comment_value = int(comment[2])
|
||||
for i, (lower, upper) in enumerate(intervals):
|
||||
if lower <= comment_value < upper:
|
||||
Y[i] += 1
|
||||
break
|
||||
return X,Y
|
||||
return X, Y
|
||||
|
||||
def getCommentDataTwo():
|
||||
genderDic = {}
|
||||
for i in commentList:
|
||||
if i[6] in genderDic.keys():
|
||||
genderDic[i[6]] += 1
|
||||
else:
|
||||
genderDic[i[6]] = 1
|
||||
resultData = [{
|
||||
'name':x[0],
|
||||
'value':x[1]
|
||||
} for x in genderDic.items()]
|
||||
# Count comments by gender
|
||||
genders = [comment[6] for comment in commentList]
|
||||
gender_counts = Counter(genders)
|
||||
resultData = [{'name': key, 'value': value} for key, value in gender_counts.items()]
|
||||
return resultData
|
||||
|
||||
def getYuQingCharDataOne():
|
||||
# Analyze sentiment of hot words
|
||||
hotWordList = getAllHotWords()
|
||||
X = ['正面','中性','负面']
|
||||
Y = [0,0,0]
|
||||
sentiments = []
|
||||
for word in hotWordList:
|
||||
emotionValue = SnowNLP(word[0]).sentiments
|
||||
if emotionValue > 0.4:
|
||||
Y[0] += 1
|
||||
sentiments.append('正面')
|
||||
elif emotionValue < 0.2:
|
||||
Y[2] += 1
|
||||
sentiments.append('负面')
|
||||
else:
|
||||
Y[1] += 1
|
||||
biedata = [{
|
||||
'name':x,
|
||||
'value':Y[index]
|
||||
} for index,x in enumerate(X)]
|
||||
return X,Y,biedata
|
||||
sentiments.append('中性')
|
||||
counts = Counter(sentiments)
|
||||
X = ['正面','中性','负面']
|
||||
Y = [counts.get(sentiment, 0) for sentiment in X]
|
||||
biedata = [{'name': x, 'value': y} for x, y in zip(X, Y)]
|
||||
return X, Y, biedata
|
||||
|
||||
def getYuQingCharDataTwo():
|
||||
X = ['正面', '中性', '负面']
|
||||
biedata1 = [{
|
||||
'name':x,
|
||||
'value':0
|
||||
} for x in X]
|
||||
biedata2 = [{
|
||||
'name': x,
|
||||
'value': 0
|
||||
} for x in X]
|
||||
|
||||
# Analyze sentiment of comments and articles
|
||||
comment_sentiments = []
|
||||
for comment in commentList:
|
||||
emotionValue = SnowNLP(comment[4]).sentiments
|
||||
if emotionValue > 0.4:
|
||||
biedata1[0]['value'] += 1
|
||||
comment_sentiments.append('正面')
|
||||
elif emotionValue < 0.2:
|
||||
biedata1[2]['value'] += 1
|
||||
comment_sentiments.append('负面')
|
||||
else:
|
||||
biedata1[1]['value'] += 1
|
||||
for artile in articleList:
|
||||
emotionValue = SnowNLP(artile[5]).sentiments
|
||||
comment_sentiments.append('中性')
|
||||
comment_counts = Counter(comment_sentiments)
|
||||
|
||||
article_sentiments = []
|
||||
for article in articleList:
|
||||
emotionValue = SnowNLP(article[5]).sentiments
|
||||
if emotionValue > 0.4:
|
||||
biedata2[0]['value'] += 1
|
||||
article_sentiments.append('正面')
|
||||
elif emotionValue < 0.2:
|
||||
biedata2[2]['value'] += 1
|
||||
article_sentiments.append('负面')
|
||||
else:
|
||||
biedata2[1]['value'] += 1
|
||||
return biedata1,biedata2
|
||||
article_sentiments.append('中性')
|
||||
article_counts = Counter(article_sentiments)
|
||||
|
||||
X = ['正面', '中性', '负面']
|
||||
biedata1 = [{'name': x, 'value': comment_counts.get(x, 0)} for x in X]
|
||||
biedata2 = [{'name': x, 'value': article_counts.get(x, 0)} for x in X]
|
||||
return biedata1, biedata2
|
||||
|
||||
def getYuQingCharDataThree():
|
||||
# Retrieve top 10 hot words and their counts
|
||||
hotWordList = getAllHotWords()
|
||||
x1Data = []
|
||||
y1Data = []
|
||||
for i in hotWordList[:10]:
|
||||
x1Data.append(i[0])
|
||||
y1Data.append(int(i[1]))
|
||||
return x1Data,y1Data
|
||||
|
||||
x1Data = [word[0] for word in hotWordList[:10]]
|
||||
y1Data = [int(word[1]) for word in hotWordList[:10]]
|
||||
return x1Data, y1Data
|
||||
Reference in New Issue
Block a user