爬虫结束自动打标注

This commit is contained in:
juanboy
2024-07-04 19:22:17 +08:00
parent 59b18fa5a2
commit d139169e09
5 changed files with 108 additions and 108 deletions
+71 -71
View File
@@ -13,85 +13,85 @@ def getArticleByType(type):
articles.append(i)
return articles
def getArticleCharLikeCount(type):
def getArticleLikeCount(type):
articles = getArticleByType(type)
xData = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
yData = [0 for x in range(len(xData))]
X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
Y = [0 for x in range(len(X))]
for article in articles:
likeCount = int(article[1])
if likeCount < 100:
yData[0] += 1
Y[0] += 1
elif likeCount < 1000:
yData[1] += 1
Y[1] += 1
elif likeCount < 5000:
yData[2] += 1
Y[2] += 1
elif likeCount < 15000:
yData[3] += 1
Y[3] += 1
elif likeCount < 30000:
yData[4] += 1
Y[4] += 1
elif likeCount < 50000:
yData[5] += 1
Y[5] += 1
elif likeCount >= 50000:
yData[6] += 1
return xData,yData
Y[6] += 1
return X,Y
def getArticleCharCommentsLen(type):
def getArticleCommentsLen(type):
articles = getArticleByType(type)
xData = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
yData = [0 for x in range(len(xData))]
X = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
Y = [0 for x in range(len(X))]
for article in articles:
commentLen = int(article[2])
if commentLen < 100:
yData[0] += 1
Y[0] += 1
elif commentLen < 500:
yData[1] += 1
Y[1] += 1
elif commentLen < 5000:
yData[2] += 1
Y[2] += 1
elif commentLen < 1000:
yData[3] += 1
Y[3] += 1
elif commentLen < 1500:
yData[4] += 1
Y[4] += 1
elif commentLen < 3000:
yData[5] += 1
Y[5] += 1
elif commentLen < 5000:
yData[6] += 1
Y[6] += 1
elif commentLen < 10000:
yData[7] += 1
Y[7] += 1
elif commentLen >= 15000:
yData[8] += 1
return xData,yData
Y[8] += 1
return X,Y
def getArticleCharRepotsLen(type):
def getArticleRepotsLen(type):
articles = getArticleByType(type)
xData = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
yData = [0 for x in range(len(xData))]
X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
Y = [0 for x in range(len(X))]
for article in articles:
repostsCount = int(article[3])
if repostsCount < 100:
yData[0] += 1
Y[0] += 1
elif repostsCount < 300:
yData[1] += 1
Y[1] += 1
elif repostsCount < 500:
yData[2] += 1
Y[2] += 1
elif repostsCount < 1000:
yData[3] += 1
Y[3] += 1
elif repostsCount < 3000:
yData[4] += 1
Y[4] += 1
elif repostsCount < 4000:
yData[5] += 1
Y[5] += 1
elif repostsCount < 5000:
yData[6] += 1
Y[6] += 1
elif repostsCount < 10000:
yData[7] += 1
Y[7] += 1
elif repostsCount < 15000:
yData[8] += 1
Y[8] += 1
elif repostsCount < 30000:
yData[9] += 1
Y[9] += 1
elif repostsCount < 70000:
yData[10] += 1
Y[10] += 1
elif repostsCount >= 70000:
yData[11] += 1
return xData,yData
Y[11] += 1
return X,Y
def getIPCharByArticleRegion():
articleRegionDic = {}
@@ -125,26 +125,26 @@ def getIPCharByCommentsRegion():
})
return resultData
def getCommentCharDataOne():
xData = []
def getCommentDataOne():
X = []
rangeNum = 20
for item in range(100):
xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
yData = [0 for x in range(len(xData))]
X.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
Y = [0 for x in range(len(X))]
for comment in commentList:
for item in range(100):
if int(comment[2]) < rangeNum * (item + 1):
yData[item] += 1
Y[item] += 1
break
return xData,yData
return X,Y
def getCommentCharDataTwo():
def getCommentDataTwo():
genderDic = {}
for i in commentList:
if genderDic.get(i[6],-1) == -1:
genderDic[i[6]] = 1
else:
if i[6] in genderDic.keys():
genderDic[i[6]] += 1
else:
genderDic[i[6]] = 1
resultData = [{
'name':x[0],
'value':x[1]
@@ -153,50 +153,50 @@ def getCommentCharDataTwo():
def getYuQingCharDataOne():
hotWordList = getAllHotWords()
xData = ['正面','中性','负面']
yData = [0,0,0]
X = ['正面','中性','负面']
Y = [0,0,0]
for word in hotWordList:
emotionValue = SnowNLP(word[0]).sentiments
if emotionValue > 0.4:
yData[0] += 1
Y[0] += 1
elif emotionValue < 0.2:
yData[2] += 1
Y[2] += 1
else:
yData[1] += 1
bieData = [{
Y[1] += 1
finaldata = [{
'name':x,
'value':yData[index]
} for index,x in enumerate(xData)]
return xData,yData,bieData
'value':Y[index]
} for index,x in enumerate(X)]
return X,Y,finaldata
def getYuQingCharDataTwo():
xData = ['正面', '中性', '负面']
bieData1 = [{
X = ['正面', '中性', '负面']
finaldata1 = [{
'name':x,
'value':0
} for x in xData]
bieData2 = [{
} for x in X]
finaldata2 = [{
'name': x,
'value': 0
} for x in xData]
} for x in X]
for comment in commentList:
emotionValue = SnowNLP(comment[4]).sentiments
if emotionValue > 0.4:
bieData1[0]['value'] += 1
finaldata1[0]['value'] += 1
elif emotionValue < 0.2:
bieData1[2]['value'] += 1
finaldata1[2]['value'] += 1
else:
bieData1[1]['value'] += 1
finaldata1[1]['value'] += 1
for artile in articleList:
emotionValue = SnowNLP(artile[5]).sentiments
if emotionValue > 0.4:
bieData2[0]['value'] += 1
finaldata2[0]['value'] += 1
elif emotionValue < 0.2:
bieData2[2]['value'] += 1
finaldata2[2]['value'] += 1
else:
bieData2[1]['value'] += 1
return bieData1,bieData2
finaldata2[1]['value'] += 1
return finaldata1,finaldata2
def getYuQingCharDataThree():
hotWordList = getAllHotWords()