爬虫结束自动打标注
This commit is contained in:
+71
-71
@@ -13,85 +13,85 @@ def getArticleByType(type):
|
||||
articles.append(i)
|
||||
return articles
|
||||
|
||||
def getArticleCharLikeCount(type):
|
||||
def getArticleLikeCount(type):
|
||||
articles = getArticleByType(type)
|
||||
xData = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
|
||||
yData = [0 for x in range(len(xData))]
|
||||
X = ['0-100','100-1000','1000-5000','5000-15000','15000-30000','30000-50000','50000-~']
|
||||
Y = [0 for x in range(len(X))]
|
||||
for article in articles:
|
||||
likeCount = int(article[1])
|
||||
if likeCount < 100:
|
||||
yData[0] += 1
|
||||
Y[0] += 1
|
||||
elif likeCount < 1000:
|
||||
yData[1] += 1
|
||||
Y[1] += 1
|
||||
elif likeCount < 5000:
|
||||
yData[2] += 1
|
||||
Y[2] += 1
|
||||
elif likeCount < 15000:
|
||||
yData[3] += 1
|
||||
Y[3] += 1
|
||||
elif likeCount < 30000:
|
||||
yData[4] += 1
|
||||
Y[4] += 1
|
||||
elif likeCount < 50000:
|
||||
yData[5] += 1
|
||||
Y[5] += 1
|
||||
elif likeCount >= 50000:
|
||||
yData[6] += 1
|
||||
return xData,yData
|
||||
Y[6] += 1
|
||||
return X,Y
|
||||
|
||||
def getArticleCharCommentsLen(type):
|
||||
def getArticleCommentsLen(type):
|
||||
articles = getArticleByType(type)
|
||||
xData = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
|
||||
yData = [0 for x in range(len(xData))]
|
||||
X = ['0-100','100-500','500-1000','1000-1500','1500-3000','3000-5000','5000-10000','10000-15000','15000-~']
|
||||
Y = [0 for x in range(len(X))]
|
||||
for article in articles:
|
||||
commentLen = int(article[2])
|
||||
if commentLen < 100:
|
||||
yData[0] += 1
|
||||
Y[0] += 1
|
||||
elif commentLen < 500:
|
||||
yData[1] += 1
|
||||
Y[1] += 1
|
||||
elif commentLen < 5000:
|
||||
yData[2] += 1
|
||||
Y[2] += 1
|
||||
elif commentLen < 1000:
|
||||
yData[3] += 1
|
||||
Y[3] += 1
|
||||
elif commentLen < 1500:
|
||||
yData[4] += 1
|
||||
Y[4] += 1
|
||||
elif commentLen < 3000:
|
||||
yData[5] += 1
|
||||
Y[5] += 1
|
||||
elif commentLen < 5000:
|
||||
yData[6] += 1
|
||||
Y[6] += 1
|
||||
elif commentLen < 10000:
|
||||
yData[7] += 1
|
||||
Y[7] += 1
|
||||
elif commentLen >= 15000:
|
||||
yData[8] += 1
|
||||
return xData,yData
|
||||
Y[8] += 1
|
||||
return X,Y
|
||||
|
||||
def getArticleCharRepotsLen(type):
|
||||
def getArticleRepotsLen(type):
|
||||
articles = getArticleByType(type)
|
||||
xData = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
|
||||
yData = [0 for x in range(len(xData))]
|
||||
X = ['0-100','100-300','300-500','500-1000','1000-2000','2000-3000','3000-4000','4000-5000','5000-10000','10000-15000','15000-30000','30000-70000','70000-~']
|
||||
Y = [0 for x in range(len(X))]
|
||||
for article in articles:
|
||||
repostsCount = int(article[3])
|
||||
if repostsCount < 100:
|
||||
yData[0] += 1
|
||||
Y[0] += 1
|
||||
elif repostsCount < 300:
|
||||
yData[1] += 1
|
||||
Y[1] += 1
|
||||
elif repostsCount < 500:
|
||||
yData[2] += 1
|
||||
Y[2] += 1
|
||||
elif repostsCount < 1000:
|
||||
yData[3] += 1
|
||||
Y[3] += 1
|
||||
elif repostsCount < 3000:
|
||||
yData[4] += 1
|
||||
Y[4] += 1
|
||||
elif repostsCount < 4000:
|
||||
yData[5] += 1
|
||||
Y[5] += 1
|
||||
elif repostsCount < 5000:
|
||||
yData[6] += 1
|
||||
Y[6] += 1
|
||||
elif repostsCount < 10000:
|
||||
yData[7] += 1
|
||||
Y[7] += 1
|
||||
elif repostsCount < 15000:
|
||||
yData[8] += 1
|
||||
Y[8] += 1
|
||||
elif repostsCount < 30000:
|
||||
yData[9] += 1
|
||||
Y[9] += 1
|
||||
elif repostsCount < 70000:
|
||||
yData[10] += 1
|
||||
Y[10] += 1
|
||||
elif repostsCount >= 70000:
|
||||
yData[11] += 1
|
||||
return xData,yData
|
||||
Y[11] += 1
|
||||
return X,Y
|
||||
|
||||
def getIPCharByArticleRegion():
|
||||
articleRegionDic = {}
|
||||
@@ -125,26 +125,26 @@ def getIPCharByCommentsRegion():
|
||||
})
|
||||
return resultData
|
||||
|
||||
def getCommentCharDataOne():
|
||||
xData = []
|
||||
def getCommentDataOne():
|
||||
X = []
|
||||
rangeNum = 20
|
||||
for item in range(100):
|
||||
xData.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
|
||||
yData = [0 for x in range(len(xData))]
|
||||
X.append(str(rangeNum * item) + '-' + str(rangeNum * (item + 1)))
|
||||
Y = [0 for x in range(len(X))]
|
||||
for comment in commentList:
|
||||
for item in range(100):
|
||||
if int(comment[2]) < rangeNum * (item + 1):
|
||||
yData[item] += 1
|
||||
Y[item] += 1
|
||||
break
|
||||
return xData,yData
|
||||
return X,Y
|
||||
|
||||
def getCommentCharDataTwo():
|
||||
def getCommentDataTwo():
|
||||
genderDic = {}
|
||||
for i in commentList:
|
||||
if genderDic.get(i[6],-1) == -1:
|
||||
genderDic[i[6]] = 1
|
||||
else:
|
||||
if i[6] in genderDic.keys():
|
||||
genderDic[i[6]] += 1
|
||||
else:
|
||||
genderDic[i[6]] = 1
|
||||
resultData = [{
|
||||
'name':x[0],
|
||||
'value':x[1]
|
||||
@@ -153,50 +153,50 @@ def getCommentCharDataTwo():
|
||||
|
||||
def getYuQingCharDataOne():
|
||||
hotWordList = getAllHotWords()
|
||||
xData = ['正面','中性','负面']
|
||||
yData = [0,0,0]
|
||||
X = ['正面','中性','负面']
|
||||
Y = [0,0,0]
|
||||
for word in hotWordList:
|
||||
emotionValue = SnowNLP(word[0]).sentiments
|
||||
if emotionValue > 0.4:
|
||||
yData[0] += 1
|
||||
Y[0] += 1
|
||||
elif emotionValue < 0.2:
|
||||
yData[2] += 1
|
||||
Y[2] += 1
|
||||
else:
|
||||
yData[1] += 1
|
||||
bieData = [{
|
||||
Y[1] += 1
|
||||
finaldata = [{
|
||||
'name':x,
|
||||
'value':yData[index]
|
||||
} for index,x in enumerate(xData)]
|
||||
return xData,yData,bieData
|
||||
'value':Y[index]
|
||||
} for index,x in enumerate(X)]
|
||||
return X,Y,finaldata
|
||||
|
||||
def getYuQingCharDataTwo():
|
||||
xData = ['正面', '中性', '负面']
|
||||
bieData1 = [{
|
||||
X = ['正面', '中性', '负面']
|
||||
finaldata1 = [{
|
||||
'name':x,
|
||||
'value':0
|
||||
} for x in xData]
|
||||
bieData2 = [{
|
||||
} for x in X]
|
||||
finaldata2 = [{
|
||||
'name': x,
|
||||
'value': 0
|
||||
} for x in xData]
|
||||
} for x in X]
|
||||
|
||||
for comment in commentList:
|
||||
emotionValue = SnowNLP(comment[4]).sentiments
|
||||
if emotionValue > 0.4:
|
||||
bieData1[0]['value'] += 1
|
||||
finaldata1[0]['value'] += 1
|
||||
elif emotionValue < 0.2:
|
||||
bieData1[2]['value'] += 1
|
||||
finaldata1[2]['value'] += 1
|
||||
else:
|
||||
bieData1[1]['value'] += 1
|
||||
finaldata1[1]['value'] += 1
|
||||
for artile in articleList:
|
||||
emotionValue = SnowNLP(artile[5]).sentiments
|
||||
if emotionValue > 0.4:
|
||||
bieData2[0]['value'] += 1
|
||||
finaldata2[0]['value'] += 1
|
||||
elif emotionValue < 0.2:
|
||||
bieData2[2]['value'] += 1
|
||||
finaldata2[2]['value'] += 1
|
||||
else:
|
||||
bieData2[1]['value'] += 1
|
||||
return bieData1,bieData2
|
||||
finaldata2[1]['value'] += 1
|
||||
return finaldata1,finaldata2
|
||||
|
||||
def getYuQingCharDataThree():
|
||||
hotWordList = getAllHotWords()
|
||||
|
||||
@@ -27,14 +27,14 @@ def getHomeCommentsLikeCountTopFore():# 获取评论中点赞最高的前四条
|
||||
return list(sorted(commentsList,key=lambda x:int(x[2]),reverse=True))[:4]
|
||||
|
||||
def getHomeArticleCreatedAtChart():# 根据日期分别计算该日期的文章数
|
||||
xData = list(set([x[7] for x in articleList]))
|
||||
xData = list(sorted(xData,key=lambda x:datetime.strptime(x,'%Y-%m-%d').timestamp(),reverse=True))
|
||||
yData = [0 for x in range(len(xData))]
|
||||
X = list(set([x[7] for x in articleList]))
|
||||
X = list(sorted(X,key=lambda x:datetime.strptime(x,'%Y-%m-%d').timestamp(),reverse=True))
|
||||
Y = [0 for x in range(len(X))]
|
||||
for article in articleList:
|
||||
for index,j in enumerate(xData):# 返回索引和值
|
||||
for index,j in enumerate(X):# 返回索引和值
|
||||
if article[7] == j:
|
||||
yData[index] += 1
|
||||
return xData,yData
|
||||
Y[index] += 1
|
||||
return X,Y
|
||||
|
||||
def getHomeTypeChart():# 统计每种类型的文章数量
|
||||
typeDic = {}
|
||||
|
||||
@@ -50,9 +50,9 @@ def getTopicData():
|
||||
# 读取合并文件 merge.csv # 取前十个话题
|
||||
top_10_topics = pd.read_csv('./merged_topics.csv').head(10)
|
||||
# 获取话题名称和对应的值
|
||||
xData = top_10_topics['name'].tolist()
|
||||
yData = top_10_topics['value'].tolist()
|
||||
return xData, yData
|
||||
X = top_10_topics['name'].tolist()
|
||||
Y = top_10_topics['value'].tolist()
|
||||
return X, Y
|
||||
|
||||
def getTopicCreatedAtandpredictData(topic):# 统计特定话题的评论在每个日期的数量,并返回日期和对应的评论数量
|
||||
createdAt = {}
|
||||
|
||||
+25
-25
@@ -18,7 +18,7 @@ def home():
|
||||
username = session.get('username')
|
||||
articleLenMax, likeCountMaxAuthorName, cityMax = getHomeTagsData()
|
||||
commentsLikeCountTopFore = getHomeCommentsLikeCountTopFore()
|
||||
xData, yData = getHomeArticleCreatedAtChart()
|
||||
X, Y = getHomeArticleCreatedAtChart()
|
||||
typeChart = getHomeTypeChart()
|
||||
createAtChart = getHomeCommentCreatedChart()
|
||||
# getUserNameWordCloud()
|
||||
@@ -28,8 +28,8 @@ def home():
|
||||
likeCountMaxAuthorName=likeCountMaxAuthorName,
|
||||
cityMax=cityMax,
|
||||
commentsLikeCountTopFore=commentsLikeCountTopFore,
|
||||
xData=xData,
|
||||
yData=yData,
|
||||
X=X,
|
||||
Y=Y,
|
||||
typeChart=typeChart,
|
||||
createAtChart=createAtChart)
|
||||
|
||||
@@ -42,7 +42,7 @@ def hotWord():
|
||||
if request.args.get('hotWord'):
|
||||
defaultHotWord = request.args.get('hotWord')
|
||||
hotWordLen = getHotWordLen(defaultHotWord)
|
||||
xData, yData = getHotWordPageCreatedAtCharData(defaultHotWord)
|
||||
X, Y = getHotWordPageCreatedAtCharData(defaultHotWord)
|
||||
sentences = ''
|
||||
value = SnowNLP(defaultHotWord).sentiments
|
||||
if value == 0.5:
|
||||
@@ -59,8 +59,8 @@ def hotWord():
|
||||
defaultHotWord=defaultHotWord,
|
||||
hotWordLen=hotWordLen,
|
||||
sentences=sentences,
|
||||
xData=xData,
|
||||
yData=yData,
|
||||
X=X,
|
||||
Y=Y,
|
||||
comments=comments)
|
||||
|
||||
|
||||
@@ -72,7 +72,7 @@ def hotTopic():
|
||||
if request.args.get('topic'):
|
||||
defaultTopic = request.args.get('topic')
|
||||
topicLen = getTopicLen(defaultTopic)
|
||||
xData, yData = getTopicPageCreatedAtCharData()
|
||||
X, Y = getTopicPageCreatedAtCharData()
|
||||
sentences = ''
|
||||
|
||||
# ... 这里要嵌入 topic 相关内容(热度?)来填充 sentences
|
||||
@@ -84,8 +84,8 @@ def hotTopic():
|
||||
defaultTopic=defaultTopic,
|
||||
topicLen=topicLen,
|
||||
sentences=sentences,
|
||||
xData=xData,
|
||||
yData=yData,
|
||||
X=X,
|
||||
Y=Y,
|
||||
comments=comments)
|
||||
|
||||
|
||||
@@ -107,15 +107,15 @@ def articleChar():
|
||||
typeList = getTypeList()
|
||||
defaultType = typeList[0]
|
||||
if request.args.get('type'): defaultType = request.args.get('type')
|
||||
xData, yData = getArticleCharLikeCount(defaultType)
|
||||
x1Data, y1Data = getArticleCharCommentsLen(defaultType)
|
||||
x2Data, y2Data = getArticleCharRepotsLen(defaultType)
|
||||
X, Y = getArticleLikeCount(defaultType)
|
||||
x1Data, y1Data = getArticleCommentsLen(defaultType)
|
||||
x2Data, y2Data = getArticleRepotsLen(defaultType)
|
||||
return render_template('articleChar.html',
|
||||
username=username,
|
||||
typeList=typeList,
|
||||
defaultType=defaultType,
|
||||
xData=xData,
|
||||
yData=yData,
|
||||
X=X,
|
||||
Y=Y,
|
||||
x1Data=x1Data,
|
||||
y1Data=y1Data,
|
||||
x2Data=x2Data,
|
||||
@@ -136,28 +136,28 @@ def ipChar():
|
||||
@pb.route('/commentChar')
|
||||
def commentChar():
|
||||
username = session.get('username')
|
||||
xData, yData = getCommentCharDataOne()
|
||||
genderPieData = getCommentCharDataTwo()
|
||||
X, Y = getCommentDataOne()
|
||||
genderPieData = getCommentDataTwo()
|
||||
return render_template('commentChar.html',
|
||||
username=username,
|
||||
xData=xData,
|
||||
yData=yData,
|
||||
X=X,
|
||||
Y=Y,
|
||||
genderPieData=genderPieData)
|
||||
|
||||
|
||||
@pb.route('/yuqingChar')
|
||||
def yuqingChar():
|
||||
username = session.get('username')
|
||||
xData, yData, bieData = getYuQingCharDataOne()
|
||||
bieData1, bieData2 = getYuQingCharDataTwo()
|
||||
X, Y, finaldata = getYuQingCharDataOne()
|
||||
finaldata1, finaldata2 = getYuQingCharDataTwo()
|
||||
x1Data, y1Data = getYuQingCharDataThree()
|
||||
return render_template('yuqingChar.html',
|
||||
username=username,
|
||||
xData=xData,
|
||||
yData=yData,
|
||||
bieData=bieData,
|
||||
bieData1=bieData1,
|
||||
bieData2=bieData2,
|
||||
X=X,
|
||||
Y=Y,
|
||||
finaldata=finaldata,
|
||||
finaldata1=finaldata1,
|
||||
finaldata2=finaldata2,
|
||||
x1Data=x1Data,
|
||||
y1Data=y1Data)
|
||||
|
||||
|
||||
@@ -234,7 +234,7 @@
|
||||
series: [
|
||||
{
|
||||
type: 'treemap',
|
||||
data: {{ bieData | tojson }}
|
||||
data: {{ finaldata | tojson }}
|
||||
}
|
||||
]
|
||||
};
|
||||
@@ -272,7 +272,7 @@
|
||||
labelLine: {
|
||||
show: false
|
||||
},
|
||||
data: {{ bieData1 | tojson }}
|
||||
data: {{ finaldata1 | tojson }}
|
||||
},
|
||||
{
|
||||
name: '文章舆情结果',
|
||||
@@ -313,7 +313,7 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
data: {{ bieData2 | tojson }}
|
||||
data: {{ finaldata2 | tojson }}
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user