This commit is contained in:
juanboy
2024-07-03 19:02:09 +08:00
17 changed files with 112 additions and 42 deletions
+2 -1
View File
@@ -1 +1,2 @@
.conda
.conda
*__pycache__/
Binary file not shown.
Binary file not shown.
+1
View File
@@ -0,0 +1 @@
id,likeNum,commentsLen,reposts_count,region,content,contentLen,created_at,type,detailUrl,authorAvatar,authorName,authorDetail,isVip
1 id likeNum commentsLen reposts_count region content contentLen created_at type detailUrl authorAvatar authorName authorDetail isVip
+9 -7
View File
@@ -1,13 +1,15 @@
from spiderData import spiderData
from spiderContent import start as spiderContentStart
from spiderComments import start as spiderCommentsStart
from saveData import save_to_sql as saveData
def main():
try:
spiderData()
saveData()
print("爬取数据更新")
except:
print("爬取数据失败")
print('正在爬取文章数据')
spiderContentStart(1,1)
print('正在爬取文章评论数据')
spiderCommentsStart()
print('正在存储数据')
saveData()
print("爬取数据更新")
if __name__ == '__main__':
main()
+60
View File
@@ -0,0 +1,60 @@
typeName,gid,containerid
热门,102803,102803
同城,1028032222,102803_2222
榜单,102803600169,102803_ctg1_600169_-_ctg1_600169
男篮,102803600279,102803_ctg1_600279_-_ctg1_600279
明星,1028034288,102803_ctg1_4288_-_ctg1_4288
车展,1028035188,102803_ctg1_5188_-_ctg1_5188
搞笑,1028034388,102803_ctg1_4388_-_ctg1_4388
情感,1028031988,102803_ctg1_1988_-_ctg1_1988
周末,102803600195,102803_ctg1_600195_-_ctg1_600195
电影,1028033288,102803_ctg1_3288_-_ctg1_3288
社会,1028034188,102803_ctg1_4188_-_ctg1_4188
电视剧,1028032488,102803_ctg1_2488_-_ctg1_2488
美食,1028032688,102803_ctg1_2688_-_ctg1_2688
俄乌局势,102803600267,102803_ctg1_600267_-_ctg1_600267
国际,1028036288,102803_ctg1_6288_-_ctg1_6288
深度,102803600155,102803_ctg1_600155_-_ctg1_600155
财经,1028036388,102803_ctg1_6388_-_ctg1_6388
读书,1028034588,102803_ctg1_4588_-_ctg1_4588
摄影,1028034988,102803_ctg1_4988_-_ctg1_4988
颜值,102803600165,102803_ctg1_600165_-_ctg1_600165
体育,1028031388,102803_ctg1_1388_-_ctg1_1388
数码,1028035088,102803_ctg1_5088_-_ctg1_5088
综艺,1028034688,102803_ctg1_4688_-_ctg1_4688
时尚,1028034488,102803_ctg1_4488_-_ctg1_4488
星座,1028031688,102803_ctg1_1688_-_ctg1_1688
军事,1028036688,102803_ctg1_6688_-_ctg1_6688
股市,1028031288,102803_ctg1_1288_-_ctg1_1288
房产,1028035588,102803_ctg1_5588_-_ctg1_5588
家居,1028035888,102803_ctg1_5888_-_ctg1_5888
萌宠,1028032788,102803_ctg1_2788_-_ctg1_2788
科技,1028032088,102803_ctg1_2088_-_ctg1_2088
科普,1028035988,102803_ctg1_5988_-_ctg1_5988
动漫,1028032388,102803_ctg1_2388_-_ctg1_2388
运动健身,1028034788,102803_ctg1_4788_-_ctg1_4788
旅游,1028032588,102803_ctg1_2588_-_ctg1_2588
瘦身,1028036488,102803_ctg1_6488_-_ctg1_6488
好物,102803600094,102803_ctg1_600094_-_ctg1_600094
历史,1028036788,102803_ctg1_6788_-_ctg1_6788
艺术,1028035488,102803_ctg1_5488_-_ctg1_5488
美妆,1028031588,102803_ctg1_1588_-_ctg1_1588
法律,1028037388,102803_ctg1_7388_-_ctg1_7388
设计,1028035388,102803_ctg1_5388_-_ctg1_5388
健康,1028032188,102803_ctg1_2188_-_ctg1_2188
音乐,1028035288,102803_ctg1_5288_-_ctg1_5288
游戏,1028034888,102803_ctg1_4888_-_ctg1_4888
新时代,1028037968,102803_ctg1_7968_-_ctg1_7968
校园,102803600177,102803_ctg1_600177_-_ctg1_600177
收藏,1028038189,102803_ctg1_8189_-_ctg1_8189
政务,1028035788,102803_ctg1_5788_-_ctg1_5788
养生,1028036588,102803_ctg1_6588_-_ctg1_6588
育儿,1028033188,102803_ctg1_3188_-_ctg1_3188
抽奖,102803600037,102803_ctg1_600037_-_ctg1_600037
教育,102803600080,102803_ctg1_600080_-_ctg1_600080
婚恋,1028031788,102803_ctg1_1788_-_ctg1_1788
舞蹈,1028038788,102803_ctg1_8788_-_ctg1_8788
辟谣,1028036988,102803_ctg1_6988_-_ctg1_6988
公益,102803600057,102803_ctg1_600057_-_ctg1_600057
问答,1028037977,102803_ctg1_7977_-_ctg1_7977
三农,1028037188,102803_ctg1_7188_-_ctg1_7188
1 typeName gid containerid
2 热门 102803 102803
3 同城 1028032222 102803_2222
4 榜单 102803600169 102803_ctg1_600169_-_ctg1_600169
5 男篮 102803600279 102803_ctg1_600279_-_ctg1_600279
6 明星 1028034288 102803_ctg1_4288_-_ctg1_4288
7 车展 1028035188 102803_ctg1_5188_-_ctg1_5188
8 搞笑 1028034388 102803_ctg1_4388_-_ctg1_4388
9 情感 1028031988 102803_ctg1_1988_-_ctg1_1988
10 周末 102803600195 102803_ctg1_600195_-_ctg1_600195
11 电影 1028033288 102803_ctg1_3288_-_ctg1_3288
12 社会 1028034188 102803_ctg1_4188_-_ctg1_4188
13 电视剧 1028032488 102803_ctg1_2488_-_ctg1_2488
14 美食 1028032688 102803_ctg1_2688_-_ctg1_2688
15 俄乌局势 102803600267 102803_ctg1_600267_-_ctg1_600267
16 国际 1028036288 102803_ctg1_6288_-_ctg1_6288
17 深度 102803600155 102803_ctg1_600155_-_ctg1_600155
18 财经 1028036388 102803_ctg1_6388_-_ctg1_6388
19 读书 1028034588 102803_ctg1_4588_-_ctg1_4588
20 摄影 1028034988 102803_ctg1_4988_-_ctg1_4988
21 颜值 102803600165 102803_ctg1_600165_-_ctg1_600165
22 体育 1028031388 102803_ctg1_1388_-_ctg1_1388
23 数码 1028035088 102803_ctg1_5088_-_ctg1_5088
24 综艺 1028034688 102803_ctg1_4688_-_ctg1_4688
25 时尚 1028034488 102803_ctg1_4488_-_ctg1_4488
26 星座 1028031688 102803_ctg1_1688_-_ctg1_1688
27 军事 1028036688 102803_ctg1_6688_-_ctg1_6688
28 股市 1028031288 102803_ctg1_1288_-_ctg1_1288
29 房产 1028035588 102803_ctg1_5588_-_ctg1_5588
30 家居 1028035888 102803_ctg1_5888_-_ctg1_5888
31 萌宠 1028032788 102803_ctg1_2788_-_ctg1_2788
32 科技 1028032088 102803_ctg1_2088_-_ctg1_2088
33 科普 1028035988 102803_ctg1_5988_-_ctg1_5988
34 动漫 1028032388 102803_ctg1_2388_-_ctg1_2388
35 运动健身 1028034788 102803_ctg1_4788_-_ctg1_4788
36 旅游 1028032588 102803_ctg1_2588_-_ctg1_2588
37 瘦身 1028036488 102803_ctg1_6488_-_ctg1_6488
38 好物 102803600094 102803_ctg1_600094_-_ctg1_600094
39 历史 1028036788 102803_ctg1_6788_-_ctg1_6788
40 艺术 1028035488 102803_ctg1_5488_-_ctg1_5488
41 美妆 1028031588 102803_ctg1_1588_-_ctg1_1588
42 法律 1028037388 102803_ctg1_7388_-_ctg1_7388
43 设计 1028035388 102803_ctg1_5388_-_ctg1_5388
44 健康 1028032188 102803_ctg1_2188_-_ctg1_2188
45 音乐 1028035288 102803_ctg1_5288_-_ctg1_5288
46 游戏 1028034888 102803_ctg1_4888_-_ctg1_4888
47 新时代 1028037968 102803_ctg1_7968_-_ctg1_7968
48 校园 102803600177 102803_ctg1_600177_-_ctg1_600177
49 收藏 1028038189 102803_ctg1_8189_-_ctg1_8189
50 政务 1028035788 102803_ctg1_5788_-_ctg1_5788
51 养生 1028036588 102803_ctg1_6588_-_ctg1_6588
52 育儿 1028033188 102803_ctg1_3188_-_ctg1_3188
53 抽奖 102803600037 102803_ctg1_600037_-_ctg1_600037
54 教育 102803600080 102803_ctg1_600080_-_ctg1_600080
55 婚恋 1028031788 102803_ctg1_1788_-_ctg1_1788
56 舞蹈 1028038788 102803_ctg1_8788_-_ctg1_8788
57 辟谣 1028036988 102803_ctg1_6988_-_ctg1_6988
58 公益 102803600057 102803_ctg1_600057_-_ctg1_600057
59 问答 1028037977 102803_ctg1_7977_-_ctg1_7977
60 三农 1028037188 102803_ctg1_7188_-_ctg1_7188
-13
View File
@@ -1,13 +0,0 @@
from spiderDataPack.spiderNav import start as spiderNavStart
from spiderDataPack.spiderContent import start as spiderContentStart
from spiderDataPack.spiderComments import start as spiderCommentsStart
import os
def spiderData():
if not os.path.exists('./nav.csv'):
spiderNavStart()
spiderContentStart(1,1)
spiderCommentsStart()
if __name__ == '__main__':
spiderData()
View File
@@ -45,11 +45,9 @@ def parse_json(response):
containerid
])
def start():
if __name__ == '__main__':
init()
url = 'https://weibo.com/ajax/feed/allGroups'
response = get_data(url)
parse_json(response)
if __name__ == '__main__':
start()
parse_json(response)
Binary file not shown.
+37 -16
View File
@@ -1,48 +1,69 @@
import time
from flask import Blueprint, redirect, render_template, request,Flask, session
import hashlib
from flask import Blueprint, redirect, render_template, request, Flask, session
from utils.query import query
from utils.errorResponse import errorResponse
ub = Blueprint('user',
__name__,
url_prefix='/user',
template_folder='templates')
ub = Blueprint('user',__name__,url_prefix='/user',template_folder='templates')
@ub.route('/login',methods=['GET','POST'])
@ub.route('/login', methods=['GET', 'POST'])
def login():
if request.method == 'GET':
return render_template('login.html')
else:
def filter_fn(user):
return request.form['username'] in user and request.form['password'] in user
hash_with_salt = hashlib.sha256('XiaoXueQi2024'.encode('utf-8'))
hash_with_salt.update(request.form['password'].encode('utf-8'))
return request.form[
'username'] in user and hash_with_salt.hexdigest() in user
users = query('select * from user', [], 'select')
login_success = list(filter(filter_fn,users))
if not len(login_success):return errorResponse('账号或密码错误')
login_success = list(filter(filter_fn, users))
if not len(login_success): return errorResponse('账号或密码错误')
session['username'] = request.form['username']
return redirect('/page/home')
@ub.route('/register',methods=['GET','POST'])
@ub.route('/register', methods=['GET', 'POST'])
def register():
if request.method == 'GET':
return render_template('register.html')
else:
if request.form['password'] != request.form['checkPassword']:return errorResponse('两次密码不符合')
if request.form['password'] != request.form['checkPassword']:
return errorResponse('两次密码不符合')
def filter_fn(user):
return request.form['username'] in user
users = query('select * from user',[],'select')
filter_list = list(filter(filter_fn,users))
users = query('select * from user', [], 'select')
filter_list = list(filter(filter_fn, users))
if len(filter_list):
return errorResponse('该用户名已被注册')
else:
time_tuple = time.localtime(time.time())
query('''
hash_with_salt = hashlib.sha256('XiaoXueQi2024'.encode('utf-8'))
hash_with_salt.update(request.form['password'].encode('utf-8'))
query(
'''
insert into user(username,password,createTime) values(%s,%s,%s)
''',[request.form['username'],request.form['password'],str(time_tuple[0]) + '-' + str(time_tuple[1]) + '-' + str(time_tuple[2])])
''', [
request.form['username'],
hash_with_salt.hexdigest(),
str(time_tuple[0]) + '-' + str(time_tuple[1]) + '-' +
str(time_tuple[2])
])
return redirect('/user/login')
@ub.route('/logOut')
def logOut():
session.clear()
return redirect('/user/login')
session.clear()
return redirect('/user/login')