修复爬虫bug
This commit is contained in:
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
id,likeNum,commentsLen,reposts_count,region,content,contentLen,created_at,type,detailUrl,authorAvatar,authorName,authorDetail,isVip
|
||||||
|
+9
-7
@@ -1,13 +1,15 @@
|
|||||||
from spiderData import spiderData
|
from spiderContent import start as spiderContentStart
|
||||||
|
from spiderComments import start as spiderCommentsStart
|
||||||
from saveData import save_to_sql as saveData
|
from saveData import save_to_sql as saveData
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
try:
|
print('正在爬取文章数据')
|
||||||
spiderData()
|
spiderContentStart(1,1)
|
||||||
saveData()
|
print('正在爬取文章评论数据')
|
||||||
print("爬取数据更新")
|
spiderCommentsStart()
|
||||||
except:
|
print('正在存储数据')
|
||||||
print("爬取数据失败")
|
saveData()
|
||||||
|
print("爬取数据更新")
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
@@ -0,0 +1,60 @@
|
|||||||
|
typeName,gid,containerid
|
||||||
|
热门,102803,102803
|
||||||
|
同城,1028032222,102803_2222
|
||||||
|
榜单,102803600169,102803_ctg1_600169_-_ctg1_600169
|
||||||
|
男篮,102803600279,102803_ctg1_600279_-_ctg1_600279
|
||||||
|
明星,1028034288,102803_ctg1_4288_-_ctg1_4288
|
||||||
|
车展,1028035188,102803_ctg1_5188_-_ctg1_5188
|
||||||
|
搞笑,1028034388,102803_ctg1_4388_-_ctg1_4388
|
||||||
|
情感,1028031988,102803_ctg1_1988_-_ctg1_1988
|
||||||
|
周末,102803600195,102803_ctg1_600195_-_ctg1_600195
|
||||||
|
电影,1028033288,102803_ctg1_3288_-_ctg1_3288
|
||||||
|
社会,1028034188,102803_ctg1_4188_-_ctg1_4188
|
||||||
|
电视剧,1028032488,102803_ctg1_2488_-_ctg1_2488
|
||||||
|
美食,1028032688,102803_ctg1_2688_-_ctg1_2688
|
||||||
|
俄乌局势,102803600267,102803_ctg1_600267_-_ctg1_600267
|
||||||
|
国际,1028036288,102803_ctg1_6288_-_ctg1_6288
|
||||||
|
深度,102803600155,102803_ctg1_600155_-_ctg1_600155
|
||||||
|
财经,1028036388,102803_ctg1_6388_-_ctg1_6388
|
||||||
|
读书,1028034588,102803_ctg1_4588_-_ctg1_4588
|
||||||
|
摄影,1028034988,102803_ctg1_4988_-_ctg1_4988
|
||||||
|
颜值,102803600165,102803_ctg1_600165_-_ctg1_600165
|
||||||
|
体育,1028031388,102803_ctg1_1388_-_ctg1_1388
|
||||||
|
数码,1028035088,102803_ctg1_5088_-_ctg1_5088
|
||||||
|
综艺,1028034688,102803_ctg1_4688_-_ctg1_4688
|
||||||
|
时尚,1028034488,102803_ctg1_4488_-_ctg1_4488
|
||||||
|
星座,1028031688,102803_ctg1_1688_-_ctg1_1688
|
||||||
|
军事,1028036688,102803_ctg1_6688_-_ctg1_6688
|
||||||
|
股市,1028031288,102803_ctg1_1288_-_ctg1_1288
|
||||||
|
房产,1028035588,102803_ctg1_5588_-_ctg1_5588
|
||||||
|
家居,1028035888,102803_ctg1_5888_-_ctg1_5888
|
||||||
|
萌宠,1028032788,102803_ctg1_2788_-_ctg1_2788
|
||||||
|
科技,1028032088,102803_ctg1_2088_-_ctg1_2088
|
||||||
|
科普,1028035988,102803_ctg1_5988_-_ctg1_5988
|
||||||
|
动漫,1028032388,102803_ctg1_2388_-_ctg1_2388
|
||||||
|
运动健身,1028034788,102803_ctg1_4788_-_ctg1_4788
|
||||||
|
旅游,1028032588,102803_ctg1_2588_-_ctg1_2588
|
||||||
|
瘦身,1028036488,102803_ctg1_6488_-_ctg1_6488
|
||||||
|
好物,102803600094,102803_ctg1_600094_-_ctg1_600094
|
||||||
|
历史,1028036788,102803_ctg1_6788_-_ctg1_6788
|
||||||
|
艺术,1028035488,102803_ctg1_5488_-_ctg1_5488
|
||||||
|
美妆,1028031588,102803_ctg1_1588_-_ctg1_1588
|
||||||
|
法律,1028037388,102803_ctg1_7388_-_ctg1_7388
|
||||||
|
设计,1028035388,102803_ctg1_5388_-_ctg1_5388
|
||||||
|
健康,1028032188,102803_ctg1_2188_-_ctg1_2188
|
||||||
|
音乐,1028035288,102803_ctg1_5288_-_ctg1_5288
|
||||||
|
游戏,1028034888,102803_ctg1_4888_-_ctg1_4888
|
||||||
|
新时代,1028037968,102803_ctg1_7968_-_ctg1_7968
|
||||||
|
校园,102803600177,102803_ctg1_600177_-_ctg1_600177
|
||||||
|
收藏,1028038189,102803_ctg1_8189_-_ctg1_8189
|
||||||
|
政务,1028035788,102803_ctg1_5788_-_ctg1_5788
|
||||||
|
养生,1028036588,102803_ctg1_6588_-_ctg1_6588
|
||||||
|
育儿,1028033188,102803_ctg1_3188_-_ctg1_3188
|
||||||
|
抽奖,102803600037,102803_ctg1_600037_-_ctg1_600037
|
||||||
|
教育,102803600080,102803_ctg1_600080_-_ctg1_600080
|
||||||
|
婚恋,1028031788,102803_ctg1_1788_-_ctg1_1788
|
||||||
|
舞蹈,1028038788,102803_ctg1_8788_-_ctg1_8788
|
||||||
|
辟谣,1028036988,102803_ctg1_6988_-_ctg1_6988
|
||||||
|
公益,102803600057,102803_ctg1_600057_-_ctg1_600057
|
||||||
|
问答,1028037977,102803_ctg1_7977_-_ctg1_7977
|
||||||
|
三农,1028037188,102803_ctg1_7188_-_ctg1_7188
|
||||||
|
@@ -1,13 +0,0 @@
|
|||||||
from spiderDataPack.spiderNav import start as spiderNavStart
|
|
||||||
from spiderDataPack.spiderContent import start as spiderContentStart
|
|
||||||
from spiderDataPack.spiderComments import start as spiderCommentsStart
|
|
||||||
import os
|
|
||||||
|
|
||||||
def spiderData():
|
|
||||||
if not os.path.exists('./nav.csv'):
|
|
||||||
spiderNavStart()
|
|
||||||
spiderContentStart(1,1)
|
|
||||||
spiderCommentsStart()
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
spiderData()
|
|
||||||
Binary file not shown.
Binary file not shown.
@@ -45,11 +45,9 @@ def parse_json(response):
|
|||||||
containerid
|
containerid
|
||||||
])
|
])
|
||||||
|
|
||||||
def start():
|
|
||||||
|
if __name__ == '__main__':
|
||||||
init()
|
init()
|
||||||
url = 'https://weibo.com/ajax/feed/allGroups'
|
url = 'https://weibo.com/ajax/feed/allGroups'
|
||||||
response = get_data(url)
|
response = get_data(url)
|
||||||
parse_json(response)
|
parse_json(response)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
start()
|
|
||||||
Reference in New Issue
Block a user