优化爬虫代码
This commit is contained in:
@@ -36,7 +36,7 @@ def run_spider_script():
|
||||
|
||||
if __name__ == '__main__':
|
||||
scheduler = BackgroundScheduler(timezone=utc)
|
||||
scheduler.add_job(run_spider_script, 'interval', hours=5)
|
||||
scheduler.add_job(run_spider_script, 'interval', minutes=1)
|
||||
scheduler.start()
|
||||
|
||||
try:
|
||||
|
||||
+3
-6
@@ -1,12 +1,9 @@
|
||||
from spiderContent import start as spiderContent
|
||||
from spiderComments import start as spiderComments
|
||||
from spiderData import spiderData
|
||||
from saveData import save_to_sql as saveData
|
||||
|
||||
def main():
|
||||
print('正在爬取文章数据')
|
||||
spiderContent(1,1)
|
||||
print('正在爬取文章评论数据')
|
||||
spiderComments()
|
||||
print('正在爬取数据')
|
||||
spiderData()
|
||||
print('正在存储数据')
|
||||
saveData()
|
||||
print("爬取数据更新")
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
from spiderDataPackage.spiderNav import start as spiderNav
|
||||
from spiderDataPackage.spiderContent import start as spiderContent
|
||||
from spiderDataPackage.spiderComments import start as spiderComments
|
||||
import os
|
||||
|
||||
def spiderData():
|
||||
if not os.path.exists('./nav.csv'):
|
||||
print('正在爬取导航栏数据')
|
||||
spiderNav()
|
||||
print('正在爬取文章数据')
|
||||
spiderContent(1,1)
|
||||
print('正在爬取文章评论数据')
|
||||
spiderComments()
|
||||
|
||||
if __name__ == '__main__':
|
||||
spiderData()
|
||||
@@ -0,0 +1,3 @@
|
||||
navAddr="./nav.csv"
|
||||
articleAddr="./article.csv"
|
||||
commentsAddr="./comments.csv"
|
||||
@@ -3,10 +3,11 @@ import requests
|
||||
import csv
|
||||
import os
|
||||
from datetime import datetime
|
||||
from settings import articleAddr,commentsAddr
|
||||
|
||||
def init():
|
||||
if not os.path.exists('./comments.csv'):
|
||||
with open('./comments.csv','w',encoding='utf-8',newline='') as csvFile:
|
||||
if not os.path.exists(commentsAddr):
|
||||
with open(commentsAddr,'w',encoding='utf-8',newline='') as csvFile:
|
||||
writer = csv.writer(csvFile)
|
||||
writer.writerow([
|
||||
'articleId',
|
||||
@@ -21,7 +22,7 @@ def init():
|
||||
])
|
||||
|
||||
def write(row):
|
||||
with open('./comments.csv', 'a', encoding='utf-8', newline='') as csvFile:
|
||||
with open(commentsAddr, 'a', encoding='utf-8', newline='') as csvFile:
|
||||
writer = csv.writer(csvFile)
|
||||
writer.writerow(row)
|
||||
|
||||
@@ -38,7 +39,7 @@ def fetchData(url,params):
|
||||
|
||||
def getArticleList():
|
||||
articleList = []
|
||||
with open('./article.csv','r',encoding='utf-8') as reader:
|
||||
with open(articleAddr,'r',encoding='utf-8') as reader:
|
||||
readerCsv = csv.reader(reader)
|
||||
next(reader)
|
||||
for nav in readerCsv:
|
||||
@@ -3,10 +3,11 @@ import requests
|
||||
import csv
|
||||
import os
|
||||
from datetime import datetime
|
||||
from settings import navAddr,articleAddr
|
||||
|
||||
def init():
|
||||
if not os.path.exists('./article.csv'):
|
||||
with open('./article.csv','w',encoding='utf-8',newline='') as csvFile:
|
||||
if not os.path.exists(articleAddr):
|
||||
with open(articleAddr,'w',encoding='utf-8',newline='') as csvFile:
|
||||
writer = csv.writer(csvFile)
|
||||
writer.writerow([
|
||||
'id',
|
||||
@@ -26,7 +27,7 @@ def init():
|
||||
])
|
||||
|
||||
def write(row):
|
||||
with open('./article.csv', 'a', encoding='utf-8', newline='') as csvFile:
|
||||
with open(articleAddr, 'a', encoding='utf-8', newline='') as csvFile:
|
||||
writer = csv.writer(csvFile)
|
||||
writer.writerow(row)
|
||||
|
||||
@@ -43,7 +44,7 @@ def fetchData(url,params):
|
||||
|
||||
def getTypeList():
|
||||
typeList = []
|
||||
with open('./nav.csv','r',encoding='utf-8') as reader:
|
||||
with open(navAddr,'r',encoding='utf-8') as reader:
|
||||
readerCsv = csv.reader(reader)
|
||||
next(reader)
|
||||
for nav in readerCsv:
|
||||
@@ -2,10 +2,10 @@ import requests
|
||||
import csv
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
from settings import navAddr
|
||||
def init():
|
||||
if not os.path.exists('./nav.csv'):
|
||||
with open('./nav.csv','w',encoding='utf-8',newline='') as csvFile:
|
||||
if not os.path.exists(navAddr):
|
||||
with open(navAddr,'w',encoding='utf-8',newline='') as csvFile:
|
||||
writer = csv.writer(csvFile)
|
||||
writer.writerow([
|
||||
'typeName',
|
||||
@@ -14,7 +14,7 @@ def init():
|
||||
])
|
||||
|
||||
def write(row):
|
||||
with open('./nav.csv', 'a', encoding='utf-8', newline='') as csvFile:
|
||||
with open(navAddr, 'a', encoding='utf-8', newline='') as csvFile:
|
||||
writer = csv.writer(csvFile)
|
||||
writer.writerow(row)
|
||||
|
||||
@@ -45,9 +45,11 @@ def readJson(response):
|
||||
containerid
|
||||
])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
def start():
|
||||
init()
|
||||
url = 'https://weibo.com/ajax/feed/allGroups'
|
||||
response = fetchData(url)
|
||||
readJson(response)
|
||||
readJson(response)
|
||||
|
||||
if __name__ == '__main__':
|
||||
start()
|
||||
Reference in New Issue
Block a user