修改爬虫bug
This commit is contained in:
@@ -36,7 +36,7 @@ def run_spider_script():
|
||||
|
||||
if __name__ == '__main__':
|
||||
scheduler = BackgroundScheduler(timezone=utc)
|
||||
scheduler.add_job(run_spider_script, 'interval', minutes=1)
|
||||
scheduler.add_job(run_spider_script, 'interval', hours=5)
|
||||
scheduler.start()
|
||||
|
||||
try:
|
||||
|
||||
+1
-1
@@ -1,5 +1,5 @@
|
||||
from spiderData import spiderData
|
||||
from saveData import save_to_sql as saveData
|
||||
from saveData import saveData
|
||||
|
||||
def main():
|
||||
print('正在爬取数据')
|
||||
|
||||
+2
-2
@@ -4,7 +4,7 @@ import pandas as pd
|
||||
|
||||
engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4')
|
||||
|
||||
def save_to_sql():
|
||||
def saveData():
|
||||
try:
|
||||
oldArticle = pd.read_sql('select * from article',engine)
|
||||
newArticle = pd.read_csv('article.csv')
|
||||
@@ -29,4 +29,4 @@ def save_to_sql():
|
||||
os.remove('./comments.csv')
|
||||
|
||||
if __name__ == '__main__':
|
||||
save_to_sql()
|
||||
saveData()
|
||||
@@ -1,10 +1,11 @@
|
||||
from spiderDataPackage.spiderNav import start as spiderNav
|
||||
from spiderDataPackage.spiderContent import start as spiderContent
|
||||
from spiderDataPackage.spiderComments import start as spiderComments
|
||||
from spiderDataPackage.settings import navAddr
|
||||
import os
|
||||
|
||||
def spiderData():
|
||||
if not os.path.exists('./nav.csv'):
|
||||
if not os.path.exists(navAddr):
|
||||
print('正在爬取导航栏数据')
|
||||
spiderNav()
|
||||
print('正在爬取文章数据')
|
||||
|
||||
@@ -3,7 +3,7 @@ import requests
|
||||
import csv
|
||||
import os
|
||||
from datetime import datetime
|
||||
from settings import articleAddr,commentsAddr
|
||||
from .settings import articleAddr,commentsAddr
|
||||
|
||||
def init():
|
||||
if not os.path.exists(commentsAddr):
|
||||
|
||||
@@ -3,7 +3,7 @@ import requests
|
||||
import csv
|
||||
import os
|
||||
from datetime import datetime
|
||||
from settings import navAddr,articleAddr
|
||||
from .settings import navAddr,articleAddr
|
||||
|
||||
def init():
|
||||
if not os.path.exists(articleAddr):
|
||||
|
||||
@@ -2,7 +2,7 @@ import requests
|
||||
import csv
|
||||
import numpy as np
|
||||
import os
|
||||
from settings import navAddr
|
||||
from .settings import navAddr
|
||||
def init():
|
||||
if not os.path.exists(navAddr):
|
||||
with open(navAddr,'w',encoding='utf-8',newline='') as csvFile:
|
||||
|
||||
Reference in New Issue
Block a user