diff --git a/app.py b/app.py index 0606542..3c1c876 100644 --- a/app.py +++ b/app.py @@ -36,7 +36,7 @@ def run_spider_script(): if __name__ == '__main__': scheduler = BackgroundScheduler(timezone=utc) - scheduler.add_job(run_spider_script, 'interval', minutes=1) + scheduler.add_job(run_spider_script, 'interval', hours=5) scheduler.start() try: diff --git a/spider/main.py b/spider/main.py index 5aeb4d2..e65f35d 100644 --- a/spider/main.py +++ b/spider/main.py @@ -1,5 +1,5 @@ from spiderData import spiderData -from saveData import save_to_sql as saveData +from saveData import saveData def main(): print('正在爬取数据') diff --git a/spider/saveData.py b/spider/saveData.py index 1ef0b2f..7c73dd4 100644 --- a/spider/saveData.py +++ b/spider/saveData.py @@ -4,7 +4,7 @@ import pandas as pd engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4') -def save_to_sql(): +def saveData(): try: oldArticle = pd.read_sql('select * from article',engine) newArticle = pd.read_csv('article.csv') @@ -29,4 +29,4 @@ def save_to_sql(): os.remove('./comments.csv') if __name__ == '__main__': - save_to_sql() \ No newline at end of file + saveData() \ No newline at end of file diff --git a/spider/spiderData.py b/spider/spiderData.py index 97498a9..57a235c 100644 --- a/spider/spiderData.py +++ b/spider/spiderData.py @@ -1,10 +1,11 @@ from spiderDataPackage.spiderNav import start as spiderNav from spiderDataPackage.spiderContent import start as spiderContent from spiderDataPackage.spiderComments import start as spiderComments +from spiderDataPackage.settings import navAddr import os def spiderData(): - if not os.path.exists('./nav.csv'): + if not os.path.exists(navAddr): print('正在爬取导航栏数据') spiderNav() print('正在爬取文章数据') diff --git a/spider/spiderDataPackage/spiderComments.py b/spider/spiderDataPackage/spiderComments.py index 6bfc121..d30d9b0 100644 --- a/spider/spiderDataPackage/spiderComments.py +++ b/spider/spiderDataPackage/spiderComments.py @@ -3,7 +3,7 @@ import requests import csv import os from datetime import datetime -from settings import articleAddr,commentsAddr +from .settings import articleAddr,commentsAddr def init(): if not os.path.exists(commentsAddr): diff --git a/spider/spiderDataPackage/spiderContent.py b/spider/spiderDataPackage/spiderContent.py index f0afd84..a941215 100644 --- a/spider/spiderDataPackage/spiderContent.py +++ b/spider/spiderDataPackage/spiderContent.py @@ -3,7 +3,7 @@ import requests import csv import os from datetime import datetime -from settings import navAddr,articleAddr +from .settings import navAddr,articleAddr def init(): if not os.path.exists(articleAddr): diff --git a/spider/spiderDataPackage/spiderNav.py b/spider/spiderDataPackage/spiderNav.py index 7322034..4aa17f2 100644 --- a/spider/spiderDataPackage/spiderNav.py +++ b/spider/spiderDataPackage/spiderNav.py @@ -2,7 +2,7 @@ import requests import csv import numpy as np import os -from settings import navAddr +from .settings import navAddr def init(): if not os.path.exists(navAddr): with open(navAddr,'w',encoding='utf-8',newline='') as csvFile: