修改爬虫bug

This commit is contained in:
YYL469
2024-07-04 13:19:34 +08:00
parent 579cb8179e
commit b9558ad943
7 changed files with 9 additions and 8 deletions
+1 -1
View File
@@ -36,7 +36,7 @@ def run_spider_script():
if __name__ == '__main__':
scheduler = BackgroundScheduler(timezone=utc)
scheduler.add_job(run_spider_script, 'interval', minutes=1)
scheduler.add_job(run_spider_script, 'interval', hours=5)
scheduler.start()
try:
+1 -1
View File
@@ -1,5 +1,5 @@
from spiderData import spiderData
from saveData import save_to_sql as saveData
from saveData import saveData
def main():
print('正在爬取数据')
+2 -2
View File
@@ -4,7 +4,7 @@ import pandas as pd
engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@47.92.235.6/Weibo_PublicOpinion_AnalysisSystem?charset=utf8mb4')
def save_to_sql():
def saveData():
try:
oldArticle = pd.read_sql('select * from article',engine)
newArticle = pd.read_csv('article.csv')
@@ -29,4 +29,4 @@ def save_to_sql():
os.remove('./comments.csv')
if __name__ == '__main__':
save_to_sql()
saveData()
+2 -1
View File
@@ -1,10 +1,11 @@
from spiderDataPackage.spiderNav import start as spiderNav
from spiderDataPackage.spiderContent import start as spiderContent
from spiderDataPackage.spiderComments import start as spiderComments
from spiderDataPackage.settings import navAddr
import os
def spiderData():
if not os.path.exists('./nav.csv'):
if not os.path.exists(navAddr):
print('正在爬取导航栏数据')
spiderNav()
print('正在爬取文章数据')
+1 -1
View File
@@ -3,7 +3,7 @@ import requests
import csv
import os
from datetime import datetime
from settings import articleAddr,commentsAddr
from .settings import articleAddr,commentsAddr
def init():
if not os.path.exists(commentsAddr):
+1 -1
View File
@@ -3,7 +3,7 @@ import requests
import csv
import os
from datetime import datetime
from settings import navAddr,articleAddr
from .settings import navAddr,articleAddr
def init():
if not os.path.exists(articleAddr):
+1 -1
View File
@@ -2,7 +2,7 @@ import requests
import csv
import numpy as np
import os
from settings import navAddr
from .settings import navAddr
def init():
if not os.path.exists(navAddr):
with open(navAddr,'w',encoding='utf-8',newline='') as csvFile: