增加spider脚本,每5小时爬取一次数据
This commit is contained in:
+14
-14
@@ -6,24 +6,24 @@ engine = create_engine('mysql+pymysql://XiaoXueQi:XiaoXueQi@10.92.35.13/Weibo_Pu
|
||||
|
||||
def save_to_sql():
|
||||
try:
|
||||
artileOldPd = pd.read_sql('select * from article',engine)
|
||||
articleNewPd = pd.read_csv('article.csv')
|
||||
commentOldPd = pd.read_sql('select * from comments',engine)
|
||||
commentNewPd = pd.read_csv('comments.csv')
|
||||
oldArticle = pd.read_sql('select * from article',engine)
|
||||
newArticle = pd.read_csv('article.csv')
|
||||
oldComment = pd.read_sql('select * from comments',engine)
|
||||
newComment = pd.read_csv('comments.csv')
|
||||
|
||||
concatArticlePd = pd.concat([articleNewPd,artileOldPd],join='inner')
|
||||
concatCommentsPd = pd.concat([commentNewPd,commentOldPd],join='inner')
|
||||
mergeArticle = pd.concat([newArticle,oldArticle],join='inner')
|
||||
mergeComment = pd.concat([newComment,oldComment],join='inner')
|
||||
|
||||
concatArticlePd.drop_duplicates(subset='id',keep='last',inplace=True)
|
||||
concatCommentsPd.drop_duplicates(subset='content',keep='last',inplace=True)
|
||||
mergeArticle.drop_duplicates(subset='id',keep='last',inplace=True)
|
||||
mergeComment.drop_duplicates(subset='content',keep='last',inplace=True)
|
||||
|
||||
concatArticlePd.to_sql('article', con=engine, if_exists='replace', index=False)
|
||||
concatCommentsPd.to_sql('comments', con=engine, if_exists='replace', index=False)
|
||||
mergeArticle.to_sql('article', con=engine, if_exists='replace', index=False)
|
||||
mergeComment.to_sql('comments', con=engine, if_exists='replace', index=False)
|
||||
except:
|
||||
articleNewPd = pd.read_csv('article.csv')
|
||||
commentNewPd = pd.read_csv('comments.csv')
|
||||
articleNewPd.to_sql('article',con=engine,if_exists='replace',index=False)
|
||||
commentNewPd.to_sql('comments',con=engine,if_exists='replace',index=False)
|
||||
newArticle = pd.read_csv('article.csv')
|
||||
newComment = pd.read_csv('comments.csv')
|
||||
newArticle.to_sql('article',con=engine,if_exists='replace',index=False)
|
||||
newComment.to_sql('comments',con=engine,if_exists='replace',index=False)
|
||||
|
||||
os.remove('./article.csv')
|
||||
os.remove('./comments.csv')
|
||||
|
||||
Reference in New Issue
Block a user