From b3f0ea0c5fd325b138ff144b003225bc004e987f Mon Sep 17 00:00:00 2001 From: YYL469 <2049360881@qq.com> Date: Wed, 3 Jul 2024 17:38:16 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9spider=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=A4=B9=E7=BB=93=E6=9E=84=EF=BC=8C=E4=BC=98=E5=8C=96=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E7=88=AC=E5=8F=96=E7=BB=93=E6=9E=84=EF=BC=8C=E5=AE=9E?= =?UTF-8?q?=E7=8E=B0=E6=A8=A1=E5=9D=97=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spider/main.py | 15 ++--- spider/navData.csv | 60 ------------------ spider/spiderData.py | 13 ++++ spider/spiderDataPack/__init__.py | 0 .../__pycache__/__init__.cpython-38.pyc | Bin 0 -> 206 bytes .../__pycache__/spiderComments.cpython-38.pyc | Bin 0 -> 3450 bytes .../__pycache__/spiderContent.cpython-38.pyc | Bin 0 -> 3953 bytes .../__pycache__/spiderNav.cpython-38.pyc | Bin 0 -> 2583 bytes spider/{ => spiderDataPack}/spiderComments.py | 8 +-- spider/{ => spiderDataPack}/spiderContent.py | 8 +-- spider/{ => spiderDataPack}/spiderNav.py | 14 ++-- 11 files changed, 36 insertions(+), 82 deletions(-) delete mode 100644 spider/navData.csv create mode 100644 spider/spiderData.py create mode 100644 spider/spiderDataPack/__init__.py create mode 100644 spider/spiderDataPack/__pycache__/__init__.cpython-38.pyc create mode 100644 spider/spiderDataPack/__pycache__/spiderComments.cpython-38.pyc create mode 100644 spider/spiderDataPack/__pycache__/spiderContent.cpython-38.pyc create mode 100644 spider/spiderDataPack/__pycache__/spiderNav.cpython-38.pyc rename spider/{ => spiderDataPack}/spiderComments.py (91%) rename spider/{ => spiderDataPack}/spiderContent.py (93%) rename spider/{ => spiderDataPack}/spiderNav.py (89%) diff --git a/spider/main.py b/spider/main.py index d164331..898fad6 100644 --- a/spider/main.py +++ b/spider/main.py @@ -1,14 +1,13 @@ -from spiderContent import start as spiderContentStart -from spiderComments import start as spiderCommentsStart +from spiderData import spiderData from saveData import save_to_sql as saveData def main(): - print('正在爬取文章数据') - spiderContentStart(1,1) - print('正在爬取文章评论数据') - spiderCommentsStart() - print('正在存储数据') - saveData() + try: + spiderData() + saveData() + print("爬取数据更新") + except: + print("爬取数据失败") if __name__ == '__main__': main() \ No newline at end of file diff --git a/spider/navData.csv b/spider/navData.csv deleted file mode 100644 index b61708e..0000000 --- a/spider/navData.csv +++ /dev/null @@ -1,60 +0,0 @@ -typeName,gid,containerid -热门,102803,102803 -同城,1028032222,102803_2222 -榜单,102803600169,102803_ctg1_600169_-_ctg1_600169 -男篮,102803600279,102803_ctg1_600279_-_ctg1_600279 -明星,1028034288,102803_ctg1_4288_-_ctg1_4288 -车展,1028035188,102803_ctg1_5188_-_ctg1_5188 -搞笑,1028034388,102803_ctg1_4388_-_ctg1_4388 -情感,1028031988,102803_ctg1_1988_-_ctg1_1988 -周末,102803600195,102803_ctg1_600195_-_ctg1_600195 -电影,1028033288,102803_ctg1_3288_-_ctg1_3288 -社会,1028034188,102803_ctg1_4188_-_ctg1_4188 -电视剧,1028032488,102803_ctg1_2488_-_ctg1_2488 -美食,1028032688,102803_ctg1_2688_-_ctg1_2688 -俄乌局势,102803600267,102803_ctg1_600267_-_ctg1_600267 -国际,1028036288,102803_ctg1_6288_-_ctg1_6288 -深度,102803600155,102803_ctg1_600155_-_ctg1_600155 -财经,1028036388,102803_ctg1_6388_-_ctg1_6388 -读书,1028034588,102803_ctg1_4588_-_ctg1_4588 -摄影,1028034988,102803_ctg1_4988_-_ctg1_4988 -颜值,102803600165,102803_ctg1_600165_-_ctg1_600165 -体育,1028031388,102803_ctg1_1388_-_ctg1_1388 -数码,1028035088,102803_ctg1_5088_-_ctg1_5088 -综艺,1028034688,102803_ctg1_4688_-_ctg1_4688 -时尚,1028034488,102803_ctg1_4488_-_ctg1_4488 -星座,1028031688,102803_ctg1_1688_-_ctg1_1688 -军事,1028036688,102803_ctg1_6688_-_ctg1_6688 -股市,1028031288,102803_ctg1_1288_-_ctg1_1288 -房产,1028035588,102803_ctg1_5588_-_ctg1_5588 -家居,1028035888,102803_ctg1_5888_-_ctg1_5888 -萌宠,1028032788,102803_ctg1_2788_-_ctg1_2788 -科技,1028032088,102803_ctg1_2088_-_ctg1_2088 -科普,1028035988,102803_ctg1_5988_-_ctg1_5988 -动漫,1028032388,102803_ctg1_2388_-_ctg1_2388 -运动健身,1028034788,102803_ctg1_4788_-_ctg1_4788 -旅游,1028032588,102803_ctg1_2588_-_ctg1_2588 -瘦身,1028036488,102803_ctg1_6488_-_ctg1_6488 -好物,102803600094,102803_ctg1_600094_-_ctg1_600094 -历史,1028036788,102803_ctg1_6788_-_ctg1_6788 -艺术,1028035488,102803_ctg1_5488_-_ctg1_5488 -美妆,1028031588,102803_ctg1_1588_-_ctg1_1588 -法律,1028037388,102803_ctg1_7388_-_ctg1_7388 -设计,1028035388,102803_ctg1_5388_-_ctg1_5388 -健康,1028032188,102803_ctg1_2188_-_ctg1_2188 -音乐,1028035288,102803_ctg1_5288_-_ctg1_5288 -游戏,1028034888,102803_ctg1_4888_-_ctg1_4888 -新时代,1028037968,102803_ctg1_7968_-_ctg1_7968 -校园,102803600177,102803_ctg1_600177_-_ctg1_600177 -收藏,1028038189,102803_ctg1_8189_-_ctg1_8189 -政务,1028035788,102803_ctg1_5788_-_ctg1_5788 -养生,1028036588,102803_ctg1_6588_-_ctg1_6588 -育儿,1028033188,102803_ctg1_3188_-_ctg1_3188 -抽奖,102803600037,102803_ctg1_600037_-_ctg1_600037 -教育,102803600080,102803_ctg1_600080_-_ctg1_600080 -婚恋,1028031788,102803_ctg1_1788_-_ctg1_1788 -舞蹈,1028038788,102803_ctg1_8788_-_ctg1_8788 -辟谣,1028036988,102803_ctg1_6988_-_ctg1_6988 -公益,102803600057,102803_ctg1_600057_-_ctg1_600057 -问答,1028037977,102803_ctg1_7977_-_ctg1_7977 -三农,1028037188,102803_ctg1_7188_-_ctg1_7188 diff --git a/spider/spiderData.py b/spider/spiderData.py new file mode 100644 index 0000000..131c87a --- /dev/null +++ b/spider/spiderData.py @@ -0,0 +1,13 @@ +from spiderDataPack.spiderNav import start as spiderNavStart +from spiderDataPack.spiderContent import start as spiderContentStart +from spiderDataPack.spiderComments import start as spiderCommentsStart +import os + +def spiderData(): + if not os.path.exists('./nav.csv'): + spiderNavStart() + spiderContentStart(1,1) + spiderCommentsStart() + +if __name__ == '__main__': + spiderData() \ No newline at end of file diff --git a/spider/spiderDataPack/__init__.py b/spider/spiderDataPack/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/spider/spiderDataPack/__pycache__/__init__.cpython-38.pyc b/spider/spiderDataPack/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5d539c2131a6117e5331b1f94b6f05d09c779a6 GIT binary patch literal 206 zcmWIL<>g`k0(HUGG!Xq5L?8o3AjbiSi&=m~3PUi1CZpde zj>#%b%riDIj&VsX&MwI>hujqcdtN+EieJ|E6e7bJl zi|t!up6*-sw0G8vj*FkI{s6`S>q%X+afHgbyw&H={244fIj zMp&x4X%eYgrBbC-TBVV#rfSm5=Ba8FReA2;Ft3ewoj;(8l!vO{8N8d#=Anb6 zp(|7bmO(rcUSldA&M032)sz zWo+ThGhxozQ+C3hd66p-m4-?}rD)Ecvn*3)ROSv-WcEQzkm0$)7uUjzb+g;WhHZuG z*3e-`Uw8&~Bb0EM4>(7}M%_fZ(R5}EJnHBMc4jqV>-7%aM2@dcFo=$-n(fSY^ftDt zb#nkGIbMRBx@kBwbQ+^#A2oq;vI!hx&uw^}dm>z1jMuIaA zXIhTX)9jYx#mBk@XA5Qz8xBu}^^S;c!x`dqG)F-WK4}ft^bRK91e<-pkZ-~8pCJ&m zbXc-5u?~+0nh}Xc4l~$l+h*_Z(~o}p=|}f3Km6qI;=Mmzy!-1%@BjMZ01=hz3Ecfj1bMhT{UT5iBR~HZCEZlAkM1ip(cAoP4E2pdtF)@Ld zIOQhX8BZhNYhLyi2tCMQHR6GhcqyS01qJdFeFU$V@o#I+(_jdMHA+=}c_J{hx&Zd> ztxeS{?&sTOxqua;wHaCOU`4&Nm6a;V8+a?8lAz`Js540peXv;D%dzr3=&b8BZO zm9dppEnU*~8|}RwIXWTf%5F7R)G|5CI9?C6H*5G{WUSQG+ofzpA2s$!wM6O%jq;$s z9*S1>GI@0amm(Yc?MxwCwOfs11MMW!W(<#aBhpc(Z4E=KJ#DYs*-iJ?OB+X9@$G~5 zhN`V*qZ_&7Zm};%D_W;}r!X2f%K2?AdLr!=in+l7YQ}T26-}OW3;n7(REGo097`nH z)?$SruBFob?pE21$mZIfs@q1f zSuT|1&}ie%ZZ5XnZLj1Sk!EJl@1cHrv!RungUoK8;MncsqlQ`K27(udJCuiPmC`6w^ysv@QWu^Rf}sixv}imdc8J2 z0fCzs+t{+Jw%Nu;Dkdp)6vYuOj*$!zo{$oagoYYe))Xn4sKHlrYmh365>@0xEHU2B zn`6Dx(H7OP0Z$Z$zk;$bQ1U&r;fxu{681b?!B zO((cv9xqOxa*lV^G}}5J`)CyorJJBJE!QoqK^=vb(bI_5wLbe7R0&6bmZ${+Z(`e-vuw>C&@O|pBOF<#0q^hw(jni2UF5rTD{QfPrUBo0rfi9DLY1CNiJec5;?wDEchj z@G*4GpiuGA{T-Zxu1LbL$(fzLxC)(-H?-li26!F}knVKS)Ba7j{Q!1NEddRQT0w;0 zODu`N`d8}s94ye=t>f>(DsvRRJV7s(IUsdBpq=gE@BeuD#-H7uIe`AdJpA36Bii7ID$z!cFn~CNumm84 zjj93Yrs9Rbrrr`eW?i!>2%6ViFjMVlq=~_^v_U!|IITxPpUX8|Zie7qN2_DEj|S(+ zkKx6NIQ25XEdWz4P;-?c6dRHSN@ysP=P0>O$@7%FK*=)+;DZdaaFwQw1dlBmGmPtS z5wOUMtk(^Ic|a)i_ceUNa-Wk|VaHcegqnz^i5nO{2L`b;MJUR(*}DuyqYx+=icSy* zPUCF|kqQ42L)~QaX95yvg4mu3e;0rN@&GgtY%c%<;Vy)WJp<0=?{eVM7hRWn3*%7B zwtLpn;^GMFHIsJZMeRsCUYynjY%SIXdIvp*V1(kSK>*0Zzy0Cjo%bKT`PYlL-qz8@ zFaG)IM;`~QNB3WQ`1Tu*{&IHt;cEcyKBLZovc&1Aa$UEoR?8e!YlBV)0J;$rU5MtU zhvVrH9e}Oidd_2YFzz`#thi;wHB``B@m;yBL^_qZt8}M{SDCq*5c&_ww0co*KkG6N zC`If|cw$j}#z2~S;YZVSp7n8Fw|x0natoe0Vzt`Ubfa1g&XW~bAm69N1$W<|993u(#!5x+Y=MLjGliXHdK7GVKY<;U>qO(29Le4UeeB`@)IU62I3bVv>; zg}Srhl$<v1ZL&(LEiEb*`S8fpe1a1`Ut?ls<|0>^&KDvx5u9dgg@o=EyF>0BH%iR z>$dYw@`y8>CoYpa<&SuiKXDU#O}HLh3mL?9kYQNQ2zjnDbjtJGCYRuDajV=GXbh_v z&3`c%9+R1&lQcF;X<*6$*nr}C( z{*u;cP}R_LRJ8^fZECuq7fV`OH7qxy6-84mPf1e^=wtcL8SLvTF$(IFqHI`BjVW@lfFNz)G7Nm?C^&i^F#5EHV`ttQ_v~XclO${8jHtv zvQ)1bT65>Y{a-w|fA{h4KHE8a|2Jpvz4hpWx6VGj^Z4FpkMEtH-FxrxA3xbS`|RG? z?YocPeB<%GPtV?baZM4>#*-$wEB^=$@hS>`g+^Gs&#Onj^a35 zJr)42g!caQaRP&L+6ttB=`H_V;YdI)FgX(zPn@?MM-J1mClD9-R>X9iI8Hg@z6SP; zHf`IrCoh*li?xOP)Lic7^i*y#nn)xvaWNfDM#OknjHG}hQ^|BhOeK@aXkx-&$ZbrD zDKV3dX5xuVG?f@nj*H{b@pv*B$z+nTNFtNYgfn6~-0PJBf$2b98%AzxU)f$;(9?B|rSxuGnLAV}xe5PT zVS2I{or)%M)%`*yOINFNbap;DyRa2ni!C2!qYLv}8(OHcC$j8DdZnOyR;PNRuii%StHlUa-ztX0fQYI1vNF=WeMnT$kYu}!wN)mk|yFQ=l3O+B;M zsw~gK_Czx)nQUfrGq${+-bmbNH*f9kE>*+(vb4}CBo>=niKT9}P|1a7wdFXCBue5E zP0>g+N)~h4LNk>+SX$1BOVhdOB^CHqPOPfAY3M6fyYRMF=~62{;oqEIS-Y{eHkmb| z)!nIryj8AkG}+!Eo08Uxxm7uv)768yP;FtCZgnc#5^s;=C^9u{8G`VmRHMUEi*)s`5S3Iybe0a&95inVf`;o|&DTYBd%&wV0^QZX^^# zU9D{93u34UYigk^Z&upb^<_qrHxKs8i9%OT#TTclQ5MOP z4N+edE6GNvvzW-=3QzcpdJ*B29ZtfTPBrCHl}`R28=dg4%oKa(m&zm(a!TG6%M*UN z*{sn+Sa5|>gX%`n&}vkjOh%$55~ndaMTrO&o{pr;X_=I%C`Tjl^e)Vrn*~>iM&nU2 zolJLEmbI=@ughbJaK!)erlJz9qxWVzRd0M)n?r|CdYZoTqlq&6{9yYk(>@R%y^_o6hj!8yn?FH912eTmE%c5#a= zr%DeX2*VjumEgu;l+8hKWKkF*_4T^VK!9|B)c;g{2vnb{*VoU-T!$gO4S=YG%6s8= zg=Z;f{|bZ;Lj%4xgpWXUGPx}GHvgI%BM#xOaiF9?1Sp+QI-qnx>4ehV9R<#HjXMD_ zC!SN`$g|0*!#pHTkUDvfJlmWqY{6EMKIrX*(g$Uq9nl2Re+?48Q;2C~5TcvUgoLR3 zA{jDp7MS&zzsiwIS0P=3`f#s))c+Ov{TK2tgRBvdJpg@?e-8LTGHQ>-`nkNLq4V$0 z!}m*u%XFAd(`C9%kLfl0Ojv{2Zw{D)=8$=*GK9z6a@xV^N}gecDqWHdLowQfo@Y$b z(9>+HmqD>X?A6~(qlzlzKW!X$cIW43_m3ZZ`ZK>Bo7)iQxQD+wk-P>Rc#+8+$%hskd;J}3AWlLllvCo9%vR-sbA4p|&k?WJ``zb>FXCb{>E&3ao8x-Ac6 zhjqC`8Af~yT$VL4`L(&S%{idWP6~pLP7$*=!-zObIV5S7Wh%#ePQXCi@j*q2Ov8e zh4+8tX1wq@dljTT0sH@|dmuKT$J*}kJ}?AQF@wX~fx#bxa}WU>!vSdU1~`V(hd)8;{P#cK4e0&{_dmv#k3I_M zPfkC1_|8us{r2SHFMt2=_C59^l!2z+y-XCnr0r8?!u6wwlLcLpx5Bh`vj|2B*AEpg z4KS@jBzHqTz($vS{gAxi19p%IdhJx%#wxgLpv}4TI;Bm^#bmVtwI)*_o+6mr#}@39 zJP4L>!I3;oxneWE^Gg!kPZ(+@4V-tG!OKq1RN)E&UV)*hA?rO8_M=dn*hc4qaKsF} z*6A33I6zZ3{G5oV99r8D{{rt5u+6rwf4V(Bz*~l)#PSx44N$gN3<~xZ*(5l?vhb0; zg2YbEUqB8|pri_dt5AG_Eq_5QG-k#?QFiU+ErOmqtT4G7-n Qy)JLRx6k{M?~0fIH-hsCY5)KL literal 0 HcmV?d00001 diff --git a/spider/spiderDataPack/__pycache__/spiderNav.cpython-38.pyc b/spider/spiderDataPack/__pycache__/spiderNav.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..935a5330566cf7d495493cf3e8f048366cc33156 GIT binary patch literal 2583 zcmai0+ixP*8J`Pa?A^_#=|$aCZIPN(p$d3rV1ThpRBRU8FklSk;*qP-z#L$>%)pt! zT!f#tjUq*>RO(w)t!$}Mn-}+~YLth4?%%*`K@$-+^75WHQ;NyUtb6A*EjSBz}L}+{;4p^Ql6U(Eol!H{6NZ zg=S}fI8>LuV08MDws#y>r-?qI>$&HXz&Yi3F1!osl0K&w^o6Ti_qW+gnx<-$NF7ln zstz{?UF(If*29~ob{1~hXG?nWm3Qp4LWw2y8ME{wvuWvu*;dv}H0c{AQvA+rhzi;O zE!noAFCR25)6rl88+veU$yoN%Gt`{U(ubyo?SNzrk-22a*}e%j9E6wt6}KiqX1N9v z8KaNzW3cEm3cm&HpCcgp)@j8?*gox!H8T>8oTic8bFAU%m!JRk%g;Z(`P0kO>)-$3 z`o+ip_~XadUwm?N_44NG^7`t<&0qd}dj0b1`uV3fAANXp^~Lo^AO8KtZ%*x@p(A|9 z(wd_cwPx>5EP(C9+0tW}hT{X(_M1Q``XT)my+M0uhW!48zXM}Un#|mNEb#k)L_u&M zc&`NE?Q`0J04*Ru=gfk+U`aUs$NjznCXrpz8ukFkUP8z%f*$;c^xq(acn6bf1zCl~ z)&zg+ew293bsPRR^p@)@=a#GqS#Fb)DAy4DJ?MSI0HTkmb2I7#=&Y{)JLa5*Jc3Mu z9z(KhK`rP7qx+t-=j?**GWr_iYiz+@u$PcQB#Yck@)xugIHi10NbSjoJ1IFSh@vP( z`GgST_-L5t;y_~YSc2o@u~Zx1NCw#6FNT3JuM`w zj$A)8PRa*%qHp22J&ziDGovkU2g;S5q$;EYQSO{oq%2C|0tK9x<;}&-ZG40SRX3AhOp-f_Rk8af&2mBMu&^1;vb=v7q#77bYGqhN+Uk1X#K%aVKQ&k@S@Hsg8iTv|i=y zEZ>d|LX)gmI0|nER9khB>EKxsVwxIi%??WbpB&u|6nE8C`o$$t)Z$u$Z*2#(;jo8h zaN#yW4zeB9v3kf%#yFv=M^QwJBb|o~PjHD=LepD_*90z_XnE;fx>Zs=fWHW+%RL&?ZA`}e7404@h}$*?4f4Q+7vjRgFily zF)(Ub)6LZ^qxg0$t7oA327%sCstp$3637cO${U8QLJd%D)E>aFvfe^Yv!iw_hoE=| zX#lVG(l^wwHn4C1N;Y|xevC$Au+fHMfgI(5?P$)}R-vXK#nZJd6V|X}+{fRC_4o-8 zvc(=+Cg^_+*!*yUssnO=7na!sjVT6z`QJXTOFeG_So<0LV;KJi$`J&-N4Yu(4s`*D zrqfR-oq@(aq3$HlO1cF6ThM9hmp>!0t$UwPfO>#_z&*g%fO{Qg!7e-tZ<~DvkUo0v z4*wKi8(ZU{eM@SC;&p{v0<%l6V>nRd{LsgwXaw0MV-A--$OdHUE;a{!_(x>h8q{JL zYNx_N$CXIA{d()`j@ba8+ejtud6===zvtQ?WZiP< z2E$bwWbhB+eAmF&@9%}Izzzq~HgOL{`=IFhY5Y^@Fa4@Ifais(1|Q%bz<^s06|XrS z3}^T!WPrTbR@z8``&#AweiOW>s;_8u6U!}@{+|ylecVSMkR*2