From fcc847e0965daaf1814f77c451a9b24b86a17d5a Mon Sep 17 00:00:00 2001 From: YYL469 <2049360881@qq.com> Date: Wed, 3 Jul 2024 18:19:54 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E7=88=AC=E8=99=ABbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spider/__pycache__/saveData.cpython-38.pyc | Bin 0 -> 1214 bytes .../__pycache__/spiderComments.cpython-38.pyc | Bin 3450 -> 3435 bytes .../__pycache__/spiderContent.cpython-38.pyc | Bin 3953 -> 3938 bytes spider/__pycache__/spiderData.cpython-38.pyc | Bin 0 -> 510 bytes spider/article.csv | 1 + spider/main.py | 16 +++-- spider/nav.csv | 60 ++++++++++++++++++ spider/{spiderDataPack => }/spiderComments.py | 0 spider/{spiderDataPack => }/spiderContent.py | 0 spider/spiderData.py | 13 ---- spider/spiderDataPack/__init__.py | 0 .../__pycache__/__init__.cpython-38.pyc | Bin 206 -> 0 bytes .../__pycache__/spiderNav.cpython-38.pyc | Bin 2583 -> 0 bytes spider/{spiderDataPack => }/spiderNav.py | 8 +-- 14 files changed, 73 insertions(+), 25 deletions(-) create mode 100644 spider/__pycache__/saveData.cpython-38.pyc rename spider/{spiderDataPack => }/__pycache__/spiderComments.cpython-38.pyc (94%) rename spider/{spiderDataPack => }/__pycache__/spiderContent.cpython-38.pyc (57%) create mode 100644 spider/__pycache__/spiderData.cpython-38.pyc create mode 100644 spider/article.csv create mode 100644 spider/nav.csv rename spider/{spiderDataPack => }/spiderComments.py (100%) rename spider/{spiderDataPack => }/spiderContent.py (100%) delete mode 100644 spider/spiderData.py delete mode 100644 spider/spiderDataPack/__init__.py delete mode 100644 spider/spiderDataPack/__pycache__/__init__.cpython-38.pyc delete mode 100644 spider/spiderDataPack/__pycache__/spiderNav.cpython-38.pyc rename spider/{spiderDataPack => }/spiderNav.py (97%) diff --git a/spider/__pycache__/saveData.cpython-38.pyc b/spider/__pycache__/saveData.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97fee395ffa2d2c795ec85c1a14fea17cf1b9c19 GIT binary patch literal 1214 zcmZWp&u`R56rLG-ZD*6P`OzSbIdIul+CT#mjamvK>M;ZXBDGesX8k7MjXicfvnjjU zs0R>2LQyOAQgMigREhSooDhgZ{t)+Aw%z^#?U`q<6K&|M=FR)Qd4BKn%=6xzovjia z=l^)T{X>P2KXWiS7=mj!)m1b=fQH1)R~FKUxr~lE!!-p1;}LZ$U_#|FaV=qkwZqai zVLc)kMQ@Y3{SW3K8+Dq^`l;YrctYL}r0BJxuKF(gEa?{O+RDmSz~ilsxD~9Oc-Jmo zXngT`WA$?5;_Avh5wv1&v(pL#|7H@%AeP>B$-}M+)a|a)BKq2Yz*8mkR~@~5C2C#r z^U}w|LT3u+4V>yfG`(Lmc|n0{cF6V!`UbE=x_x zEEuEDPMAvHY*)Yr2d1F9Z&1>&AfBGUX4`@p&0rSJKyBYFA}pAjWbP5UPVw;QX+;JU zD*E()84P4jW%!mzwneo4!h!iiig|KxA@_jbg)DCLmXrvEua`ev-cI9anWsANL(!|1 z&N|n;;j7)=+0$`;97RHE)mtnBZJCI4peUaIIEH!4#>@s0yr=r;Urfy^g?z&UKwkIA2N~^p8h=-YTB4fU>^0DHj z6KYaheJW!KxDNIoc-VBAHSWQZ$<bP{YbY*yu3IxZezvC6DeMDebu zTkdpTI21QS*aUZ`RIwqx&rzwb+$+&|uQsl7eSF{;JU1qe#csmyBwfVb{FkGFwdP$V zQq^pCxLm!o+FTcEN5@I?{o9}4zkNP>b<`aG`f_;i?C;;64d4AZ8XS!Vhr_|a=*{cq z@MtjHe?EHpWHfj;eEQ_CgP)oz2|%Px#dk$v+1dX`$qRp|mf$Eq}4!nn-wyod+V^Xg7IkAz#r!{wp>Kt$a%{}XYogiGKmJ%b>v n<>vT7$s^%;#mi2ZPcS5NzKmk%gyJeiq-M}KD8^y# zmH&VsSBT_aIJo9|!O~J}EbJ_tw+j-d_~yMg@B7}5d8*!5%>&0VC**;~jk(L1hllLe znUjI537D7H^AT<+K3*vuwtx=`G|QAcOF&srk>IMH3ehQ4-=f6K=upGoYLzXH{;0DB zss3+r`>SWg2~X14??rv#6@J7~=oUORg%fBHuP)(E2N4gVFc!H?CRoEAy}_0-(0^%p z8~CDss;$a=&j7*u0=n_gQ$s#{$J#PhSaTE_EyfJIG#kvu|7Mq!uw-3V7A4y)flHOA zNlD|79QOnm(!pD+&Svn%`e==hRJVJqH9EI97+c4`&XOGL(fO<`kVX^B#d&R+%oc7I z>tEJL)J}d5#9kV7qclwNI)wrMEeUdf4s&;v3=H&)H~hx1AZL(zN^4G$r89W zw8>r~$bRDkSqeec28{(-7frHS1RV(^7xF;({@9B0?Pjm-AnK4cH4Prpd z?fM25{sAUbMU}erPv~6f5DR;U4&8gtX;Hc1^S<|f-Fv#*%CCy^&hwlheY);v=QU#r zci9hbPCG7#z}Mz?hMNYrcSI`Qip~L|@scgzwE^`R9fer}r9(v{FqH}MG0fk2eNs89 zcxqNyt$$(86!bG+hhIEXr6d(;mhev8m8$R|iKC#P%nS};nd+7_lnpZJ#Ze*$ZT|Lp zYS&mD_w0|>U<~|eKk`>~e!B=#bS1QsBV|V7=w8)Y@iA-kd9lHmgXd0-dHBs~u@PjY ztLd7q4j#f2ZD5o-Spw4ij>PY!#bAWIV}xk!8>{(OViTQlpQ6HEO7kUcwwI;Lza0Wul?brlkOs!TBx(0r~{@ zL0#Zj0fB;6UBbg4hugf{B$SmqzM#ISDZOxK#d#iZJSJZeOT$DQe$+;CY}Ywu+`jC& ze(9)WS150ZPS;*c9~RTGe4E?R({nT#eZP#N*_{mLGI)%FiTrrCqj?bBj^%J5gIP2j zd`<4{WaVF~xWX-VuflA?DyYLst^&eY%uL9k={TO-^c|;K$XI*p)^oSfDy~k5&tmGw tnVPot|LIFkWrk{8sr#-+9cO)aQf1=ZCtip2+VoU?BUb}CoYPF*#J>)jk&FNU literal 0 HcmV?d00001 diff --git a/spider/article.csv b/spider/article.csv new file mode 100644 index 0000000..a481b83 --- /dev/null +++ b/spider/article.csv @@ -0,0 +1 @@ +id,likeNum,commentsLen,reposts_count,region,content,contentLen,created_at,type,detailUrl,authorAvatar,authorName,authorDetail,isVip diff --git a/spider/main.py b/spider/main.py index 898fad6..132b661 100644 --- a/spider/main.py +++ b/spider/main.py @@ -1,13 +1,15 @@ -from spiderData import spiderData +from spiderContent import start as spiderContentStart +from spiderComments import start as spiderCommentsStart from saveData import save_to_sql as saveData def main(): - try: - spiderData() - saveData() - print("爬取数据更新") - except: - print("爬取数据失败") + print('正在爬取文章数据') + spiderContentStart(1,1) + print('正在爬取文章评论数据') + spiderCommentsStart() + print('正在存储数据') + saveData() + print("爬取数据更新") if __name__ == '__main__': main() \ No newline at end of file diff --git a/spider/nav.csv b/spider/nav.csv new file mode 100644 index 0000000..b61708e --- /dev/null +++ b/spider/nav.csv @@ -0,0 +1,60 @@ +typeName,gid,containerid +热门,102803,102803 +同城,1028032222,102803_2222 +榜单,102803600169,102803_ctg1_600169_-_ctg1_600169 +男篮,102803600279,102803_ctg1_600279_-_ctg1_600279 +明星,1028034288,102803_ctg1_4288_-_ctg1_4288 +车展,1028035188,102803_ctg1_5188_-_ctg1_5188 +搞笑,1028034388,102803_ctg1_4388_-_ctg1_4388 +情感,1028031988,102803_ctg1_1988_-_ctg1_1988 +周末,102803600195,102803_ctg1_600195_-_ctg1_600195 +电影,1028033288,102803_ctg1_3288_-_ctg1_3288 +社会,1028034188,102803_ctg1_4188_-_ctg1_4188 +电视剧,1028032488,102803_ctg1_2488_-_ctg1_2488 +美食,1028032688,102803_ctg1_2688_-_ctg1_2688 +俄乌局势,102803600267,102803_ctg1_600267_-_ctg1_600267 +国际,1028036288,102803_ctg1_6288_-_ctg1_6288 +深度,102803600155,102803_ctg1_600155_-_ctg1_600155 +财经,1028036388,102803_ctg1_6388_-_ctg1_6388 +读书,1028034588,102803_ctg1_4588_-_ctg1_4588 +摄影,1028034988,102803_ctg1_4988_-_ctg1_4988 +颜值,102803600165,102803_ctg1_600165_-_ctg1_600165 +体育,1028031388,102803_ctg1_1388_-_ctg1_1388 +数码,1028035088,102803_ctg1_5088_-_ctg1_5088 +综艺,1028034688,102803_ctg1_4688_-_ctg1_4688 +时尚,1028034488,102803_ctg1_4488_-_ctg1_4488 +星座,1028031688,102803_ctg1_1688_-_ctg1_1688 +军事,1028036688,102803_ctg1_6688_-_ctg1_6688 +股市,1028031288,102803_ctg1_1288_-_ctg1_1288 +房产,1028035588,102803_ctg1_5588_-_ctg1_5588 +家居,1028035888,102803_ctg1_5888_-_ctg1_5888 +萌宠,1028032788,102803_ctg1_2788_-_ctg1_2788 +科技,1028032088,102803_ctg1_2088_-_ctg1_2088 +科普,1028035988,102803_ctg1_5988_-_ctg1_5988 +动漫,1028032388,102803_ctg1_2388_-_ctg1_2388 +运动健身,1028034788,102803_ctg1_4788_-_ctg1_4788 +旅游,1028032588,102803_ctg1_2588_-_ctg1_2588 +瘦身,1028036488,102803_ctg1_6488_-_ctg1_6488 +好物,102803600094,102803_ctg1_600094_-_ctg1_600094 +历史,1028036788,102803_ctg1_6788_-_ctg1_6788 +艺术,1028035488,102803_ctg1_5488_-_ctg1_5488 +美妆,1028031588,102803_ctg1_1588_-_ctg1_1588 +法律,1028037388,102803_ctg1_7388_-_ctg1_7388 +设计,1028035388,102803_ctg1_5388_-_ctg1_5388 +健康,1028032188,102803_ctg1_2188_-_ctg1_2188 +音乐,1028035288,102803_ctg1_5288_-_ctg1_5288 +游戏,1028034888,102803_ctg1_4888_-_ctg1_4888 +新时代,1028037968,102803_ctg1_7968_-_ctg1_7968 +校园,102803600177,102803_ctg1_600177_-_ctg1_600177 +收藏,1028038189,102803_ctg1_8189_-_ctg1_8189 +政务,1028035788,102803_ctg1_5788_-_ctg1_5788 +养生,1028036588,102803_ctg1_6588_-_ctg1_6588 +育儿,1028033188,102803_ctg1_3188_-_ctg1_3188 +抽奖,102803600037,102803_ctg1_600037_-_ctg1_600037 +教育,102803600080,102803_ctg1_600080_-_ctg1_600080 +婚恋,1028031788,102803_ctg1_1788_-_ctg1_1788 +舞蹈,1028038788,102803_ctg1_8788_-_ctg1_8788 +辟谣,1028036988,102803_ctg1_6988_-_ctg1_6988 +公益,102803600057,102803_ctg1_600057_-_ctg1_600057 +问答,1028037977,102803_ctg1_7977_-_ctg1_7977 +三农,1028037188,102803_ctg1_7188_-_ctg1_7188 diff --git a/spider/spiderDataPack/spiderComments.py b/spider/spiderComments.py similarity index 100% rename from spider/spiderDataPack/spiderComments.py rename to spider/spiderComments.py diff --git a/spider/spiderDataPack/spiderContent.py b/spider/spiderContent.py similarity index 100% rename from spider/spiderDataPack/spiderContent.py rename to spider/spiderContent.py diff --git a/spider/spiderData.py b/spider/spiderData.py deleted file mode 100644 index 131c87a..0000000 --- a/spider/spiderData.py +++ /dev/null @@ -1,13 +0,0 @@ -from spiderDataPack.spiderNav import start as spiderNavStart -from spiderDataPack.spiderContent import start as spiderContentStart -from spiderDataPack.spiderComments import start as spiderCommentsStart -import os - -def spiderData(): - if not os.path.exists('./nav.csv'): - spiderNavStart() - spiderContentStart(1,1) - spiderCommentsStart() - -if __name__ == '__main__': - spiderData() \ No newline at end of file diff --git a/spider/spiderDataPack/__init__.py b/spider/spiderDataPack/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/spider/spiderDataPack/__pycache__/__init__.cpython-38.pyc b/spider/spiderDataPack/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index a5d539c2131a6117e5331b1f94b6f05d09c779a6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 206 zcmWIL<>g`k0(HUGG!Xq5L?8o3AjbiSi&=m~3PUi1CZpde zj>#%b%riDIj&VsX&MwI>hujqcdtN+EieJ|E6e7bJl zi|t!up6*-sw0G8vj%)pt! zT!f#tjUq*>RO(w)t!$}Mn-}+~YLth4?%%*`K@$-+^75WHQ;NyUtb6A*EjSBz}L}+{;4p^Ql6U(Eol!H{6NZ zg=S}fI8>LuV08MDws#y>r-?qI>$&HXz&Yi3F1!osl0K&w^o6Ti_qW+gnx<-$NF7ln zstz{?UF(If*29~ob{1~hXG?nWm3Qp4LWw2y8ME{wvuWvu*;dv}H0c{AQvA+rhzi;O zE!noAFCR25)6rl88+veU$yoN%Gt`{U(ubyo?SNzrk-22a*}e%j9E6wt6}KiqX1N9v z8KaNzW3cEm3cm&HpCcgp)@j8?*gox!H8T>8oTic8bFAU%m!JRk%g;Z(`P0kO>)-$3 z`o+ip_~XadUwm?N_44NG^7`t<&0qd}dj0b1`uV3fAANXp^~Lo^AO8KtZ%*x@p(A|9 z(wd_cwPx>5EP(C9+0tW}hT{X(_M1Q``XT)my+M0uhW!48zXM}Un#|mNEb#k)L_u&M zc&`NE?Q`0J04*Ru=gfk+U`aUs$NjznCXrpz8ukFkUP8z%f*$;c^xq(acn6bf1zCl~ z)&zg+ew293bsPRR^p@)@=a#GqS#Fb)DAy4DJ?MSI0HTkmb2I7#=&Y{)JLa5*Jc3Mu z9z(KhK`rP7qx+t-=j?**GWr_iYiz+@u$PcQB#Yck@)xugIHi10NbSjoJ1IFSh@vP( z`GgST_-L5t;y_~YSc2o@u~Zx1NCw#6FNT3JuM`w zj$A)8PRa*%qHp22J&ziDGovkU2g;S5q$;EYQSO{oq%2C|0tK9x<;}&-ZG40SRX3AhOp-f_Rk8af&2mBMu&^1;vb=v7q#77bYGqhN+Uk1X#K%aVKQ&k@S@Hsg8iTv|i=y zEZ>d|LX)gmI0|nER9khB>EKxsVwxIi%??WbpB&u|6nE8C`o$$t)Z$u$Z*2#(;jo8h zaN#yW4zeB9v3kf%#yFv=M^QwJBb|o~PjHD=LepD_*90z_XnE;fx>Zs=fWHW+%RL&?ZA`}e7404@h}$*?4f4Q+7vjRgFily zF)(Ub)6LZ^qxg0$t7oA327%sCstp$3637cO${U8QLJd%D)E>aFvfe^Yv!iw_hoE=| zX#lVG(l^wwHn4C1N;Y|xevC$Au+fHMfgI(5?P$)}R-vXK#nZJd6V|X}+{fRC_4o-8 zvc(=+Cg^_+*!*yUssnO=7na!sjVT6z`QJXTOFeG_So<0LV;KJi$`J&-N4Yu(4s`*D zrqfR-oq@(aq3$HlO1cF6ThM9hmp>!0t$UwPfO>#_z&*g%fO{Qg!7e-tZ<~DvkUo0v z4*wKi8(ZU{eM@SC;&p{v0<%l6V>nRd{LsgwXaw0MV-A--$OdHUE;a{!_(x>h8q{JL zYNx_N$CXIA{d()`j@ba8+ejtud6===zvtQ?WZiP< z2E$bwWbhB+eAmF&@9%}Izzzq~HgOL{`=IFhY5Y^@Fa4@Ifais(1|Q%bz<^s06|XrS z3}^T!WPrTbR@z8``&#AweiOW>s;_8u6U!}@{+|ylecVSMkR*2