from bs4 import BeautifulSoup import requests url = "https://python123.io/ws/demo.html" try: r = requests.get(url, timeout=20) r.raise_for_status() r.encoding = r.apparent_encoding except: print("出现异常") demo = r.text soup = BeautifulSoup(demo, "html.parser") # 标签 print(soup.title) # 获取title标签的内容 # 标签的名字 tag = soup.a print(tag) # 获得了第一个a标签的内容 print(soup.a.name) print(soup.a.parent.name) # 查询a标签父标签名字 print(soup.a.parent.parent.name) # 查询p标签父标签名字 # 标签的属性 print(tag.attrs) # 查询标签的属性 # 结果{'href': 'http://www.icourse163.org/course/BIT-268001', 'class': ['py1'], 'id': 'link1'} print(tag.attrs['class']) # 查询class属性内容 # 结果['py1'] print(type(tag.attrs)) # print(type(tag)) # # 标签内非属性字符串 print(soup.a) # 查询a标签内容 # Basic Python print(soup.a.string) # 查询a标签内非属性字符产 # Basic Python print(soup.b) # The demo python introduces several python courses. print(soup.b.string) # The demo python introduces several python courses. # 由于b.string没有 # 所以Navigable String 可以跨越多个标签属性的 print(type(soup.a.string)) # # 标签注释 newsoup = BeautifulSoup("

This is not a comment

", "html.parser") print(newsoup.b.string) print(newsoup.p.string)# 注释并未表明,因此区分注释需要用type()来区分 print(type(newsoup.b.string)) print(type(newsoup.p.string))