This commit is contained in:
2025-08-05 09:19:34 +08:00
commit 584548d006
1696 changed files with 53855 additions and 0 deletions
+50
View File
@@ -0,0 +1,50 @@
from bs4 import BeautifulSoup
import requests
url = "https://python123.io/ws/demo.html"
try:
r = requests.get(url, timeout=20)
r.raise_for_status()
r.encoding = r.apparent_encoding
except:
print("出现异常")
demo = r.text
soup = BeautifulSoup(demo, "html.parser")
# 标签
print(soup.title) # 获取title标签的内容
# 标签的名字
tag = soup.a
print(tag) # 获得了第一个a标签的内容
print(soup.a.name)
print(soup.a.parent.name) # 查询a标签父标签名字
print(soup.a.parent.parent.name) # 查询p标签父标签名字
# 标签的属性
print(tag.attrs) # 查询标签的属性
# 结果{'href': 'http://www.icourse163.org/course/BIT-268001', 'class': ['py1'], 'id': 'link1'}
print(tag.attrs['class']) # 查询class属性内容
# 结果['py1']
print(type(tag.attrs)) # <class 'dict'>
print(type(tag)) # <class 'bs4.element.Tag'>
# 标签内非属性字符串
print(soup.a) # 查询a标签内容
# <a class="py1" href="http://www.icourse163.org/course/BIT-268001" id="link1">Basic Python</a>
print(soup.a.string) # 查询a标签内非属性字符产
# Basic Python
print(soup.b) # <b>The demo python introduces several python courses.</b>
print(soup.b.string) # The demo python introduces several python courses.
# 由于b.string没有<b></b>
# 所以Navigable String 可以跨越多个标签属性的
print(type(soup.a.string)) # <class 'bs4.element.NavigableString'>
# 标签注释
newsoup = BeautifulSoup("<b><!-- This is a comment--></b><p>This is not a comment</p>", "html.parser")
print(newsoup.b.string)
print(newsoup.p.string)# 注释并未表明,因此区分注释需要用type()来区分
print(type(newsoup.b.string))
print(type(newsoup.p.string))