52 lines
1.4 KiB
Python
52 lines
1.4 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
url = "https://python123.io/ws/demo.html"
|
|
try:
|
|
r = requests.get(url)
|
|
r.raise_for_status()
|
|
r.encoding = r.apparent_encoding
|
|
except:
|
|
print("出现异常")
|
|
|
|
demo = r.text
|
|
soup = BeautifulSoup(demo, "html.parser")
|
|
|
|
# 下行遍历
|
|
print(soup.head) # <head><title>This is a python demo page</title></head>
|
|
print(soup.head.contents) # [<title>This is a python demo page</title>]
|
|
|
|
print(soup.body)
|
|
print(soup.body.contents)
|
|
print(len(soup.body.contents)) # body标签儿子节点有5个
|
|
print(soup.body.contents[1]) # 检索body第一个儿子节点
|
|
|
|
# 遍历儿子节点
|
|
for child in soup.body.children:
|
|
print(child)
|
|
|
|
# 遍历子孙节点
|
|
for childs in soup.body.descendants:
|
|
print(childs)
|
|
|
|
# 上行遍历
|
|
print(soup.a.parent) # 返回a节点的父节点
|
|
for parent in soup.a.parents:
|
|
if parent is None:
|
|
print(parent)
|
|
else:
|
|
print(parent.name)
|
|
|
|
# 平行遍历
|
|
print(soup.a.next_sibling) # 结果为and ,平行遍历能遍历到NavigableString类型
|
|
print(soup.a.previous_sibling)
|
|
for sibling in soup.a.next_siblings:
|
|
print(sibling)
|
|
# 结果为
|
|
# and
|
|
# <a class="py2" href="http://www.icourse163.org/course/BIT-1001870001" id="link2">Advanced Python</a>
|
|
# .
|
|
for siblings in soup.a.previous_siblings:
|
|
print(siblings)
|
|
# 结果为 Python is a wonderful general-purpose programming language. You can learn Python from novice to professional by tracking the following courses:
|