Improve the Security of Regular Expression Matching
This commit is contained in:
@@ -51,9 +51,37 @@ class TemplateSection:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
heading_pattern = re.compile(r"^(#{1,6})\s+(.*)$")
|
# The parsing expressions intentionally avoid `.*` to keep matching deterministic and
|
||||||
bullet_pattern = re.compile(r"^[-*+]\s+(.*)$")
|
# eliminate easy Regular-Expression-DoS gadgets on untrusted template text.
|
||||||
number_pattern = re.compile(r"^(?P<num>\d+(?:\.\d+)*)(?:[\s、::.-]+(?P<label>.*))?$")
|
heading_pattern = re.compile(
|
||||||
|
r"""
|
||||||
|
(?P<marker>\#{1,6}) # Markdown heading markers
|
||||||
|
[ \t]+ # required whitespace
|
||||||
|
(?P<title>[^\r\n]+) # heading text without newline characters
|
||||||
|
""",
|
||||||
|
re.VERBOSE,
|
||||||
|
)
|
||||||
|
bullet_pattern = re.compile(
|
||||||
|
r"""
|
||||||
|
(?P<marker>[-*+]) # list bullet symbol
|
||||||
|
[ \t]+
|
||||||
|
(?P<title>[^\r\n]+)
|
||||||
|
""",
|
||||||
|
re.VERBOSE,
|
||||||
|
)
|
||||||
|
number_pattern = re.compile(
|
||||||
|
r"""
|
||||||
|
(?P<num>
|
||||||
|
(?:0|[1-9]\d*)
|
||||||
|
(?:\.(?:0|[1-9]\d*))*
|
||||||
|
)
|
||||||
|
(?:
|
||||||
|
(?:[ \t\u00A0\u3000、::-]+|\.(?!\d))+
|
||||||
|
(?P<label>[^\r\n]*)
|
||||||
|
)?
|
||||||
|
""",
|
||||||
|
re.VERBOSE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def parse_template_sections(template_md: str) -> List[TemplateSection]:
|
def parse_template_sections(template_md: str) -> List[TemplateSection]:
|
||||||
@@ -128,10 +156,10 @@ def _classify_line(stripped: str, indent: int) -> Optional[dict]:
|
|||||||
dict | None: 识别后的元数据;无法识别时返回None。
|
dict | None: 识别后的元数据;无法识别时返回None。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
heading_match = heading_pattern.match(stripped)
|
heading_match = heading_pattern.fullmatch(stripped)
|
||||||
if heading_match:
|
if heading_match:
|
||||||
level = len(heading_match.group(1))
|
level = len(heading_match.group("marker"))
|
||||||
payload = _strip_markup(heading_match.group(2).strip())
|
payload = _strip_markup(heading_match.group("title").strip())
|
||||||
title_info = _split_number(payload)
|
title_info = _split_number(payload)
|
||||||
slug = _build_slug(title_info["number"], title_info["title"])
|
slug = _build_slug(title_info["number"], title_info["title"])
|
||||||
return {
|
return {
|
||||||
@@ -143,9 +171,9 @@ def _classify_line(stripped: str, indent: int) -> Optional[dict]:
|
|||||||
"slug": slug,
|
"slug": slug,
|
||||||
}
|
}
|
||||||
|
|
||||||
bullet_match = bullet_pattern.match(stripped)
|
bullet_match = bullet_pattern.fullmatch(stripped)
|
||||||
if bullet_match:
|
if bullet_match:
|
||||||
payload = _strip_markup(bullet_match.group(1).strip())
|
payload = _strip_markup(bullet_match.group("title").strip())
|
||||||
title_info = _split_number(payload)
|
title_info = _split_number(payload)
|
||||||
slug = _build_slug(title_info["number"], title_info["title"])
|
slug = _build_slug(title_info["number"], title_info["title"])
|
||||||
is_section = indent <= 1
|
is_section = indent <= 1
|
||||||
@@ -160,7 +188,7 @@ def _classify_line(stripped: str, indent: int) -> Optional[dict]:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# 兼容“1.1 ...”没有前缀符号的行
|
# 兼容“1.1 ...”没有前缀符号的行
|
||||||
number_match = number_pattern.match(stripped)
|
number_match = number_pattern.fullmatch(stripped)
|
||||||
if number_match and number_match.group("label"):
|
if number_match and number_match.group("label"):
|
||||||
payload = stripped
|
payload = stripped
|
||||||
title = number_match.group("label").strip()
|
title = number_match.group("label").strip()
|
||||||
@@ -201,7 +229,7 @@ def _split_number(payload: str) -> dict:
|
|||||||
返回:
|
返回:
|
||||||
dict: 包含 number/title/display。
|
dict: 包含 number/title/display。
|
||||||
"""
|
"""
|
||||||
match = number_pattern.match(payload)
|
match = number_pattern.fullmatch(payload)
|
||||||
number = match.group("num") if match else ""
|
number = match.group("num") if match else ""
|
||||||
label = match.group("label") if match else payload
|
label = match.group("label") if match else payload
|
||||||
label = (label or "").strip()
|
label = (label or "").strip()
|
||||||
|
|||||||
Reference in New Issue
Block a user