Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1227,6 +1227,13 @@ def test_parse_qs_encoding(self):
errors="ignore")
self.assertEqual(result, {'key': ['\u0141-']})

def test_qsl_strict_parsing_raises(self):
with self.assertRaises(ValueError):
urllib.parse.parse_qsl("foo=^", strict_parsing=True)

with self.assertRaises(ValueError):
urllib.parse.parse_qsl(b"foo=`", strict_parsing=True)

def test_parse_qsl_encoding(self):
result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
self.assertEqual(result, [('key', '\u0141\xE9')])
Expand Down
19 changes: 19 additions & 0 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@
# Unsafe bytes to be removed per WHATWG spec
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']

# Allowed valid characters in parse_qsl as per RFC 3986.
_VALID_RFC3986_QUERY_CHARS = "-._~!$&'()*+,;=:@/?%"

def clear_cache():
"""Clear internal performance caches. Undocumented; some tests want it."""
urlsplit.cache_clear()
Expand Down Expand Up @@ -778,6 +781,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
parsed_result[name] = [value]
return parsed_result

def _is_valid_rfc3986_query(chars):
"""Return True if all characters are valid per RFC 3986."""
for ch in chars:
if not ch.isascii():
return False
if ch.isalnum() or ch in _VALID_RFC3986_QUERY_CHARS:
continue
return False
return True

def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace', max_num_fields=None, separator='&', *, _stacklevel=1):
Expand Down Expand Up @@ -854,6 +866,13 @@ def _unquote(s):
name, has_eq, value = name_value.partition(eq)
if not has_eq and strict_parsing:
raise ValueError("bad query field: %r" % (name_value,))
if strict_parsing:
# Validate RFC3986 characters
to_check = _unquote(name_value)
if isinstance(to_check, (bytes, bytearray)):
to_check = to_check.decode(encoding, errors)
if not _is_valid_rfc3986_query(to_check):
raise ValueError(f"Invalid characters in query string per RFC 3986: {name_value!r}")
if value or keep_blank_values:
name = _unquote(name)
value = _unquote(value)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
:mod:`urllib.parse`: in strict parsing, :func:`~urllib.parse.parse_qsl` now properly rejects characters according to :rfc:`3986` and raises a :exc:`ValueError` when encountering them.
Loading