Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions src/validators/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# standard
from functools import lru_cache
import re
from urllib.parse import unquote, urlsplit
from urllib.parse import parse_qs, unquote, urlsplit

# local
from .hostname import hostname
Expand Down Expand Up @@ -34,11 +34,6 @@ def _path_regex():
)


@lru_cache
def _query_regex():
return re.compile(r"&?(\w+=?[^\s&]*)", re.IGNORECASE)


def _validate_scheme(value: str):
"""Validate scheme."""
# More schemes will be considered later.
Expand Down Expand Up @@ -108,16 +103,16 @@ def _validate_netloc(
) and _validate_auth_segment(basic_auth)


def _validate_optionals(path: str, query: str, fragment: str):
def _validate_optionals(path: str, query: str, fragment: str, strict_query: bool):
"""Validate path query and fragments."""
optional_segments = True
if path:
optional_segments &= bool(_path_regex().match(path))
if query:
optional_segments &= bool(_query_regex().match(query))
if query and parse_qs(query, strict_parsing=strict_query):
optional_segments &= True
if fragment:
fragment = fragment.lstrip("/") if fragment.startswith("/") else fragment
optional_segments &= all(char_to_avoid not in fragment for char_to_avoid in ("/", "?"))
optional_segments &= all(char_to_avoid not in fragment for char_to_avoid in ("?",))
return optional_segments


Expand All @@ -130,6 +125,7 @@ def url(
skip_ipv4_addr: bool = False,
may_have_port: bool = True,
simple_host: bool = False,
strict_query: bool = True,
rfc_1034: bool = False,
rfc_2782: bool = False,
):
Expand Down Expand Up @@ -167,6 +163,8 @@ def url(
URL string may contain port number.
simple_host:
URL string maybe only hyphens and alpha-numerals.
strict_query:
Fail validation on query string parsing error.
rfc_1034:
Allow trailing dot in domain/host name.
Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
Expand Down Expand Up @@ -214,5 +212,5 @@ def url(
rfc_1034,
rfc_2782,
)
and _validate_optionals(path, query, fragment)
and _validate_optionals(path, query, fragment, strict_query)
)
11 changes: 10 additions & 1 deletion tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"http://foo.com/blah_blah_(wikipedia)",
"http://foo.com/blah_blah_(wikipedia)_(again)",
"http://www.example.com/wpstyle/?p=364",
"https://www.example.com/foo/?bar=baz&inga=42&quux",
"https://www.example.com?bar=baz",
"http://✪df.ws/123",
"http://userid:[email protected]:8080",
Expand Down Expand Up @@ -85,12 +84,18 @@
"http://:::::::::::::@exmp.com",
"http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com",
"https://exchange.jetswap.finance/#/swap",
"https://www.foo.com/bar#/baz/test",
# when simple_host=True
# "http://localhost",
# "http://localhost:8000",
# "http://pc:8081/",
# "http://3628126748",
# "http://foobar",
# when strict_query=False
# "https://www.example.com/foo/?bar=baz&inga=42&quux",
# "https://foo.bar.net/baz.php?-/inga/test-lenient-query/",
# "https://foo.com/img/bar/baz.jpg?-62169987208",
# "https://example.com/foo/?bar#!baz/inga/8SA-M3as7A8",
],
)
def test_returns_true_on_valid_url(value: str):
Expand Down Expand Up @@ -144,6 +149,10 @@ def test_returns_true_on_valid_url(value: str):
"http://[2010:836B:4179::836B:4179",
"http://2010:836B:4179::836B:4179",
"http://2010:836B:4179::836B:4179:80/index.html",
"https://www.example.com/foo/?bar=baz&inga=42&quux",
"https://foo.com/img/bar/baz.jpg?-62169987208",
"https://foo.bar.net/baz.php?-/inga/test-lenient-query/",
"https://example.com/foo/?bar#!baz/inga/8SA-M3as7A8",
"http://0.00.00.00.00.00.00.00.00.00.00.00.00.00.00."
+ "00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00."
+ "00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00."
Expand Down