diff --git a/src/validators/url.py b/src/validators/url.py index 16698b1f..00df3d63 100644 --- a/src/validators/url.py +++ b/src/validators/url.py @@ -3,7 +3,7 @@ # standard from functools import lru_cache import re -from urllib.parse import unquote, urlsplit +from urllib.parse import parse_qs, unquote, urlsplit # local from .hostname import hostname @@ -34,11 +34,6 @@ def _path_regex(): ) -@lru_cache -def _query_regex(): - return re.compile(r"&?(\w+=?[^\s&]*)", re.IGNORECASE) - - def _validate_scheme(value: str): """Validate scheme.""" # More schemes will be considered later. @@ -108,16 +103,16 @@ def _validate_netloc( ) and _validate_auth_segment(basic_auth) -def _validate_optionals(path: str, query: str, fragment: str): +def _validate_optionals(path: str, query: str, fragment: str, strict_query: bool): """Validate path query and fragments.""" optional_segments = True if path: optional_segments &= bool(_path_regex().match(path)) - if query: - optional_segments &= bool(_query_regex().match(query)) + if query and parse_qs(query, strict_parsing=strict_query): + optional_segments &= True if fragment: fragment = fragment.lstrip("/") if fragment.startswith("/") else fragment - optional_segments &= all(char_to_avoid not in fragment for char_to_avoid in ("/", "?")) + optional_segments &= all(char_to_avoid not in fragment for char_to_avoid in ("?",)) return optional_segments @@ -130,6 +125,7 @@ def url( skip_ipv4_addr: bool = False, may_have_port: bool = True, simple_host: bool = False, + strict_query: bool = True, rfc_1034: bool = False, rfc_2782: bool = False, ): @@ -167,6 +163,8 @@ def url( URL string may contain port number. simple_host: URL string maybe only hyphens and alpha-numerals. + strict_query: + Fail validation on query string parsing error. rfc_1034: Allow trailing dot in domain/host name. Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034). @@ -214,5 +212,5 @@ def url( rfc_1034, rfc_2782, ) - and _validate_optionals(path, query, fragment) + and _validate_optionals(path, query, fragment, strict_query) ) diff --git a/tests/test_url.py b/tests/test_url.py index 62332f5a..558d50ce 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -19,7 +19,6 @@ "http://foo.com/blah_blah_(wikipedia)", "http://foo.com/blah_blah_(wikipedia)_(again)", "http://www.example.com/wpstyle/?p=364", - "https://www.example.com/foo/?bar=baz&inga=42&quux", "https://www.example.com?bar=baz", "http://✪df.ws/123", "http://userid:password@example.com:8080", @@ -85,12 +84,18 @@ "http://:::::::::::::@exmp.com", "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com", "https://exchange.jetswap.finance/#/swap", + "https://www.foo.com/bar#/baz/test", # when simple_host=True # "http://localhost", # "http://localhost:8000", # "http://pc:8081/", # "http://3628126748", # "http://foobar", + # when strict_query=False + # "https://www.example.com/foo/?bar=baz&inga=42&quux", + # "https://foo.bar.net/baz.php?-/inga/test-lenient-query/", + # "https://foo.com/img/bar/baz.jpg?-62169987208", + # "https://example.com/foo/?bar#!baz/inga/8SA-M3as7A8", ], ) def test_returns_true_on_valid_url(value: str): @@ -144,6 +149,10 @@ def test_returns_true_on_valid_url(value: str): "http://[2010:836B:4179::836B:4179", "http://2010:836B:4179::836B:4179", "http://2010:836B:4179::836B:4179:80/index.html", + "https://www.example.com/foo/?bar=baz&inga=42&quux", + "https://foo.com/img/bar/baz.jpg?-62169987208", + "https://foo.bar.net/baz.php?-/inga/test-lenient-query/", + "https://example.com/foo/?bar#!baz/inga/8SA-M3as7A8", "http://0.00.00.00.00.00.00.00.00.00.00.00.00.00.00." + "00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00." + "00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00."