From 0435142bc86267245c09f9e07f8db6abbdd42b7f Mon Sep 17 00:00:00 2001 From: danherbriley Date: Tue, 12 Sep 2023 14:18:40 +0200 Subject: [PATCH 1/3] fix: query string and fragment validations --- src/validators/url.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/validators/url.py b/src/validators/url.py index 16698b1f..e19dc6f7 100644 --- a/src/validators/url.py +++ b/src/validators/url.py @@ -3,7 +3,7 @@ # standard from functools import lru_cache import re -from urllib.parse import unquote, urlsplit +from urllib.parse import parse_qs, unquote, urlsplit # local from .hostname import hostname @@ -34,11 +34,6 @@ def _path_regex(): ) -@lru_cache -def _query_regex(): - return re.compile(r"&?(\w+=?[^\s&]*)", re.IGNORECASE) - - def _validate_scheme(value: str): """Validate scheme.""" # More schemes will be considered later. @@ -108,16 +103,21 @@ def _validate_netloc( ) and _validate_auth_segment(basic_auth) -def _validate_optionals(path: str, query: str, fragment: str): +def _validate_optionals( + path: str, + query: str, + fragment: str, + strict_query: bool = False, +): """Validate path query and fragments.""" optional_segments = True if path: optional_segments &= bool(_path_regex().match(path)) - if query: - optional_segments &= bool(_query_regex().match(query)) + if query and parse_qs(query, strict_parsing=strict_query): + optional_segments &= True if fragment: fragment = fragment.lstrip("/") if fragment.startswith("/") else fragment - optional_segments &= all(char_to_avoid not in fragment for char_to_avoid in ("/", "?")) + optional_segments &= all(char_to_avoid not in fragment for char_to_avoid in ("?",)) return optional_segments @@ -130,6 +130,7 @@ def url( skip_ipv4_addr: bool = False, may_have_port: bool = True, simple_host: bool = False, + strict_query: bool = True, rfc_1034: bool = False, rfc_2782: bool = False, ): @@ -167,6 +168,8 @@ def url( URL string may contain port number. simple_host: URL string maybe only hyphens and alpha-numerals. + strict_query: + Fail validation on query string parsing error. rfc_1034: Allow trailing dot in domain/host name. Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034). @@ -214,5 +217,5 @@ def url( rfc_1034, rfc_2782, ) - and _validate_optionals(path, query, fragment) + and _validate_optionals(path, query, fragment, strict_query) ) From 1e7afe9c7f81a2e903905aa0fb3863f2caa88f6d Mon Sep 17 00:00:00 2001 From: danherbriley Date: Tue, 12 Sep 2023 14:27:57 +0200 Subject: [PATCH 2/3] Add tests for query and fragment parts of url --- tests/test_url.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_url.py b/tests/test_url.py index 62332f5a..558d50ce 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -19,7 +19,6 @@ "http://foo.com/blah_blah_(wikipedia)", "http://foo.com/blah_blah_(wikipedia)_(again)", "http://www.example.com/wpstyle/?p=364", - "https://www.example.com/foo/?bar=baz&inga=42&quux", "https://www.example.com?bar=baz", "http://✪df.ws/123", "http://userid:password@example.com:8080", @@ -85,12 +84,18 @@ "http://:::::::::::::@exmp.com", "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com", "https://exchange.jetswap.finance/#/swap", + "https://www.foo.com/bar#/baz/test", # when simple_host=True # "http://localhost", # "http://localhost:8000", # "http://pc:8081/", # "http://3628126748", # "http://foobar", + # when strict_query=False + # "https://www.example.com/foo/?bar=baz&inga=42&quux", + # "https://foo.bar.net/baz.php?-/inga/test-lenient-query/", + # "https://foo.com/img/bar/baz.jpg?-62169987208", + # "https://example.com/foo/?bar#!baz/inga/8SA-M3as7A8", ], ) def test_returns_true_on_valid_url(value: str): @@ -144,6 +149,10 @@ def test_returns_true_on_valid_url(value: str): "http://[2010:836B:4179::836B:4179", "http://2010:836B:4179::836B:4179", "http://2010:836B:4179::836B:4179:80/index.html", + "https://www.example.com/foo/?bar=baz&inga=42&quux", + "https://foo.com/img/bar/baz.jpg?-62169987208", + "https://foo.bar.net/baz.php?-/inga/test-lenient-query/", + "https://example.com/foo/?bar#!baz/inga/8SA-M3as7A8", "http://0.00.00.00.00.00.00.00.00.00.00.00.00.00.00." + "00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00." + "00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00." From b8dcac783013a896abb89e453d424d8780098f02 Mon Sep 17 00:00:00 2001 From: Jovial Joe Jayarson Date: Sun, 17 Sep 2023 13:51:52 +0530 Subject: [PATCH 3/3] fix: formatting & default value --- src/validators/url.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/validators/url.py b/src/validators/url.py index e19dc6f7..00df3d63 100644 --- a/src/validators/url.py +++ b/src/validators/url.py @@ -103,12 +103,7 @@ def _validate_netloc( ) and _validate_auth_segment(basic_auth) -def _validate_optionals( - path: str, - query: str, - fragment: str, - strict_query: bool = False, -): +def _validate_optionals(path: str, query: str, fragment: str, strict_query: bool): """Validate path query and fragments.""" optional_segments = True if path: