From 297f42a2f7b8c8706334c3976f71ddbf95ecbe3c Mon Sep 17 00:00:00 2001 From: Matthew Wilson <22526682+automationator@users.noreply.github.com> Date: Sat, 11 Jul 2020 14:58:43 -0400 Subject: [PATCH 1/6] Adds support for emojis and more IDNA URLs --- tests/test_url.py | 4 ++++ validators/url.py | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_url.py b/tests/test_url.py index 3ef921af..29ce7ea3 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -42,6 +42,10 @@ u'http://مثال.إختبار', u'http://例子.测试', u'http://उदाहरण.परीक्षा', + u'http://www.😉.com', + u'http://😉.com/😁', + u'http://উদাহরণ.বাংলা', + u'http://xn--d5b6ci4b4b3a.xn--54b7fta0cc', u'http://-.~_!$&\'()*+,;=:%40:80%2f::::::@example.com', u'http://1337.net', u'http://a.b-c.de', diff --git a/validators/url.py b/validators/url.py index 80dc59dc..7db8bd65 100644 --- a/validators/url.py +++ b/validators/url.py @@ -69,16 +69,16 @@ r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" r")\]|" # host name - r"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)" + r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)" # domain name - r"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*" + r"(?:\.(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)*" # TLD identifier - r"(?:\.(?:[a-z\u00a1-\uffff]{2,}))" + r"(?:\.(?:(?:xn--[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]{2,})|[a-z\u00a1-\uffff\U00010000-\U0010ffff]{2,}))" r")" # port number r"(?::\d{2,5})?" # resource path - u"(?:/[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:@/]*)?" + u"(?:/[-a-z\u00a1-\uffff\U00010000-\U0010ffff0-9._~%!$&'()*+,;=:@/]*)?" # query string r"(?:\?\S*)?" # fragment From 42ea2fd7007cfb8013a42ddedbad6ea57be51a39 Mon Sep 17 00:00:00 2001 From: Matthew Wilson <22526682+automationator@users.noreply.github.com> Date: Sat, 11 Jul 2020 15:20:17 -0400 Subject: [PATCH 2/6] Adds setup_requires parameter --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index b4d1bddd..798eb954 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,7 @@ def get_version(): install_requires=install_requires, build_requires=install_requires, extras_require=extras_require, + setup_requires=['wheel'], classifiers=[ 'Environment :: Web Environment', 'Intended Audience :: Developers', From 4fd2b5711864bd1789b6f289573362d3f8cdc7e1 Mon Sep 17 00:00:00 2001 From: Matthew Wilson <22526682+automationator@users.noreply.github.com> Date: Sat, 11 Jul 2020 15:29:45 -0400 Subject: [PATCH 3/6] Validates URLs with IPs ending in .0 or .255 --- tests/test_url.py | 4 ++-- validators/url.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_url.py b/tests/test_url.py index 29ce7ea3..8fcc67be 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -50,8 +50,10 @@ u'http://1337.net', u'http://a.b-c.de', u'http://223.255.255.254', + u'http://10.1.1.0', u'http://10.1.1.1', u'http://10.1.1.254', + u'http://10.1.1.255', u'http://127.0.0.1:8080', u'http://127.0.10.150', u'http://localhost', @@ -118,8 +120,6 @@ def test_returns_true_on_valid_public_url(address, public): 'http://-a.b.co', 'http://a.b-.co', 'http://0.0.0.0', - 'http://10.1.1.0', - 'http://10.1.1.255', 'http://224.1.1.1', 'http://1.1.1.1.1', 'http://123.123.123', diff --git a/validators/url.py b/validators/url.py index 7db8bd65..6a1a5700 100644 --- a/validators/url.py +++ b/validators/url.py @@ -3,7 +3,7 @@ from .utils import validator ip_middle_octet = r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5]))" -ip_last_octet = r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +ip_last_octet = r"(?:\.(?:0|[1-9]\d?|1\d\d|2[0-4]\d|25[0-5]))" regex = re.compile( # noqa: W605 r"^" From 80dc2265e67de278488d30ed0db582b4b0ca0a14 Mon Sep 17 00:00:00 2001 From: Matthew Wilson <22526682+automationator@users.noreply.github.com> Date: Mon, 13 Jul 2020 07:51:32 -0400 Subject: [PATCH 4/6] Fixed flake8 errors --- validators/url.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/validators/url.py b/validators/url.py index 6a1a5700..751f4cb3 100644 --- a/validators/url.py +++ b/validators/url.py @@ -69,11 +69,14 @@ r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" r")\]|" # host name - r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)" + r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" + r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)" # domain name - r"(?:\.(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)*" + r"(?:\.(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" + r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)*" # TLD identifier - r"(?:\.(?:(?:xn--[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]{2,})|[a-z\u00a1-\uffff\U00010000-\U0010ffff]{2,}))" + r"(?:\.(?:(?:xn--[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]{2,})|" + r"[a-z\u00a1-\uffff\U00010000-\U0010ffff]{2,}))" r")" # port number r"(?::\d{2,5})?" From d8b2633c4e3d5c44a2f18c9959c473a8737d0ec9 Mon Sep 17 00:00:00 2001 From: Matthew Wilson <22526682+automationator@users.noreply.github.com> Date: Mon, 13 Jul 2020 09:15:14 -0400 Subject: [PATCH 5/6] Revert "Adds setup_requires parameter" This reverts commit 42ea2fd7007cfb8013a42ddedbad6ea57be51a39. --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 798eb954..b4d1bddd 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,6 @@ def get_version(): install_requires=install_requires, build_requires=install_requires, extras_require=extras_require, - setup_requires=['wheel'], classifiers=[ 'Environment :: Web Environment', 'Intended Audience :: Developers', From 759fac9e050576d1e86d8510eec938e24f5eda17 Mon Sep 17 00:00:00 2001 From: Matthew Wilson Date: Wed, 30 Dec 2020 09:50:34 -0500 Subject: [PATCH 6/6] Fixes international domains with more than 2 hyphens --- tests/test_url.py | 2 ++ validators/url.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_url.py b/tests/test_url.py index 8fcc67be..2252f24d 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -46,6 +46,8 @@ u'http://😉.com/😁', u'http://উদাহরণ.বাংলা', u'http://xn--d5b6ci4b4b3a.xn--54b7fta0cc', + u'http://дом-м.рф/1/asdf', + u'http://xn----gtbybh.xn--p1ai/1/asdf', u'http://-.~_!$&\'()*+,;=:%40:80%2f::::::@example.com', u'http://1337.net', u'http://a.b-c.de', diff --git a/validators/url.py b/validators/url.py index fe2cbf53..37d946cb 100644 --- a/validators/url.py +++ b/validators/url.py @@ -69,13 +69,13 @@ r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" r")\]|" # host name - r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" + r"(?:(?:(?:xn--[-]{0,2})|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)" # domain name - r"(?:\.(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" + r"(?:\.(?:(?:xn--[-]{0,2})|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)*" # TLD identifier - r"(?:\.(?:(?:xn--[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]{2,})|" + r"(?:\.(?:(?:xn--[-]{0,2}[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]{2,})|" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff]{2,}))" r")" # port number