From fa62d727d4044e4f7f1de24a1b58f24c9751ca37 Mon Sep 17 00:00:00 2001 From: K3VRAL Date: Mon, 9 May 2022 15:24:50 +0200 Subject: [PATCH 1/3] Minor fix for hostname with underscores --- validators/url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/url.py b/validators/url.py index fe2cbf53..636df326 100644 --- a/validators/url.py +++ b/validators/url.py @@ -69,7 +69,7 @@ r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" r")\]|" # host name - r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" + r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?_?)*" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)" # domain name r"(?:\.(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" From 430a746c170cd26fb3da945859fd5fda3fb9f13c Mon Sep 17 00:00:00 2001 From: K3VRAL <38153680+K3VRAL@users.noreply.github.com> Date: Tue, 10 May 2022 14:08:13 +0000 Subject: [PATCH 2/3] Underscore condition in the wrong place fixed After more testing and reading online where underscores can be legal in which parts of the URL, I found that underscores can be legal in the beginning of the subdomain which my code did not reflect correctly according to https://stackoverflow.com/questions/2180465/can-domain-name-subdomains-have-an-underscore-in-it. --- validators/url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/url.py b/validators/url.py index 636df326..5c1f64a3 100644 --- a/validators/url.py +++ b/validators/url.py @@ -69,7 +69,7 @@ r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" r")\]|" # host name - r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?_?)*" + r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9_?]-?)*" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)" # domain name r"(?:\.(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" From 14520991f02c47417cfca3b26fb31eabb451df8e Mon Sep 17 00:00:00 2001 From: K3VRAL <38153680+K3VRAL@users.noreply.github.com> Date: Tue, 10 May 2022 14:17:53 +0000 Subject: [PATCH 3/3] Whoops forgot to remove ? I forgot to remove the conditional character while it was inside the "many of" characters. --- validators/url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/url.py b/validators/url.py index 5c1f64a3..4db869b9 100644 --- a/validators/url.py +++ b/validators/url.py @@ -69,7 +69,7 @@ r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" r")\]|" # host name - r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9_?]-?)*" + r"(?:(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9_]-?)*" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)" # domain name r"(?:\.(?:(?:xn--)|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*"