Skip to content

Commit 7dcfe3c

Browse files
jmeridthkvesteri
authored andcommitted
Fix domain validation (#133)
- Updated regex to not allow numeric only TLDs (examples in tests) - Allow for idna encoded domains and test for them (examples in tests) Fixes #47 Fixes #123
1 parent 669129a commit 7dcfe3c

File tree

2 files changed

+29
-4
lines changed

2 files changed

+29
-4
lines changed

tests/test_domain.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
'3.cn',
1414
'a.cn',
1515
'sub1.sub2.sample.co.uk',
16-
'somerandomexample.xn--fiqs8s'
16+
'somerandomexample.xn--fiqs8s',
17+
'kräuter.com',
18+
'über.com'
1719
])
1820
def test_returns_true_on_valid_domain(value):
1921
assert domain(value)
@@ -29,7 +31,11 @@ def test_returns_true_on_valid_domain(value):
2931
'_example.com',
3032
'example_.com',
3133
'example',
32-
'a......b.com'
34+
'a......b.com',
35+
'a.123',
36+
'123.123',
37+
'123.123.123',
38+
'123.123.123.123'
3339
])
3440
def test_returns_failed_validation_on_invalid_domain(value):
3541
assert isinstance(domain(value), ValidationFailure)

validators/domain.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,31 @@
11
import re
22

3+
import six
4+
35
from .utils import validator
46

7+
if six.PY3:
8+
text_type = str
9+
unicode = str
10+
else:
11+
text_type = unicode
12+
513
pattern = re.compile(
614
r'^(?:[a-z0-9]' # First character of the domain
715
r'(?:[a-z0-9-_]{0,61}[a-z0-9])?\.)' # Sub domain + hostname
816
r'+[a-z0-9][a-z0-9-_]{0,61}' # First 61 characters of the gTLD
9-
r'[a-z0-9]$' # Last character of the gTLD
17+
r'[a-z]$' # Last character of the gTLD
1018
)
1119

1220

21+
def to_unicode(obj, charset='utf-8', errors='strict'):
22+
if obj is None:
23+
return None
24+
if not isinstance(obj, bytes):
25+
return text_type(obj)
26+
return obj.decode(charset, errors)
27+
28+
1329
@validator
1430
def domain(value):
1531
"""
@@ -40,4 +56,7 @@ def domain(value):
4056
4157
:param value: domain string to validate
4258
"""
43-
return pattern.match(value)
59+
try:
60+
return pattern.match(to_unicode(value).encode('idna').decode('ascii'))
61+
except UnicodeError:
62+
return False

0 commit comments

Comments
 (0)