Skip to content

gh-125926: Fix urllib.parse.urljoin() for base URI with undefined authority #125989

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,78 @@ def test_urljoins(self):
self.checkJoin(RFC1808_BASE, 'https:;', 'https:;')
self.checkJoin(RFC1808_BASE, 'https:;x', 'https:;x')

def test_urljoins_relative_base(self):
# According to RFC 3986, Section 5.1, a base URI must conform to
# the absolute-URI syntax rule (Section 4.3). But urljoin() lacks
# a context to establish missed components of the relative base URI.
# It still has to return a sensible result for backwards compatibility.
# The following tests are figments of the imagination and artifacts
# of the current implementation that are not based on any standard.
self.checkJoin('', '', '')
self.checkJoin('', '//', '//', relroundtrip=False)
self.checkJoin('', '//v', '//v')
self.checkJoin('', '//v/w', '//v/w')
self.checkJoin('', '/w', '/w')
self.checkJoin('', '///w', '///w', relroundtrip=False)
self.checkJoin('', 'w', 'w')

self.checkJoin('//', '', '//')
self.checkJoin('//', '//', '//')
self.checkJoin('//', '//v', '//v')
self.checkJoin('//', '//v/w', '//v/w')
self.checkJoin('//', '/w', '///w')
self.checkJoin('//', '///w', '///w')
self.checkJoin('//', 'w', '///w')

self.checkJoin('//a', '', '//a')
self.checkJoin('//a', '//', '//a')
self.checkJoin('//a', '//v', '//v')
self.checkJoin('//a', '//v/w', '//v/w')
self.checkJoin('//a', '/w', '//a/w')
self.checkJoin('//a', '///w', '//a/w')
self.checkJoin('//a', 'w', '//a/w')

for scheme in '', 'http:':
self.checkJoin('http:', scheme + '', 'http:')
self.checkJoin('http:', scheme + '//', 'http:')
self.checkJoin('http:', scheme + '//v', 'http://v')
self.checkJoin('http:', scheme + '//v/w', 'http://v/w')
self.checkJoin('http:', scheme + '/w', 'http:/w')
self.checkJoin('http:', scheme + '///w', 'http:/w')
self.checkJoin('http:', scheme + 'w', 'http:/w')

self.checkJoin('http://', scheme + '', 'http://')
self.checkJoin('http://', scheme + '//', 'http://')
self.checkJoin('http://', scheme + '//v', 'http://v')
self.checkJoin('http://', scheme + '//v/w', 'http://v/w')
self.checkJoin('http://', scheme + '/w', 'http:///w')
self.checkJoin('http://', scheme + '///w', 'http:///w')
self.checkJoin('http://', scheme + 'w', 'http:///w')

self.checkJoin('http://a', scheme + '', 'http://a')
self.checkJoin('http://a', scheme + '//', 'http://a')
self.checkJoin('http://a', scheme + '//v', 'http://v')
self.checkJoin('http://a', scheme + '//v/w', 'http://v/w')
self.checkJoin('http://a', scheme + '/w', 'http://a/w')
self.checkJoin('http://a', scheme + '///w', 'http://a/w')
self.checkJoin('http://a', scheme + 'w', 'http://a/w')

self.checkJoin('/b/c', '', '/b/c')
self.checkJoin('/b/c', '//', '/b/c')
self.checkJoin('/b/c', '//v', '//v')
self.checkJoin('/b/c', '//v/w', '//v/w')
self.checkJoin('/b/c', '/w', '/w')
self.checkJoin('/b/c', '///w', '/w')
self.checkJoin('/b/c', 'w', '/b/w')

self.checkJoin('///b/c', '', '///b/c')
self.checkJoin('///b/c', '//', '///b/c')
self.checkJoin('///b/c', '//v', '//v')
self.checkJoin('///b/c', '//v/w', '//v/w')
self.checkJoin('///b/c', '/w', '///w')
self.checkJoin('///b/c', '///w', '///w')
self.checkJoin('///b/c', 'w', '///b/w')

def test_RFC2732(self):
str_cases = [
('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
Expand Down
4 changes: 2 additions & 2 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,9 +577,9 @@ def urljoin(base, url, allow_fragments=True):

if scheme is None:
scheme = bscheme
if scheme != bscheme or scheme not in uses_relative:
if scheme != bscheme or (scheme and scheme not in uses_relative):
return _coerce_result(url)
if scheme in uses_netloc:
if not scheme or scheme in uses_netloc:
if netloc:
return _coerce_result(_urlunsplit(scheme, netloc, path,
query, fragment))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix :func:`urllib.parse.urljoin` for base URI with undefined authority.
Although :rfc:`3986` only specify reference resolution for absolute base
URI, :func:`!urljoin` should continue to return sensible result for relative
base URI.
Loading