[3.7] bpo-36216: Add check for characters in netloc that normalize to separators (GH-12201) by zooba · Pull Request #12213 · python/cpython
Expand Up
@@ -391,6 +391,21 @@ def _splitnetloc(url, start=0):
delim = min(delim, wdelim) # use earliest delim position
return url[start:delim], url[delim:] # return (domain, rest)
def _checknetloc(netloc): if not netloc or netloc.isascii(): return # looking for characters like \u2100 that expand to 'a/c' # IDNA uses NFKC equivalence, so normalize for this check import unicodedata netloc2 = unicodedata.normalize('NFKC', netloc) if netloc == netloc2: return _, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay for c in '/?#@:': if c in netloc2: raise ValueError("netloc '" + netloc2 + "' contains invalid " + "characters under NFKC normalization")
def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Expand Down Expand Up @@ -419,6 +434,7 @@ def urlsplit(url, scheme='', allow_fragments=True): url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) _checknetloc(netloc) v = SplitResult('http', netloc, url, query, fragment) _parse_cache[key] = v return _coerce_result(v) Expand All @@ -442,6 +458,7 @@ def urlsplit(url, scheme='', allow_fragments=True): url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) _checknetloc(netloc) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return _coerce_result(v) Expand Down
def _checknetloc(netloc): if not netloc or netloc.isascii(): return # looking for characters like \u2100 that expand to 'a/c' # IDNA uses NFKC equivalence, so normalize for this check import unicodedata netloc2 = unicodedata.normalize('NFKC', netloc) if netloc == netloc2: return _, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay for c in '/?#@:': if c in netloc2: raise ValueError("netloc '" + netloc2 + "' contains invalid " + "characters under NFKC normalization")
def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Expand Down Expand Up @@ -419,6 +434,7 @@ def urlsplit(url, scheme='', allow_fragments=True): url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) _checknetloc(netloc) v = SplitResult('http', netloc, url, query, fragment) _parse_cache[key] = v return _coerce_result(v) Expand All @@ -442,6 +458,7 @@ def urlsplit(url, scheme='', allow_fragments=True): url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) _checknetloc(netloc) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return _coerce_result(v) Expand Down