Issue 34038: urllib2.urlopen fails if http_proxy(s) is set to a sock5 proxy
Changing the urlopen call to a curl commnand invoke works. $ export http_proxy=socks5://127.0.0.1:8888 https_proxy=socks5://127.0.0.1:8888 # this will raise an exception with string representation is a blank string # at least for url: https://s3.amazonaws.com/mozilla-games/emscripten/packages/llvm/tag/linux_64bit/emscripten-llvm-e1.37.35.tar.gz from urllib2 import urlopen, HTTPError u = urlopen(url) mkdir_p(os.path.dirname(file_name)) with open(file_name, 'wb') as f: file_size = get_content_length(u) if file_size > 0: print("Downloading: %s from %s, %s Bytes" % (file_name, url, file_size)) else: print("Downloading: %s from %s" % (file_name, url)) file_size_dl = 0 block_sz = 8192 while True: buffer = u.read(block_sz) if not buffer: break file_size_dl += len(buffer) f.write(buffer) # this alternative way works import commands status, output = commands.getstatusoutput("curl -L --output " + file_name + " " + url)
I think this is caused by the fact that socks5 proxies are not supported? $ cat ~/issue34038.py from urllib.request import urlopen, HTTPError url = 'http://icanhazip.com' u = urlopen(url) $./python.exe ~/issue34038.py Traceback (most recent call last): File "/Users/dwallace/issue34038.py", line 4, in <module> u = urlopen(url) File "/Users/dwallace/workspace/cpython/Lib/urllib/request.py", line 222, in urlopen return opener.open(url, data, timeout) File "/Users/dwallace/workspace/cpython/Lib/urllib/request.py", line 524, in open response = self._open(req, data) File "/Users/dwallace/workspace/cpython/Lib/urllib/request.py", line 541, in _open result = self._call_chain(self.handle_open, protocol, protocol + File "/Users/dwallace/workspace/cpython/Lib/urllib/request.py", line 502, in _call_chain result = func(*args) File "/Users/dwallace/workspace/cpython/Lib/urllib/request.py", line 804, in <lambda> meth(r, proxy, type)) File "/Users/dwallace/workspace/cpython/Lib/urllib/request.py", line 832, in proxy_open return self.parent.open(req, timeout=req.timeout) File "/Users/dwallace/workspace/cpython/Lib/urllib/request.py", line 524, in open response = self._open(req, data) File "/Users/dwallace/workspace/cpython/Lib/urllib/request.py", line 546, in _open return self._call_chain(self.handle_open, 'unknown', File "/Users/dwallace/workspace/cpython/Lib/urllib/request.py", line 502, in _call_chain result = func(*args) File "/Users/dwallace/workspace/cpython/Lib/urllib/request.py", line 1386, in unknown_open raise URLError('unknown url type: %s' % type) urllib.error.URLError: <urlopen error unknown url type: socks5> Though the error message could be better. You can work around this by setting the default socket. import urllib.request import socket import socks url = 'http://icanhazip.com' socks.set_default_proxy(socks.SOCKS5, "localhost",port=8888) socket.socket = socks.socksocket print(urllib.request.urlopen(url).read())