Update wrapper.py

This commit is contained in:
Akash
2020-07-19 19:53:03 +05:30
committed by GitHub
parent 0ac4689944
commit b4e8d8d023

View File

@@ -93,24 +93,29 @@ class Url():
def get(self, url=None, user_agent=None, encoding=None): def get(self, url=None, user_agent=None, encoding=None):
"""Returns the source code of the supplied URL. Auto detects the encoding if not supplied.""" """Returns the source code of the supplied URL. Auto detects the encoding if not supplied."""
if not url: if not url:
url = self.clean_url() url = self.clean_url()
if not user_agent: if not user_agent:
user_agent = self.user_agent user_agent = self.user_agent
hdr = { 'User-Agent' : '%s' % user_agent } hdr = { 'User-Agent' : '%s' % user_agent }
req = Request(url, headers=hdr) #nosec req = Request(url, headers=hdr) #nosec
try: try:
resp=urlopen(req) #nosec resp=urlopen(req) #nosec
except URLError: except Exception:
try: try:
resp=urlopen(req) #nosec resp=urlopen(req) #nosec
except URLError as e: except Exception as e:
raise HTTPError(e) raise WaybackError(e)
if not encoding: if not encoding:
try: try:
encoding= resp.headers['content-type'].split('charset=')[-1] encoding= resp.headers['content-type'].split('charset=')[-1]
except AttributeError: except AttributeError:
encoding = "UTF-8" encoding = "UTF-8"
return resp.read().decode(encoding.replace("text/html", "UTF-8", 1)) return resp.read().decode(encoding.replace("text/html", "UTF-8", 1))
def near(self, **kwargs): def near(self, **kwargs):