Update wrapper.py

This commit is contained in:
akamhy 2020-05-06 20:07:25 +05:30 committed by GitHub
parent 45fe07ddb6
commit ed497bbd23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -4,9 +4,9 @@ from datetime import datetime
from waybackpy.exceptions import TooManyArchivingRequests, ArchivingNotAllowed, PageNotSaved, ArchiveNotFound, UrlNotFound, BadGateWay, InvalidUrl, WaybackUnavailable from waybackpy.exceptions import TooManyArchivingRequests, ArchivingNotAllowed, PageNotSaved, ArchiveNotFound, UrlNotFound, BadGateWay, InvalidUrl, WaybackUnavailable
try: try:
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
from urllib.error import HTTPError from urllib.error import HTTPError, URLError
except ImportError: except ImportError:
from urllib2 import Request, urlopen, HTTPError from urllib2 import Request, urlopen, HTTPError, URLError
default_UA = "waybackpy python package" default_UA = "waybackpy python package"
@ -45,12 +45,16 @@ def get(url,encoding=None,UA=default_UA):
url_check(url) url_check(url)
hdr = { 'User-Agent' : '%s' % UA } hdr = { 'User-Agent' : '%s' % UA }
req = Request(clean_url(url), headers=hdr) #nosec req = Request(clean_url(url), headers=hdr) #nosec
try:
resp=urlopen(req) #nosec resp=urlopen(req) #nosec
except URLError as e:
raise UrlNotFound(e)
if encoding is None: if encoding is None:
try: try:
encoding= resp.headers['content-type'].split('charset=')[-1] encoding= resp.headers['content-type'].split('charset=')[-1]
except AttributeError: except AttributeError:
encoding = "UTF-8" encoding = "UTF-8"
encoding = encoding.replace("text/html","UTF-8",1)
return resp.read().decode(encoding) return resp.read().decode(encoding)
def wayback_timestamp(year,month,day,hour,minute): def wayback_timestamp(year,month,day,hour,minute):