Update wrapper.py
This commit is contained in:
parent
1abb46828b
commit
705f4164e2
@ -24,15 +24,13 @@ def save(url,UA="pywayback python module"):
|
|||||||
hdr = { 'User-Agent' : '%s' % UA }
|
hdr = { 'User-Agent' : '%s' % UA }
|
||||||
req = Request(request_url, headers=hdr)
|
req = Request(request_url, headers=hdr)
|
||||||
try:
|
try:
|
||||||
response = urlopen(req)
|
response = urlopen(req) #nosec
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
raise TooManyArchivingRequestsError(e)
|
raise TooManyArchivingRequestsError(e)
|
||||||
# print(response.read())
|
|
||||||
header = response.headers
|
header = response.headers
|
||||||
if "exclusion.robots.policy" in str(header):
|
if "exclusion.robots.policy" in str(header):
|
||||||
raise ArchivingNotAllowed("Can not archive %s. Disabled by site owner." % (url))
|
raise ArchivingNotAllowed("Can not archive %s. Disabled by site owner." % (url))
|
||||||
archive_id = header['Content-Location']
|
archive_id = header['Content-Location']
|
||||||
print(header)
|
|
||||||
archived_url = "https://web.archive.org" + archive_id
|
archived_url = "https://web.archive.org" + archive_id
|
||||||
return archived_url
|
return archived_url
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user