Update wrapper.py

This commit is contained in:
akamhy 2020-05-02 16:03:33 +05:30 committed by GitHub
parent af1ac96d48
commit 685fc730ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -10,19 +10,31 @@ class TooManyArchivingRequestsError(Exception):
Wayback machine doesn't supports archivng any url too many times in a short period of time. Wayback machine doesn't supports archivng any url too many times in a short period of time.
""" """
class ArchivingNotAllowed(Exception): class ArchivingNotAllowed(Exception):
""" """
Files like robots.txt are set to deny robot archiving. Files like robots.txt are set to deny robot archiving.
Wayback machine respects these file, will not archive. Wayback machine respects these file, will not archive.
""" """
class PageNotSavedError(Exception):
"""
Files like robots.txt are set to deny robot archiving.
Wayback machine respects these file, will not archive.
"""
class InvalidUrlError(Exception):
"""
Files like robots.txt are set to deny robot archiving.
Wayback machine respects these file, will not archive.
"""
def save(url,UA="pywayback python module"): def save(url,UA="pywayback python module"):
base_save_url = "https://web.archive.org/save/" base_save_url = "https://web.archive.org/save/"
request_url = base_save_url + url request_url = base_save_url + url
hdr = { 'User-Agent' : '%s' % UA } hdr = { 'User-Agent' : '%s' % UA }
req = Request(request_url, headers=hdr) req = Request(request_url, headers=hdr)
if "." not in url:
raise InvalidUrlError("'%s' is not a vaild url." % url)
try: try:
response = urlopen(req) #nosec response = urlopen(req) #nosec
except urllib.error.HTTPError as e: except urllib.error.HTTPError as e:
@ -51,6 +63,10 @@ def near(
encoding = response.info().get_content_charset('utf8') encoding = response.info().get_content_charset('utf8')
import json import json
data = json.loads(response.read().decode(encoding)) data = json.loads(response.read().decode(encoding))
print(data)
if not data["archived_snapshots"]:
raise PageNotSavedError("'%s' was not archived." % url)
archive_url = (data["archived_snapshots"]["closest"]["url"]) archive_url = (data["archived_snapshots"]["closest"]["url"])
return archive_url return archive_url