Update wrapper.py

This commit is contained in:
akamhy 2020-05-06 19:35:01 +05:30 committed by GitHub
parent 0029d63d8a
commit 45fe07ddb6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import json import json
from datetime import datetime from datetime import datetime
from waybackpy.exceptions import TooManyArchivingRequests, ArchivingNotAllowed, PageNotSaved, ArchiveNotFound, UrlNotFound, BadGateWay, InvalidUrl from waybackpy.exceptions import TooManyArchivingRequests, ArchivingNotAllowed, PageNotSaved, ArchiveNotFound, UrlNotFound, BadGateWay, InvalidUrl, WaybackUnavailable
try: try:
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
from urllib.error import HTTPError from urllib.error import HTTPError
@ -19,13 +19,14 @@ def save(url,UA=default_UA):
request_url = (base_save_url + clean_url(url)) request_url = (base_save_url + clean_url(url))
hdr = { 'User-Agent' : '%s' % UA } #nosec hdr = { 'User-Agent' : '%s' % UA } #nosec
req = Request(request_url, headers=hdr) #nosec req = Request(request_url, headers=hdr) #nosec
if "." not in url: url_check(url)
raise InvalidUrl("'%s' is not a vaild url." % url)
try: try:
response = urlopen(req) #nosec response = urlopen(req) #nosec
except HTTPError as e: except HTTPError as e:
if e.code == 502: if e.code == 502:
raise BadGateWay(e) raise BadGateWay(e)
elif e.code == 503:
raise WaybackUnavailable(e)
elif e.code == 429: elif e.code == 429:
raise TooManyArchivingRequests(e) raise TooManyArchivingRequests(e)
elif e.code == 404: elif e.code == 404:
@ -41,9 +42,9 @@ def save(url,UA=default_UA):
return archived_url return archived_url
def get(url,encoding=None,UA=default_UA): def get(url,encoding=None,UA=default_UA):
url_check(url)
hdr = { 'User-Agent' : '%s' % UA } hdr = { 'User-Agent' : '%s' % UA }
request_url = clean_url(url) req = Request(clean_url(url), headers=hdr) #nosec
req = Request(request_url, headers=hdr) #nosec
resp=urlopen(req) #nosec resp=urlopen(req) #nosec
if encoding is None: if encoding is None:
try: try:
@ -60,6 +61,10 @@ def wayback_timestamp(year,month,day,hour,minute):
minute = str(minute).zfill(2) minute = str(minute).zfill(2)
return (year+month+day+hour+minute) return (year+month+day+hour+minute)
def url_check(url):
if "." not in url:
raise InvalidUrl("'%s' is not a vaild url." % url)
def near( def near(
url, url,
year=datetime.utcnow().strftime('%Y'), year=datetime.utcnow().strftime('%Y'),
@ -69,11 +74,20 @@ def near(
minute=datetime.utcnow().strftime('%M'), minute=datetime.utcnow().strftime('%M'),
UA=default_UA, UA=default_UA,
): ):
url_check(url)
timestamp = wayback_timestamp(year,month,day,hour,minute) timestamp = wayback_timestamp(year,month,day,hour,minute)
request_url = "https://archive.org/wayback/available?url=%s&timestamp=%s" % (clean_url(url), str(timestamp)) request_url = "https://archive.org/wayback/available?url=%s&timestamp=%s" % (clean_url(url), str(timestamp))
hdr = { 'User-Agent' : '%s' % UA } hdr = { 'User-Agent' : '%s' % UA }
req = Request(request_url, headers=hdr) # nosec req = Request(request_url, headers=hdr) # nosec
response = urlopen(req) #nosec try:
response = urlopen(req) #nosec
except HTTPError as e:
if e.code == 502:
raise BadGateWay(e)
elif e.code == 503:
raise WaybackUnavailable(e)
elif e.code == 404:
raise UrlNotFound(e)
data = json.loads(response.read().decode("UTF-8")) data = json.loads(response.read().decode("UTF-8"))
if not data["archived_snapshots"]: if not data["archived_snapshots"]:
raise ArchiveNotFound("'%s' is not yet archived." % url) raise ArchiveNotFound("'%s' is not yet archived." % url)