+ total_archives()

This commit is contained in:
akamhy 2020-05-07 14:52:05 +05:30 committed by GitHub
parent e7dac74906
commit 8c5c0153da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -11,9 +11,21 @@ except ImportError:
default_UA = "waybackpy python package" default_UA = "waybackpy python package"
def url_check(url):
if "." not in url:
raise InvalidUrl("'%s' is not a vaild url." % url)
def clean_url(url): def clean_url(url):
return str(url).strip().replace(" ","_") return str(url).strip().replace(" ","_")
def wayback_timestamp(year, month, day, hour, minute):
year = str(year)
month = str(month).zfill(2)
day = str(day).zfill(2)
hour = str(hour).zfill(2)
minute = str(minute).zfill(2)
return (year+month+day+hour+minute)
def save(url, UA=default_UA): def save(url, UA=default_UA):
base_save_url = "https://web.archive.org/save/" base_save_url = "https://web.archive.org/save/"
request_url = (base_save_url + clean_url(url)) request_url = (base_save_url + clean_url(url))
@ -57,18 +69,6 @@ def get(url,encoding=None,UA=default_UA):
encoding = encoding.replace("text/html","UTF-8",1) encoding = encoding.replace("text/html","UTF-8",1)
return resp.read().decode(encoding) return resp.read().decode(encoding)
def wayback_timestamp(year,month,day,hour,minute):
year = str(year)
month = str(month).zfill(2)
day = str(day).zfill(2)
hour = str(hour).zfill(2)
minute = str(minute).zfill(2)
return (year+month+day+hour+minute)
def url_check(url):
if "." not in url:
raise InvalidUrl("'%s' is not a vaild url." % url)
def near( def near(
url, url,
year=datetime.utcnow().strftime('%Y'), year=datetime.utcnow().strftime('%Y'),
@ -106,3 +106,12 @@ def oldest(url,UA=default_UA,year=1994):
def newest(url, UA=default_UA): def newest(url, UA=default_UA):
return near(url, UA=UA) return near(url, UA=UA)
def total_archives(url, UA=default_UA):
url_check(url)
hdr = { 'User-Agent' : '%s' % UA }
request_url = "https://web.archive.org/cdx/search/cdx?url=%s&output=json" % clean_url(url)
req = Request(request_url, headers=hdr) # nosec
with urlopen(req) as response: # nosec
data = json.loads(response.read())
return (len(data))