From fd5e85420c78c07e4f1dcb8fd2b2740510332c5c Mon Sep 17 00:00:00 2001 From: Akash Mahanty Date: Fri, 21 Jan 2022 22:47:44 +0530 Subject: [PATCH] waybackpy/availability_api.py : removed unused imports, added doc strings, removed redundant function. --- waybackpy/availability_api.py | 86 +++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 24 deletions(-) diff --git a/waybackpy/availability_api.py b/waybackpy/availability_api.py index 51ecf11..8d19211 100644 --- a/waybackpy/availability_api.py +++ b/waybackpy/availability_api.py @@ -1,30 +1,14 @@ -import re import time import requests from datetime import datetime -from .__version__ import __version__ from .utils import DEFAULT_USER_AGENT -def full_url(endpoint, params): - if not params: - return endpoint.strip() - - full_url = endpoint if endpoint.endswith("?") else (endpoint + "?") - - for key, val in params.items(): - key = "filter" if key.startswith("filter") else key - key = "collapse" if key.startswith("collapse") else key - amp = "" if full_url.endswith("?") else "&" - full_url = ( - full_url - + amp - + "{key}={val}".format(key=key, val=requests.utils.quote(str(val))) - ) - return full_url - - class WaybackMachineAvailabilityAPI: + """ + Class that interfaces the availability API of the Wayback Machine. + """ + def __init__(self, url, user_agent=DEFAULT_USER_AGENT): self.url = str(url).strip().replace(" ", "%20") self.user_agent = user_agent @@ -34,24 +18,50 @@ class WaybackMachineAvailabilityAPI: self.JSON = None def unix_timestamp_to_wayback_timestamp(self, unix_timestamp): + """ + Converts Unix time to wayback Machine timestamp. + """ return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S") def __repr__(self): - return str(self) # self.__str__() + """ + Same as string representation, just return the archive URL as a string. + """ + return str(self) def __str__(self): + """ + String representation of the class. If atleast one API call was successfully + made then return the archive URL as a string. Else returns None. + """ if not self.JSON: return None return self.archive_url def json(self): - self.request_url = full_url(self.endpoint, self.payload) - self.response = requests.get(self.request_url, self.headers) + """ + Makes the API call to the availability API can set the JSON response + to the JSON attribute of the instance and also returns the JSON attribute. + """ + self.response = requests.get( + self.endpoint, params=self.payload, headers=self.headers + ) self.JSON = self.response.json() return self.JSON def timestamp(self): - if not self.JSON["archived_snapshots"] or not self.JSON: + """ + Converts the timestamp form the JSON response to datetime object. + If JSON attribute of the instance is None it implies that the either + the the last API call failed or one was never made. + + If not JSON or if JSON but no timestamp in the JSON response then returns + the maximum value for datetime object that is possible. + + If you get an URL as a response form the availability API it is guaranteed + that you can get the datetime object from the timestamp. + """ + if not self.JSON or not self.JSON["archived_snapshots"]: return datetime.max return datetime.strptime( @@ -60,6 +70,10 @@ class WaybackMachineAvailabilityAPI: @property def archive_url(self): + """ + Reads the the JSON response data and tries to get the timestamp and returns + the timestamp if found else returns None. + """ data = self.JSON if not data["archived_snapshots"]: @@ -72,15 +86,29 @@ class WaybackMachineAvailabilityAPI: return archive_url def wayback_timestamp(self, **kwargs): + """ + Prepends zero before the year, month, day, hour and minute so that they + are conformable with the YYYYMMDDhhmmss wayback machine timestamp format. + """ return "".join( str(kwargs[key]).zfill(2) for key in ["year", "month", "day", "hour", "minute"] ) def oldest(self): + """ + Passing the year 1994 should return the oldest archive because + wayback machine was started in May, 1996 and there should be no archive + before the year 1994. + """ return self.near(year=1994) def newest(self): + """ + Passing the current UNIX time should be sufficient to get the newest + archive considering the API request-response time delay and also the + database lags on Wayback machine. + """ return self.near(unix_timestamp=int(time.time())) def near( @@ -92,6 +120,16 @@ class WaybackMachineAvailabilityAPI: minute=None, unix_timestamp=None, ): + """ + The main method for this Class, oldest and newest methods are dependent on this + method. + + It generates the timestamp based on the input either by calling the + unix_timestamp_to_wayback_timestamp or wayback_timestamp method with + appropriate arguments for their respective parameters. + Adds the timestamp to the payload dictionary. + And finally invoking the json method to make the API call then returns the instance. + """ if unix_timestamp: timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp) else: