waybackpy/availability_api.py : removed unused imports, added doc strings, removed redundant function.

This commit is contained in:
Akash Mahanty 2022-01-21 22:47:44 +05:30
parent 5c685ef5d7
commit fd5e85420c

View File

@ -1,30 +1,14 @@
import re
import time
import requests
from datetime import datetime
from .__version__ import __version__
from .utils import DEFAULT_USER_AGENT
def full_url(endpoint, params):
if not params:
return endpoint.strip()
full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
for key, val in params.items():
key = "filter" if key.startswith("filter") else key
key = "collapse" if key.startswith("collapse") else key
amp = "" if full_url.endswith("?") else "&"
full_url = (
full_url
+ amp
+ "{key}={val}".format(key=key, val=requests.utils.quote(str(val)))
)
return full_url
class WaybackMachineAvailabilityAPI:
"""
Class that interfaces the availability API of the Wayback Machine.
"""
def __init__(self, url, user_agent=DEFAULT_USER_AGENT):
self.url = str(url).strip().replace(" ", "%20")
self.user_agent = user_agent
@ -34,24 +18,50 @@ class WaybackMachineAvailabilityAPI:
self.JSON = None
def unix_timestamp_to_wayback_timestamp(self, unix_timestamp):
"""
Converts Unix time to wayback Machine timestamp.
"""
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
def __repr__(self):
return str(self) # self.__str__()
"""
Same as string representation, just return the archive URL as a string.
"""
return str(self)
def __str__(self):
"""
String representation of the class. If atleast one API call was successfully
made then return the archive URL as a string. Else returns None.
"""
if not self.JSON:
return None
return self.archive_url
def json(self):
self.request_url = full_url(self.endpoint, self.payload)
self.response = requests.get(self.request_url, self.headers)
"""
Makes the API call to the availability API can set the JSON response
to the JSON attribute of the instance and also returns the JSON attribute.
"""
self.response = requests.get(
self.endpoint, params=self.payload, headers=self.headers
)
self.JSON = self.response.json()
return self.JSON
def timestamp(self):
if not self.JSON["archived_snapshots"] or not self.JSON:
"""
Converts the timestamp form the JSON response to datetime object.
If JSON attribute of the instance is None it implies that the either
the the last API call failed or one was never made.
If not JSON or if JSON but no timestamp in the JSON response then returns
the maximum value for datetime object that is possible.
If you get an URL as a response form the availability API it is guaranteed
that you can get the datetime object from the timestamp.
"""
if not self.JSON or not self.JSON["archived_snapshots"]:
return datetime.max
return datetime.strptime(
@ -60,6 +70,10 @@ class WaybackMachineAvailabilityAPI:
@property
def archive_url(self):
"""
Reads the the JSON response data and tries to get the timestamp and returns
the timestamp if found else returns None.
"""
data = self.JSON
if not data["archived_snapshots"]:
@ -72,15 +86,29 @@ class WaybackMachineAvailabilityAPI:
return archive_url
def wayback_timestamp(self, **kwargs):
"""
Prepends zero before the year, month, day, hour and minute so that they
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
"""
return "".join(
str(kwargs[key]).zfill(2)
for key in ["year", "month", "day", "hour", "minute"]
)
def oldest(self):
"""
Passing the year 1994 should return the oldest archive because
wayback machine was started in May, 1996 and there should be no archive
before the year 1994.
"""
return self.near(year=1994)
def newest(self):
"""
Passing the current UNIX time should be sufficient to get the newest
archive considering the API request-response time delay and also the
database lags on Wayback machine.
"""
return self.near(unix_timestamp=int(time.time()))
def near(
@ -92,6 +120,16 @@ class WaybackMachineAvailabilityAPI:
minute=None,
unix_timestamp=None,
):
"""
The main method for this Class, oldest and newest methods are dependent on this
method.
It generates the timestamp based on the input either by calling the
unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
appropriate arguments for their respective parameters.
Adds the timestamp to the payload dictionary.
And finally invoking the json method to make the API call then returns the instance.
"""
if unix_timestamp:
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
else: