waybackpy/availability_api.py : removed unused imports, added doc strings, removed redundant function.
This commit is contained in:
		@@ -1,30 +1,14 @@
 | 
			
		||||
import re
 | 
			
		||||
import time
 | 
			
		||||
import requests
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from .__version__ import __version__
 | 
			
		||||
from .utils import DEFAULT_USER_AGENT
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def full_url(endpoint, params):
 | 
			
		||||
    if not params:
 | 
			
		||||
        return endpoint.strip()
 | 
			
		||||
 | 
			
		||||
    full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
 | 
			
		||||
 | 
			
		||||
    for key, val in params.items():
 | 
			
		||||
        key = "filter" if key.startswith("filter") else key
 | 
			
		||||
        key = "collapse" if key.startswith("collapse") else key
 | 
			
		||||
        amp = "" if full_url.endswith("?") else "&"
 | 
			
		||||
        full_url = (
 | 
			
		||||
            full_url
 | 
			
		||||
            + amp
 | 
			
		||||
            + "{key}={val}".format(key=key, val=requests.utils.quote(str(val)))
 | 
			
		||||
        )
 | 
			
		||||
    return full_url
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WaybackMachineAvailabilityAPI:
 | 
			
		||||
    """
 | 
			
		||||
    Class that interfaces the availability API of the Wayback Machine.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, url, user_agent=DEFAULT_USER_AGENT):
 | 
			
		||||
        self.url = str(url).strip().replace(" ", "%20")
 | 
			
		||||
        self.user_agent = user_agent
 | 
			
		||||
@@ -34,24 +18,50 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
        self.JSON = None
 | 
			
		||||
 | 
			
		||||
    def unix_timestamp_to_wayback_timestamp(self, unix_timestamp):
 | 
			
		||||
        """
 | 
			
		||||
        Converts Unix time to wayback Machine timestamp.
 | 
			
		||||
        """
 | 
			
		||||
        return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return str(self)  # self.__str__()
 | 
			
		||||
        """
 | 
			
		||||
        Same as string representation, just return the archive URL as a string.
 | 
			
		||||
        """
 | 
			
		||||
        return str(self)
 | 
			
		||||
 | 
			
		||||
    def __str__(self):
 | 
			
		||||
        """
 | 
			
		||||
        String representation of the class. If atleast one API call was successfully
 | 
			
		||||
        made then return the archive URL as a string. Else returns None.
 | 
			
		||||
        """
 | 
			
		||||
        if not self.JSON:
 | 
			
		||||
            return None
 | 
			
		||||
        return self.archive_url
 | 
			
		||||
 | 
			
		||||
    def json(self):
 | 
			
		||||
        self.request_url = full_url(self.endpoint, self.payload)
 | 
			
		||||
        self.response = requests.get(self.request_url, self.headers)
 | 
			
		||||
        """
 | 
			
		||||
        Makes the API call to the availability API can set the JSON response
 | 
			
		||||
        to the JSON attribute of the instance and also returns the JSON attribute.
 | 
			
		||||
        """
 | 
			
		||||
        self.response = requests.get(
 | 
			
		||||
            self.endpoint, params=self.payload, headers=self.headers
 | 
			
		||||
        )
 | 
			
		||||
        self.JSON = self.response.json()
 | 
			
		||||
        return self.JSON
 | 
			
		||||
 | 
			
		||||
    def timestamp(self):
 | 
			
		||||
        if not self.JSON["archived_snapshots"] or not self.JSON:
 | 
			
		||||
        """
 | 
			
		||||
        Converts the timestamp form the JSON response to datetime object.
 | 
			
		||||
        If JSON attribute of the instance is None it implies that the either
 | 
			
		||||
        the the last API call failed or one was never made.
 | 
			
		||||
 | 
			
		||||
        If not JSON or if JSON but no timestamp in the JSON response then returns
 | 
			
		||||
        the maximum value for datetime object that is possible.
 | 
			
		||||
 | 
			
		||||
        If you get an URL as a response form the availability API it is guaranteed
 | 
			
		||||
        that you can get the datetime object from the timestamp.
 | 
			
		||||
        """
 | 
			
		||||
        if not self.JSON or not self.JSON["archived_snapshots"]:
 | 
			
		||||
            return datetime.max
 | 
			
		||||
 | 
			
		||||
        return datetime.strptime(
 | 
			
		||||
@@ -60,6 +70,10 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def archive_url(self):
 | 
			
		||||
        """
 | 
			
		||||
        Reads the the JSON response data and tries to get the timestamp and returns
 | 
			
		||||
        the timestamp if found else returns None.
 | 
			
		||||
        """
 | 
			
		||||
        data = self.JSON
 | 
			
		||||
 | 
			
		||||
        if not data["archived_snapshots"]:
 | 
			
		||||
@@ -72,15 +86,29 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
        return archive_url
 | 
			
		||||
 | 
			
		||||
    def wayback_timestamp(self, **kwargs):
 | 
			
		||||
        """
 | 
			
		||||
        Prepends zero before the year, month, day, hour and minute so that they
 | 
			
		||||
        are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
 | 
			
		||||
        """
 | 
			
		||||
        return "".join(
 | 
			
		||||
            str(kwargs[key]).zfill(2)
 | 
			
		||||
            for key in ["year", "month", "day", "hour", "minute"]
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def oldest(self):
 | 
			
		||||
        """
 | 
			
		||||
        Passing the year 1994 should return the oldest archive because
 | 
			
		||||
        wayback machine was started in May, 1996 and there should be no archive
 | 
			
		||||
        before the year 1994.
 | 
			
		||||
        """
 | 
			
		||||
        return self.near(year=1994)
 | 
			
		||||
 | 
			
		||||
    def newest(self):
 | 
			
		||||
        """
 | 
			
		||||
        Passing the current UNIX time should be sufficient to get the newest
 | 
			
		||||
        archive considering the API request-response time delay and also the
 | 
			
		||||
        database lags on Wayback machine.
 | 
			
		||||
        """
 | 
			
		||||
        return self.near(unix_timestamp=int(time.time()))
 | 
			
		||||
 | 
			
		||||
    def near(
 | 
			
		||||
@@ -92,6 +120,16 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
        minute=None,
 | 
			
		||||
        unix_timestamp=None,
 | 
			
		||||
    ):
 | 
			
		||||
        """
 | 
			
		||||
        The main method for this Class, oldest and newest methods are dependent on this
 | 
			
		||||
        method.
 | 
			
		||||
 | 
			
		||||
        It generates the timestamp based on the input either by calling the
 | 
			
		||||
        unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
 | 
			
		||||
        appropriate arguments for their respective parameters.
 | 
			
		||||
        Adds the timestamp to the payload dictionary.
 | 
			
		||||
        And finally invoking the json method to make the API call then returns the instance.
 | 
			
		||||
        """
 | 
			
		||||
        if unix_timestamp:
 | 
			
		||||
            timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
 | 
			
		||||
        else:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user