improve doc strings and comments and remove useless exceptions.
This commit is contained in:
		@@ -1,19 +1,19 @@
 | 
			
		||||
"""
 | 
			
		||||
This module interfaces the Wayback Machine's availability API.
 | 
			
		||||
 | 
			
		||||
The interface could be useful for looking up archives and finding archives
 | 
			
		||||
The interface is useful for looking up archives and finding archives
 | 
			
		||||
that are close to a specific date and time.
 | 
			
		||||
 | 
			
		||||
It has a class called WaybackMachineAvailabilityAPI, and the class has
 | 
			
		||||
methods such as:
 | 
			
		||||
It has a class WaybackMachineAvailabilityAPI, and the class has
 | 
			
		||||
methods like:
 | 
			
		||||
 | 
			
		||||
near() for looking up archives close to a specific date and time.
 | 
			
		||||
near() for retrieving archives close to a specific date and time.
 | 
			
		||||
 | 
			
		||||
oldest() for retrieving the first archive URL of the webpage.
 | 
			
		||||
 | 
			
		||||
newest() for retrieving the latest archive of an URL.
 | 
			
		||||
newest() for retrieving the latest archive of the webpage.
 | 
			
		||||
 | 
			
		||||
The Wayback Machine Availability response should be a valid JSON and
 | 
			
		||||
The Wayback Machine Availability API response must be a valid JSON and
 | 
			
		||||
if it is not then an exception, InvalidJSONInAvailabilityAPIResponse is raised.
 | 
			
		||||
 | 
			
		||||
If the Availability API returned valid JSON but archive URL could not be found
 | 
			
		||||
@@ -39,7 +39,7 @@ ResponseJSON = Dict[str, Any]
 | 
			
		||||
 | 
			
		||||
class WaybackMachineAvailabilityAPI:
 | 
			
		||||
    """
 | 
			
		||||
    Class that interfaces the availability API of the Wayback Machine.
 | 
			
		||||
    Class that interfaces the Wayback Machine's availability API.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(
 | 
			
		||||
@@ -61,7 +61,7 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str:
 | 
			
		||||
        """
 | 
			
		||||
        Converts Unix time to wayback Machine timestamp and the Wayback Machine
 | 
			
		||||
        Converts Unix time to Wayback Machine timestamp, Wayback Machine
 | 
			
		||||
        timestamp format is yyyyMMddhhmmss.
 | 
			
		||||
        """
 | 
			
		||||
        return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
 | 
			
		||||
@@ -76,10 +76,10 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
        """
 | 
			
		||||
        String representation of the class. If atleast one API
 | 
			
		||||
        call was successfully made then return the archive URL
 | 
			
		||||
        as a string. Else returns "".
 | 
			
		||||
        as a string. Else returns "" (empty string literal).
 | 
			
		||||
        """
 | 
			
		||||
        # String should not return anything other than a string object
 | 
			
		||||
        # So, if a string repr is asked for before making any API requests
 | 
			
		||||
        # __str__ can not return anything other than a string object
 | 
			
		||||
        # So, if a string repr is asked even before making a API request
 | 
			
		||||
        # just return ""
 | 
			
		||||
        if not self.json:
 | 
			
		||||
            return ""
 | 
			
		||||
@@ -147,7 +147,7 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
                self.json["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        raise ValueError("Could not get timestamp from result")
 | 
			
		||||
        raise ValueError("Timestamp not found in the Availability API's JSON response.")
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def archive_url(self) -> str:
 | 
			
		||||
@@ -159,8 +159,8 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
        archive_url = ""
 | 
			
		||||
        data = self.json
 | 
			
		||||
 | 
			
		||||
        # If the user didn't invoke oldest, newest or near but tries to access the
 | 
			
		||||
        # archive_url attribute then assume they are fine with any archive
 | 
			
		||||
        # If the user didn't invoke oldest, newest or near but tries to access
 | 
			
		||||
        # archive_url attribute then assume they that are fine with any archive
 | 
			
		||||
        # and invoke the oldest method.
 | 
			
		||||
        if not data:
 | 
			
		||||
            self.oldest()
 | 
			
		||||
@@ -172,10 +172,10 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
                not data or not data["archived_snapshots"]
 | 
			
		||||
            ):
 | 
			
		||||
                self.setup_json()  # It makes a new API call
 | 
			
		||||
                data = self.json  # json() updated the value of JSON attribute
 | 
			
		||||
                data = self.json  # setup_json() updates value of json attribute
 | 
			
		||||
 | 
			
		||||
            # If we exhausted the max_tries, then we give up and
 | 
			
		||||
            # raise exception.
 | 
			
		||||
            # If exhausted max_tries, then give up and
 | 
			
		||||
            # raise ArchiveNotInAvailabilityAPIResponse.
 | 
			
		||||
 | 
			
		||||
            if not data or not data["archived_snapshots"]:
 | 
			
		||||
                raise ArchiveNotInAvailabilityAPIResponse(
 | 
			
		||||
@@ -198,7 +198,7 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
    def wayback_timestamp(**kwargs: int) -> str:
 | 
			
		||||
        """
 | 
			
		||||
        Prepends zero before the year, month, day, hour and minute so that they
 | 
			
		||||
        are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
 | 
			
		||||
        are conformable with the YYYYMMDDhhmmss Wayback Machine timestamp format.
 | 
			
		||||
        """
 | 
			
		||||
        return "".join(
 | 
			
		||||
            str(kwargs[key]).zfill(2)
 | 
			
		||||
@@ -218,7 +218,7 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
        Passes the current UNIX time to near() for retrieving the newest archive
 | 
			
		||||
        from the availability API.
 | 
			
		||||
 | 
			
		||||
        We assume that wayback machine can not archive the future of a webpage.
 | 
			
		||||
        Remember UNIX time is UTC and Wayback Machine is also UTC based.
 | 
			
		||||
        """
 | 
			
		||||
        return self.near(unix_timestamp=int(time.time()))
 | 
			
		||||
 | 
			
		||||
@@ -232,7 +232,8 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
        unix_timestamp: Optional[int] = None,
 | 
			
		||||
    ) -> "WaybackMachineAvailabilityAPI":
 | 
			
		||||
        """
 | 
			
		||||
        The main method for the Class, oldest() and newest() are dependent on it.
 | 
			
		||||
        The most important method of this Class, oldest() and newest() are
 | 
			
		||||
        dependent on it.
 | 
			
		||||
 | 
			
		||||
        It generates the timestamp based on the input either by calling the
 | 
			
		||||
        unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
 | 
			
		||||
@@ -240,8 +241,8 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
 | 
			
		||||
        Adds the timestamp to the payload dictionary.
 | 
			
		||||
 | 
			
		||||
        And finally invoking the json method to make the API call then returns
 | 
			
		||||
        the instance.
 | 
			
		||||
        And finally invokes the setup_json method to make the API call then
 | 
			
		||||
        finally returns the instance.
 | 
			
		||||
        """
 | 
			
		||||
        if unix_timestamp:
 | 
			
		||||
            timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
 | 
			
		||||
 
 | 
			
		||||
@@ -173,7 +173,6 @@ class WaybackMachineCDXServerAPI:
 | 
			
		||||
            for i, collapse in enumerate(self.collapses):
 | 
			
		||||
                payload["collapse" + str(i)] = collapse
 | 
			
		||||
 | 
			
		||||
        # Don't need to return anything as it's dictionary.
 | 
			
		||||
        payload["url"] = self.url
 | 
			
		||||
 | 
			
		||||
    def snapshots(self) -> Generator[CDXSnapshot, None, None]:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
"""
 | 
			
		||||
Module that contains the CDXSnapshot class, CDX records are casted
 | 
			
		||||
Module that contains the CDXSnapshot class, CDX records/lines are casted
 | 
			
		||||
to CDXSnapshot objects for easier access.
 | 
			
		||||
 | 
			
		||||
The CDX index format is plain text data. Each line ('record') indicates a
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
Utility functions required for accessing the CDX server API.
 | 
			
		||||
 | 
			
		||||
These are here in this module so that we don’t make any module too
 | 
			
		||||
big.
 | 
			
		||||
long.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
@@ -63,7 +63,7 @@ def get_response(
 | 
			
		||||
    backoff_factor: float = 0.5,
 | 
			
		||||
) -> Union[requests.Response, Exception]:
 | 
			
		||||
    """
 | 
			
		||||
    Make get request to the CDX server and return the response.
 | 
			
		||||
    Makes get request to the CDX server and returns the response.
 | 
			
		||||
    """
 | 
			
		||||
    session = requests.Session()
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
"""
 | 
			
		||||
Module that makes waybackpy a CLI tool.
 | 
			
		||||
Module responsible for enabling waybackpy to function as a CLI tool.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
@@ -18,24 +18,30 @@ from .cdx_api import WaybackMachineCDXServerAPI
 | 
			
		||||
from .save_api import WaybackMachineSaveAPI
 | 
			
		||||
from .utils import DEFAULT_USER_AGENT
 | 
			
		||||
from .wrapper import Url
 | 
			
		||||
from .exceptions import ArchiveNotInAvailabilityAPIResponse
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def echo_availability_api(
 | 
			
		||||
    availability_api_instance: WaybackMachineAvailabilityAPI, json: bool
 | 
			
		||||
) -> None:
 | 
			
		||||
    """
 | 
			
		||||
    Output availability API depending functions.
 | 
			
		||||
    Near, oldest and newest output by this method.
 | 
			
		||||
    Output for method that use the availability API.
 | 
			
		||||
    Near, oldest and newest output via this function.
 | 
			
		||||
    """
 | 
			
		||||
    if not availability_api_instance.archive_url:
 | 
			
		||||
        archive_url = (
 | 
			
		||||
    try:
 | 
			
		||||
        if availability_api_instance.archive_url:
 | 
			
		||||
            archive_url = availability_api_instance.archive_url
 | 
			
		||||
    except ArchiveNotInAvailabilityAPIResponse as error:
 | 
			
		||||
        message = (
 | 
			
		||||
            "NO ARCHIVE FOUND - The requested URL is probably "
 | 
			
		||||
            + "not yet archived or if the URL was recently archived then it is "
 | 
			
		||||
            + "not yet available via the Wayback Machine's availability API "
 | 
			
		||||
            + "because of database lag and should be available after some time."
 | 
			
		||||
        )
 | 
			
		||||
    else:
 | 
			
		||||
        archive_url = availability_api_instance.archive_url
 | 
			
		||||
 | 
			
		||||
        click.echo(message + "\nJSON response:\n" + str(error), err=True)
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    click.echo("Archive URL:")
 | 
			
		||||
    click.echo(archive_url)
 | 
			
		||||
    if json:
 | 
			
		||||
@@ -45,7 +51,7 @@ def echo_availability_api(
 | 
			
		||||
 | 
			
		||||
def handle_cdx(data: List[Any]) -> None:
 | 
			
		||||
    """
 | 
			
		||||
    Handles the CDX CLI options and output.
 | 
			
		||||
    Handles the CDX CLI options and output format.
 | 
			
		||||
    """
 | 
			
		||||
    url = data[0]
 | 
			
		||||
    user_agent = data[1]
 | 
			
		||||
 
 | 
			
		||||
@@ -12,20 +12,7 @@ class WaybackError(Exception):
 | 
			
		||||
    1) Wayback Machine API Service is unreachable/down.
 | 
			
		||||
    2) You passed illegal arguments.
 | 
			
		||||
 | 
			
		||||
    All other exceptions are inherited from this class.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RedirectSaveError(WaybackError):
 | 
			
		||||
    """
 | 
			
		||||
    Raised when the original URL is redirected and the
 | 
			
		||||
    redirect URL is archived but not the original URL.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class URLError(Exception):
 | 
			
		||||
    """
 | 
			
		||||
    Raised when malformed URLs are passed as arguments.
 | 
			
		||||
    All other exceptions are inherited from this main exception.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -33,6 +20,8 @@ class TooManyRequestsError(WaybackError):
 | 
			
		||||
    """
 | 
			
		||||
    Raised when you make more than 15 requests per
 | 
			
		||||
    minute and the Wayback Machine returns 429.
 | 
			
		||||
 | 
			
		||||
    See https://github.com/akamhy/waybackpy/issues/131
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -113,6 +113,7 @@ class Url:
 | 
			
		||||
        """Set the attributes for total backwards compatibility."""
 | 
			
		||||
        self.archive_url = self.wayback_machine_availability_api.archive_url
 | 
			
		||||
        self.json = self.wayback_machine_availability_api.json
 | 
			
		||||
        self.JSON = self.json # for backwards compatibility, do not remove it.
 | 
			
		||||
        self.timestamp = self.wayback_machine_availability_api.timestamp()
 | 
			
		||||
 | 
			
		||||
    def total_archives(
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user