improve doc strings and comments and remove useless exceptions.

This commit is contained in:
Akash Mahanty 2022-02-09 14:32:15 +05:30
parent 6d233f24fc
commit 25eb709ade
7 changed files with 44 additions and 48 deletions

View File

@ -1,19 +1,19 @@
""" """
This module interfaces the Wayback Machine's availability API. This module interfaces the Wayback Machine's availability API.
The interface could be useful for looking up archives and finding archives The interface is useful for looking up archives and finding archives
that are close to a specific date and time. that are close to a specific date and time.
It has a class called WaybackMachineAvailabilityAPI, and the class has It has a class WaybackMachineAvailabilityAPI, and the class has
methods such as: methods like:
near() for looking up archives close to a specific date and time. near() for retrieving archives close to a specific date and time.
oldest() for retrieving the first archive URL of the webpage. oldest() for retrieving the first archive URL of the webpage.
newest() for retrieving the latest archive of an URL. newest() for retrieving the latest archive of the webpage.
The Wayback Machine Availability response should be a valid JSON and The Wayback Machine Availability API response must be a valid JSON and
if it is not then an exception, InvalidJSONInAvailabilityAPIResponse is raised. if it is not then an exception, InvalidJSONInAvailabilityAPIResponse is raised.
If the Availability API returned valid JSON but archive URL could not be found If the Availability API returned valid JSON but archive URL could not be found
@ -39,7 +39,7 @@ ResponseJSON = Dict[str, Any]
class WaybackMachineAvailabilityAPI: class WaybackMachineAvailabilityAPI:
""" """
Class that interfaces the availability API of the Wayback Machine. Class that interfaces the Wayback Machine's availability API.
""" """
def __init__( def __init__(
@ -61,7 +61,7 @@ class WaybackMachineAvailabilityAPI:
@staticmethod @staticmethod
def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str: def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str:
""" """
Converts Unix time to wayback Machine timestamp and the Wayback Machine Converts Unix time to Wayback Machine timestamp, Wayback Machine
timestamp format is yyyyMMddhhmmss. timestamp format is yyyyMMddhhmmss.
""" """
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S") return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
@ -76,10 +76,10 @@ class WaybackMachineAvailabilityAPI:
""" """
String representation of the class. If atleast one API String representation of the class. If atleast one API
call was successfully made then return the archive URL call was successfully made then return the archive URL
as a string. Else returns "". as a string. Else returns "" (empty string literal).
""" """
# String should not return anything other than a string object # __str__ can not return anything other than a string object
# So, if a string repr is asked for before making any API requests # So, if a string repr is asked even before making a API request
# just return "" # just return ""
if not self.json: if not self.json:
return "" return ""
@ -147,7 +147,7 @@ class WaybackMachineAvailabilityAPI:
self.json["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S" self.json["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
) )
raise ValueError("Could not get timestamp from result") raise ValueError("Timestamp not found in the Availability API's JSON response.")
@property @property
def archive_url(self) -> str: def archive_url(self) -> str:
@ -159,8 +159,8 @@ class WaybackMachineAvailabilityAPI:
archive_url = "" archive_url = ""
data = self.json data = self.json
# If the user didn't invoke oldest, newest or near but tries to access the # If the user didn't invoke oldest, newest or near but tries to access
# archive_url attribute then assume they are fine with any archive # archive_url attribute then assume they that are fine with any archive
# and invoke the oldest method. # and invoke the oldest method.
if not data: if not data:
self.oldest() self.oldest()
@ -172,10 +172,10 @@ class WaybackMachineAvailabilityAPI:
not data or not data["archived_snapshots"] not data or not data["archived_snapshots"]
): ):
self.setup_json() # It makes a new API call self.setup_json() # It makes a new API call
data = self.json # json() updated the value of JSON attribute data = self.json # setup_json() updates value of json attribute
# If we exhausted the max_tries, then we give up and # If exhausted max_tries, then give up and
# raise exception. # raise ArchiveNotInAvailabilityAPIResponse.
if not data or not data["archived_snapshots"]: if not data or not data["archived_snapshots"]:
raise ArchiveNotInAvailabilityAPIResponse( raise ArchiveNotInAvailabilityAPIResponse(
@ -198,7 +198,7 @@ class WaybackMachineAvailabilityAPI:
def wayback_timestamp(**kwargs: int) -> str: def wayback_timestamp(**kwargs: int) -> str:
""" """
Prepends zero before the year, month, day, hour and minute so that they Prepends zero before the year, month, day, hour and minute so that they
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format. are conformable with the YYYYMMDDhhmmss Wayback Machine timestamp format.
""" """
return "".join( return "".join(
str(kwargs[key]).zfill(2) str(kwargs[key]).zfill(2)
@ -218,7 +218,7 @@ class WaybackMachineAvailabilityAPI:
Passes the current UNIX time to near() for retrieving the newest archive Passes the current UNIX time to near() for retrieving the newest archive
from the availability API. from the availability API.
We assume that wayback machine can not archive the future of a webpage. Remember UNIX time is UTC and Wayback Machine is also UTC based.
""" """
return self.near(unix_timestamp=int(time.time())) return self.near(unix_timestamp=int(time.time()))
@ -232,7 +232,8 @@ class WaybackMachineAvailabilityAPI:
unix_timestamp: Optional[int] = None, unix_timestamp: Optional[int] = None,
) -> "WaybackMachineAvailabilityAPI": ) -> "WaybackMachineAvailabilityAPI":
""" """
The main method for the Class, oldest() and newest() are dependent on it. The most important method of this Class, oldest() and newest() are
dependent on it.
It generates the timestamp based on the input either by calling the It generates the timestamp based on the input either by calling the
unix_timestamp_to_wayback_timestamp or wayback_timestamp method with unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
@ -240,8 +241,8 @@ class WaybackMachineAvailabilityAPI:
Adds the timestamp to the payload dictionary. Adds the timestamp to the payload dictionary.
And finally invoking the json method to make the API call then returns And finally invokes the setup_json method to make the API call then
the instance. finally returns the instance.
""" """
if unix_timestamp: if unix_timestamp:
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp) timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)

View File

@ -173,7 +173,6 @@ class WaybackMachineCDXServerAPI:
for i, collapse in enumerate(self.collapses): for i, collapse in enumerate(self.collapses):
payload["collapse" + str(i)] = collapse payload["collapse" + str(i)] = collapse
# Don't need to return anything as it's dictionary.
payload["url"] = self.url payload["url"] = self.url
def snapshots(self) -> Generator[CDXSnapshot, None, None]: def snapshots(self) -> Generator[CDXSnapshot, None, None]:

View File

@ -1,5 +1,5 @@
""" """
Module that contains the CDXSnapshot class, CDX records are casted Module that contains the CDXSnapshot class, CDX records/lines are casted
to CDXSnapshot objects for easier access. to CDXSnapshot objects for easier access.
The CDX index format is plain text data. Each line ('record') indicates a The CDX index format is plain text data. Each line ('record') indicates a

View File

@ -2,7 +2,7 @@
Utility functions required for accessing the CDX server API. Utility functions required for accessing the CDX server API.
These are here in this module so that we dont make any module too These are here in this module so that we dont make any module too
big. long.
""" """
import re import re
@ -63,7 +63,7 @@ def get_response(
backoff_factor: float = 0.5, backoff_factor: float = 0.5,
) -> Union[requests.Response, Exception]: ) -> Union[requests.Response, Exception]:
""" """
Make get request to the CDX server and return the response. Makes get request to the CDX server and returns the response.
""" """
session = requests.Session() session = requests.Session()

View File

@ -1,5 +1,5 @@
""" """
Module that makes waybackpy a CLI tool. Module responsible for enabling waybackpy to function as a CLI tool.
""" """
import os import os
@ -18,24 +18,30 @@ from .cdx_api import WaybackMachineCDXServerAPI
from .save_api import WaybackMachineSaveAPI from .save_api import WaybackMachineSaveAPI
from .utils import DEFAULT_USER_AGENT from .utils import DEFAULT_USER_AGENT
from .wrapper import Url from .wrapper import Url
from .exceptions import ArchiveNotInAvailabilityAPIResponse
def echo_availability_api( def echo_availability_api(
availability_api_instance: WaybackMachineAvailabilityAPI, json: bool availability_api_instance: WaybackMachineAvailabilityAPI, json: bool
) -> None: ) -> None:
""" """
Output availability API depending functions. Output for method that use the availability API.
Near, oldest and newest output by this method. Near, oldest and newest output via this function.
""" """
if not availability_api_instance.archive_url: try:
archive_url = ( if availability_api_instance.archive_url:
archive_url = availability_api_instance.archive_url
except ArchiveNotInAvailabilityAPIResponse as error:
message = (
"NO ARCHIVE FOUND - The requested URL is probably " "NO ARCHIVE FOUND - The requested URL is probably "
+ "not yet archived or if the URL was recently archived then it is " + "not yet archived or if the URL was recently archived then it is "
+ "not yet available via the Wayback Machine's availability API " + "not yet available via the Wayback Machine's availability API "
+ "because of database lag and should be available after some time." + "because of database lag and should be available after some time."
) )
else:
archive_url = availability_api_instance.archive_url click.echo(message + "\nJSON response:\n" + str(error), err=True)
return
click.echo("Archive URL:") click.echo("Archive URL:")
click.echo(archive_url) click.echo(archive_url)
if json: if json:
@ -45,7 +51,7 @@ def echo_availability_api(
def handle_cdx(data: List[Any]) -> None: def handle_cdx(data: List[Any]) -> None:
""" """
Handles the CDX CLI options and output. Handles the CDX CLI options and output format.
""" """
url = data[0] url = data[0]
user_agent = data[1] user_agent = data[1]

View File

@ -12,20 +12,7 @@ class WaybackError(Exception):
1) Wayback Machine API Service is unreachable/down. 1) Wayback Machine API Service is unreachable/down.
2) You passed illegal arguments. 2) You passed illegal arguments.
All other exceptions are inherited from this class. All other exceptions are inherited from this main exception.
"""
class RedirectSaveError(WaybackError):
"""
Raised when the original URL is redirected and the
redirect URL is archived but not the original URL.
"""
class URLError(Exception):
"""
Raised when malformed URLs are passed as arguments.
""" """
@ -33,6 +20,8 @@ class TooManyRequestsError(WaybackError):
""" """
Raised when you make more than 15 requests per Raised when you make more than 15 requests per
minute and the Wayback Machine returns 429. minute and the Wayback Machine returns 429.
See https://github.com/akamhy/waybackpy/issues/131
""" """

View File

@ -113,6 +113,7 @@ class Url:
"""Set the attributes for total backwards compatibility.""" """Set the attributes for total backwards compatibility."""
self.archive_url = self.wayback_machine_availability_api.archive_url self.archive_url = self.wayback_machine_availability_api.archive_url
self.json = self.wayback_machine_availability_api.json self.json = self.wayback_machine_availability_api.json
self.JSON = self.json # for backwards compatibility, do not remove it.
self.timestamp = self.wayback_machine_availability_api.timestamp() self.timestamp = self.wayback_machine_availability_api.timestamp()
def total_archives( def total_archives(