improve doc strings and comments and remove useless exceptions.

This commit is contained in:
Akash Mahanty 2022-02-09 14:32:15 +05:30
parent 6d233f24fc
commit 25eb709ade
7 changed files with 44 additions and 48 deletions

View File

@ -1,19 +1,19 @@
"""
This module interfaces the Wayback Machine's availability API.
The interface could be useful for looking up archives and finding archives
The interface is useful for looking up archives and finding archives
that are close to a specific date and time.
It has a class called WaybackMachineAvailabilityAPI, and the class has
methods such as:
It has a class WaybackMachineAvailabilityAPI, and the class has
methods like:
near() for looking up archives close to a specific date and time.
near() for retrieving archives close to a specific date and time.
oldest() for retrieving the first archive URL of the webpage.
newest() for retrieving the latest archive of an URL.
newest() for retrieving the latest archive of the webpage.
The Wayback Machine Availability response should be a valid JSON and
The Wayback Machine Availability API response must be a valid JSON and
if it is not then an exception, InvalidJSONInAvailabilityAPIResponse is raised.
If the Availability API returned valid JSON but archive URL could not be found
@ -39,7 +39,7 @@ ResponseJSON = Dict[str, Any]
class WaybackMachineAvailabilityAPI:
"""
Class that interfaces the availability API of the Wayback Machine.
Class that interfaces the Wayback Machine's availability API.
"""
def __init__(
@ -61,7 +61,7 @@ class WaybackMachineAvailabilityAPI:
@staticmethod
def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str:
"""
Converts Unix time to wayback Machine timestamp and the Wayback Machine
Converts Unix time to Wayback Machine timestamp, Wayback Machine
timestamp format is yyyyMMddhhmmss.
"""
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
@ -76,10 +76,10 @@ class WaybackMachineAvailabilityAPI:
"""
String representation of the class. If atleast one API
call was successfully made then return the archive URL
as a string. Else returns "".
as a string. Else returns "" (empty string literal).
"""
# String should not return anything other than a string object
# So, if a string repr is asked for before making any API requests
# __str__ can not return anything other than a string object
# So, if a string repr is asked even before making a API request
# just return ""
if not self.json:
return ""
@ -147,7 +147,7 @@ class WaybackMachineAvailabilityAPI:
self.json["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
)
raise ValueError("Could not get timestamp from result")
raise ValueError("Timestamp not found in the Availability API's JSON response.")
@property
def archive_url(self) -> str:
@ -159,8 +159,8 @@ class WaybackMachineAvailabilityAPI:
archive_url = ""
data = self.json
# If the user didn't invoke oldest, newest or near but tries to access the
# archive_url attribute then assume they are fine with any archive
# If the user didn't invoke oldest, newest or near but tries to access
# archive_url attribute then assume they that are fine with any archive
# and invoke the oldest method.
if not data:
self.oldest()
@ -172,10 +172,10 @@ class WaybackMachineAvailabilityAPI:
not data or not data["archived_snapshots"]
):
self.setup_json() # It makes a new API call
data = self.json # json() updated the value of JSON attribute
data = self.json # setup_json() updates value of json attribute
# If we exhausted the max_tries, then we give up and
# raise exception.
# If exhausted max_tries, then give up and
# raise ArchiveNotInAvailabilityAPIResponse.
if not data or not data["archived_snapshots"]:
raise ArchiveNotInAvailabilityAPIResponse(
@ -198,7 +198,7 @@ class WaybackMachineAvailabilityAPI:
def wayback_timestamp(**kwargs: int) -> str:
"""
Prepends zero before the year, month, day, hour and minute so that they
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
are conformable with the YYYYMMDDhhmmss Wayback Machine timestamp format.
"""
return "".join(
str(kwargs[key]).zfill(2)
@ -218,7 +218,7 @@ class WaybackMachineAvailabilityAPI:
Passes the current UNIX time to near() for retrieving the newest archive
from the availability API.
We assume that wayback machine can not archive the future of a webpage.
Remember UNIX time is UTC and Wayback Machine is also UTC based.
"""
return self.near(unix_timestamp=int(time.time()))
@ -232,7 +232,8 @@ class WaybackMachineAvailabilityAPI:
unix_timestamp: Optional[int] = None,
) -> "WaybackMachineAvailabilityAPI":
"""
The main method for the Class, oldest() and newest() are dependent on it.
The most important method of this Class, oldest() and newest() are
dependent on it.
It generates the timestamp based on the input either by calling the
unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
@ -240,8 +241,8 @@ class WaybackMachineAvailabilityAPI:
Adds the timestamp to the payload dictionary.
And finally invoking the json method to make the API call then returns
the instance.
And finally invokes the setup_json method to make the API call then
finally returns the instance.
"""
if unix_timestamp:
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)

View File

@ -173,7 +173,6 @@ class WaybackMachineCDXServerAPI:
for i, collapse in enumerate(self.collapses):
payload["collapse" + str(i)] = collapse
# Don't need to return anything as it's dictionary.
payload["url"] = self.url
def snapshots(self) -> Generator[CDXSnapshot, None, None]:

View File

@ -1,5 +1,5 @@
"""
Module that contains the CDXSnapshot class, CDX records are casted
Module that contains the CDXSnapshot class, CDX records/lines are casted
to CDXSnapshot objects for easier access.
The CDX index format is plain text data. Each line ('record') indicates a

View File

@ -2,7 +2,7 @@
Utility functions required for accessing the CDX server API.
These are here in this module so that we dont make any module too
big.
long.
"""
import re
@ -63,7 +63,7 @@ def get_response(
backoff_factor: float = 0.5,
) -> Union[requests.Response, Exception]:
"""
Make get request to the CDX server and return the response.
Makes get request to the CDX server and returns the response.
"""
session = requests.Session()

View File

@ -1,5 +1,5 @@
"""
Module that makes waybackpy a CLI tool.
Module responsible for enabling waybackpy to function as a CLI tool.
"""
import os
@ -18,24 +18,30 @@ from .cdx_api import WaybackMachineCDXServerAPI
from .save_api import WaybackMachineSaveAPI
from .utils import DEFAULT_USER_AGENT
from .wrapper import Url
from .exceptions import ArchiveNotInAvailabilityAPIResponse
def echo_availability_api(
availability_api_instance: WaybackMachineAvailabilityAPI, json: bool
) -> None:
"""
Output availability API depending functions.
Near, oldest and newest output by this method.
Output for method that use the availability API.
Near, oldest and newest output via this function.
"""
if not availability_api_instance.archive_url:
archive_url = (
try:
if availability_api_instance.archive_url:
archive_url = availability_api_instance.archive_url
except ArchiveNotInAvailabilityAPIResponse as error:
message = (
"NO ARCHIVE FOUND - The requested URL is probably "
+ "not yet archived or if the URL was recently archived then it is "
+ "not yet available via the Wayback Machine's availability API "
+ "because of database lag and should be available after some time."
)
else:
archive_url = availability_api_instance.archive_url
click.echo(message + "\nJSON response:\n" + str(error), err=True)
return
click.echo("Archive URL:")
click.echo(archive_url)
if json:
@ -45,7 +51,7 @@ def echo_availability_api(
def handle_cdx(data: List[Any]) -> None:
"""
Handles the CDX CLI options and output.
Handles the CDX CLI options and output format.
"""
url = data[0]
user_agent = data[1]

View File

@ -12,20 +12,7 @@ class WaybackError(Exception):
1) Wayback Machine API Service is unreachable/down.
2) You passed illegal arguments.
All other exceptions are inherited from this class.
"""
class RedirectSaveError(WaybackError):
"""
Raised when the original URL is redirected and the
redirect URL is archived but not the original URL.
"""
class URLError(Exception):
"""
Raised when malformed URLs are passed as arguments.
All other exceptions are inherited from this main exception.
"""
@ -33,6 +20,8 @@ class TooManyRequestsError(WaybackError):
"""
Raised when you make more than 15 requests per
minute and the Wayback Machine returns 429.
See https://github.com/akamhy/waybackpy/issues/131
"""

View File

@ -113,6 +113,7 @@ class Url:
"""Set the attributes for total backwards compatibility."""
self.archive_url = self.wayback_machine_availability_api.archive_url
self.json = self.wayback_machine_availability_api.json
self.JSON = self.json # for backwards compatibility, do not remove it.
self.timestamp = self.wayback_machine_availability_api.timestamp()
def total_archives(