add: type annotation to waybackpy modules

This commit is contained in:
eggplants
2022-02-04 04:25:01 +09:00
parent c274c474b2
commit 38088fa0d8
9 changed files with 275 additions and 205 deletions

View File

@@ -1,6 +1,7 @@
import json
import time
from datetime import datetime
from typing import Any, Dict, Optional
import requests
@@ -10,37 +11,41 @@ from .exceptions import (
)
from .utils import DEFAULT_USER_AGENT
ResponseJSON = Dict[str, Any]
class WaybackMachineAvailabilityAPI:
class WaybackMachineAvailabilityAPI(object):
"""
Class that interfaces the availability API of the Wayback Machine.
"""
def __init__(self, url, user_agent=DEFAULT_USER_AGENT, max_tries=3):
def __init__(
self, url: str, user_agent: str = DEFAULT_USER_AGENT, max_tries: int = 3
) -> None:
self.url = str(url).strip().replace(" ", "%20")
self.user_agent = user_agent
self.headers = {"User-Agent": self.user_agent}
self.headers: Dict[str, str] = {"User-Agent": self.user_agent}
self.payload = {"url": "{url}".format(url=self.url)}
self.endpoint = "https://archive.org/wayback/available"
self.max_tries = max_tries
self.tries = 0
self.last_api_call_unix_time = int(time.time())
self.api_call_time_gap = 5
self.JSON = None
self.JSON: Optional[ResponseJSON] = None
def unix_timestamp_to_wayback_timestamp(self, unix_timestamp):
def unix_timestamp_to_wayback_timestamp(self, unix_timestamp: int) -> str:
"""
Converts Unix time to wayback Machine timestamp.
"""
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
def __repr__(self):
def __repr__(self) -> str:
"""
Same as string representation, just return the archive URL as a string.
"""
return str(self)
def __str__(self):
def __str__(self) -> str:
"""
String representation of the class. If atleast one API call was successfully
made then return the archive URL as a string. Else returns None.
@@ -54,7 +59,7 @@ class WaybackMachineAvailabilityAPI:
return self.archive_url
def json(self):
def json(self) -> Optional[ResponseJSON]:
"""
Makes the API call to the availability API can set the JSON response
to the JSON attribute of the instance and also returns the JSON attribute.
@@ -79,7 +84,7 @@ class WaybackMachineAvailabilityAPI:
return self.JSON
def timestamp(self):
def timestamp(self) -> datetime:
"""
Converts the timestamp form the JSON response to datetime object.
If JSON attribute of the instance is None it implies that the either
@@ -91,19 +96,29 @@ class WaybackMachineAvailabilityAPI:
If you get an URL as a response form the availability API it is guaranteed
that you can get the datetime object from the timestamp.
"""
if not self.JSON or not self.JSON["archived_snapshots"]:
if self.JSON is None or "archived_snapshots" not in self.JSON:
return datetime.max
return datetime.strptime(
self.JSON["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
)
elif (
self.JSON is not None
and "archived_snapshots" in self.JSON
and self.JSON["archived_snapshots"] is not None
and "closest" in self.JSON["archived_snapshots"]
and self.JSON["archived_snapshots"]["closest"] is not None
and "timestamp" in self.JSON["archived_snapshots"]["closest"]
):
return datetime.strptime(
self.JSON["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
)
else:
raise ValueError("Could not get timestamp from result")
@property
def archive_url(self):
def archive_url(self) -> str:
"""
Reads the the JSON response data and tries to get the timestamp and returns
the timestamp if found else returns None.
"""
archive_url = ""
data = self.JSON
# If the user didn't used oldest, newest or near but tries to access the
@@ -138,7 +153,7 @@ class WaybackMachineAvailabilityAPI:
)
return archive_url
def wayback_timestamp(self, **kwargs):
def wayback_timestamp(self, **kwargs: int) -> str:
"""
Prepends zero before the year, month, day, hour and minute so that they
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
@@ -148,7 +163,7 @@ class WaybackMachineAvailabilityAPI:
for key in ["year", "month", "day", "hour", "minute"]
)
def oldest(self):
def oldest(self) -> "WaybackMachineAvailabilityAPI":
"""
Passing the year 1994 should return the oldest archive because
wayback machine was started in May, 1996 and there should be no archive
@@ -156,7 +171,7 @@ class WaybackMachineAvailabilityAPI:
"""
return self.near(year=1994)
def newest(self):
def newest(self) -> "WaybackMachineAvailabilityAPI":
"""
Passing the current UNIX time should be sufficient to get the newest
archive considering the API request-response time delay and also the
@@ -166,13 +181,13 @@ class WaybackMachineAvailabilityAPI:
def near(
self,
year=None,
month=None,
day=None,
hour=None,
minute=None,
unix_timestamp=None,
):
year: Optional[int] = None,
month: Optional[int] = None,
day: Optional[int] = None,
hour: Optional[int] = None,
minute: Optional[int] = None,
unix_timestamp: Optional[int] = None,
) -> "WaybackMachineAvailabilityAPI":
"""
The main method for this Class, oldest and newest methods are dependent on this
method.
@@ -188,11 +203,11 @@ class WaybackMachineAvailabilityAPI:
else:
now = datetime.utcnow().timetuple()
timestamp = self.wayback_timestamp(
year=year if year else now.tm_year,
month=month if month else now.tm_mon,
day=day if day else now.tm_mday,
hour=hour if hour else now.tm_hour,
minute=minute if minute else now.tm_min,
year=now.tm_year if year is None else year,
month=now.tm_mon if month is None else month,
day=now.tm_mday if day is None else day,
hour=now.tm_hour if hour is None else hour,
minute=now.tm_min if minute is None else minute,
)
self.payload["timestamp"] = timestamp