add: type annotation to waybackpy modules
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import requests
|
||||
|
||||
@@ -10,37 +11,41 @@ from .exceptions import (
|
||||
)
|
||||
from .utils import DEFAULT_USER_AGENT
|
||||
|
||||
ResponseJSON = Dict[str, Any]
|
||||
|
||||
class WaybackMachineAvailabilityAPI:
|
||||
|
||||
class WaybackMachineAvailabilityAPI(object):
|
||||
"""
|
||||
Class that interfaces the availability API of the Wayback Machine.
|
||||
"""
|
||||
|
||||
def __init__(self, url, user_agent=DEFAULT_USER_AGENT, max_tries=3):
|
||||
def __init__(
|
||||
self, url: str, user_agent: str = DEFAULT_USER_AGENT, max_tries: int = 3
|
||||
) -> None:
|
||||
self.url = str(url).strip().replace(" ", "%20")
|
||||
self.user_agent = user_agent
|
||||
self.headers = {"User-Agent": self.user_agent}
|
||||
self.headers: Dict[str, str] = {"User-Agent": self.user_agent}
|
||||
self.payload = {"url": "{url}".format(url=self.url)}
|
||||
self.endpoint = "https://archive.org/wayback/available"
|
||||
self.max_tries = max_tries
|
||||
self.tries = 0
|
||||
self.last_api_call_unix_time = int(time.time())
|
||||
self.api_call_time_gap = 5
|
||||
self.JSON = None
|
||||
self.JSON: Optional[ResponseJSON] = None
|
||||
|
||||
def unix_timestamp_to_wayback_timestamp(self, unix_timestamp):
|
||||
def unix_timestamp_to_wayback_timestamp(self, unix_timestamp: int) -> str:
|
||||
"""
|
||||
Converts Unix time to wayback Machine timestamp.
|
||||
"""
|
||||
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
|
||||
|
||||
def __repr__(self):
|
||||
def __repr__(self) -> str:
|
||||
"""
|
||||
Same as string representation, just return the archive URL as a string.
|
||||
"""
|
||||
return str(self)
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
"""
|
||||
String representation of the class. If atleast one API call was successfully
|
||||
made then return the archive URL as a string. Else returns None.
|
||||
@@ -54,7 +59,7 @@ class WaybackMachineAvailabilityAPI:
|
||||
|
||||
return self.archive_url
|
||||
|
||||
def json(self):
|
||||
def json(self) -> Optional[ResponseJSON]:
|
||||
"""
|
||||
Makes the API call to the availability API can set the JSON response
|
||||
to the JSON attribute of the instance and also returns the JSON attribute.
|
||||
@@ -79,7 +84,7 @@ class WaybackMachineAvailabilityAPI:
|
||||
|
||||
return self.JSON
|
||||
|
||||
def timestamp(self):
|
||||
def timestamp(self) -> datetime:
|
||||
"""
|
||||
Converts the timestamp form the JSON response to datetime object.
|
||||
If JSON attribute of the instance is None it implies that the either
|
||||
@@ -91,19 +96,29 @@ class WaybackMachineAvailabilityAPI:
|
||||
If you get an URL as a response form the availability API it is guaranteed
|
||||
that you can get the datetime object from the timestamp.
|
||||
"""
|
||||
if not self.JSON or not self.JSON["archived_snapshots"]:
|
||||
if self.JSON is None or "archived_snapshots" not in self.JSON:
|
||||
return datetime.max
|
||||
|
||||
return datetime.strptime(
|
||||
self.JSON["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
|
||||
)
|
||||
elif (
|
||||
self.JSON is not None
|
||||
and "archived_snapshots" in self.JSON
|
||||
and self.JSON["archived_snapshots"] is not None
|
||||
and "closest" in self.JSON["archived_snapshots"]
|
||||
and self.JSON["archived_snapshots"]["closest"] is not None
|
||||
and "timestamp" in self.JSON["archived_snapshots"]["closest"]
|
||||
):
|
||||
return datetime.strptime(
|
||||
self.JSON["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
|
||||
)
|
||||
else:
|
||||
raise ValueError("Could not get timestamp from result")
|
||||
|
||||
@property
|
||||
def archive_url(self):
|
||||
def archive_url(self) -> str:
|
||||
"""
|
||||
Reads the the JSON response data and tries to get the timestamp and returns
|
||||
the timestamp if found else returns None.
|
||||
"""
|
||||
archive_url = ""
|
||||
data = self.JSON
|
||||
|
||||
# If the user didn't used oldest, newest or near but tries to access the
|
||||
@@ -138,7 +153,7 @@ class WaybackMachineAvailabilityAPI:
|
||||
)
|
||||
return archive_url
|
||||
|
||||
def wayback_timestamp(self, **kwargs):
|
||||
def wayback_timestamp(self, **kwargs: int) -> str:
|
||||
"""
|
||||
Prepends zero before the year, month, day, hour and minute so that they
|
||||
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
|
||||
@@ -148,7 +163,7 @@ class WaybackMachineAvailabilityAPI:
|
||||
for key in ["year", "month", "day", "hour", "minute"]
|
||||
)
|
||||
|
||||
def oldest(self):
|
||||
def oldest(self) -> "WaybackMachineAvailabilityAPI":
|
||||
"""
|
||||
Passing the year 1994 should return the oldest archive because
|
||||
wayback machine was started in May, 1996 and there should be no archive
|
||||
@@ -156,7 +171,7 @@ class WaybackMachineAvailabilityAPI:
|
||||
"""
|
||||
return self.near(year=1994)
|
||||
|
||||
def newest(self):
|
||||
def newest(self) -> "WaybackMachineAvailabilityAPI":
|
||||
"""
|
||||
Passing the current UNIX time should be sufficient to get the newest
|
||||
archive considering the API request-response time delay and also the
|
||||
@@ -166,13 +181,13 @@ class WaybackMachineAvailabilityAPI:
|
||||
|
||||
def near(
|
||||
self,
|
||||
year=None,
|
||||
month=None,
|
||||
day=None,
|
||||
hour=None,
|
||||
minute=None,
|
||||
unix_timestamp=None,
|
||||
):
|
||||
year: Optional[int] = None,
|
||||
month: Optional[int] = None,
|
||||
day: Optional[int] = None,
|
||||
hour: Optional[int] = None,
|
||||
minute: Optional[int] = None,
|
||||
unix_timestamp: Optional[int] = None,
|
||||
) -> "WaybackMachineAvailabilityAPI":
|
||||
"""
|
||||
The main method for this Class, oldest and newest methods are dependent on this
|
||||
method.
|
||||
@@ -188,11 +203,11 @@ class WaybackMachineAvailabilityAPI:
|
||||
else:
|
||||
now = datetime.utcnow().timetuple()
|
||||
timestamp = self.wayback_timestamp(
|
||||
year=year if year else now.tm_year,
|
||||
month=month if month else now.tm_mon,
|
||||
day=day if day else now.tm_mday,
|
||||
hour=hour if hour else now.tm_hour,
|
||||
minute=minute if minute else now.tm_min,
|
||||
year=now.tm_year if year is None else year,
|
||||
month=now.tm_mon if month is None else month,
|
||||
day=now.tm_mday if day is None else day,
|
||||
hour=now.tm_hour if hour is None else hour,
|
||||
minute=now.tm_min if minute is None else minute,
|
||||
)
|
||||
|
||||
self.payload["timestamp"] = timestamp
|
||||
|
Reference in New Issue
Block a user