waybackpy/availability_api.py : removed unused imports, added doc strings, removed redundant function.
This commit is contained in:
parent
5c685ef5d7
commit
fd5e85420c
@ -1,30 +1,14 @@
|
|||||||
import re
|
|
||||||
import time
|
import time
|
||||||
import requests
|
import requests
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from .__version__ import __version__
|
|
||||||
from .utils import DEFAULT_USER_AGENT
|
from .utils import DEFAULT_USER_AGENT
|
||||||
|
|
||||||
|
|
||||||
def full_url(endpoint, params):
|
|
||||||
if not params:
|
|
||||||
return endpoint.strip()
|
|
||||||
|
|
||||||
full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
|
|
||||||
|
|
||||||
for key, val in params.items():
|
|
||||||
key = "filter" if key.startswith("filter") else key
|
|
||||||
key = "collapse" if key.startswith("collapse") else key
|
|
||||||
amp = "" if full_url.endswith("?") else "&"
|
|
||||||
full_url = (
|
|
||||||
full_url
|
|
||||||
+ amp
|
|
||||||
+ "{key}={val}".format(key=key, val=requests.utils.quote(str(val)))
|
|
||||||
)
|
|
||||||
return full_url
|
|
||||||
|
|
||||||
|
|
||||||
class WaybackMachineAvailabilityAPI:
|
class WaybackMachineAvailabilityAPI:
|
||||||
|
"""
|
||||||
|
Class that interfaces the availability API of the Wayback Machine.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, url, user_agent=DEFAULT_USER_AGENT):
|
def __init__(self, url, user_agent=DEFAULT_USER_AGENT):
|
||||||
self.url = str(url).strip().replace(" ", "%20")
|
self.url = str(url).strip().replace(" ", "%20")
|
||||||
self.user_agent = user_agent
|
self.user_agent = user_agent
|
||||||
@ -34,24 +18,50 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
self.JSON = None
|
self.JSON = None
|
||||||
|
|
||||||
def unix_timestamp_to_wayback_timestamp(self, unix_timestamp):
|
def unix_timestamp_to_wayback_timestamp(self, unix_timestamp):
|
||||||
|
"""
|
||||||
|
Converts Unix time to wayback Machine timestamp.
|
||||||
|
"""
|
||||||
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
|
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self) # self.__str__()
|
"""
|
||||||
|
Same as string representation, just return the archive URL as a string.
|
||||||
|
"""
|
||||||
|
return str(self)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
"""
|
||||||
|
String representation of the class. If atleast one API call was successfully
|
||||||
|
made then return the archive URL as a string. Else returns None.
|
||||||
|
"""
|
||||||
if not self.JSON:
|
if not self.JSON:
|
||||||
return None
|
return None
|
||||||
return self.archive_url
|
return self.archive_url
|
||||||
|
|
||||||
def json(self):
|
def json(self):
|
||||||
self.request_url = full_url(self.endpoint, self.payload)
|
"""
|
||||||
self.response = requests.get(self.request_url, self.headers)
|
Makes the API call to the availability API can set the JSON response
|
||||||
|
to the JSON attribute of the instance and also returns the JSON attribute.
|
||||||
|
"""
|
||||||
|
self.response = requests.get(
|
||||||
|
self.endpoint, params=self.payload, headers=self.headers
|
||||||
|
)
|
||||||
self.JSON = self.response.json()
|
self.JSON = self.response.json()
|
||||||
return self.JSON
|
return self.JSON
|
||||||
|
|
||||||
def timestamp(self):
|
def timestamp(self):
|
||||||
if not self.JSON["archived_snapshots"] or not self.JSON:
|
"""
|
||||||
|
Converts the timestamp form the JSON response to datetime object.
|
||||||
|
If JSON attribute of the instance is None it implies that the either
|
||||||
|
the the last API call failed or one was never made.
|
||||||
|
|
||||||
|
If not JSON or if JSON but no timestamp in the JSON response then returns
|
||||||
|
the maximum value for datetime object that is possible.
|
||||||
|
|
||||||
|
If you get an URL as a response form the availability API it is guaranteed
|
||||||
|
that you can get the datetime object from the timestamp.
|
||||||
|
"""
|
||||||
|
if not self.JSON or not self.JSON["archived_snapshots"]:
|
||||||
return datetime.max
|
return datetime.max
|
||||||
|
|
||||||
return datetime.strptime(
|
return datetime.strptime(
|
||||||
@ -60,6 +70,10 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def archive_url(self):
|
def archive_url(self):
|
||||||
|
"""
|
||||||
|
Reads the the JSON response data and tries to get the timestamp and returns
|
||||||
|
the timestamp if found else returns None.
|
||||||
|
"""
|
||||||
data = self.JSON
|
data = self.JSON
|
||||||
|
|
||||||
if not data["archived_snapshots"]:
|
if not data["archived_snapshots"]:
|
||||||
@ -72,15 +86,29 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
return archive_url
|
return archive_url
|
||||||
|
|
||||||
def wayback_timestamp(self, **kwargs):
|
def wayback_timestamp(self, **kwargs):
|
||||||
|
"""
|
||||||
|
Prepends zero before the year, month, day, hour and minute so that they
|
||||||
|
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
|
||||||
|
"""
|
||||||
return "".join(
|
return "".join(
|
||||||
str(kwargs[key]).zfill(2)
|
str(kwargs[key]).zfill(2)
|
||||||
for key in ["year", "month", "day", "hour", "minute"]
|
for key in ["year", "month", "day", "hour", "minute"]
|
||||||
)
|
)
|
||||||
|
|
||||||
def oldest(self):
|
def oldest(self):
|
||||||
|
"""
|
||||||
|
Passing the year 1994 should return the oldest archive because
|
||||||
|
wayback machine was started in May, 1996 and there should be no archive
|
||||||
|
before the year 1994.
|
||||||
|
"""
|
||||||
return self.near(year=1994)
|
return self.near(year=1994)
|
||||||
|
|
||||||
def newest(self):
|
def newest(self):
|
||||||
|
"""
|
||||||
|
Passing the current UNIX time should be sufficient to get the newest
|
||||||
|
archive considering the API request-response time delay and also the
|
||||||
|
database lags on Wayback machine.
|
||||||
|
"""
|
||||||
return self.near(unix_timestamp=int(time.time()))
|
return self.near(unix_timestamp=int(time.time()))
|
||||||
|
|
||||||
def near(
|
def near(
|
||||||
@ -92,6 +120,16 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
minute=None,
|
minute=None,
|
||||||
unix_timestamp=None,
|
unix_timestamp=None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
The main method for this Class, oldest and newest methods are dependent on this
|
||||||
|
method.
|
||||||
|
|
||||||
|
It generates the timestamp based on the input either by calling the
|
||||||
|
unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
|
||||||
|
appropriate arguments for their respective parameters.
|
||||||
|
Adds the timestamp to the payload dictionary.
|
||||||
|
And finally invoking the json method to make the API call then returns the instance.
|
||||||
|
"""
|
||||||
if unix_timestamp:
|
if unix_timestamp:
|
||||||
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user