created pytest.ini, added test for waybackpy/availability_api.py, new exceptions all of which inherit from the main WaybackError and created requirements-dev.txt
This commit is contained in:
parent
79901ba968
commit
1bacd73002
11
pytest.ini
Normal file
11
pytest.ini
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
[pytest]
|
||||||
|
addopts =
|
||||||
|
# show summary of all tests that did not pass
|
||||||
|
-ra
|
||||||
|
# enable all warnings
|
||||||
|
-Wd
|
||||||
|
# coverage and html report
|
||||||
|
--cov=waybackpy
|
||||||
|
--cov-report=html
|
||||||
|
testpaths =
|
||||||
|
tests
|
3
requirements-dev.txt
Normal file
3
requirements-dev.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
click
|
||||||
|
requests
|
||||||
|
pytest
|
93
tests/test_availability_api.py
Normal file
93
tests/test_availability_api.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
import pytest
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
from waybackpy.availability_api import WaybackMachineAvailabilityAPI
|
||||||
|
from waybackpy.exceptions import (
|
||||||
|
InvalidJSONInAvailabilityAPIResponse,
|
||||||
|
ArchiveNotInAvailabilityAPIResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
now = datetime.utcnow()
|
||||||
|
url = "https://google.com"
|
||||||
|
user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0"
|
||||||
|
|
||||||
|
rndstr = lambda n: "".join(
|
||||||
|
random.choice(string.ascii_uppercase + string.digits) for _ in range(n)
|
||||||
|
)
|
||||||
|
|
||||||
|
availability_api = WaybackMachineAvailabilityAPI(url, user_agent)
|
||||||
|
|
||||||
|
|
||||||
|
def test_oldest():
|
||||||
|
"""
|
||||||
|
Test the oldest archive of Google.com and also checks the attributes.
|
||||||
|
"""
|
||||||
|
oldest = availability_api.oldest()
|
||||||
|
oldest_archive_url = oldest.archive_url
|
||||||
|
assert "1998" in oldest_archive_url
|
||||||
|
oldest_timestamp = oldest.timestamp()
|
||||||
|
assert abs(oldest_timestamp - now) > timedelta(days=8400) # More than 20 years
|
||||||
|
assert availability_api.JSON["archived_snapshots"]["closest"]["available"] is True
|
||||||
|
assert "google.com" in repr(oldest)
|
||||||
|
assert "1998" in str(oldest)
|
||||||
|
|
||||||
|
|
||||||
|
def test_newest():
|
||||||
|
"""
|
||||||
|
Assuming that the recent most Google Archive was made no more earlier than
|
||||||
|
last one day which is 86400 seconds.
|
||||||
|
"""
|
||||||
|
newest = availability_api.newest()
|
||||||
|
newest_timestamp = newest.timestamp()
|
||||||
|
assert abs(newest_timestamp - now) < timedelta(seconds=86400)
|
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_json():
|
||||||
|
"""
|
||||||
|
When the API is malfunctioning or we don't pass a URL it may return invalid JSON data.
|
||||||
|
"""
|
||||||
|
with pytest.raises(InvalidJSONInAvailabilityAPIResponse):
|
||||||
|
availability_api = WaybackMachineAvailabilityAPI(url="", user_agent=user_agent)
|
||||||
|
archive_url = availability_api.archive_url
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_archive():
|
||||||
|
"""
|
||||||
|
ArchiveNotInAvailabilityAPIResponse may be raised if Wayback Machine did not
|
||||||
|
replied with the archive despite the fact that we know the site has million
|
||||||
|
of archives. Don't know the reason for this wierd behavior.
|
||||||
|
|
||||||
|
And also if really there are no archives for the passed URL this exception
|
||||||
|
is raised.
|
||||||
|
"""
|
||||||
|
with pytest.raises(ArchiveNotInAvailabilityAPIResponse):
|
||||||
|
availability_api = WaybackMachineAvailabilityAPI(
|
||||||
|
url="https://%s.com" % rndstr(30), user_agent=user_agent
|
||||||
|
)
|
||||||
|
archive_url = availability_api.archive_url
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_api_call_str_repr():
|
||||||
|
"""
|
||||||
|
Some entitled users maybe want to see what is the string representation
|
||||||
|
if they don’t make any API requests.
|
||||||
|
|
||||||
|
str() must not return None so we return ""
|
||||||
|
"""
|
||||||
|
availability_api = WaybackMachineAvailabilityAPI(
|
||||||
|
url="https://%s.com" % rndstr(30), user_agent=user_agent
|
||||||
|
)
|
||||||
|
assert "" == str(availability_api)
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_call_timestamp():
|
||||||
|
"""
|
||||||
|
If no API requests were made the bound timestamp() method returns
|
||||||
|
the datetime.max as a default value.
|
||||||
|
"""
|
||||||
|
availability_api = WaybackMachineAvailabilityAPI(
|
||||||
|
url="https://%s.com" % rndstr(30), user_agent=user_agent
|
||||||
|
)
|
||||||
|
assert datetime.max == availability_api.timestamp()
|
@ -1,7 +1,12 @@
|
|||||||
import time
|
import time
|
||||||
|
import json
|
||||||
import requests
|
import requests
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from .utils import DEFAULT_USER_AGENT
|
from .utils import DEFAULT_USER_AGENT
|
||||||
|
from .exceptions import (
|
||||||
|
ArchiveNotInAvailabilityAPIResponse,
|
||||||
|
InvalidJSONInAvailabilityAPIResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class WaybackMachineAvailabilityAPI:
|
class WaybackMachineAvailabilityAPI:
|
||||||
@ -34,8 +39,13 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
String representation of the class. If atleast one API call was successfully
|
String representation of the class. If atleast one API call was successfully
|
||||||
made then return the archive URL as a string. Else returns None.
|
made then return the archive URL as a string. Else returns None.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# String must not return anything other than a string object
|
||||||
|
# So, if some asks for string repr before making the API requests
|
||||||
|
# just return ""
|
||||||
if not self.JSON:
|
if not self.JSON:
|
||||||
return None
|
return ""
|
||||||
|
|
||||||
return self.archive_url
|
return self.archive_url
|
||||||
|
|
||||||
def json(self):
|
def json(self):
|
||||||
@ -46,7 +56,13 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
self.response = requests.get(
|
self.response = requests.get(
|
||||||
self.endpoint, params=self.payload, headers=self.headers
|
self.endpoint, params=self.payload, headers=self.headers
|
||||||
)
|
)
|
||||||
self.JSON = self.response.json()
|
try:
|
||||||
|
self.JSON = self.response.json()
|
||||||
|
except json.decoder.JSONDecodeError:
|
||||||
|
raise InvalidJSONInAvailabilityAPIResponse(
|
||||||
|
"Response data:\n{text}".format(text=self.response.text)
|
||||||
|
)
|
||||||
|
|
||||||
return self.JSON
|
return self.JSON
|
||||||
|
|
||||||
def timestamp(self):
|
def timestamp(self):
|
||||||
@ -76,8 +92,21 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
"""
|
"""
|
||||||
data = self.JSON
|
data = self.JSON
|
||||||
|
|
||||||
if not data["archived_snapshots"]:
|
# If the user didn't used oldest, newest or near but tries to access the
|
||||||
archive_url = None
|
# archive_url attribute then, we assume they are fine with any archive
|
||||||
|
# and invoke the oldest archive function.
|
||||||
|
if not data:
|
||||||
|
self.oldest()
|
||||||
|
|
||||||
|
# If data is still not none then probably there are no
|
||||||
|
# archive for the requested URL.
|
||||||
|
if not data or not data["archived_snapshots"]:
|
||||||
|
raise ArchiveNotInAvailabilityAPIResponse(
|
||||||
|
"Archive not found in the availability "
|
||||||
|
+ "API response, maybe the URL you requested does not have any "
|
||||||
|
+ "archive yet. You may retry after some time or archive the webpage now."
|
||||||
|
+ "\nResponse data:\n{response}".format(response=self.response.text)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
archive_url = data["archived_snapshots"]["closest"]["url"]
|
archive_url = data["archived_snapshots"]["closest"]["url"]
|
||||||
archive_url = archive_url.replace(
|
archive_url = archive_url.replace(
|
||||||
|
@ -38,3 +38,15 @@ class MaximumSaveRetriesExceeded(MaximumRetriesExceeded):
|
|||||||
"""
|
"""
|
||||||
MaximumSaveRetriesExceeded
|
MaximumSaveRetriesExceeded
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ArchiveNotInAvailabilityAPIResponse(WaybackError):
|
||||||
|
"""
|
||||||
|
Could not parse the archive in the JSON response of the availability API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidJSONInAvailabilityAPIResponse(WaybackError):
|
||||||
|
"""
|
||||||
|
availability api returned invalid JSON
|
||||||
|
"""
|
||||||
|
Loading…
Reference in New Issue
Block a user