created pytest.ini, added test for waybackpy/availability_api.py, new exceptions all of which inherit from the main WaybackError and created requirements-dev.txt
This commit is contained in:
		
							
								
								
									
										11
									
								
								pytest.ini
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								pytest.ini
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,11 @@
 | 
			
		||||
[pytest]
 | 
			
		||||
addopts =
 | 
			
		||||
    # show summary of all tests that did not pass
 | 
			
		||||
    -ra
 | 
			
		||||
    # enable all warnings
 | 
			
		||||
    -Wd
 | 
			
		||||
    # coverage and html report
 | 
			
		||||
    --cov=waybackpy
 | 
			
		||||
    --cov-report=html
 | 
			
		||||
testpaths =
 | 
			
		||||
    tests
 | 
			
		||||
							
								
								
									
										3
									
								
								requirements-dev.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								requirements-dev.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,3 @@
 | 
			
		||||
click
 | 
			
		||||
requests
 | 
			
		||||
pytest
 | 
			
		||||
							
								
								
									
										93
									
								
								tests/test_availability_api.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								tests/test_availability_api.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,93 @@
 | 
			
		||||
import pytest
 | 
			
		||||
import random
 | 
			
		||||
import string
 | 
			
		||||
from datetime import datetime, timedelta
 | 
			
		||||
 | 
			
		||||
from waybackpy.availability_api import WaybackMachineAvailabilityAPI
 | 
			
		||||
from waybackpy.exceptions import (
 | 
			
		||||
    InvalidJSONInAvailabilityAPIResponse,
 | 
			
		||||
    ArchiveNotInAvailabilityAPIResponse,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
now = datetime.utcnow()
 | 
			
		||||
url = "https://google.com"
 | 
			
		||||
user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0"
 | 
			
		||||
 | 
			
		||||
rndstr = lambda n: "".join(
 | 
			
		||||
    random.choice(string.ascii_uppercase + string.digits) for _ in range(n)
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
availability_api = WaybackMachineAvailabilityAPI(url, user_agent)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_oldest():
 | 
			
		||||
    """
 | 
			
		||||
    Test the oldest archive of Google.com and also checks the attributes.
 | 
			
		||||
    """
 | 
			
		||||
    oldest = availability_api.oldest()
 | 
			
		||||
    oldest_archive_url = oldest.archive_url
 | 
			
		||||
    assert "1998" in oldest_archive_url
 | 
			
		||||
    oldest_timestamp = oldest.timestamp()
 | 
			
		||||
    assert abs(oldest_timestamp - now) > timedelta(days=8400)  # More than 20 years
 | 
			
		||||
    assert availability_api.JSON["archived_snapshots"]["closest"]["available"] is True
 | 
			
		||||
    assert "google.com" in repr(oldest)
 | 
			
		||||
    assert "1998" in str(oldest)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_newest():
 | 
			
		||||
    """
 | 
			
		||||
    Assuming that the recent most Google Archive was made no more earlier than
 | 
			
		||||
    last one day which is 86400 seconds.
 | 
			
		||||
    """
 | 
			
		||||
    newest = availability_api.newest()
 | 
			
		||||
    newest_timestamp = newest.timestamp()
 | 
			
		||||
    assert abs(newest_timestamp - now) < timedelta(seconds=86400)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_invalid_json():
 | 
			
		||||
    """
 | 
			
		||||
    When the API is malfunctioning or we don't pass a URL it may return invalid JSON data.
 | 
			
		||||
    """
 | 
			
		||||
    with pytest.raises(InvalidJSONInAvailabilityAPIResponse):
 | 
			
		||||
        availability_api = WaybackMachineAvailabilityAPI(url="", user_agent=user_agent)
 | 
			
		||||
        archive_url = availability_api.archive_url
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_no_archive():
 | 
			
		||||
    """
 | 
			
		||||
    ArchiveNotInAvailabilityAPIResponse may be raised if Wayback Machine did not
 | 
			
		||||
    replied with the archive despite the fact that we know the site has million
 | 
			
		||||
    of archives. Don't know the reason for this wierd behavior.
 | 
			
		||||
 | 
			
		||||
    And also if really there are no archives for the passed URL this exception
 | 
			
		||||
    is raised.
 | 
			
		||||
    """
 | 
			
		||||
    with pytest.raises(ArchiveNotInAvailabilityAPIResponse):
 | 
			
		||||
        availability_api = WaybackMachineAvailabilityAPI(
 | 
			
		||||
            url="https://%s.com" % rndstr(30), user_agent=user_agent
 | 
			
		||||
        )
 | 
			
		||||
        archive_url = availability_api.archive_url
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_no_api_call_str_repr():
 | 
			
		||||
    """
 | 
			
		||||
    Some entitled users maybe want to see what is the string representation
 | 
			
		||||
    if they don’t make any API requests.
 | 
			
		||||
 | 
			
		||||
    str() must not return None so we return ""
 | 
			
		||||
    """
 | 
			
		||||
    availability_api = WaybackMachineAvailabilityAPI(
 | 
			
		||||
        url="https://%s.com" % rndstr(30), user_agent=user_agent
 | 
			
		||||
    )
 | 
			
		||||
    assert "" == str(availability_api)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_no_call_timestamp():
 | 
			
		||||
    """
 | 
			
		||||
    If no API requests were made the bound timestamp() method returns
 | 
			
		||||
    the datetime.max as a default value.
 | 
			
		||||
    """
 | 
			
		||||
    availability_api = WaybackMachineAvailabilityAPI(
 | 
			
		||||
        url="https://%s.com" % rndstr(30), user_agent=user_agent
 | 
			
		||||
    )
 | 
			
		||||
    assert datetime.max == availability_api.timestamp()
 | 
			
		||||
@@ -1,7 +1,12 @@
 | 
			
		||||
import time
 | 
			
		||||
import json
 | 
			
		||||
import requests
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from .utils import DEFAULT_USER_AGENT
 | 
			
		||||
from .exceptions import (
 | 
			
		||||
    ArchiveNotInAvailabilityAPIResponse,
 | 
			
		||||
    InvalidJSONInAvailabilityAPIResponse,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WaybackMachineAvailabilityAPI:
 | 
			
		||||
@@ -34,8 +39,13 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
        String representation of the class. If atleast one API call was successfully
 | 
			
		||||
        made then return the archive URL as a string. Else returns None.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        # String must not return anything other than a string object
 | 
			
		||||
        # So, if some asks for string repr before making the API requests
 | 
			
		||||
        # just return ""
 | 
			
		||||
        if not self.JSON:
 | 
			
		||||
            return None
 | 
			
		||||
            return ""
 | 
			
		||||
 | 
			
		||||
        return self.archive_url
 | 
			
		||||
 | 
			
		||||
    def json(self):
 | 
			
		||||
@@ -46,7 +56,13 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
        self.response = requests.get(
 | 
			
		||||
            self.endpoint, params=self.payload, headers=self.headers
 | 
			
		||||
        )
 | 
			
		||||
        try:
 | 
			
		||||
            self.JSON = self.response.json()
 | 
			
		||||
        except json.decoder.JSONDecodeError:
 | 
			
		||||
            raise InvalidJSONInAvailabilityAPIResponse(
 | 
			
		||||
                "Response data:\n{text}".format(text=self.response.text)
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        return self.JSON
 | 
			
		||||
 | 
			
		||||
    def timestamp(self):
 | 
			
		||||
@@ -76,8 +92,21 @@ class WaybackMachineAvailabilityAPI:
 | 
			
		||||
        """
 | 
			
		||||
        data = self.JSON
 | 
			
		||||
 | 
			
		||||
        if not data["archived_snapshots"]:
 | 
			
		||||
            archive_url = None
 | 
			
		||||
        # If the user didn't used oldest, newest or near but tries to access the
 | 
			
		||||
        # archive_url attribute then, we assume they are fine with any archive
 | 
			
		||||
        # and invoke the oldest archive function.
 | 
			
		||||
        if not data:
 | 
			
		||||
            self.oldest()
 | 
			
		||||
 | 
			
		||||
        # If data is still not none then probably there are no
 | 
			
		||||
        # archive for the requested URL.
 | 
			
		||||
        if not data or not data["archived_snapshots"]:
 | 
			
		||||
            raise ArchiveNotInAvailabilityAPIResponse(
 | 
			
		||||
                "Archive not found in the availability "
 | 
			
		||||
                + "API response, maybe the URL you requested does not have any "
 | 
			
		||||
                + "archive yet. You may retry after some time or archive the webpage now."
 | 
			
		||||
                + "\nResponse data:\n{response}".format(response=self.response.text)
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            archive_url = data["archived_snapshots"]["closest"]["url"]
 | 
			
		||||
            archive_url = archive_url.replace(
 | 
			
		||||
 
 | 
			
		||||
@@ -38,3 +38,15 @@ class MaximumSaveRetriesExceeded(MaximumRetriesExceeded):
 | 
			
		||||
    """
 | 
			
		||||
    MaximumSaveRetriesExceeded
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ArchiveNotInAvailabilityAPIResponse(WaybackError):
 | 
			
		||||
    """
 | 
			
		||||
    Could not parse the archive in the JSON response of the availability API.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class InvalidJSONInAvailabilityAPIResponse(WaybackError):
 | 
			
		||||
    """
 | 
			
		||||
    availability api returned invalid JSON
 | 
			
		||||
    """
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user