* fix: CI yml name

* add: mypy configuraion

* add: type annotation to waybackpy modules

* add: type annotation to test modules

* fix: mypy command

* add: types-requests to dev deps

* fix: disable max-line-length

* fix: move pytest.ini into setup.cfg

* add: urllib3 to deps

* fix: Retry (ref: https://github.com/python/typeshed/issues/6893)

* fix: f-string

* fix: shorten long lines

* add: staticmethod decorator to no-self-use methods

* fix: str(headers)->headers_str

* fix: error message

* fix: revert "str(headers)->headers_str" and ignore assignment CaseInsensitiveDict with str

* fix: mypy error
This commit is contained in:
eggplants
2022-02-05 03:23:36 +09:00
committed by GitHub
parent 320ef30371
commit d8cabdfdb5
22 changed files with 537 additions and 364 deletions

View File

@@ -12,33 +12,42 @@ from waybackpy.exceptions import (
now = datetime.utcnow()
url = "https://example.com/"
user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36"
user_agent = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36"
)
def rndstr(n):
def rndstr(n: int) -> str:
return "".join(
random.choice(string.ascii_uppercase + string.digits) for _ in range(n)
)
def test_oldest():
def test_oldest() -> None:
"""
Test the oldest archive of Google.com and also checks the attributes.
"""
url = "https://example.com/"
user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36"
user_agent = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36"
)
availability_api = WaybackMachineAvailabilityAPI(url, user_agent)
oldest = availability_api.oldest()
oldest_archive_url = oldest.archive_url
assert "2002" in oldest_archive_url
oldest_timestamp = oldest.timestamp()
assert abs(oldest_timestamp - now) > timedelta(days=7000) # More than 19 years
assert availability_api.JSON["archived_snapshots"]["closest"]["available"] is True
assert (
availability_api.JSON is not None
and availability_api.JSON["archived_snapshots"]["closest"]["available"] is True
)
assert repr(oldest).find("example.com") != -1
assert "2002" in str(oldest)
def test_newest():
def test_newest() -> None:
"""
Assuming that the recent most Google Archive was made no more earlier than
last one day which is 86400 seconds.
@@ -54,16 +63,17 @@ def test_newest():
assert abs(newest_timestamp - now) < timedelta(seconds=86400 * 3)
def test_invalid_json():
def test_invalid_json() -> None:
"""
When the API is malfunctioning or we don't pass a URL it may return invalid JSON data.
When the API is malfunctioning or we don't pass a URL,
it may return invalid JSON data.
"""
with pytest.raises(InvalidJSONInAvailabilityAPIResponse):
availability_api = WaybackMachineAvailabilityAPI(url="", user_agent=user_agent)
_ = availability_api.archive_url
def test_no_archive():
def test_no_archive() -> None:
"""
ArchiveNotInAvailabilityAPIResponse may be raised if Wayback Machine did not
replied with the archive despite the fact that we know the site has million
@@ -74,12 +84,12 @@ def test_no_archive():
"""
with pytest.raises(ArchiveNotInAvailabilityAPIResponse):
availability_api = WaybackMachineAvailabilityAPI(
url="https://%s.cn" % rndstr(30), user_agent=user_agent
url=f"https://{rndstr(30)}.cn", user_agent=user_agent
)
_ = availability_api.archive_url
def test_no_api_call_str_repr():
def test_no_api_call_str_repr() -> None:
"""
Some entitled users maybe want to see what is the string representation
if they dont make any API requests.
@@ -87,17 +97,17 @@ def test_no_api_call_str_repr():
str() must not return None so we return ""
"""
availability_api = WaybackMachineAvailabilityAPI(
url="https://%s.gov" % rndstr(30), user_agent=user_agent
url=f"https://{rndstr(30)}.gov", user_agent=user_agent
)
assert "" == str(availability_api)
def test_no_call_timestamp():
def test_no_call_timestamp() -> None:
"""
If no API requests were made the bound timestamp() method returns
the datetime.max as a default value.
"""
availability_api = WaybackMachineAvailabilityAPI(
url="https://%s.in" % rndstr(30), user_agent=user_agent
url=f"https://{rndstr(30)}.in", user_agent=user_agent
)
assert datetime.max == availability_api.timestamp()