added docstrings, added some static type hints and also lint. (#141)

* added docstrings, added some static type hints and also lint.

* added doc strings and changed some internal variable names for more clarity.

* make flake8 happy

* add descriptive docstrings and type hints in waybackpy/cdx_snapshot.py

* remove useless code and add docstrings and also lint using pylint.

* remove unwarented test

* added docstrings, lint using pylint and add a raise on 509 SC

* added docstrings and lint with pylint

* lint

* add doc strings and lint

* add docstrings and lint
This commit is contained in:
Akash Mahanty
2022-02-07 19:40:37 +05:30
committed by GitHub
parent 004ff26196
commit 97f8b96411
9 changed files with 400 additions and 127 deletions

View File

@@ -1,3 +1,9 @@
"""
This module exists because backwards compatibility matters.
Don't touch this or add any new functionality here and don't use
the Url class.
"""
from datetime import datetime, timedelta
from typing import Generator, Optional
@@ -49,12 +55,14 @@ class Url(object):
if not isinstance(self.timestamp, datetime):
raise TypeError("timestamp must be a datetime")
elif self.timestamp == datetime.max:
if self.timestamp == datetime.max:
return td_max.days
else:
return (datetime.utcnow() - self.timestamp).days
return (datetime.utcnow() - self.timestamp).days
def save(self) -> "Url":
"""Save the URL on wayback machine."""
self.wayback_machine_save_api = WaybackMachineSaveAPI(
self.url, user_agent=self.user_agent
)
@@ -72,7 +80,7 @@ class Url(object):
minute: Optional[int] = None,
unix_timestamp: Optional[int] = None,
) -> "Url":
"""Returns the archive of the URL close to a date and time."""
self.wayback_machine_availability_api.near(
year=year,
month=month,
@@ -85,16 +93,19 @@ class Url(object):
return self
def oldest(self) -> "Url":
"""Returns the oldest archive of the URL."""
self.wayback_machine_availability_api.oldest()
self.set_availability_api_attrs()
return self
def newest(self) -> "Url":
"""Returns the newest archive of the URL."""
self.wayback_machine_availability_api.newest()
self.set_availability_api_attrs()
return self
def set_availability_api_attrs(self) -> None:
"""Set the attributes for total backwards compatibility."""
self.archive_url = self.wayback_machine_availability_api.archive_url
self.JSON = self.wayback_machine_availability_api.JSON
self.timestamp = self.wayback_machine_availability_api.timestamp()
@@ -102,6 +113,10 @@ class Url(object):
def total_archives(
self, start_timestamp: Optional[str] = None, end_timestamp: Optional[str] = None
) -> int:
"""
Returns an integer which indicates total number of archives for an URL.
Useless in my opinion, only here because of backwards compatibility.
"""
cdx = WaybackMachineCDXServerAPI(
self.url,
user_agent=self.user_agent,
@@ -122,6 +137,7 @@ class Url(object):
end_timestamp: Optional[str] = None,
match_type: str = "prefix",
) -> Generator[str, None, None]:
"""Yields known URLs for any URL."""
if subdomain:
match_type = "domain"
if host:
@@ -137,4 +153,4 @@ class Url(object):
)
for snapshot in cdx.snapshots():
yield (snapshot.original)
yield snapshot.original