added docstrings, added some static type hints and also lint. (#141)
* added docstrings, added some static type hints and also lint. * added doc strings and changed some internal variable names for more clarity. * make flake8 happy * add descriptive docstrings and type hints in waybackpy/cdx_snapshot.py * remove useless code and add docstrings and also lint using pylint. * remove unwarented test * added docstrings, lint using pylint and add a raise on 509 SC * added docstrings and lint with pylint * lint * add doc strings and lint * add docstrings and lint
This commit is contained in:
@@ -1,3 +1,9 @@
|
||||
"""
|
||||
This module exists because backwards compatibility matters.
|
||||
Don't touch this or add any new functionality here and don't use
|
||||
the Url class.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Generator, Optional
|
||||
|
||||
@@ -49,12 +55,14 @@ class Url(object):
|
||||
|
||||
if not isinstance(self.timestamp, datetime):
|
||||
raise TypeError("timestamp must be a datetime")
|
||||
elif self.timestamp == datetime.max:
|
||||
|
||||
if self.timestamp == datetime.max:
|
||||
return td_max.days
|
||||
else:
|
||||
return (datetime.utcnow() - self.timestamp).days
|
||||
|
||||
return (datetime.utcnow() - self.timestamp).days
|
||||
|
||||
def save(self) -> "Url":
|
||||
"""Save the URL on wayback machine."""
|
||||
self.wayback_machine_save_api = WaybackMachineSaveAPI(
|
||||
self.url, user_agent=self.user_agent
|
||||
)
|
||||
@@ -72,7 +80,7 @@ class Url(object):
|
||||
minute: Optional[int] = None,
|
||||
unix_timestamp: Optional[int] = None,
|
||||
) -> "Url":
|
||||
|
||||
"""Returns the archive of the URL close to a date and time."""
|
||||
self.wayback_machine_availability_api.near(
|
||||
year=year,
|
||||
month=month,
|
||||
@@ -85,16 +93,19 @@ class Url(object):
|
||||
return self
|
||||
|
||||
def oldest(self) -> "Url":
|
||||
"""Returns the oldest archive of the URL."""
|
||||
self.wayback_machine_availability_api.oldest()
|
||||
self.set_availability_api_attrs()
|
||||
return self
|
||||
|
||||
def newest(self) -> "Url":
|
||||
"""Returns the newest archive of the URL."""
|
||||
self.wayback_machine_availability_api.newest()
|
||||
self.set_availability_api_attrs()
|
||||
return self
|
||||
|
||||
def set_availability_api_attrs(self) -> None:
|
||||
"""Set the attributes for total backwards compatibility."""
|
||||
self.archive_url = self.wayback_machine_availability_api.archive_url
|
||||
self.JSON = self.wayback_machine_availability_api.JSON
|
||||
self.timestamp = self.wayback_machine_availability_api.timestamp()
|
||||
@@ -102,6 +113,10 @@ class Url(object):
|
||||
def total_archives(
|
||||
self, start_timestamp: Optional[str] = None, end_timestamp: Optional[str] = None
|
||||
) -> int:
|
||||
"""
|
||||
Returns an integer which indicates total number of archives for an URL.
|
||||
Useless in my opinion, only here because of backwards compatibility.
|
||||
"""
|
||||
cdx = WaybackMachineCDXServerAPI(
|
||||
self.url,
|
||||
user_agent=self.user_agent,
|
||||
@@ -122,6 +137,7 @@ class Url(object):
|
||||
end_timestamp: Optional[str] = None,
|
||||
match_type: str = "prefix",
|
||||
) -> Generator[str, None, None]:
|
||||
"""Yields known URLs for any URL."""
|
||||
if subdomain:
|
||||
match_type = "domain"
|
||||
if host:
|
||||
@@ -137,4 +153,4 @@ class Url(object):
|
||||
)
|
||||
|
||||
for snapshot in cdx.snapshots():
|
||||
yield (snapshot.original)
|
||||
yield snapshot.original
|
||||
|
||||
Reference in New Issue
Block a user