added docstrings and lint with pylint

This commit is contained in:
Akash Mahanty
2022-02-07 18:37:59 +05:30
parent f782a1343c
commit 03deaf43bc

View File

@@ -1,3 +1,9 @@
"""
This module exists because backwards compatibility matters.
Don't touch this or add any new functionality here and don't use
the Url class.
"""
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Generator, Optional from typing import Generator, Optional
@@ -49,12 +55,16 @@ class Url(object):
if not isinstance(self.timestamp, datetime): if not isinstance(self.timestamp, datetime):
raise TypeError("timestamp must be a datetime") raise TypeError("timestamp must be a datetime")
elif self.timestamp == datetime.max:
if self.timestamp == datetime.max:
return td_max.days return td_max.days
else:
return (datetime.utcnow() - self.timestamp).days return (datetime.utcnow() - self.timestamp).days
def save(self) -> "Url": def save(self) -> "Url":
"""
Save the URL on wayback machine.
"""
self.wayback_machine_save_api = WaybackMachineSaveAPI( self.wayback_machine_save_api = WaybackMachineSaveAPI(
self.url, user_agent=self.user_agent self.url, user_agent=self.user_agent
) )
@@ -72,7 +82,9 @@ class Url(object):
minute: Optional[int] = None, minute: Optional[int] = None,
unix_timestamp: Optional[int] = None, unix_timestamp: Optional[int] = None,
) -> "Url": ) -> "Url":
"""
Returns the archive of the URL close to a date and time.
"""
self.wayback_machine_availability_api.near( self.wayback_machine_availability_api.near(
year=year, year=year,
month=month, month=month,
@@ -85,16 +97,25 @@ class Url(object):
return self return self
def oldest(self) -> "Url": def oldest(self) -> "Url":
"""
Returns the oldest archive of the URL.
"""
self.wayback_machine_availability_api.oldest() self.wayback_machine_availability_api.oldest()
self.set_availability_api_attrs() self.set_availability_api_attrs()
return self return self
def newest(self) -> "Url": def newest(self) -> "Url":
"""
Returns the newest archive of the URL.
"""
self.wayback_machine_availability_api.newest() self.wayback_machine_availability_api.newest()
self.set_availability_api_attrs() self.set_availability_api_attrs()
return self return self
def set_availability_api_attrs(self) -> None: def set_availability_api_attrs(self) -> None:
"""
Set the attributes for total backwards compatibility.
"""
self.archive_url = self.wayback_machine_availability_api.archive_url self.archive_url = self.wayback_machine_availability_api.archive_url
self.JSON = self.wayback_machine_availability_api.JSON self.JSON = self.wayback_machine_availability_api.JSON
self.timestamp = self.wayback_machine_availability_api.timestamp() self.timestamp = self.wayback_machine_availability_api.timestamp()
@@ -102,6 +123,10 @@ class Url(object):
def total_archives( def total_archives(
self, start_timestamp: Optional[str] = None, end_timestamp: Optional[str] = None self, start_timestamp: Optional[str] = None, end_timestamp: Optional[str] = None
) -> int: ) -> int:
"""
Returns an integer which indicates total number of archives for an URL.
Useless in my opinion, only here because of backwards compatibility.
"""
cdx = WaybackMachineCDXServerAPI( cdx = WaybackMachineCDXServerAPI(
self.url, self.url,
user_agent=self.user_agent, user_agent=self.user_agent,
@@ -122,6 +147,9 @@ class Url(object):
end_timestamp: Optional[str] = None, end_timestamp: Optional[str] = None,
match_type: str = "prefix", match_type: str = "prefix",
) -> Generator[str, None, None]: ) -> Generator[str, None, None]:
"""
yields known URLs for any URL.
"""
if subdomain: if subdomain:
match_type = "domain" match_type = "domain"
if host: if host:
@@ -137,4 +165,4 @@ class Url(object):
) )
for snapshot in cdx.snapshots(): for snapshot in cdx.snapshots():
yield (snapshot.original) yield snapshot.original