Remove blank lines after docstring (#146)
Co-authored-by: deepsource-autofix[bot] <62050782+deepsource-autofix[bot]@users.noreply.github.com>
This commit is contained in:
parent
e0dfbe0b7d
commit
7317bd7183
@ -64,14 +64,12 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
Converts Unix time to wayback Machine timestamp and the Wayback Machine
|
Converts Unix time to wayback Machine timestamp and the Wayback Machine
|
||||||
timestamp format is yyyyMMddhhmmss.
|
timestamp format is yyyyMMddhhmmss.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
|
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
"""
|
"""
|
||||||
Same as string representation, just return the archive URL as a string.
|
Same as string representation, just return the archive URL as a string.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return str(self)
|
return str(self)
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
@ -80,7 +78,6 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
call was successfully made then return the archive URL
|
call was successfully made then return the archive URL
|
||||||
as a string. Else returns "".
|
as a string. Else returns "".
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# String should not return anything other than a string object
|
# String should not return anything other than a string object
|
||||||
# So, if a string repr is asked for before making any API requests
|
# So, if a string repr is asked for before making any API requests
|
||||||
# just return ""
|
# just return ""
|
||||||
@ -103,7 +100,6 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
to increase or decrease the default time gap between two successive API
|
to increase or decrease the default time gap between two successive API
|
||||||
calls, but it is not recommended to increase it.
|
calls, but it is not recommended to increase it.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
time_diff = int(time.time()) - self.last_api_call_unix_time
|
time_diff = int(time.time()) - self.last_api_call_unix_time
|
||||||
sleep_time = self.api_call_time_gap - time_diff
|
sleep_time = self.api_call_time_gap - time_diff
|
||||||
|
|
||||||
@ -136,7 +132,6 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
If you get an URL as a response form the availability API it is
|
If you get an URL as a response form the availability API it is
|
||||||
guaranteed that you can get the datetime object from the timestamp.
|
guaranteed that you can get the datetime object from the timestamp.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if self.json is None or "archived_snapshots" not in self.json:
|
if self.json is None or "archived_snapshots" not in self.json:
|
||||||
return datetime.max
|
return datetime.max
|
||||||
|
|
||||||
@ -161,7 +156,6 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
the timestamp if found and if not found raises
|
the timestamp if found and if not found raises
|
||||||
ArchiveNotInAvailabilityAPIResponse.
|
ArchiveNotInAvailabilityAPIResponse.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
archive_url = ""
|
archive_url = ""
|
||||||
data = self.json
|
data = self.json
|
||||||
|
|
||||||
@ -206,7 +200,6 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
Prepends zero before the year, month, day, hour and minute so that they
|
Prepends zero before the year, month, day, hour and minute so that they
|
||||||
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
|
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return "".join(
|
return "".join(
|
||||||
str(kwargs[key]).zfill(2)
|
str(kwargs[key]).zfill(2)
|
||||||
for key in ["year", "month", "day", "hour", "minute"]
|
for key in ["year", "month", "day", "hour", "minute"]
|
||||||
@ -218,7 +211,6 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
because Wayback Machine was started in May, 1996 and it is assumed that
|
because Wayback Machine was started in May, 1996 and it is assumed that
|
||||||
there would be no archive older than January 1, 1994.
|
there would be no archive older than January 1, 1994.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self.near(year=1994, month=1, day=1)
|
return self.near(year=1994, month=1, day=1)
|
||||||
|
|
||||||
def newest(self) -> "WaybackMachineAvailabilityAPI":
|
def newest(self) -> "WaybackMachineAvailabilityAPI":
|
||||||
@ -228,7 +220,6 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
|
|
||||||
We assume that wayback machine can not archive the future of a webpage.
|
We assume that wayback machine can not archive the future of a webpage.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self.near(unix_timestamp=int(time.time()))
|
return self.near(unix_timestamp=int(time.time()))
|
||||||
|
|
||||||
def near(
|
def near(
|
||||||
@ -252,7 +243,6 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
And finally invoking the json method to make the API call then returns
|
And finally invoking the json method to make the API call then returns
|
||||||
the instance.
|
the instance.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if unix_timestamp:
|
if unix_timestamp:
|
||||||
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||||
else:
|
else:
|
||||||
|
@ -90,7 +90,6 @@ class WaybackMachineCDXServerAPI:
|
|||||||
not matter but for queries where the number of pages are less this
|
not matter but for queries where the number of pages are less this
|
||||||
method chooses accuracy over the pagination API.
|
method chooses accuracy over the pagination API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# number of pages that will returned by the pagination API.
|
# number of pages that will returned by the pagination API.
|
||||||
# get_total_pages adds the showNumPages=true param to pagination API
|
# get_total_pages adds the showNumPages=true param to pagination API
|
||||||
# requests.
|
# requests.
|
||||||
|
@ -23,7 +23,6 @@ def get_total_pages(url: str, user_agent: str = DEFAULT_USER_AGENT) -> int:
|
|||||||
URL makes the CDX server return an integer which is the number of pages
|
URL makes the CDX server return an integer which is the number of pages
|
||||||
of CDX pages available for us to query using the pagination API.
|
of CDX pages available for us to query using the pagination API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
endpoint = "https://web.archive.org/cdx/search/cdx?"
|
endpoint = "https://web.archive.org/cdx/search/cdx?"
|
||||||
payload = {"showNumPages": "true", "url": str(url)}
|
payload = {"showNumPages": "true", "url": str(url)}
|
||||||
headers = {"User-Agent": user_agent}
|
headers = {"User-Agent": user_agent}
|
||||||
@ -43,7 +42,6 @@ def full_url(endpoint: str, params: Dict[str, Any]) -> str:
|
|||||||
such as filter and collapse and this function adds them without
|
such as filter and collapse and this function adds them without
|
||||||
overwriting earlier added arguments.
|
overwriting earlier added arguments.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not params:
|
if not params:
|
||||||
return endpoint
|
return endpoint
|
||||||
_full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
|
_full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
|
||||||
@ -67,7 +65,6 @@ def get_response(
|
|||||||
"""
|
"""
|
||||||
Make get request to the CDX server and return the response.
|
Make get request to the CDX server and return the response.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
|
|
||||||
retries_ = Retry(
|
retries_ = Retry(
|
||||||
@ -87,7 +84,6 @@ def check_filters(filters: List[str]) -> None:
|
|||||||
Check that the filter arguments passed by the end-user are valid.
|
Check that the filter arguments passed by the end-user are valid.
|
||||||
If not valid then raise WaybackError.
|
If not valid then raise WaybackError.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not isinstance(filters, list):
|
if not isinstance(filters, list):
|
||||||
raise WaybackError("filters must be a list.")
|
raise WaybackError("filters must be a list.")
|
||||||
|
|
||||||
@ -110,7 +106,6 @@ def check_collapses(collapses: List[str]) -> bool:
|
|||||||
Check that the collapse arguments passed by the end-user are valid.
|
Check that the collapse arguments passed by the end-user are valid.
|
||||||
If not valid then raise WaybackError.
|
If not valid then raise WaybackError.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not isinstance(collapses, list):
|
if not isinstance(collapses, list):
|
||||||
raise WaybackError("collapses must be a list.")
|
raise WaybackError("collapses must be a list.")
|
||||||
|
|
||||||
@ -138,7 +133,6 @@ def check_match_type(match_type: Optional[str], url: str) -> bool:
|
|||||||
Check that the match_type argument passed by the end-user is valid.
|
Check that the match_type argument passed by the end-user is valid.
|
||||||
If not valid then raise WaybackError.
|
If not valid then raise WaybackError.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
legal_match_type = ["exact", "prefix", "host", "domain"]
|
legal_match_type = ["exact", "prefix", "host", "domain"]
|
||||||
|
|
||||||
if not match_type:
|
if not match_type:
|
||||||
|
@ -58,7 +58,6 @@ class WaybackMachineSaveAPI:
|
|||||||
else invoke the save method to save the archive which returns the
|
else invoke the save method to save the archive which returns the
|
||||||
archive thus we return the methods return value.
|
archive thus we return the methods return value.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if self._archive_url:
|
if self._archive_url:
|
||||||
return self._archive_url
|
return self._archive_url
|
||||||
|
|
||||||
@ -80,7 +79,6 @@ class WaybackMachineSaveAPI:
|
|||||||
to be very unreliable thus if it fails first check opening
|
to be very unreliable thus if it fails first check opening
|
||||||
the response URL yourself in the browser.
|
the response URL yourself in the browser.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
retries = Retry(
|
retries = Retry(
|
||||||
total=self.total_save_retries,
|
total=self.total_save_retries,
|
||||||
@ -120,7 +118,6 @@ class WaybackMachineSaveAPI:
|
|||||||
archive URL in the headers and finally look in the response URL
|
archive URL in the headers and finally look in the response URL
|
||||||
for the archive URL.
|
for the archive URL.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
regex1 = r"Content-Location: (/web/[0-9]{14}/.*)"
|
regex1 = r"Content-Location: (/web/[0-9]{14}/.*)"
|
||||||
match = re.search(regex1, str(self.headers))
|
match = re.search(regex1, str(self.headers))
|
||||||
if match:
|
if match:
|
||||||
@ -156,7 +153,6 @@ class WaybackMachineSaveAPI:
|
|||||||
|
|
||||||
If tries are multiple of 3 sleep 10 seconds else sleep 5 seconds.
|
If tries are multiple of 3 sleep 10 seconds else sleep 5 seconds.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
sleep_seconds = 5
|
sleep_seconds = 5
|
||||||
if tries % 3 == 0:
|
if tries % 3 == 0:
|
||||||
sleep_seconds = 10
|
sleep_seconds = 10
|
||||||
@ -176,7 +172,6 @@ class WaybackMachineSaveAPI:
|
|||||||
the Wayback Machine to serve cached archive if last archive was captured
|
the Wayback Machine to serve cached archive if last archive was captured
|
||||||
before last 45 minutes.
|
before last 45 minutes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
regex = r"https?://web\.archive.org/web/([0-9]{14})/http"
|
regex = r"https?://web\.archive.org/web/([0-9]{14})/http"
|
||||||
match = re.search(regex, str(self._archive_url))
|
match = re.search(regex, str(self._archive_url))
|
||||||
|
|
||||||
@ -205,7 +200,6 @@ class WaybackMachineSaveAPI:
|
|||||||
Raises MaximumSaveRetriesExceeded is maximum retries are exhausted but still
|
Raises MaximumSaveRetriesExceeded is maximum retries are exhausted but still
|
||||||
we were unable to retrieve the archive from the Wayback Machine.
|
we were unable to retrieve the archive from the Wayback Machine.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.saved_archive = None
|
self.saved_archive = None
|
||||||
tries = 0
|
tries = 0
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user