Remove blank lines after docstring (#146)
Co-authored-by: deepsource-autofix[bot] <62050782+deepsource-autofix[bot]@users.noreply.github.com>
This commit is contained in:
parent
e0dfbe0b7d
commit
7317bd7183
@ -64,14 +64,12 @@ class WaybackMachineAvailabilityAPI:
|
||||
Converts Unix time to wayback Machine timestamp and the Wayback Machine
|
||||
timestamp format is yyyyMMddhhmmss.
|
||||
"""
|
||||
|
||||
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""
|
||||
Same as string representation, just return the archive URL as a string.
|
||||
"""
|
||||
|
||||
return str(self)
|
||||
|
||||
def __str__(self) -> str:
|
||||
@ -80,7 +78,6 @@ class WaybackMachineAvailabilityAPI:
|
||||
call was successfully made then return the archive URL
|
||||
as a string. Else returns "".
|
||||
"""
|
||||
|
||||
# String should not return anything other than a string object
|
||||
# So, if a string repr is asked for before making any API requests
|
||||
# just return ""
|
||||
@ -103,7 +100,6 @@ class WaybackMachineAvailabilityAPI:
|
||||
to increase or decrease the default time gap between two successive API
|
||||
calls, but it is not recommended to increase it.
|
||||
"""
|
||||
|
||||
time_diff = int(time.time()) - self.last_api_call_unix_time
|
||||
sleep_time = self.api_call_time_gap - time_diff
|
||||
|
||||
@ -136,7 +132,6 @@ class WaybackMachineAvailabilityAPI:
|
||||
If you get an URL as a response form the availability API it is
|
||||
guaranteed that you can get the datetime object from the timestamp.
|
||||
"""
|
||||
|
||||
if self.json is None or "archived_snapshots" not in self.json:
|
||||
return datetime.max
|
||||
|
||||
@ -161,7 +156,6 @@ class WaybackMachineAvailabilityAPI:
|
||||
the timestamp if found and if not found raises
|
||||
ArchiveNotInAvailabilityAPIResponse.
|
||||
"""
|
||||
|
||||
archive_url = ""
|
||||
data = self.json
|
||||
|
||||
@ -206,7 +200,6 @@ class WaybackMachineAvailabilityAPI:
|
||||
Prepends zero before the year, month, day, hour and minute so that they
|
||||
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
|
||||
"""
|
||||
|
||||
return "".join(
|
||||
str(kwargs[key]).zfill(2)
|
||||
for key in ["year", "month", "day", "hour", "minute"]
|
||||
@ -218,7 +211,6 @@ class WaybackMachineAvailabilityAPI:
|
||||
because Wayback Machine was started in May, 1996 and it is assumed that
|
||||
there would be no archive older than January 1, 1994.
|
||||
"""
|
||||
|
||||
return self.near(year=1994, month=1, day=1)
|
||||
|
||||
def newest(self) -> "WaybackMachineAvailabilityAPI":
|
||||
@ -228,7 +220,6 @@ class WaybackMachineAvailabilityAPI:
|
||||
|
||||
We assume that wayback machine can not archive the future of a webpage.
|
||||
"""
|
||||
|
||||
return self.near(unix_timestamp=int(time.time()))
|
||||
|
||||
def near(
|
||||
@ -252,7 +243,6 @@ class WaybackMachineAvailabilityAPI:
|
||||
And finally invoking the json method to make the API call then returns
|
||||
the instance.
|
||||
"""
|
||||
|
||||
if unix_timestamp:
|
||||
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||
else:
|
||||
|
@ -90,7 +90,6 @@ class WaybackMachineCDXServerAPI:
|
||||
not matter but for queries where the number of pages are less this
|
||||
method chooses accuracy over the pagination API.
|
||||
"""
|
||||
|
||||
# number of pages that will returned by the pagination API.
|
||||
# get_total_pages adds the showNumPages=true param to pagination API
|
||||
# requests.
|
||||
|
@ -23,7 +23,6 @@ def get_total_pages(url: str, user_agent: str = DEFAULT_USER_AGENT) -> int:
|
||||
URL makes the CDX server return an integer which is the number of pages
|
||||
of CDX pages available for us to query using the pagination API.
|
||||
"""
|
||||
|
||||
endpoint = "https://web.archive.org/cdx/search/cdx?"
|
||||
payload = {"showNumPages": "true", "url": str(url)}
|
||||
headers = {"User-Agent": user_agent}
|
||||
@ -43,7 +42,6 @@ def full_url(endpoint: str, params: Dict[str, Any]) -> str:
|
||||
such as filter and collapse and this function adds them without
|
||||
overwriting earlier added arguments.
|
||||
"""
|
||||
|
||||
if not params:
|
||||
return endpoint
|
||||
_full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
|
||||
@ -67,7 +65,6 @@ def get_response(
|
||||
"""
|
||||
Make get request to the CDX server and return the response.
|
||||
"""
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
retries_ = Retry(
|
||||
@ -87,7 +84,6 @@ def check_filters(filters: List[str]) -> None:
|
||||
Check that the filter arguments passed by the end-user are valid.
|
||||
If not valid then raise WaybackError.
|
||||
"""
|
||||
|
||||
if not isinstance(filters, list):
|
||||
raise WaybackError("filters must be a list.")
|
||||
|
||||
@ -110,7 +106,6 @@ def check_collapses(collapses: List[str]) -> bool:
|
||||
Check that the collapse arguments passed by the end-user are valid.
|
||||
If not valid then raise WaybackError.
|
||||
"""
|
||||
|
||||
if not isinstance(collapses, list):
|
||||
raise WaybackError("collapses must be a list.")
|
||||
|
||||
@ -138,7 +133,6 @@ def check_match_type(match_type: Optional[str], url: str) -> bool:
|
||||
Check that the match_type argument passed by the end-user is valid.
|
||||
If not valid then raise WaybackError.
|
||||
"""
|
||||
|
||||
legal_match_type = ["exact", "prefix", "host", "domain"]
|
||||
|
||||
if not match_type:
|
||||
|
@ -58,7 +58,6 @@ class WaybackMachineSaveAPI:
|
||||
else invoke the save method to save the archive which returns the
|
||||
archive thus we return the methods return value.
|
||||
"""
|
||||
|
||||
if self._archive_url:
|
||||
return self._archive_url
|
||||
|
||||
@ -80,7 +79,6 @@ class WaybackMachineSaveAPI:
|
||||
to be very unreliable thus if it fails first check opening
|
||||
the response URL yourself in the browser.
|
||||
"""
|
||||
|
||||
session = requests.Session()
|
||||
retries = Retry(
|
||||
total=self.total_save_retries,
|
||||
@ -120,7 +118,6 @@ class WaybackMachineSaveAPI:
|
||||
archive URL in the headers and finally look in the response URL
|
||||
for the archive URL.
|
||||
"""
|
||||
|
||||
regex1 = r"Content-Location: (/web/[0-9]{14}/.*)"
|
||||
match = re.search(regex1, str(self.headers))
|
||||
if match:
|
||||
@ -156,7 +153,6 @@ class WaybackMachineSaveAPI:
|
||||
|
||||
If tries are multiple of 3 sleep 10 seconds else sleep 5 seconds.
|
||||
"""
|
||||
|
||||
sleep_seconds = 5
|
||||
if tries % 3 == 0:
|
||||
sleep_seconds = 10
|
||||
@ -176,7 +172,6 @@ class WaybackMachineSaveAPI:
|
||||
the Wayback Machine to serve cached archive if last archive was captured
|
||||
before last 45 minutes.
|
||||
"""
|
||||
|
||||
regex = r"https?://web\.archive.org/web/([0-9]{14})/http"
|
||||
match = re.search(regex, str(self._archive_url))
|
||||
|
||||
@ -205,7 +200,6 @@ class WaybackMachineSaveAPI:
|
||||
Raises MaximumSaveRetriesExceeded is maximum retries are exhausted but still
|
||||
we were unable to retrieve the archive from the Wayback Machine.
|
||||
"""
|
||||
|
||||
self.saved_archive = None
|
||||
tries = 0
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user