fix: shorten long lines
This commit is contained in:
@@ -142,8 +142,8 @@ class WaybackMachineAvailabilityAPI(object):
|
||||
if not data or not data["archived_snapshots"]:
|
||||
raise ArchiveNotInAvailabilityAPIResponse(
|
||||
"Archive not found in the availability "
|
||||
"API response, the URL you requested may not have any "
|
||||
"archives yet. You may retry after some time or archive the webpage now.\n"
|
||||
"API response, the URL you requested may not have any archives "
|
||||
"yet. You may retry after some time or archive the webpage now.\n"
|
||||
f"Response data:\n{self.response.text}"
|
||||
)
|
||||
else:
|
||||
@@ -196,7 +196,8 @@ class WaybackMachineAvailabilityAPI(object):
|
||||
unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
|
||||
appropriate arguments for their respective parameters.
|
||||
Adds the timestamp to the payload dictionary.
|
||||
And finally invoking the json method to make the API call then returns the instance.
|
||||
And finally invoking the json method to make the API call then returns
|
||||
the instance.
|
||||
"""
|
||||
if unix_timestamp:
|
||||
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||
|
||||
@@ -177,8 +177,8 @@ class WaybackMachineCDXServerAPI(object):
|
||||
|
||||
if prop_values_len != properties_len:
|
||||
raise WaybackError(
|
||||
f"Snapshot returned by Cdx API has {prop_values_len} properties "
|
||||
f"instead of expected {properties_len} properties.\n"
|
||||
f"Snapshot returned by Cdx API has {prop_values_len} "
|
||||
f"properties instead of expected {properties_len} properties.\n"
|
||||
f"Problematic Snapshot: {snapshot}"
|
||||
)
|
||||
|
||||
|
||||
@@ -69,7 +69,8 @@ def check_filters(filters: List[str]) -> None:
|
||||
# [!]field:regex
|
||||
for _filter in filters:
|
||||
match = re.search(
|
||||
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
|
||||
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):"
|
||||
r"(.*)",
|
||||
_filter,
|
||||
)
|
||||
|
||||
@@ -87,11 +88,15 @@ def check_collapses(collapses: List[str]) -> bool:
|
||||
|
||||
for collapse in collapses:
|
||||
match = re.search(
|
||||
r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?",
|
||||
r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)"
|
||||
r"(:?[0-9]{1,99})?",
|
||||
collapse,
|
||||
)
|
||||
if match is None or len(match.groups()) != 2:
|
||||
exc_message = f"collapse argument '{collapse}' is not following the cdx collapse syntax."
|
||||
exc_message = (
|
||||
f"collapse argument '{collapse}' "
|
||||
"is not following the cdx collapse syntax."
|
||||
)
|
||||
raise WaybackError(exc_message)
|
||||
else:
|
||||
return True
|
||||
@@ -106,7 +111,10 @@ def check_match_type(match_type: Optional[str], url: str) -> bool:
|
||||
"Can not use wildcard in the URL along with the match_type arguments."
|
||||
)
|
||||
elif match_type not in legal_match_type:
|
||||
exc_message = f"{match_type} is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'"
|
||||
exc_message = (
|
||||
f"{match_type} is not an allowed match type.\n"
|
||||
"Use one from 'exact', 'prefix', 'host' or 'domain'"
|
||||
)
|
||||
raise WaybackError(exc_message)
|
||||
else:
|
||||
return True
|
||||
|
||||
@@ -241,7 +241,8 @@ def main(
|
||||
and not cdx
|
||||
):
|
||||
click.echo(
|
||||
"Only URL passed, but did not specify what to do with the URL. Use --help flag for help using waybackpy."
|
||||
"Only URL passed, but did not specify what to do with the URL. "
|
||||
"Use --help flag for help using waybackpy."
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@@ -72,7 +72,6 @@ class WaybackMachineSaveAPI(object):
|
||||
self.response = session.get(self.request_url, headers=self.request_headers)
|
||||
# requests.response.headers is requests.structures.CaseInsensitiveDict
|
||||
self.headers = self.response.headers
|
||||
self.headers_str = str(self.headers)
|
||||
self.status_code = self.response.status_code
|
||||
self.response_url = self.response.url
|
||||
session.close()
|
||||
@@ -85,17 +84,17 @@ class WaybackMachineSaveAPI(object):
|
||||
"""
|
||||
|
||||
regex1 = r"Content-Location: (/web/[0-9]{14}/.*)"
|
||||
match = re.search(regex1, self.headers_str)
|
||||
match = re.search(regex1, str(self.headers))
|
||||
if match:
|
||||
return "https://web.archive.org" + match.group(1)
|
||||
|
||||
regex2 = r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>"
|
||||
match = re.search(regex2, self.headers_str)
|
||||
match = re.search(regex2, str(self.headers))
|
||||
if match is not None and len(match.groups()) == 1:
|
||||
return "https://" + match.group(1)
|
||||
|
||||
regex3 = r"X-Cache-Key:\shttps(.*)[A-Z]{2}"
|
||||
match = re.search(regex3, self.headers_str)
|
||||
match = re.search(regex3, str(self.headers))
|
||||
if match is not None and len(match.groups()) == 1:
|
||||
return "https" + match.group(1)
|
||||
|
||||
@@ -132,10 +131,11 @@ class WaybackMachineSaveAPI(object):
|
||||
Also check if the time on archive is URL and compare it to instance birth
|
||||
time.
|
||||
|
||||
If time on the archive is older than the instance creation time set the cached_save
|
||||
to True else set it to False. The flag can be used to check if the Wayback Machine
|
||||
didn't serve a Cached URL. It is quite common for the Wayback Machine to serve
|
||||
cached archive if last archive was captured before last 45 minutes.
|
||||
If time on the archive is older than the instance creation time set the
|
||||
cached_save to True else set it to False. The flag can be used to check
|
||||
if the Wayback Machine didn't serve a Cached URL. It is quite common for
|
||||
the Wayback Machine to serve cached archive if last archive was captured
|
||||
before last 45 minutes.
|
||||
"""
|
||||
regex = r"https?://web\.archive.org/web/([0-9]{14})/http"
|
||||
m = re.search(regex, str(self._archive_url))
|
||||
@@ -167,7 +167,7 @@ class WaybackMachineSaveAPI(object):
|
||||
tries = 0
|
||||
|
||||
while True:
|
||||
if not self.saved_archive:
|
||||
if self.saved_archive is None:
|
||||
if tries >= 1:
|
||||
self.sleep(tries)
|
||||
|
||||
@@ -182,7 +182,8 @@ class WaybackMachineSaveAPI(object):
|
||||
tries += 1
|
||||
if tries >= self.max_tries:
|
||||
raise MaximumSaveRetriesExceeded(
|
||||
f"Tried {str(tries)} times but failed to save and retrieve the archive for {self.url}.\n"
|
||||
f"Tried {tries} times but failed to save "
|
||||
f"and retrieve the archive for {self.url}.\n"
|
||||
f"Response URL:\n{self.response_url}\n"
|
||||
f"Response Header:\n{self.headers_str}"
|
||||
f"Response Header:\n{self.headers}"
|
||||
)
|
||||
|
||||
@@ -15,8 +15,8 @@ The reason it is still in the code is backwards compatibility with 2.x.x version
|
||||
If were are using the Url before the update to version 3.x.x, your code should still be
|
||||
working fine and there is no hurry to update the interface but is recommended that you
|
||||
do not use the Url class for new code as it would be removed after 2025 also the first
|
||||
3.x.x versions was released in January 2022 and three years are more than enough to update
|
||||
the older interface code.
|
||||
3.x.x versions was released in January 2022 and three years are more than enough to
|
||||
update the older interface code.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user