fix: shorten long lines

This commit is contained in:
eggplants
2022-02-04 07:20:02 +09:00
parent b496f7008e
commit 5a324b9f61
12 changed files with 172 additions and 62 deletions

View File

@@ -142,8 +142,8 @@ class WaybackMachineAvailabilityAPI(object):
if not data or not data["archived_snapshots"]:
raise ArchiveNotInAvailabilityAPIResponse(
"Archive not found in the availability "
"API response, the URL you requested may not have any "
"archives yet. You may retry after some time or archive the webpage now.\n"
"API response, the URL you requested may not have any archives "
"yet. You may retry after some time or archive the webpage now.\n"
f"Response data:\n{self.response.text}"
)
else:
@@ -196,7 +196,8 @@ class WaybackMachineAvailabilityAPI(object):
unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
appropriate arguments for their respective parameters.
Adds the timestamp to the payload dictionary.
And finally invoking the json method to make the API call then returns the instance.
And finally invoking the json method to make the API call then returns
the instance.
"""
if unix_timestamp:
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)

View File

@@ -177,8 +177,8 @@ class WaybackMachineCDXServerAPI(object):
if prop_values_len != properties_len:
raise WaybackError(
f"Snapshot returned by Cdx API has {prop_values_len} properties "
f"instead of expected {properties_len} properties.\n"
f"Snapshot returned by Cdx API has {prop_values_len} "
f"properties instead of expected {properties_len} properties.\n"
f"Problematic Snapshot: {snapshot}"
)

View File

@@ -69,7 +69,8 @@ def check_filters(filters: List[str]) -> None:
# [!]field:regex
for _filter in filters:
match = re.search(
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):"
r"(.*)",
_filter,
)
@@ -87,11 +88,15 @@ def check_collapses(collapses: List[str]) -> bool:
for collapse in collapses:
match = re.search(
r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?",
r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)"
r"(:?[0-9]{1,99})?",
collapse,
)
if match is None or len(match.groups()) != 2:
exc_message = f"collapse argument '{collapse}' is not following the cdx collapse syntax."
exc_message = (
f"collapse argument '{collapse}' "
"is not following the cdx collapse syntax."
)
raise WaybackError(exc_message)
else:
return True
@@ -106,7 +111,10 @@ def check_match_type(match_type: Optional[str], url: str) -> bool:
"Can not use wildcard in the URL along with the match_type arguments."
)
elif match_type not in legal_match_type:
exc_message = f"{match_type} is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'"
exc_message = (
f"{match_type} is not an allowed match type.\n"
"Use one from 'exact', 'prefix', 'host' or 'domain'"
)
raise WaybackError(exc_message)
else:
return True

View File

@@ -241,7 +241,8 @@ def main(
and not cdx
):
click.echo(
"Only URL passed, but did not specify what to do with the URL. Use --help flag for help using waybackpy."
"Only URL passed, but did not specify what to do with the URL. "
"Use --help flag for help using waybackpy."
)
return

View File

@@ -72,7 +72,6 @@ class WaybackMachineSaveAPI(object):
self.response = session.get(self.request_url, headers=self.request_headers)
# requests.response.headers is requests.structures.CaseInsensitiveDict
self.headers = self.response.headers
self.headers_str = str(self.headers)
self.status_code = self.response.status_code
self.response_url = self.response.url
session.close()
@@ -85,17 +84,17 @@ class WaybackMachineSaveAPI(object):
"""
regex1 = r"Content-Location: (/web/[0-9]{14}/.*)"
match = re.search(regex1, self.headers_str)
match = re.search(regex1, str(self.headers))
if match:
return "https://web.archive.org" + match.group(1)
regex2 = r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>"
match = re.search(regex2, self.headers_str)
match = re.search(regex2, str(self.headers))
if match is not None and len(match.groups()) == 1:
return "https://" + match.group(1)
regex3 = r"X-Cache-Key:\shttps(.*)[A-Z]{2}"
match = re.search(regex3, self.headers_str)
match = re.search(regex3, str(self.headers))
if match is not None and len(match.groups()) == 1:
return "https" + match.group(1)
@@ -132,10 +131,11 @@ class WaybackMachineSaveAPI(object):
Also check if the time on archive is URL and compare it to instance birth
time.
If time on the archive is older than the instance creation time set the cached_save
to True else set it to False. The flag can be used to check if the Wayback Machine
didn't serve a Cached URL. It is quite common for the Wayback Machine to serve
cached archive if last archive was captured before last 45 minutes.
If time on the archive is older than the instance creation time set the
cached_save to True else set it to False. The flag can be used to check
if the Wayback Machine didn't serve a Cached URL. It is quite common for
the Wayback Machine to serve cached archive if last archive was captured
before last 45 minutes.
"""
regex = r"https?://web\.archive.org/web/([0-9]{14})/http"
m = re.search(regex, str(self._archive_url))
@@ -167,7 +167,7 @@ class WaybackMachineSaveAPI(object):
tries = 0
while True:
if not self.saved_archive:
if self.saved_archive is None:
if tries >= 1:
self.sleep(tries)
@@ -182,7 +182,8 @@ class WaybackMachineSaveAPI(object):
tries += 1
if tries >= self.max_tries:
raise MaximumSaveRetriesExceeded(
f"Tried {str(tries)} times but failed to save and retrieve the archive for {self.url}.\n"
f"Tried {tries} times but failed to save "
f"and retrieve the archive for {self.url}.\n"
f"Response URL:\n{self.response_url}\n"
f"Response Header:\n{self.headers_str}"
f"Response Header:\n{self.headers}"
)

View File

@@ -15,8 +15,8 @@ The reason it is still in the code is backwards compatibility with 2.x.x version
If were are using the Url before the update to version 3.x.x, your code should still be
working fine and there is no hurry to update the interface but is recommended that you
do not use the Url class for new code as it would be removed after 2025 also the first
3.x.x versions was released in January 2022 and three years are more than enough to update
the older interface code.
3.x.x versions was released in January 2022 and three years are more than enough to
update the older interface code.
"""