check if url is redirect before throwing exceptions, res.url is the redirect url if redirected at all

This commit is contained in:
Akash Mahanty
2021-04-02 09:55:11 +05:30
parent 8171c8e394
commit 8d2cc112c1
2 changed files with 14 additions and 1 deletions

View File

@@ -302,7 +302,9 @@ def _get_total_pages(url, user_agent):
return int((_get_response(total_pages_url, headers=headers).text).strip())
def _archive_url_parser(header, url, latest_version=__version__, instance=None):
def _archive_url_parser(
header, url, latest_version=__version__, instance=None, response=None
):
"""Returns the archive after parsing it from the response header.
Parameters
@@ -388,6 +390,16 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
if m:
return m.group(1)
if response:
if response.url:
if "web.archive.org/web" in response.url:
m = re.search(
r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$",
str(response.url).strip(),
)
if m:
return m.group(0)
if instance:
newest_archive = None
try:

View File

@@ -225,6 +225,7 @@ class Url:
self.url,
latest_version=self.latest_version,
instance=self,
response=response,
)
m = re.search(