check if url is redirect before throwing exceptions, res.url is the redirect url if redirected at all
This commit is contained in:
@@ -302,7 +302,9 @@ def _get_total_pages(url, user_agent):
|
||||
return int((_get_response(total_pages_url, headers=headers).text).strip())
|
||||
|
||||
|
||||
def _archive_url_parser(header, url, latest_version=__version__, instance=None):
|
||||
def _archive_url_parser(
|
||||
header, url, latest_version=__version__, instance=None, response=None
|
||||
):
|
||||
"""Returns the archive after parsing it from the response header.
|
||||
|
||||
Parameters
|
||||
@@ -388,6 +390,16 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
|
||||
if m:
|
||||
return m.group(1)
|
||||
|
||||
if response:
|
||||
if response.url:
|
||||
if "web.archive.org/web" in response.url:
|
||||
m = re.search(
|
||||
r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$",
|
||||
str(response.url).strip(),
|
||||
)
|
||||
if m:
|
||||
return m.group(0)
|
||||
|
||||
if instance:
|
||||
newest_archive = None
|
||||
try:
|
||||
|
@@ -225,6 +225,7 @@ class Url:
|
||||
self.url,
|
||||
latest_version=self.latest_version,
|
||||
instance=self,
|
||||
response=response,
|
||||
)
|
||||
|
||||
m = re.search(
|
||||
|
Reference in New Issue
Block a user