diff --git a/waybackpy/utils.py b/waybackpy/utils.py index 0fa79b5..c9f3c55 100644 --- a/waybackpy/utils.py +++ b/waybackpy/utils.py @@ -302,7 +302,9 @@ def _get_total_pages(url, user_agent): return int((_get_response(total_pages_url, headers=headers).text).strip()) -def _archive_url_parser(header, url, latest_version=__version__, instance=None): +def _archive_url_parser( + header, url, latest_version=__version__, instance=None, response=None +): """Returns the archive after parsing it from the response header. Parameters @@ -388,6 +390,16 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None): if m: return m.group(1) + if response: + if response.url: + if "web.archive.org/web" in response.url: + m = re.search( + r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$", + str(response.url).strip(), + ) + if m: + return m.group(0) + if instance: newest_archive = None try: diff --git a/waybackpy/wrapper.py b/waybackpy/wrapper.py index ef24a81..6b68d1c 100644 --- a/waybackpy/wrapper.py +++ b/waybackpy/wrapper.py @@ -225,6 +225,7 @@ class Url: self.url, latest_version=self.latest_version, instance=self, + response=response, ) m = re.search(