From bc1dc4dc96da7ab128c2e8481557d78a5fea4938 Mon Sep 17 00:00:00 2001 From: eggplants Date: Thu, 3 Feb 2022 19:44:48 +0900 Subject: [PATCH] fix: save retry mechanism --- waybackpy/save_api.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/waybackpy/save_api.py b/waybackpy/save_api.py index cfdb595..b549aa4 100644 --- a/waybackpy/save_api.py +++ b/waybackpy/save_api.py @@ -22,6 +22,8 @@ class WaybackMachineSaveAPI: self.request_url = "https://web.archive.org/save/" + self.url self.user_agent = user_agent self.request_headers = {"User-Agent": self.user_agent} + if max_tries < 1: + raise ValueError("max_tries should be positive") self.max_tries = max_tries self.total_save_retries = 5 self.backoff_factor = 0.5 @@ -160,27 +162,22 @@ class WaybackMachineSaveAPI: tries = 0 while True: + if not self.saved_archive: + if tries >= 1: + self.sleep(tries) + + self.get_save_request_headers() + self.saved_archive = self.archive_url_parser() + + if self.saved_archive is not None: + self._archive_url = self.saved_archive + self.timestamp() + return self.saved_archive tries += 1 - if tries >= self.max_tries: raise MaximumSaveRetriesExceeded( "Tried %s times but failed to save and retrieve the" % str(tries) + " archive for %s.\nResponse URL:\n%s \nResponse Header:\n%s\n" % (self.url, self.response_url, str(self.headers)), ) - - if not self.saved_archive: - - if tries > 1: - self.sleep(tries) - - self.get_save_request_headers() - self.saved_archive = self.archive_url_parser() - - if not self.saved_archive: - continue - else: - self._archive_url = self.saved_archive - self.timestamp() - return self.saved_archive