Merge pull request #124 from eggplants/fix_save_retry

Fix save retry mechanism
This commit is contained in:
Akash Mahanty 2022-02-03 18:01:51 +05:30 committed by GitHub
commit 947647f2e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -22,6 +22,8 @@ class WaybackMachineSaveAPI:
self.request_url = "https://web.archive.org/save/" + self.url self.request_url = "https://web.archive.org/save/" + self.url
self.user_agent = user_agent self.user_agent = user_agent
self.request_headers = {"User-Agent": self.user_agent} self.request_headers = {"User-Agent": self.user_agent}
if max_tries < 1:
raise ValueError("max_tries should be positive")
self.max_tries = max_tries self.max_tries = max_tries
self.total_save_retries = 5 self.total_save_retries = 5
self.backoff_factor = 0.5 self.backoff_factor = 0.5
@ -160,27 +162,22 @@ class WaybackMachineSaveAPI:
tries = 0 tries = 0
while True: while True:
if not self.saved_archive:
if tries >= 1:
self.sleep(tries)
self.get_save_request_headers()
self.saved_archive = self.archive_url_parser()
if self.saved_archive is not None:
self._archive_url = self.saved_archive
self.timestamp()
return self.saved_archive
tries += 1 tries += 1
if tries >= self.max_tries: if tries >= self.max_tries:
raise MaximumSaveRetriesExceeded( raise MaximumSaveRetriesExceeded(
"Tried %s times but failed to save and retrieve the" % str(tries) "Tried %s times but failed to save and retrieve the" % str(tries)
+ " archive for %s.\nResponse URL:\n%s \nResponse Header:\n%s\n" + " archive for %s.\nResponse URL:\n%s \nResponse Header:\n%s\n"
% (self.url, self.response_url, str(self.headers)), % (self.url, self.response_url, str(self.headers)),
) )
if not self.saved_archive:
if tries > 1:
self.sleep(tries)
self.get_save_request_headers()
self.saved_archive = self.archive_url_parser()
if not self.saved_archive:
continue
else:
self._archive_url = self.saved_archive
self.timestamp()
return self.saved_archive