backoff_factor=2 for save, incr success by 25%

This commit is contained in:
Akash Mahanty 2021-01-13 10:13:16 +05:30
parent ec0a0d04cc
commit 76205d9cf6
4 changed files with 14 additions and 16 deletions

View File

@ -41,16 +41,6 @@ def test_save():
with pytest.raises(Exception):
url2 = "ha ha ha ha"
Url(url2, user_agent)
# url3 = "http://www.archive.is/faq.html"
# with pytest.raises(Exception):
# target = Url(
# url3,
# "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) "
# "AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 "
# "Safari/533.20.27",
# )
# target.save()
def test_near():

View File

@ -252,7 +252,12 @@ def _wayback_timestamp(**kwargs):
def _get_response(
endpoint, params=None, headers=None, retries=5, return_full_url=False
endpoint,
params=None,
headers=None,
return_full_url=False,
retries=5,
backoff_factor=0.5,
):
"""
This function is used make get request.
@ -276,7 +281,9 @@ def _get_response(
# By https://stackoverflow.com/users/401467/datashaman
s = requests.Session()
retries = Retry(
total=retries, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504]
total=retries,
backoff_factor=backoff_factor,
status_forcelist=[500, 502, 503, 504],
)
s.mount("https://", HTTPAdapter(max_retries=retries))
url = _full_url(endpoint, params)

View File

@ -129,15 +129,16 @@ class Url:
And to get the archive URL we are required to read the
header of the API response.
_get_response() takes care of the get requests. It uses requests
package.
_get_response() takes care of the get requests.
_archive_url_parser() parses the archive from the header.
"""
request_url = "https://web.archive.org/save/" + _cleaned_url(self.url)
headers = {"User-Agent": self.user_agent}
response = _get_response(request_url, params=None, headers=headers)
response = _get_response(
request_url, params=None, headers=headers, backoff_factor=2
)
self._archive_url = "https://" + _archive_url_parser(response.headers, self.url)
self.timestamp = datetime.utcnow()
return self