backoff_factor=2 for save, incr success by 25%

2021-01-13 10:13:16 +05:30 · 2021-01-13 10:13:16 +05:30 · 76205d9cf6
commit 76205d9cf6
parent ec0a0d04cc
4 changed files with 14 additions and 16 deletions
--- a/tests/test_wrapper.py
+++ b/tests/test_wrapper.py
@ -41,16 +41,6 @@ def test_save():
    with pytest.raises(Exception):
        url2 = "ha ha ha ha"
        Url(url2, user_agent)
 #     url3 = "http://www.archive.is/faq.html"
 #     with pytest.raises(Exception):
 #         target = Url(
 #             url3,
 #             "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) "
 #             "AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 "
 #             "Safari/533.20.27",
 #         )
 #         target.save()
 def test_near():
--- a/waybackpy/cdx.py
+++ b/waybackpy/cdx.py
@ -84,7 +84,7 @@ class Cdx:
        endpoint = "https://web.archive.org/cdx/search/cdx"
        total_pages = _get_total_pages(self.url, self.user_agent)
-        #If we only have two or less pages of archives then we care for accuracy
+        # If we only have two or less pages of archives then we care for accuracy
        # pagination API can be lagged sometimes
        if use_page == True and total_pages >= 2:
            blank_pages = 0
--- a/waybackpy/utils.py
+++ b/waybackpy/utils.py
@ -252,7 +252,12 @@ def _wayback_timestamp(**kwargs):
 def _get_response(
-    endpoint, params=None, headers=None, retries=5, return_full_url=False
+    endpoint,
    params=None,
    headers=None,
    return_full_url=False,
    retries=5,
    backoff_factor=0.5,
 ):
    """
    This function is used make get request.
@ -276,7 +281,9 @@ def _get_response(
    # By https://stackoverflow.com/users/401467/datashaman
    s = requests.Session()
    retries = Retry(
-        total=retries, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504]
+        total=retries,
        backoff_factor=backoff_factor,
        status_forcelist=[500, 502, 503, 504],
    )
    s.mount("https://", HTTPAdapter(max_retries=retries))
    url = _full_url(endpoint, params)
--- a/waybackpy/wrapper.py
+++ b/waybackpy/wrapper.py
@ -129,15 +129,16 @@ class Url:
        And to get the archive URL we are required to read the
        header of the API response.
-        _get_response() takes care of the get requests. It uses requests
+        _get_response() takes care of the get requests.
        package.
        _archive_url_parser() parses the archive from the header.
        """
        request_url = "https://web.archive.org/save/" + _cleaned_url(self.url)
        headers = {"User-Agent": self.user_agent}
-        response = _get_response(request_url, params=None, headers=headers)
+        response = _get_response(
            request_url, params=None, headers=headers, backoff_factor=2
        )
        self._archive_url = "https://" + _archive_url_parser(response.headers, self.url)
        self.timestamp = datetime.utcnow()
        return self