backoff_factor=2 for save, incr success by 25%
This commit is contained in:
		| @@ -41,16 +41,6 @@ def test_save(): | ||||
|     with pytest.raises(Exception): | ||||
|         url2 = "ha ha ha ha" | ||||
|         Url(url2, user_agent) | ||||
| #     url3 = "http://www.archive.is/faq.html" | ||||
|  | ||||
| #     with pytest.raises(Exception): | ||||
| #         target = Url( | ||||
| #             url3, | ||||
| #             "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) " | ||||
| #             "AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 " | ||||
| #             "Safari/533.20.27", | ||||
| #         ) | ||||
| #         target.save() | ||||
|  | ||||
|  | ||||
| def test_near(): | ||||
|   | ||||
| @@ -252,7 +252,12 @@ def _wayback_timestamp(**kwargs): | ||||
|  | ||||
|  | ||||
| def _get_response( | ||||
|     endpoint, params=None, headers=None, retries=5, return_full_url=False | ||||
|     endpoint, | ||||
|     params=None, | ||||
|     headers=None, | ||||
|     return_full_url=False, | ||||
|     retries=5, | ||||
|     backoff_factor=0.5, | ||||
| ): | ||||
|     """ | ||||
|     This function is used make get request. | ||||
| @@ -276,7 +281,9 @@ def _get_response( | ||||
|     # By https://stackoverflow.com/users/401467/datashaman | ||||
|     s = requests.Session() | ||||
|     retries = Retry( | ||||
|         total=retries, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504] | ||||
|         total=retries, | ||||
|         backoff_factor=backoff_factor, | ||||
|         status_forcelist=[500, 502, 503, 504], | ||||
|     ) | ||||
|     s.mount("https://", HTTPAdapter(max_retries=retries)) | ||||
|     url = _full_url(endpoint, params) | ||||
|   | ||||
| @@ -129,15 +129,16 @@ class Url: | ||||
|         And to get the archive URL we are required to read the | ||||
|         header of the API response. | ||||
|  | ||||
|         _get_response() takes care of the get requests. It uses requests | ||||
|         package. | ||||
|         _get_response() takes care of the get requests. | ||||
|  | ||||
|         _archive_url_parser() parses the archive from the header. | ||||
|  | ||||
|         """ | ||||
|         request_url = "https://web.archive.org/save/" + _cleaned_url(self.url) | ||||
|         headers = {"User-Agent": self.user_agent} | ||||
|         response = _get_response(request_url, params=None, headers=headers) | ||||
|         response = _get_response( | ||||
|             request_url, params=None, headers=headers, backoff_factor=2 | ||||
|         ) | ||||
|         self._archive_url = "https://" + _archive_url_parser(response.headers, self.url) | ||||
|         self.timestamp = datetime.utcnow() | ||||
|         return self | ||||
|   | ||||
		Reference in New Issue
	
	Block a user