retries support for get requests

This commit is contained in:
Akash Mahanty
2021-01-06 01:58:38 +05:30
parent 15ef5816db
commit a2550f17d7
5 changed files with 36 additions and 23 deletions

View File

@@ -1,7 +1,9 @@
import re
import requests
import concurrent.futures
from urllib3.util.retry import Retry
from datetime import datetime, timedelta
from requests.adapters import HTTPAdapter
from waybackpy.__version__ import __version__
from waybackpy.exceptions import WaybackError, URLError
@@ -102,15 +104,15 @@ def _wayback_timestamp(**kwargs):
)
def _get_response(endpoint, params=None, headers=None):
def _get_response(endpoint, params=None, headers=None, retries=5):
"""
This function is used make get request.
We use the requests package to make the
requests.
We try twice and if both the times is fails And
raises exceptions we give-up and raise WaybackError.
We try five times and if it fails it raises
WaybackError exception.
You can handles WaybackError by importing:
from waybackpy.exceptions import WaybackError
@@ -121,15 +123,18 @@ def _get_response(endpoint, params=None, headers=None):
# handle it
"""
# From https://stackoverflow.com/a/35504626
# By https://stackoverflow.com/users/401467/datashaman
s = requests.Session()
retries = Retry(total=retries, backoff_factor=0.5, status_forcelist=[ 500, 502, 503, 504 ])
s.mount('https://', HTTPAdapter(max_retries=retries))
try:
return requests.get(endpoint, params=params, headers=headers)
except Exception:
try:
return requests.get(endpoint, params=params, headers=headers)
except Exception as e:
exc = WaybackError("Error while retrieving %s" % endpoint)
exc.__cause__ = e
raise exc
return s.get(endpoint, params=params, headers=headers)
except Exception as e:
exc = WaybackError("Error while retrieving %s" % endpoint)
exc.__cause__ = e
raise exc
class Url:
@@ -450,12 +455,13 @@ class Url:
):
"""
Returns list of URLs known to exist for given domain name
because these URLs were crawled by WayBack Machine bots.
Useful for pen-testers and others.
Idea by Mohammed Diaa (https://github.com/mhmdiaa) from:
https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050
because these URLs were crawled by WayBack Machine spider.
Useful for pen-testing.
"""
# Idea by Mohammed Diaa (https://github.com/mhmdiaa) from:
# https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050
url_list = []
if subdomain: