retries support for get requests
This commit is contained in:
parent
15ef5816db
commit
a2550f17d7
@ -5,3 +5,4 @@
|
||||
|
||||
## ACKNOWLEDGEMENTS
|
||||
- mhmdiaa (<https://github.com/mhmdiaa>) for <https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050>. known_urls is based on this gist.
|
||||
- datashaman (<https://stackoverflow.com/users/401467/datashaman>) for <https://stackoverflow.com/a/35504626>. _get_response is based on this amazing answer.
|
||||
|
@ -208,7 +208,7 @@ def test_known_urls():
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://akamhy.github.io",
|
||||
url="https://www.keybr.com",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
@ -217,13 +217,13 @@ def test_known_urls():
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=True,
|
||||
subdomain=True,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=True,
|
||||
get=None,
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "github" in str(reply)
|
||||
assert "keybr" in str(reply)
|
||||
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
@ -305,7 +305,7 @@ def test_get():
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
url="https://github.com/akamhy",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
@ -325,7 +325,7 @@ def test_get():
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
url="https://github.com/akamhy/waybackpy",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
@ -345,7 +345,7 @@ def test_get():
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
url="https://akamhy.github.io/waybackpy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
|
@ -223,7 +223,7 @@ def test_total_archives():
|
||||
def test_known_urls():
|
||||
|
||||
target = waybackpy.Url("akamhy.github.io", user_agent)
|
||||
assert len(target.known_urls(alive=True, subdomain=True)) > 2
|
||||
assert len(target.known_urls(alive=True, subdomain=False)) > 2
|
||||
|
||||
target = waybackpy.Url("akamhy.github.io", user_agent)
|
||||
assert len(target.known_urls()) > 3
|
||||
|
@ -1,3 +1,9 @@
|
||||
"""
|
||||
waybackpy.exceptions
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
This module contains the set of Waybackpy's exceptions.
|
||||
"""
|
||||
|
||||
class WaybackError(Exception):
|
||||
"""
|
||||
Raised when Wayback Machine API Service is unreachable/down.
|
||||
|
@ -1,7 +1,9 @@
|
||||
import re
|
||||
import requests
|
||||
import concurrent.futures
|
||||
from urllib3.util.retry import Retry
|
||||
from datetime import datetime, timedelta
|
||||
from requests.adapters import HTTPAdapter
|
||||
from waybackpy.__version__ import __version__
|
||||
from waybackpy.exceptions import WaybackError, URLError
|
||||
|
||||
@ -102,15 +104,15 @@ def _wayback_timestamp(**kwargs):
|
||||
)
|
||||
|
||||
|
||||
def _get_response(endpoint, params=None, headers=None):
|
||||
def _get_response(endpoint, params=None, headers=None, retries=5):
|
||||
"""
|
||||
This function is used make get request.
|
||||
We use the requests package to make the
|
||||
requests.
|
||||
|
||||
|
||||
We try twice and if both the times is fails And
|
||||
raises exceptions we give-up and raise WaybackError.
|
||||
We try five times and if it fails it raises
|
||||
WaybackError exception.
|
||||
|
||||
You can handles WaybackError by importing:
|
||||
from waybackpy.exceptions import WaybackError
|
||||
@ -121,11 +123,14 @@ def _get_response(endpoint, params=None, headers=None):
|
||||
# handle it
|
||||
"""
|
||||
|
||||
# From https://stackoverflow.com/a/35504626
|
||||
# By https://stackoverflow.com/users/401467/datashaman
|
||||
s = requests.Session()
|
||||
retries = Retry(total=retries, backoff_factor=0.5, status_forcelist=[ 500, 502, 503, 504 ])
|
||||
s.mount('https://', HTTPAdapter(max_retries=retries))
|
||||
|
||||
try:
|
||||
return requests.get(endpoint, params=params, headers=headers)
|
||||
except Exception:
|
||||
try:
|
||||
return requests.get(endpoint, params=params, headers=headers)
|
||||
return s.get(endpoint, params=params, headers=headers)
|
||||
except Exception as e:
|
||||
exc = WaybackError("Error while retrieving %s" % endpoint)
|
||||
exc.__cause__ = e
|
||||
@ -450,12 +455,13 @@ class Url:
|
||||
):
|
||||
"""
|
||||
Returns list of URLs known to exist for given domain name
|
||||
because these URLs were crawled by WayBack Machine bots.
|
||||
Useful for pen-testers and others.
|
||||
Idea by Mohammed Diaa (https://github.com/mhmdiaa) from:
|
||||
https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050
|
||||
because these URLs were crawled by WayBack Machine spider.
|
||||
Useful for pen-testing.
|
||||
"""
|
||||
|
||||
# Idea by Mohammed Diaa (https://github.com/mhmdiaa) from:
|
||||
# https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050
|
||||
|
||||
url_list = []
|
||||
|
||||
if subdomain:
|
||||
|
Loading…
Reference in New Issue
Block a user