retries support for get requests
This commit is contained in:
parent
15ef5816db
commit
a2550f17d7
@ -5,3 +5,4 @@
|
|||||||
|
|
||||||
## ACKNOWLEDGEMENTS
|
## ACKNOWLEDGEMENTS
|
||||||
- mhmdiaa (<https://github.com/mhmdiaa>) for <https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050>. known_urls is based on this gist.
|
- mhmdiaa (<https://github.com/mhmdiaa>) for <https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050>. known_urls is based on this gist.
|
||||||
|
- datashaman (<https://stackoverflow.com/users/401467/datashaman>) for <https://stackoverflow.com/a/35504626>. _get_response is based on this amazing answer.
|
||||||
|
@ -208,7 +208,7 @@ def test_known_urls():
|
|||||||
args = argparse.Namespace(
|
args = argparse.Namespace(
|
||||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
url="https://akamhy.github.io",
|
url="https://www.keybr.com",
|
||||||
total=False,
|
total=False,
|
||||||
version=False,
|
version=False,
|
||||||
oldest=False,
|
oldest=False,
|
||||||
@ -217,13 +217,13 @@ def test_known_urls():
|
|||||||
archive_url=False,
|
archive_url=False,
|
||||||
newest=False,
|
newest=False,
|
||||||
near=False,
|
near=False,
|
||||||
alive=True,
|
alive=False,
|
||||||
subdomain=True,
|
subdomain=False,
|
||||||
known_urls=True,
|
known_urls=True,
|
||||||
get=None,
|
get=None,
|
||||||
)
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "github" in str(reply)
|
assert "keybr" in str(reply)
|
||||||
|
|
||||||
args = argparse.Namespace(
|
args = argparse.Namespace(
|
||||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
@ -305,7 +305,7 @@ def test_get():
|
|||||||
args = argparse.Namespace(
|
args = argparse.Namespace(
|
||||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
url="https://pypi.org/user/akamhy/",
|
url="https://github.com/akamhy",
|
||||||
total=False,
|
total=False,
|
||||||
version=False,
|
version=False,
|
||||||
oldest=False,
|
oldest=False,
|
||||||
@ -325,7 +325,7 @@ def test_get():
|
|||||||
args = argparse.Namespace(
|
args = argparse.Namespace(
|
||||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
url="https://pypi.org/user/akamhy/",
|
url="https://github.com/akamhy/waybackpy",
|
||||||
total=False,
|
total=False,
|
||||||
version=False,
|
version=False,
|
||||||
oldest=False,
|
oldest=False,
|
||||||
@ -345,7 +345,7 @@ def test_get():
|
|||||||
args = argparse.Namespace(
|
args = argparse.Namespace(
|
||||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
url="https://pypi.org/user/akamhy/",
|
url="https://akamhy.github.io/waybackpy/",
|
||||||
total=False,
|
total=False,
|
||||||
version=False,
|
version=False,
|
||||||
oldest=False,
|
oldest=False,
|
||||||
|
@ -223,7 +223,7 @@ def test_total_archives():
|
|||||||
def test_known_urls():
|
def test_known_urls():
|
||||||
|
|
||||||
target = waybackpy.Url("akamhy.github.io", user_agent)
|
target = waybackpy.Url("akamhy.github.io", user_agent)
|
||||||
assert len(target.known_urls(alive=True, subdomain=True)) > 2
|
assert len(target.known_urls(alive=True, subdomain=False)) > 2
|
||||||
|
|
||||||
target = waybackpy.Url("akamhy.github.io", user_agent)
|
target = waybackpy.Url("akamhy.github.io", user_agent)
|
||||||
assert len(target.known_urls()) > 3
|
assert len(target.known_urls()) > 3
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
"""
|
||||||
|
waybackpy.exceptions
|
||||||
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
This module contains the set of Waybackpy's exceptions.
|
||||||
|
"""
|
||||||
|
|
||||||
class WaybackError(Exception):
|
class WaybackError(Exception):
|
||||||
"""
|
"""
|
||||||
Raised when Wayback Machine API Service is unreachable/down.
|
Raised when Wayback Machine API Service is unreachable/down.
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
from urllib3.util.retry import Retry
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
from waybackpy.__version__ import __version__
|
from waybackpy.__version__ import __version__
|
||||||
from waybackpy.exceptions import WaybackError, URLError
|
from waybackpy.exceptions import WaybackError, URLError
|
||||||
|
|
||||||
@ -102,15 +104,15 @@ def _wayback_timestamp(**kwargs):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _get_response(endpoint, params=None, headers=None):
|
def _get_response(endpoint, params=None, headers=None, retries=5):
|
||||||
"""
|
"""
|
||||||
This function is used make get request.
|
This function is used make get request.
|
||||||
We use the requests package to make the
|
We use the requests package to make the
|
||||||
requests.
|
requests.
|
||||||
|
|
||||||
|
|
||||||
We try twice and if both the times is fails And
|
We try five times and if it fails it raises
|
||||||
raises exceptions we give-up and raise WaybackError.
|
WaybackError exception.
|
||||||
|
|
||||||
You can handles WaybackError by importing:
|
You can handles WaybackError by importing:
|
||||||
from waybackpy.exceptions import WaybackError
|
from waybackpy.exceptions import WaybackError
|
||||||
@ -121,11 +123,14 @@ def _get_response(endpoint, params=None, headers=None):
|
|||||||
# handle it
|
# handle it
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# From https://stackoverflow.com/a/35504626
|
||||||
|
# By https://stackoverflow.com/users/401467/datashaman
|
||||||
|
s = requests.Session()
|
||||||
|
retries = Retry(total=retries, backoff_factor=0.5, status_forcelist=[ 500, 502, 503, 504 ])
|
||||||
|
s.mount('https://', HTTPAdapter(max_retries=retries))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return requests.get(endpoint, params=params, headers=headers)
|
return s.get(endpoint, params=params, headers=headers)
|
||||||
except Exception:
|
|
||||||
try:
|
|
||||||
return requests.get(endpoint, params=params, headers=headers)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
exc = WaybackError("Error while retrieving %s" % endpoint)
|
exc = WaybackError("Error while retrieving %s" % endpoint)
|
||||||
exc.__cause__ = e
|
exc.__cause__ = e
|
||||||
@ -450,12 +455,13 @@ class Url:
|
|||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Returns list of URLs known to exist for given domain name
|
Returns list of URLs known to exist for given domain name
|
||||||
because these URLs were crawled by WayBack Machine bots.
|
because these URLs were crawled by WayBack Machine spider.
|
||||||
Useful for pen-testers and others.
|
Useful for pen-testing.
|
||||||
Idea by Mohammed Diaa (https://github.com/mhmdiaa) from:
|
|
||||||
https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Idea by Mohammed Diaa (https://github.com/mhmdiaa) from:
|
||||||
|
# https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050
|
||||||
|
|
||||||
url_list = []
|
url_list = []
|
||||||
|
|
||||||
if subdomain:
|
if subdomain:
|
||||||
|
Loading…
Reference in New Issue
Block a user