Merge pull request #136 from eggplants/429_error
Add TooManyRequestsError
This commit is contained in:
commit
f49d67a411
@ -8,14 +8,13 @@ This module contains the set of Waybackpy's exceptions.
|
|||||||
class WaybackError(Exception):
|
class WaybackError(Exception):
|
||||||
"""
|
"""
|
||||||
Raised when Waybackpy can not return what you asked for.
|
Raised when Waybackpy can not return what you asked for.
|
||||||
|
|
||||||
1) Wayback Machine API Service is unreachable/down.
|
1) Wayback Machine API Service is unreachable/down.
|
||||||
2) You passed illegal arguments.
|
2) You passed illegal arguments.
|
||||||
|
|
||||||
All other exceptions are inherited from this class.
|
All other exceptions are inherited from this class.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class RedirectSaveError(WaybackError):
|
class RedirectSaveError(WaybackError):
|
||||||
"""
|
"""
|
||||||
@ -23,15 +22,18 @@ class RedirectSaveError(WaybackError):
|
|||||||
redirect URL is archived but not the original URL.
|
redirect URL is archived but not the original URL.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class URLError(Exception):
|
class URLError(Exception):
|
||||||
"""
|
"""
|
||||||
Raised when malformed URLs are passed as arguments.
|
Raised when malformed URLs are passed as arguments.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pass
|
|
||||||
|
class TooManyRequestsError(WaybackError):
|
||||||
|
"""
|
||||||
|
Raised when you make more than 15 requests per
|
||||||
|
minute and the Wayback Machine returns 429.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class MaximumRetriesExceeded(WaybackError):
|
class MaximumRetriesExceeded(WaybackError):
|
||||||
@ -39,28 +41,20 @@ class MaximumRetriesExceeded(WaybackError):
|
|||||||
MaximumRetriesExceeded
|
MaximumRetriesExceeded
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class MaximumSaveRetriesExceeded(MaximumRetriesExceeded):
|
class MaximumSaveRetriesExceeded(MaximumRetriesExceeded):
|
||||||
"""
|
"""
|
||||||
MaximumSaveRetriesExceeded
|
MaximumSaveRetriesExceeded
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class ArchiveNotInAvailabilityAPIResponse(WaybackError):
|
class ArchiveNotInAvailabilityAPIResponse(WaybackError):
|
||||||
"""
|
"""
|
||||||
Could not parse the archive in the JSON response of the availability API.
|
Could not parse the archive in the JSON response of the availability API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class InvalidJSONInAvailabilityAPIResponse(WaybackError):
|
class InvalidJSONInAvailabilityAPIResponse(WaybackError):
|
||||||
"""
|
"""
|
||||||
availability api returned invalid JSON
|
availability api returned invalid JSON
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pass
|
|
||||||
|
@ -8,7 +8,7 @@ from requests.adapters import HTTPAdapter
|
|||||||
from requests.structures import CaseInsensitiveDict
|
from requests.structures import CaseInsensitiveDict
|
||||||
from urllib3.util.retry import Retry
|
from urllib3.util.retry import Retry
|
||||||
|
|
||||||
from .exceptions import MaximumSaveRetriesExceeded
|
from .exceptions import MaximumSaveRetriesExceeded, TooManyRequestsError
|
||||||
from .utils import DEFAULT_USER_AGENT
|
from .utils import DEFAULT_USER_AGENT
|
||||||
|
|
||||||
|
|
||||||
@ -79,6 +79,12 @@ class WaybackMachineSaveAPI(object):
|
|||||||
self.status_code = self.response.status_code
|
self.status_code = self.response.status_code
|
||||||
self.response_url = self.response.url
|
self.response_url = self.response.url
|
||||||
session.close()
|
session.close()
|
||||||
|
if self.status_code == 429:
|
||||||
|
raise TooManyRequestsError(
|
||||||
|
"Seem to be refused to request by the server. "
|
||||||
|
"Save Page Now receives up to 15 URLs per minutes. "
|
||||||
|
"Wait a moment and run again."
|
||||||
|
)
|
||||||
|
|
||||||
def archive_url_parser(self) -> Optional[str]:
|
def archive_url_parser(self) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
@ -102,12 +108,10 @@ class WaybackMachineSaveAPI(object):
|
|||||||
if match is not None and len(match.groups()) == 1:
|
if match is not None and len(match.groups()) == 1:
|
||||||
return "https" + match.group(1)
|
return "https" + match.group(1)
|
||||||
|
|
||||||
if self.response_url:
|
|
||||||
self.response_url = self.response_url.strip()
|
self.response_url = self.response_url.strip()
|
||||||
if "web.archive.org/web" in self.response_url:
|
regex4 = r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$"
|
||||||
regex = r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$"
|
match = re.search(regex4, self.response_url)
|
||||||
match = re.search(regex, self.response_url)
|
if match is not None:
|
||||||
if match:
|
|
||||||
return "https://" + match.group(0)
|
return "https://" + match.group(0)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
Loading…
Reference in New Issue
Block a user