Merge pull request #136 from eggplants/429_error

Add TooManyRequestsError
This commit is contained in:
Akash Mahanty 2022-02-05 11:28:27 +05:30 committed by GitHub
commit f49d67a411
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 24 deletions

View File

@ -8,14 +8,13 @@ This module contains the set of Waybackpy's exceptions.
class WaybackError(Exception):
"""
Raised when Waybackpy can not return what you asked for.
1) Wayback Machine API Service is unreachable/down.
2) You passed illegal arguments.
All other exceptions are inherited from this class.
"""
pass
class RedirectSaveError(WaybackError):
"""
@ -23,15 +22,18 @@ class RedirectSaveError(WaybackError):
redirect URL is archived but not the original URL.
"""
pass
class URLError(Exception):
"""
Raised when malformed URLs are passed as arguments.
"""
pass
class TooManyRequestsError(WaybackError):
"""
Raised when you make more than 15 requests per
minute and the Wayback Machine returns 429.
"""
class MaximumRetriesExceeded(WaybackError):
@ -39,28 +41,20 @@ class MaximumRetriesExceeded(WaybackError):
MaximumRetriesExceeded
"""
pass
class MaximumSaveRetriesExceeded(MaximumRetriesExceeded):
"""
MaximumSaveRetriesExceeded
"""
pass
class ArchiveNotInAvailabilityAPIResponse(WaybackError):
"""
Could not parse the archive in the JSON response of the availability API.
"""
pass
class InvalidJSONInAvailabilityAPIResponse(WaybackError):
"""
availability api returned invalid JSON
"""
pass

View File

@ -8,7 +8,7 @@ from requests.adapters import HTTPAdapter
from requests.structures import CaseInsensitiveDict
from urllib3.util.retry import Retry
from .exceptions import MaximumSaveRetriesExceeded
from .exceptions import MaximumSaveRetriesExceeded, TooManyRequestsError
from .utils import DEFAULT_USER_AGENT
@ -79,6 +79,12 @@ class WaybackMachineSaveAPI(object):
self.status_code = self.response.status_code
self.response_url = self.response.url
session.close()
if self.status_code == 429:
raise TooManyRequestsError(
"Seem to be refused to request by the server. "
"Save Page Now receives up to 15 URLs per minutes. "
"Wait a moment and run again."
)
def archive_url_parser(self) -> Optional[str]:
"""
@ -102,12 +108,10 @@ class WaybackMachineSaveAPI(object):
if match is not None and len(match.groups()) == 1:
return "https" + match.group(1)
if self.response_url:
self.response_url = self.response_url.strip()
if "web.archive.org/web" in self.response_url:
regex = r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$"
match = re.search(regex, self.response_url)
if match:
regex4 = r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$"
match = re.search(regex4, self.response_url)
if match is not None:
return "https://" + match.group(0)
return None