Merge pull request #136 from eggplants/429_error

Add TooManyRequestsError
This commit is contained in:
Akash Mahanty 2022-02-05 11:28:27 +05:30 committed by GitHub
commit f49d67a411
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 24 deletions

View File

@ -8,14 +8,13 @@ This module contains the set of Waybackpy's exceptions.
class WaybackError(Exception): class WaybackError(Exception):
""" """
Raised when Waybackpy can not return what you asked for. Raised when Waybackpy can not return what you asked for.
1) Wayback Machine API Service is unreachable/down.
2) You passed illegal arguments.
All other exceptions are inherited from this class. 1) Wayback Machine API Service is unreachable/down.
2) You passed illegal arguments.
All other exceptions are inherited from this class.
""" """
pass
class RedirectSaveError(WaybackError): class RedirectSaveError(WaybackError):
""" """
@ -23,15 +22,18 @@ class RedirectSaveError(WaybackError):
redirect URL is archived but not the original URL. redirect URL is archived but not the original URL.
""" """
pass
class URLError(Exception): class URLError(Exception):
""" """
Raised when malformed URLs are passed as arguments. Raised when malformed URLs are passed as arguments.
""" """
pass
class TooManyRequestsError(WaybackError):
"""
Raised when you make more than 15 requests per
minute and the Wayback Machine returns 429.
"""
class MaximumRetriesExceeded(WaybackError): class MaximumRetriesExceeded(WaybackError):
@ -39,28 +41,20 @@ class MaximumRetriesExceeded(WaybackError):
MaximumRetriesExceeded MaximumRetriesExceeded
""" """
pass
class MaximumSaveRetriesExceeded(MaximumRetriesExceeded): class MaximumSaveRetriesExceeded(MaximumRetriesExceeded):
""" """
MaximumSaveRetriesExceeded MaximumSaveRetriesExceeded
""" """
pass
class ArchiveNotInAvailabilityAPIResponse(WaybackError): class ArchiveNotInAvailabilityAPIResponse(WaybackError):
""" """
Could not parse the archive in the JSON response of the availability API. Could not parse the archive in the JSON response of the availability API.
""" """
pass
class InvalidJSONInAvailabilityAPIResponse(WaybackError): class InvalidJSONInAvailabilityAPIResponse(WaybackError):
""" """
availability api returned invalid JSON availability api returned invalid JSON
""" """
pass

View File

@ -8,7 +8,7 @@ from requests.adapters import HTTPAdapter
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from urllib3.util.retry import Retry from urllib3.util.retry import Retry
from .exceptions import MaximumSaveRetriesExceeded from .exceptions import MaximumSaveRetriesExceeded, TooManyRequestsError
from .utils import DEFAULT_USER_AGENT from .utils import DEFAULT_USER_AGENT
@ -79,6 +79,12 @@ class WaybackMachineSaveAPI(object):
self.status_code = self.response.status_code self.status_code = self.response.status_code
self.response_url = self.response.url self.response_url = self.response.url
session.close() session.close()
if self.status_code == 429:
raise TooManyRequestsError(
"Seem to be refused to request by the server. "
"Save Page Now receives up to 15 URLs per minutes. "
"Wait a moment and run again."
)
def archive_url_parser(self) -> Optional[str]: def archive_url_parser(self) -> Optional[str]:
""" """
@ -102,13 +108,11 @@ class WaybackMachineSaveAPI(object):
if match is not None and len(match.groups()) == 1: if match is not None and len(match.groups()) == 1:
return "https" + match.group(1) return "https" + match.group(1)
if self.response_url: self.response_url = self.response_url.strip()
self.response_url = self.response_url.strip() regex4 = r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$"
if "web.archive.org/web" in self.response_url: match = re.search(regex4, self.response_url)
regex = r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$" if match is not None:
match = re.search(regex, self.response_url) return "https://" + match.group(0)
if match:
return "https://" + match.group(0)
return None return None