added RedirectSaveError - for failed saves if the URL is a permanent … (#93)
* added RedirectSaveError - for failed saves if the URL is a permanent redirect. * check if url is redirect before throwing exceptions, res.url is the redirect url if redirected at all * update tests and cli errors
This commit is contained in:
parent
db8f902cff
commit
dd1917c77e
@ -29,7 +29,7 @@ def test_save():
|
|||||||
get=None,
|
get=None,
|
||||||
)
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "could happen because either your waybackpy" in str(reply)
|
assert "could happen because either your waybackpy" or "cannot be archived by wayback machine as it is a redirect" in str(reply)
|
||||||
|
|
||||||
|
|
||||||
def test_json():
|
def test_json():
|
||||||
|
@ -29,6 +29,8 @@ def _save(obj):
|
|||||||
version=__version__, header=header
|
version=__version__, header=header
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
if "URL cannot be archived by wayback machine as it is a redirect" in e:
|
||||||
|
return ("URL cannot be archived by wayback machine as it is a redirect")
|
||||||
raise WaybackError(err)
|
raise WaybackError(err)
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,6 +13,13 @@ class WaybackError(Exception):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class RedirectSaveError(WaybackError):
|
||||||
|
"""
|
||||||
|
Raised when the original URL is redirected and the
|
||||||
|
redirect URL is archived but not the original URL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class URLError(Exception):
|
class URLError(Exception):
|
||||||
"""
|
"""
|
||||||
Raised when malformed URLs are passed as arguments.
|
Raised when malformed URLs are passed as arguments.
|
||||||
|
@ -3,7 +3,7 @@ import time
|
|||||||
import requests
|
import requests
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from .exceptions import WaybackError, URLError
|
from .exceptions import WaybackError, URLError, RedirectSaveError
|
||||||
from .__version__ import __version__
|
from .__version__ import __version__
|
||||||
|
|
||||||
from urllib3.util.retry import Retry
|
from urllib3.util.retry import Retry
|
||||||
@ -302,7 +302,9 @@ def _get_total_pages(url, user_agent):
|
|||||||
return int((_get_response(total_pages_url, headers=headers).text).strip())
|
return int((_get_response(total_pages_url, headers=headers).text).strip())
|
||||||
|
|
||||||
|
|
||||||
def _archive_url_parser(header, url, latest_version=__version__, instance=None):
|
def _archive_url_parser(
|
||||||
|
header, url, latest_version=__version__, instance=None, response=None
|
||||||
|
):
|
||||||
"""Returns the archive after parsing it from the response header.
|
"""Returns the archive after parsing it from the response header.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@ -388,6 +390,16 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
|
|||||||
if m:
|
if m:
|
||||||
return m.group(1)
|
return m.group(1)
|
||||||
|
|
||||||
|
if response:
|
||||||
|
if response.url:
|
||||||
|
if "web.archive.org/web" in response.url:
|
||||||
|
m = re.search(
|
||||||
|
r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$",
|
||||||
|
str(response.url).strip(),
|
||||||
|
)
|
||||||
|
if m:
|
||||||
|
return m.group(0)
|
||||||
|
|
||||||
if instance:
|
if instance:
|
||||||
newest_archive = None
|
newest_archive = None
|
||||||
try:
|
try:
|
||||||
@ -414,6 +426,13 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
|
|||||||
"Wayback Machine is malfunctioning or it refused to archive your URL."
|
"Wayback Machine is malfunctioning or it refused to archive your URL."
|
||||||
"\nHeader:\n{header}".format(url=url, header=header)
|
"\nHeader:\n{header}".format(url=url, header=header)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if "save redirected" == header.strip():
|
||||||
|
raise RedirectSaveError(
|
||||||
|
"URL cannot be archived by wayback machine as it is a redirect.\nHeader:\n{header}".format(
|
||||||
|
header=header
|
||||||
|
)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
exc_message = (
|
exc_message = (
|
||||||
"No archive URL found in the API response. "
|
"No archive URL found in the API response. "
|
||||||
|
@ -225,6 +225,7 @@ class Url:
|
|||||||
self.url,
|
self.url,
|
||||||
latest_version=self.latest_version,
|
latest_version=self.latest_version,
|
||||||
instance=self,
|
instance=self,
|
||||||
|
response=response,
|
||||||
)
|
)
|
||||||
|
|
||||||
m = re.search(
|
m = re.search(
|
||||||
|
Loading…
Reference in New Issue
Block a user