added RedirectSaveError - for failed saves if the URL is a permanent … (#93)
* added RedirectSaveError - for failed saves if the URL is a permanent redirect. * check if url is redirect before throwing exceptions, res.url is the redirect url if redirected at all * update tests and cli errors
This commit is contained in:
		@@ -29,7 +29,7 @@ def test_save():
 | 
				
			|||||||
        get=None,
 | 
					        get=None,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    reply = cli.args_handler(args)
 | 
					    reply = cli.args_handler(args)
 | 
				
			||||||
    assert "could happen because either your waybackpy" in str(reply)
 | 
					    assert "could happen because either your waybackpy" or "cannot be archived by wayback machine as it is a redirect" in str(reply)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_json():
 | 
					def test_json():
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -29,6 +29,8 @@ def _save(obj):
 | 
				
			|||||||
                    version=__version__, header=header
 | 
					                    version=__version__, header=header
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					        if "URL cannot be archived by wayback machine as it is a redirect" in e:
 | 
				
			||||||
 | 
					            return ("URL cannot be archived by wayback machine as it is a redirect")
 | 
				
			||||||
        raise WaybackError(err)
 | 
					        raise WaybackError(err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -13,6 +13,13 @@ class WaybackError(Exception):
 | 
				
			|||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class RedirectSaveError(WaybackError):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Raised when the original URL is redirected and the
 | 
				
			||||||
 | 
					    redirect URL is archived but not the original URL.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class URLError(Exception):
 | 
					class URLError(Exception):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Raised when malformed URLs are passed as arguments.
 | 
					    Raised when malformed URLs are passed as arguments.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,7 +3,7 @@ import time
 | 
				
			|||||||
import requests
 | 
					import requests
 | 
				
			||||||
from datetime import datetime
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .exceptions import WaybackError, URLError
 | 
					from .exceptions import WaybackError, URLError, RedirectSaveError
 | 
				
			||||||
from .__version__ import __version__
 | 
					from .__version__ import __version__
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from urllib3.util.retry import Retry
 | 
					from urllib3.util.retry import Retry
 | 
				
			||||||
@@ -302,7 +302,9 @@ def _get_total_pages(url, user_agent):
 | 
				
			|||||||
    return int((_get_response(total_pages_url, headers=headers).text).strip())
 | 
					    return int((_get_response(total_pages_url, headers=headers).text).strip())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _archive_url_parser(header, url, latest_version=__version__, instance=None):
 | 
					def _archive_url_parser(
 | 
				
			||||||
 | 
					    header, url, latest_version=__version__, instance=None, response=None
 | 
				
			||||||
 | 
					):
 | 
				
			||||||
    """Returns the archive after parsing it from the response header.
 | 
					    """Returns the archive after parsing it from the response header.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Parameters
 | 
					    Parameters
 | 
				
			||||||
@@ -388,6 +390,16 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
 | 
				
			|||||||
    if m:
 | 
					    if m:
 | 
				
			||||||
        return m.group(1)
 | 
					        return m.group(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if response:
 | 
				
			||||||
 | 
					        if response.url:
 | 
				
			||||||
 | 
					            if "web.archive.org/web" in response.url:
 | 
				
			||||||
 | 
					                m = re.search(
 | 
				
			||||||
 | 
					                    r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$",
 | 
				
			||||||
 | 
					                    str(response.url).strip(),
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					                if m:
 | 
				
			||||||
 | 
					                    return m.group(0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if instance:
 | 
					    if instance:
 | 
				
			||||||
        newest_archive = None
 | 
					        newest_archive = None
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
@@ -414,6 +426,13 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
 | 
				
			|||||||
            "Wayback Machine is malfunctioning or it refused to archive your URL."
 | 
					            "Wayback Machine is malfunctioning or it refused to archive your URL."
 | 
				
			||||||
            "\nHeader:\n{header}".format(url=url, header=header)
 | 
					            "\nHeader:\n{header}".format(url=url, header=header)
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if "save redirected" == header.strip():
 | 
				
			||||||
 | 
					            raise RedirectSaveError(
 | 
				
			||||||
 | 
					                "URL cannot be archived by wayback machine as it is a redirect.\nHeader:\n{header}".format(
 | 
				
			||||||
 | 
					                    header=header
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        exc_message = (
 | 
					        exc_message = (
 | 
				
			||||||
            "No archive URL found in the API response. "
 | 
					            "No archive URL found in the API response. "
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -225,6 +225,7 @@ class Url:
 | 
				
			|||||||
            self.url,
 | 
					            self.url,
 | 
				
			||||||
            latest_version=self.latest_version,
 | 
					            latest_version=self.latest_version,
 | 
				
			||||||
            instance=self,
 | 
					            instance=self,
 | 
				
			||||||
 | 
					            response=response,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        m = re.search(
 | 
					        m = re.search(
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user