From 8ab116f276daac6b86ec66113841891e4e4498fd Mon Sep 17 00:00:00 2001
From: Akash <64683866+akamhy@users.noreply.github.com>
Date: Sun, 19 Jul 2020 20:39:07 +0530
Subject: [PATCH] API chnaged again. updated

* Update wrapper.py

* Update wrapper.py

* Update wrapper.py

* Update wrapper.py

* Update wrapper.py

* api changed; fix archive url parser

* Update wrapper.py

* - Trailing whitespace

* include the header in exception
---
 waybackpy/wrapper.py | 55 +++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/waybackpy/wrapper.py b/waybackpy/wrapper.py
index 2931a93..e7e4c91 100644
--- a/waybackpy/wrapper.py
+++ b/waybackpy/wrapper.py
@@ -8,9 +8,9 @@ from waybackpy.exceptions import WaybackError
 
 if sys.version_info >= (3, 0):  # If the python ver >= 3
     from urllib.request import Request, urlopen
-    from urllib.error import HTTPError, URLError
+    from urllib.error import URLError
 else: # For python2.x
-    from urllib2 import Request, urlopen, HTTPError, URLError
+    from urllib2 import Request, urlopen, URLError
 
 default_UA = "waybackpy python package - https://github.com/akamhy/waybackpy"
 
@@ -59,13 +59,6 @@ class Url():
           str(kwargs["minute"]).zfill(2)
           )
 
-    def handle_HTTPError(self, e):
-        """Handle some common HTTPErrors."""
-        if e.code == 404:
-            raise HTTPError(e)
-        if e.code >= 400:
-            raise WaybackError(e)
-
     def save(self):
         """Create a new archives for an URL on the Wayback Machine."""
         request_url = ("https://web.archive.org/save/" + self.clean_url())
@@ -79,32 +72,42 @@ class Url():
             except Exception as e:
                 raise WaybackError(e)
         header = response.headers
-        try:
-            arch = re.search(r"rel=\"memento.*?web\.archive\.org(/web/[0-9]{14}/.*?)>", str(header)).group(1)
-        except KeyError as e:
-            raise WaybackError(e)
-        return "https://web.archive.org" + arch
+
+        def archive_url_parser(header):
+            arch = re.search(r"X-Cache-Key:\shttps(.*)[A-Z]{2}", str(header))
+            if arch:
+                return arch.group(1)
+            raise WaybackError(
+                "No archive url found in the API response. Visit https://github.com/akamhy/waybackpy for latest version of waybackpy.\nHeader:\n%s" % str(header)
+            )
+
+        return "https://" + archive_url_parser(header)
 
     def get(self, url=None, user_agent=None, encoding=None):
         """Returns the source code of the supplied URL. Auto detects the encoding if not supplied."""
+
         if not url:
             url = self.clean_url()
         if not user_agent:
             user_agent = self.user_agent
+
         hdr = { 'User-Agent' : '%s' % user_agent }
         req = Request(url, headers=hdr) #nosec
+
         try:
             resp=urlopen(req) #nosec
-        except URLError:
+        except Exception:
             try:
                 resp=urlopen(req) #nosec
-            except URLError as e:
-                raise HTTPError(e)
+            except Exception as e:
+                raise WaybackError(e)
+
         if not encoding:
             try:
                 encoding= resp.headers['content-type'].split('charset=')[-1]
             except AttributeError:
                 encoding = "UTF-8"
+
         return resp.read().decode(encoding.replace("text/html", "UTF-8", 1))
 
     def near(self, **kwargs):
@@ -121,10 +124,15 @@ class Url():
         request_url = "https://archive.org/wayback/available?url=%s&timestamp=%s" % (self.clean_url(), str(timestamp))
         hdr = { 'User-Agent' : '%s' % self.user_agent }
         req = Request(request_url, headers=hdr) # nosec
+
         try:
             response = urlopen(req) #nosec
-        except Exception as e:
-            self.handle_HTTPError(e)
+        except Exception:
+            try:
+                 response = urlopen(req) #nosec
+            except Exception as e:
+                WaybackError(e)
+
         data = json.loads(response.read().decode("UTF-8"))
         if not data["archived_snapshots"]:
             raise WaybackError("'%s' is not yet archived." % url)
@@ -146,8 +154,13 @@ class Url():
         hdr = { 'User-Agent' : '%s' % self.user_agent }
         request_url = "https://web.archive.org/cdx/search/cdx?url=%s&output=json&fl=statuscode" % self.clean_url()
         req = Request(request_url, headers=hdr) # nosec
+
         try:
             response = urlopen(req) #nosec
-        except Exception as e:
-            self.handle_HTTPError(e)
+        except Exception:
+            try:
+                response = urlopen(req) #nosec
+            except Exception as e:
+                WaybackError(e)
+
         return str(response.read()).count(",") # Most efficient method to count number of archives (yet)