From 38088fa0d8d13d9e90cbc3fc95c70a0839f55a76 Mon Sep 17 00:00:00 2001
From: eggplants <w10776e8w@yahoo.co.jp>
Date: Fri, 4 Feb 2022 04:25:01 +0900
Subject: [PATCH] add: type annotation to waybackpy modules

---
 waybackpy/availability_api.py | 75 +++++++++++++++++------------
 waybackpy/cdx_api.py          | 66 ++++++++++++++-----------
 waybackpy/cdx_snapshot.py     |  7 +--
 waybackpy/cdx_utils.py        | 91 +++++++++++++++++------------------
 waybackpy/cli.py              | 72 ++++++++++++++-------------
 waybackpy/exceptions.py       | 14 ++++++
 waybackpy/save_api.py         | 61 ++++++++++++-----------
 waybackpy/utils.py            | 31 ++++++++++--
 waybackpy/wrapper.py          | 63 +++++++++++++-----------
 9 files changed, 275 insertions(+), 205 deletions(-)

diff --git a/waybackpy/availability_api.py b/waybackpy/availability_api.py
index 6e76bb8..bab92f7 100644
--- a/waybackpy/availability_api.py
+++ b/waybackpy/availability_api.py
@@ -1,6 +1,7 @@
 import json
 import time
 from datetime import datetime
+from typing import Any, Dict, Optional
 
 import requests
 
@@ -10,37 +11,41 @@ from .exceptions import (
 )
 from .utils import DEFAULT_USER_AGENT
 
+ResponseJSON = Dict[str, Any]
 
-class WaybackMachineAvailabilityAPI:
+
+class WaybackMachineAvailabilityAPI(object):
     """
     Class that interfaces the availability API of the Wayback Machine.
     """
 
-    def __init__(self, url, user_agent=DEFAULT_USER_AGENT, max_tries=3):
+    def __init__(
+        self, url: str, user_agent: str = DEFAULT_USER_AGENT, max_tries: int = 3
+    ) -> None:
         self.url = str(url).strip().replace(" ", "%20")
         self.user_agent = user_agent
-        self.headers = {"User-Agent": self.user_agent}
+        self.headers: Dict[str, str] = {"User-Agent": self.user_agent}
         self.payload = {"url": "{url}".format(url=self.url)}
         self.endpoint = "https://archive.org/wayback/available"
         self.max_tries = max_tries
         self.tries = 0
         self.last_api_call_unix_time = int(time.time())
         self.api_call_time_gap = 5
-        self.JSON = None
+        self.JSON: Optional[ResponseJSON] = None
 
-    def unix_timestamp_to_wayback_timestamp(self, unix_timestamp):
+    def unix_timestamp_to_wayback_timestamp(self, unix_timestamp: int) -> str:
         """
         Converts Unix time to wayback Machine timestamp.
         """
         return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """
         Same as string representation, just return the archive URL as a string.
         """
         return str(self)
 
-    def __str__(self):
+    def __str__(self) -> str:
         """
         String representation of the class. If atleast one API call was successfully
         made then return the archive URL as a string. Else returns None.
@@ -54,7 +59,7 @@ class WaybackMachineAvailabilityAPI:
 
         return self.archive_url
 
-    def json(self):
+    def json(self) -> Optional[ResponseJSON]:
         """
         Makes the API call to the availability API can set the JSON response
         to the JSON attribute of the instance and also returns the JSON attribute.
@@ -79,7 +84,7 @@ class WaybackMachineAvailabilityAPI:
 
         return self.JSON
 
-    def timestamp(self):
+    def timestamp(self) -> datetime:
         """
         Converts the timestamp form the JSON response to datetime object.
         If JSON attribute of the instance is None it implies that the either
@@ -91,19 +96,29 @@ class WaybackMachineAvailabilityAPI:
         If you get an URL as a response form the availability API it is guaranteed
         that you can get the datetime object from the timestamp.
         """
-        if not self.JSON or not self.JSON["archived_snapshots"]:
+        if self.JSON is None or "archived_snapshots" not in self.JSON:
             return datetime.max
-
-        return datetime.strptime(
-            self.JSON["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
-        )
+        elif (
+            self.JSON is not None
+            and "archived_snapshots" in self.JSON
+            and self.JSON["archived_snapshots"] is not None
+            and "closest" in self.JSON["archived_snapshots"]
+            and self.JSON["archived_snapshots"]["closest"] is not None
+            and "timestamp" in self.JSON["archived_snapshots"]["closest"]
+        ):
+            return datetime.strptime(
+                self.JSON["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
+            )
+        else:
+            raise ValueError("Could not get timestamp from result")
 
     @property
-    def archive_url(self):
+    def archive_url(self) -> str:
         """
         Reads the the JSON response data and tries to get the timestamp and returns
         the timestamp if found else returns None.
         """
+        archive_url = ""
         data = self.JSON
 
         # If the user didn't used oldest, newest or near but tries to access the
@@ -138,7 +153,7 @@ class WaybackMachineAvailabilityAPI:
             )
         return archive_url
 
-    def wayback_timestamp(self, **kwargs):
+    def wayback_timestamp(self, **kwargs: int) -> str:
         """
         Prepends zero before the year, month, day, hour and minute so that they
         are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
@@ -148,7 +163,7 @@ class WaybackMachineAvailabilityAPI:
             for key in ["year", "month", "day", "hour", "minute"]
         )
 
-    def oldest(self):
+    def oldest(self) -> "WaybackMachineAvailabilityAPI":
         """
         Passing the year 1994 should return the oldest archive because
         wayback machine was started in May, 1996 and there should be no archive
@@ -156,7 +171,7 @@ class WaybackMachineAvailabilityAPI:
         """
         return self.near(year=1994)
 
-    def newest(self):
+    def newest(self) -> "WaybackMachineAvailabilityAPI":
         """
         Passing the current UNIX time should be sufficient to get the newest
         archive considering the API request-response time delay and also the
@@ -166,13 +181,13 @@ class WaybackMachineAvailabilityAPI:
 
     def near(
         self,
-        year=None,
-        month=None,
-        day=None,
-        hour=None,
-        minute=None,
-        unix_timestamp=None,
-    ):
+        year: Optional[int] = None,
+        month: Optional[int] = None,
+        day: Optional[int] = None,
+        hour: Optional[int] = None,
+        minute: Optional[int] = None,
+        unix_timestamp: Optional[int] = None,
+    ) -> "WaybackMachineAvailabilityAPI":
         """
         The main method for this Class, oldest and newest methods are dependent on this
         method.
@@ -188,11 +203,11 @@ class WaybackMachineAvailabilityAPI:
         else:
             now = datetime.utcnow().timetuple()
             timestamp = self.wayback_timestamp(
-                year=year if year else now.tm_year,
-                month=month if month else now.tm_mon,
-                day=day if day else now.tm_mday,
-                hour=hour if hour else now.tm_hour,
-                minute=minute if minute else now.tm_min,
+                year=now.tm_year if year is None else year,
+                month=now.tm_mon if month is None else month,
+                day=now.tm_mday if day is None else day,
+                hour=now.tm_hour if hour is None else hour,
+                minute=now.tm_min if minute is None else minute,
             )
 
         self.payload["timestamp"] = timestamp
diff --git a/waybackpy/cdx_api.py b/waybackpy/cdx_api.py
index a04f8af..c39c83d 100644
--- a/waybackpy/cdx_api.py
+++ b/waybackpy/cdx_api.py
@@ -1,3 +1,5 @@
+from typing import Dict, Generator, List, Optional, cast
+
 from .cdx_snapshot import CDXSnapshot
 from .cdx_utils import (
     check_collapses,
@@ -11,43 +13,48 @@ from .exceptions import WaybackError
 from .utils import DEFAULT_USER_AGENT
 
 
-class WaybackMachineCDXServerAPI:
+class WaybackMachineCDXServerAPI(object):
     """
     Class that interfaces the CDX server API of the Wayback Machine.
     """
 
+    # start_timestamp: from, can not use from as it's a keyword
+    # end_timestamp: to, not using to as can not use from
     def __init__(
         self,
-        url,
-        user_agent=DEFAULT_USER_AGENT,
-        start_timestamp=None,  # from, can not use from as it's a keyword
-        end_timestamp=None,  # to, not using to as can not use from
-        filters=[],
-        match_type=None,
-        gzip=None,
-        collapses=[],
-        limit=None,
-        max_tries=3,
-    ):
+        url: str,
+        user_agent: str = DEFAULT_USER_AGENT,
+        start_timestamp: Optional[str] = None,
+        end_timestamp: Optional[str] = None,
+        filters: List[str] = [],
+        match_type: Optional[str] = None,
+        gzip: Optional[str] = None,
+        collapses: List[str] = [],
+        limit: Optional[str] = None,
+        max_tries: int = 3,
+    ) -> None:
         self.url = str(url).strip().replace(" ", "%20")
         self.user_agent = user_agent
-        self.start_timestamp = str(start_timestamp) if start_timestamp else None
-        self.end_timestamp = str(end_timestamp) if end_timestamp else None
+        self.start_timestamp = (
+            str(start_timestamp) if start_timestamp is not None else None
+        )
+        self.end_timestamp = str(end_timestamp) if end_timestamp is not None else None
         self.filters = filters
         check_filters(self.filters)
-        self.match_type = str(match_type).strip() if match_type else None
+        self.match_type = str(match_type).strip() if match_type is not None else None
         check_match_type(self.match_type, self.url)
-        self.gzip = gzip if gzip else True
+        self.gzip = gzip
         self.collapses = collapses
         check_collapses(self.collapses)
-        self.limit = limit if limit else 5000
+        self.limit = limit if limit is not None else 5000
         self.max_tries = max_tries
-        self.last_api_request_url = None
+        self.last_api_request_url: Optional[str] = None
         self.use_page = False
         self.endpoint = "https://web.archive.org/cdx/search/cdx"
 
-    def cdx_api_manager(self, payload, headers, use_page=False):
-
+    def cdx_api_manager(
+        self, payload: Dict[str, str], headers: Dict[str, str], use_page: bool = False
+    ) -> Generator[str, None, None]:
         total_pages = get_total_pages(self.url, self.user_agent)
         # If we only have two or less pages of archives then we care for more accuracy
         # pagination API is lagged sometimes
@@ -58,6 +65,8 @@ class WaybackMachineCDXServerAPI:
 
                 url = full_url(self.endpoint, params=payload)
                 res = get_response(url, headers=headers)
+                if isinstance(res, Exception):
+                    raise res
 
                 self.last_api_request_url = url
                 text = res.text
@@ -69,19 +78,18 @@ class WaybackMachineCDXServerAPI:
 
                 yield text
         else:
-
             payload["showResumeKey"] = "true"
             payload["limit"] = str(self.limit)
             resumeKey = None
-
             more = True
             while more:
-
                 if resumeKey:
                     payload["resumeKey"] = resumeKey
 
                 url = full_url(self.endpoint, params=payload)
                 res = get_response(url, headers=headers)
+                if isinstance(res, Exception):
+                    raise res
 
                 self.last_api_request_url = url
 
@@ -102,14 +110,14 @@ class WaybackMachineCDXServerAPI:
 
                 yield text
 
-    def add_payload(self, payload):
+    def add_payload(self, payload: Dict[str, str]) -> None:
         if self.start_timestamp:
             payload["from"] = self.start_timestamp
 
         if self.end_timestamp:
             payload["to"] = self.end_timestamp
 
-        if self.gzip is not True:
+        if self.gzip is None:
             payload["gzip"] = "false"
 
         if self.match_type:
@@ -126,8 +134,8 @@ class WaybackMachineCDXServerAPI:
         # Don't need to return anything as it's dictionary.
         payload["url"] = self.url
 
-    def snapshots(self):
-        payload = {}
+    def snapshots(self) -> Generator[CDXSnapshot, None, None]:
+        payload: Dict[str, str] = {}
         headers = {"User-Agent": self.user_agent}
 
         self.add_payload(payload)
@@ -152,7 +160,7 @@ class WaybackMachineCDXServerAPI:
                 if len(snapshot) < 46:  # 14 + 32 (timestamp+digest)
                     continue
 
-                properties = {
+                properties: Dict[str, Optional[str]] = {
                     "urlkey": None,
                     "timestamp": None,
                     "original": None,
@@ -190,4 +198,4 @@ class WaybackMachineCDXServerAPI:
                     properties["length"],
                 ) = prop_values
 
-                yield CDXSnapshot(properties)
+                yield CDXSnapshot(cast(Dict[str, str], properties))
diff --git a/waybackpy/cdx_snapshot.py b/waybackpy/cdx_snapshot.py
index 58d4e8b..d8419ea 100644
--- a/waybackpy/cdx_snapshot.py
+++ b/waybackpy/cdx_snapshot.py
@@ -1,7 +1,8 @@
 from datetime import datetime
+from typing import Dict
 
 
-class CDXSnapshot:
+class CDXSnapshot(object):
     """
     Class for the CDX snapshot lines returned by the CDX API,
     Each valid line of the CDX API is casted to an CDXSnapshot object
@@ -10,7 +11,7 @@ class CDXSnapshot:
     of the CDXSnapshot.
     """
 
-    def __init__(self, properties):
+    def __init__(self, properties: Dict[str, str]) -> None:
         self.urlkey = properties["urlkey"]
         self.timestamp = properties["timestamp"]
         self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
@@ -23,7 +24,7 @@ class CDXSnapshot:
             "https://web.archive.org/web/" + self.timestamp + "/" + self.original
         )
 
-    def __str__(self):
+    def __str__(self) -> str:
         return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
             urlkey=self.urlkey,
             timestamp=self.timestamp,
diff --git a/waybackpy/cdx_utils.py b/waybackpy/cdx_utils.py
index 06f043c..b4eff44 100644
--- a/waybackpy/cdx_utils.py
+++ b/waybackpy/cdx_utils.py
@@ -1,23 +1,30 @@
 import re
+from typing import Any, Dict, List, Optional, Union
+from urllib.parse import quote
 
 import requests
 from requests.adapters import HTTPAdapter
-from urllib3.util.retry import Retry
+
+# from urllib3.util.retry import Retry
+from requests.packages.urllib3.util.retry import Retry
 
 from .exceptions import WaybackError
 from .utils import DEFAULT_USER_AGENT
 
 
-def get_total_pages(url, user_agent=DEFAULT_USER_AGENT):
+def get_total_pages(url: str, user_agent: str = DEFAULT_USER_AGENT) -> int:
     endpoint = "https://web.archive.org/cdx/search/cdx?"
     payload = {"showNumPages": "true", "url": str(url)}
     headers = {"User-Agent": user_agent}
     request_url = full_url(endpoint, params=payload)
     response = get_response(request_url, headers=headers)
-    return int(response.text.strip())
+    if isinstance(response, requests.Response):
+        return int(response.text.strip())
+    else:
+        raise response
 
 
-def full_url(endpoint, params):
+def full_url(endpoint: str, params: Dict[str, Any]) -> str:
     if not params:
         return endpoint
     full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
@@ -26,27 +33,25 @@ def full_url(endpoint, params):
         key = "collapse" if key.startswith("collapse") else key
         amp = "" if full_url.endswith("?") else "&"
         full_url = (
-            full_url
-            + amp
-            + "{key}={val}".format(key=key, val=requests.utils.quote(str(val)))
+            full_url + amp + "{key}={val}".format(key=key, val=quote(str(val), safe=""))
         )
     return full_url
 
 
 def get_response(
-    url,
-    headers=None,
-    retries=5,
-    backoff_factor=0.5,
-    no_raise_on_redirects=False,
-):
+    url: str,
+    headers: Optional[Dict[str, str]] = None,
+    retries: int = 5,
+    backoff_factor: float = 0.5,
+    # no_raise_on_redirects=False,
+) -> Union[requests.Response, Exception]:
     session = requests.Session()
-    retries = Retry(
+    retries_ = Retry(
         total=retries,
         backoff_factor=backoff_factor,
         status_forcelist=[500, 502, 503, 504],
     )
-    session.mount("https://", HTTPAdapter(max_retries=retries))
+    session.mount("https://", HTTPAdapter(max_retries=retries_))
 
     try:
         response = session.get(url, headers=headers)
@@ -62,23 +67,18 @@ def get_response(
         raise exc
 
 
-def check_filters(filters):
+def check_filters(filters: List[str]) -> None:
     if not isinstance(filters, list):
         raise WaybackError("filters must be a list.")
 
     # [!]field:regex
     for _filter in filters:
-        try:
+        match = re.search(
+            r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
+            _filter,
+        )
 
-            match = re.search(
-                r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
-                _filter,
-            )
-
-            match.group(1)
-            match.group(2)
-
-        except Exception:
+        if match is None or len(match.groups()) != 2:
 
             exc_message = (
                 "Filter '{_filter}' is not following the cdx filter syntax.".format(
@@ -88,43 +88,38 @@ def check_filters(filters):
             raise WaybackError(exc_message)
 
 
-def check_collapses(collapses):
-
+def check_collapses(collapses: List[str]) -> bool:
     if not isinstance(collapses, list):
         raise WaybackError("collapses must be a list.")
-
-    if len(collapses) == 0:
-        return
+    elif len(collapses) == 0:
+        return True
 
     for collapse in collapses:
-        try:
-            match = re.search(
-                r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?",
-                collapse,
-            )
-            match.group(1)
-            if 2 == len(match.groups()):
-                match.group(2)
-        except Exception:
+        match = re.search(
+            r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?",
+            collapse,
+        )
+        if match is None or len(match.groups()) != 2:
             exc_message = "collapse argument '{collapse}' is not following the cdx collapse syntax.".format(
                 collapse=collapse
             )
             raise WaybackError(exc_message)
+    else:
+        return True
 
 
-def check_match_type(match_type, url):
+def check_match_type(match_type: Optional[str], url: str) -> bool:
+    legal_match_type = ["exact", "prefix", "host", "domain"]
     if not match_type:
-        return
-
-    if "*" in url:
+        return True
+    elif "*" in url:
         raise WaybackError(
             "Can not use wildcard in the URL along with the match_type arguments."
         )
-
-    legal_match_type = ["exact", "prefix", "host", "domain"]
-
-    if match_type not in legal_match_type:
+    elif match_type not in legal_match_type:
         exc_message = "{match_type} is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'".format(
             match_type=match_type
         )
         raise WaybackError(exc_message)
+    else:
+        return True
diff --git a/waybackpy/cli.py b/waybackpy/cli.py
index f1117c2..8fca775 100644
--- a/waybackpy/cli.py
+++ b/waybackpy/cli.py
@@ -3,6 +3,7 @@ import os
 import random
 import re
 import string
+from typing import Generator, List, Optional
 
 import click
 import requests
@@ -163,34 +164,34 @@ from .wrapper import Url
     + "will be printed.",
 )
 def main(
-    url,
-    user_agent,
-    version,
-    license,
-    newest,
-    oldest,
-    json,
-    near,
-    year,
-    month,
-    day,
-    hour,
-    minute,
-    save,
-    headers,
-    known_urls,
-    subdomain,
-    file,
-    cdx,
-    start_timestamp,
-    end_timestamp,
-    filter,
-    match_type,
-    gzip,
-    collapse,
-    limit,
-    cdx_print,
-):
+    url: Optional[str],
+    user_agent: str,
+    version: bool,
+    license: bool,
+    newest: bool,
+    oldest: bool,
+    json: bool,
+    near: bool,
+    year: Optional[int],
+    month: Optional[int],
+    day: Optional[int],
+    hour: Optional[int],
+    minute: Optional[int],
+    save: bool,
+    headers: bool,
+    known_urls: bool,
+    subdomain: bool,
+    file: bool,
+    cdx: bool,
+    start_timestamp: Optional[str],
+    end_timestamp: Optional[str],
+    filter: List[str],
+    match_type: Optional[str],
+    gzip: Optional[str],
+    collapse: List[str],
+    limit: Optional[str],
+    cdx_print: List[str],
+) -> None:
     """\b
                          _                _
                         | |              | |
@@ -244,7 +245,9 @@ def main(
         )
         return
 
-    def echo_availability_api(availability_api_instance):
+    def echo_availability_api(
+        availability_api_instance: WaybackMachineAvailabilityAPI,
+    ) -> None:
         click.echo("Archive URL:")
         if not availability_api_instance.archive_url:
             archive_url = (
@@ -295,13 +298,14 @@ def main(
             click.echo(save_api.headers)
         return
 
-    def save_urls_on_file(url_gen):
+    def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
         domain = None
         sys_random = random.SystemRandom()
         uid = "".join(
             sys_random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
         )
         url_count = 0
+        file_name = None
 
         for url in url_gen:
             url_count += 1
@@ -310,7 +314,7 @@ def main(
 
                 domain = "domain-unknown"
 
-                if match:
+                if match is not None:
                     domain = match.group(1)
 
                 file_name = "{domain}-urls-{uid}.txt".format(domain=domain, uid=uid)
@@ -318,12 +322,12 @@ def main(
                 if not os.path.isfile(file_path):
                     open(file_path, "w+").close()
 
-            with open(file_path, "a") as f:
-                f.write("{url}\n".format(url=url))
+                with open(file_path, "a") as f:
+                    f.write("{url}\n".format(url=url))
 
             click.echo(url)
 
-        if url_count > 0:
+        if url_count > 0 or file_name is not None:
             click.echo(
                 "\n\n'{file_name}' saved in current working directory".format(
                     file_name=file_name
diff --git a/waybackpy/exceptions.py b/waybackpy/exceptions.py
index 8e75aea..53f00c2 100644
--- a/waybackpy/exceptions.py
+++ b/waybackpy/exceptions.py
@@ -14,6 +14,8 @@ class WaybackError(Exception):
      All other exceptions are inherited from this class.
     """
 
+    pass
+
 
 class RedirectSaveError(WaybackError):
     """
@@ -21,32 +23,44 @@ class RedirectSaveError(WaybackError):
     redirect URL is archived but not the original URL.
     """
 
+    pass
+
 
 class URLError(Exception):
     """
     Raised when malformed URLs are passed as arguments.
     """
 
+    pass
+
 
 class MaximumRetriesExceeded(WaybackError):
     """
     MaximumRetriesExceeded
     """
 
+    pass
+
 
 class MaximumSaveRetriesExceeded(MaximumRetriesExceeded):
     """
     MaximumSaveRetriesExceeded
     """
 
+    pass
+
 
 class ArchiveNotInAvailabilityAPIResponse(WaybackError):
     """
     Could not parse the archive in the JSON response of the availability API.
     """
 
+    pass
+
 
 class InvalidJSONInAvailabilityAPIResponse(WaybackError):
     """
     availability api returned invalid JSON
     """
+
+    pass
diff --git a/waybackpy/save_api.py b/waybackpy/save_api.py
index 530e03a..af71dee 100644
--- a/waybackpy/save_api.py
+++ b/waybackpy/save_api.py
@@ -1,38 +1,42 @@
 import re
 import time
 from datetime import datetime
+from typing import Dict, Optional
 
 import requests
 from requests.adapters import HTTPAdapter
-from urllib3.util.retry import Retry
+
+# from urllib3.util.retry import Retry
+from requests.packages.urllib3.util.retry import Retry
 
 from .exceptions import MaximumSaveRetriesExceeded
 from .utils import DEFAULT_USER_AGENT
 
 
-class WaybackMachineSaveAPI:
-
+class WaybackMachineSaveAPI(object):
     """
     WaybackMachineSaveAPI class provides an interface for saving URLs on the
     Wayback Machine.
     """
 
-    def __init__(self, url, user_agent=DEFAULT_USER_AGENT, max_tries=8):
+    def __init__(
+        self, url: str, user_agent: str = DEFAULT_USER_AGENT, max_tries: int = 8
+    ) -> None:
         self.url = str(url).strip().replace(" ", "%20")
         self.request_url = "https://web.archive.org/save/" + self.url
         self.user_agent = user_agent
-        self.request_headers = {"User-Agent": self.user_agent}
+        self.request_headers: Dict[str, str] = {"User-Agent": self.user_agent}
         if max_tries < 1:
             raise ValueError("max_tries should be positive")
         self.max_tries = max_tries
         self.total_save_retries = 5
         self.backoff_factor = 0.5
         self.status_forcelist = [500, 502, 503, 504]
-        self._archive_url = None
+        self._archive_url: Optional[str] = None
         self.instance_birth_time = datetime.utcnow()
 
     @property
-    def archive_url(self):
+    def archive_url(self) -> str:
         """
         Returns the archive URL is already cached by _archive_url
         else invoke the save method to save the archive which returns the
@@ -44,7 +48,7 @@ class WaybackMachineSaveAPI:
         else:
             return self.save()
 
-    def get_save_request_headers(self):
+    def get_save_request_headers(self) -> None:
         """
         Creates a session and tries 'retries' number of times to
         retrieve the archive.
@@ -61,21 +65,21 @@ class WaybackMachineSaveAPI:
         the response URL yourself in the browser.
         """
         session = requests.Session()
-        retries = Retry(
+        retries_ = Retry(
             total=self.total_save_retries,
             backoff_factor=self.backoff_factor,
             status_forcelist=self.status_forcelist,
         )
-        session.mount("https://", HTTPAdapter(max_retries=retries))
+        session.mount("https://", HTTPAdapter(max_retries=retries_))
         self.response = session.get(self.request_url, headers=self.request_headers)
-        self.headers = (
-            self.response.headers
-        )  # <class 'requests.structures.CaseInsensitiveDict'>
+        # requests.response.headers is requests.structures.CaseInsensitiveDict
+        self.headers = self.response.headers
+        self.headers_str = str(self.headers)
         self.status_code = self.response.status_code
         self.response_url = self.response.url
         session.close()
 
-    def archive_url_parser(self):
+    def archive_url_parser(self) -> Optional[str]:
         """
         Three regexen (like oxen?) are used to search for the
         archive URL in the headers and finally look in the response URL
@@ -83,18 +87,18 @@ class WaybackMachineSaveAPI:
         """
 
         regex1 = r"Content-Location: (/web/[0-9]{14}/.*)"
-        match = re.search(regex1, str(self.headers))
+        match = re.search(regex1, self.headers_str)
         if match:
             return "https://web.archive.org" + match.group(1)
 
         regex2 = r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>"
-        match = re.search(regex2, str(self.headers))
-        if match:
+        match = re.search(regex2, self.headers_str)
+        if match is not None and len(match.groups()) == 1:
             return "https://" + match.group(1)
 
         regex3 = r"X-Cache-Key:\shttps(.*)[A-Z]{2}"
-        match = re.search(regex3, str(self.headers))
-        if match:
+        match = re.search(regex3, self.headers_str)
+        if match is not None and len(match.groups()) == 1:
             return "https" + match.group(1)
 
         if self.response_url:
@@ -105,7 +109,9 @@ class WaybackMachineSaveAPI:
                 if match:
                     return "https://" + match.group(0)
 
-    def sleep(self, tries):
+        return None
+
+    def sleep(self, tries: int) -> None:
         """
         Ensure that the we wait some time before succesive retries so that we
         don't waste the retries before the page is even captured by the Wayback
@@ -120,7 +126,7 @@ class WaybackMachineSaveAPI:
             sleep_seconds = 10
         time.sleep(sleep_seconds)
 
-    def timestamp(self):
+    def timestamp(self) -> datetime:
         """
         Read the timestamp off the archive URL and convert the Wayback Machine
         timestamp to datetime object.
@@ -133,9 +139,10 @@ class WaybackMachineSaveAPI:
         didn't serve a Cached URL. It is quite common for the Wayback Machine to serve
         cached archive if last archive was captured before last 45 minutes.
         """
-        m = re.search(
-            r"https?://web\.archive.org/web/([0-9]{14})/http", self._archive_url
-        )
+        regex = r"https?://web\.archive.org/web/([0-9]{14})/http"
+        m = re.search(regex, str(self._archive_url))
+        if m is None or len(m.groups()) != 1:
+            raise ValueError("Could not find get timestamp")
         string_timestamp = m.group(1)
         timestamp = datetime.strptime(string_timestamp, "%Y%m%d%H%M%S")
 
@@ -149,7 +156,7 @@ class WaybackMachineSaveAPI:
 
         return timestamp
 
-    def save(self):
+    def save(self) -> str:
         """
         Calls the SavePageNow API of the Wayback Machine with required parameters
         and headers to save the URL.
@@ -169,7 +176,7 @@ class WaybackMachineSaveAPI:
                 self.get_save_request_headers()
                 self.saved_archive = self.archive_url_parser()
 
-                if self.saved_archive is not None:
+                if isinstance(self.saved_archive, str):
                     self._archive_url = self.saved_archive
                     self.timestamp()
                     return self.saved_archive
@@ -179,5 +186,5 @@ class WaybackMachineSaveAPI:
                 raise MaximumSaveRetriesExceeded(
                     "Tried %s times but failed to save and retrieve the" % str(tries)
                     + " archive for %s.\nResponse URL:\n%s \nResponse Header:\n%s\n"
-                    % (self.url, self.response_url, str(self.headers)),
+                    % (self.url, self.response_url, self.headers_str),
                 )
diff --git a/waybackpy/utils.py b/waybackpy/utils.py
index 7201403..ac95a4e 100644
--- a/waybackpy/utils.py
+++ b/waybackpy/utils.py
@@ -2,22 +2,43 @@ import requests
 
 from . import __version__
 
-DEFAULT_USER_AGENT = "waybackpy %s - https://github.com/akamhy/waybackpy" % __version__
+DEFAULT_USER_AGENT: str = (
+    "waybackpy %s - https://github.com/akamhy/waybackpy" % __version__
+)
 
 
-def latest_version_pypi(package_name, user_agent=DEFAULT_USER_AGENT):
+def latest_version_pypi(package_name: str, user_agent: str = DEFAULT_USER_AGENT) -> str:
     request_url = "https://pypi.org/pypi/" + package_name + "/json"
     headers = {"User-Agent": user_agent}
     response = requests.get(request_url, headers=headers)
     data = response.json()
-    return data["info"]["version"]
+    if (
+        data is not None
+        and "info" in data
+        and data["info"] is not None
+        and "version" in data["info"]
+        and data["info"]["version"] is not None
+    ):
+        return str(data["info"]["version"])
+    else:
+        raise ValueError("Could not get latest pypi version")
 
 
-def latest_version_github(package_name, user_agent=DEFAULT_USER_AGENT):
+def latest_version_github(
+    package_name: str, user_agent: str = DEFAULT_USER_AGENT
+) -> str:
     request_url = (
         "https://api.github.com/repos/akamhy/" + package_name + "/releases?per_page=1"
     )
     headers = {"User-Agent": user_agent}
     response = requests.get(request_url, headers=headers)
     data = response.json()
-    return data[0]["tag_name"]
+    if (
+        data is not None
+        and len(data) > 0
+        and data[0] is not None
+        and "tag_name" in data[0]
+    ):
+        return str(data[0]["tag_name"])
+    else:
+        raise ValueError("Could not get latest github version")
diff --git a/waybackpy/wrapper.py b/waybackpy/wrapper.py
index 3121b77..91d1d92 100644
--- a/waybackpy/wrapper.py
+++ b/waybackpy/wrapper.py
@@ -1,4 +1,5 @@
 from datetime import datetime, timedelta
+from typing import Generator, Optional
 
 from .availability_api import WaybackMachineAvailabilityAPI
 from .cdx_api import WaybackMachineCDXServerAPI
@@ -19,35 +20,37 @@ the older interface code.
 """
 
 
-class Url:
-    def __init__(self, url, user_agent=DEFAULT_USER_AGENT):
+class Url(object):
+    def __init__(self, url: str, user_agent: str = DEFAULT_USER_AGENT) -> None:
         self.url = url
         self.user_agent = str(user_agent)
-        self.archive_url = None
-        self.timestamp = None
+        self.archive_url: Optional[str] = None
+        self.timestamp: Optional[datetime] = None
         self.wayback_machine_availability_api = WaybackMachineAvailabilityAPI(
             self.url, user_agent=self.user_agent
         )
 
-    def __str__(self):
+    def __str__(self) -> str:
         if not self.archive_url:
             self.newest()
-        return self.archive_url
+        return str(self.archive_url)
 
-    def __len__(self):
+    def __len__(self) -> int:
         td_max = timedelta(
             days=999999999, hours=23, minutes=59, seconds=59, microseconds=999999
         )
 
-        if not self.timestamp:
+        if not isinstance(self.timestamp, datetime):
             self.oldest()
 
-        if self.timestamp == datetime.max:
+        if not isinstance(self.timestamp, datetime):
+            raise TypeError("timestamp must be a datetime")
+        elif self.timestamp == datetime.max:
             return td_max.days
+        else:
+            return (datetime.utcnow() - self.timestamp).days
 
-        return (datetime.utcnow() - self.timestamp).days
-
-    def save(self):
+    def save(self) -> "Url":
         self.wayback_machine_save_api = WaybackMachineSaveAPI(
             self.url, user_agent=self.user_agent
         )
@@ -58,13 +61,13 @@ class Url:
 
     def near(
         self,
-        year=None,
-        month=None,
-        day=None,
-        hour=None,
-        minute=None,
-        unix_timestamp=None,
-    ):
+        year: Optional[int] = None,
+        month: Optional[int] = None,
+        day: Optional[int] = None,
+        hour: Optional[int] = None,
+        minute: Optional[int] = None,
+        unix_timestamp: Optional[int] = None,
+    ) -> "Url":
 
         self.wayback_machine_availability_api.near(
             year=year,
@@ -77,22 +80,24 @@ class Url:
         self.set_availability_api_attrs()
         return self
 
-    def oldest(self):
+    def oldest(self) -> "Url":
         self.wayback_machine_availability_api.oldest()
         self.set_availability_api_attrs()
         return self
 
-    def newest(self):
+    def newest(self) -> "Url":
         self.wayback_machine_availability_api.newest()
         self.set_availability_api_attrs()
         return self
 
-    def set_availability_api_attrs(self):
+    def set_availability_api_attrs(self) -> None:
         self.archive_url = self.wayback_machine_availability_api.archive_url
         self.JSON = self.wayback_machine_availability_api.JSON
         self.timestamp = self.wayback_machine_availability_api.timestamp()
 
-    def total_archives(self, start_timestamp=None, end_timestamp=None):
+    def total_archives(
+        self, start_timestamp: Optional[str] = None, end_timestamp: Optional[str] = None
+    ) -> int:
         cdx = WaybackMachineCDXServerAPI(
             self.url,
             user_agent=self.user_agent,
@@ -107,12 +112,12 @@ class Url:
 
     def known_urls(
         self,
-        subdomain=False,
-        host=False,
-        start_timestamp=None,
-        end_timestamp=None,
-        match_type="prefix",
-    ):
+        subdomain: bool = False,
+        host: bool = False,
+        start_timestamp: Optional[str] = None,
+        end_timestamp: Optional[str] = None,
+        match_type: str = "prefix",
+    ) -> Generator[str, None, None]:
         if subdomain:
             match_type = "domain"
         if host: