From db8f902cfff592f07386bcc92c7910ccb50c98d0 Mon Sep 17 00:00:00 2001
From: Akash Mahanty <akamhy@yahoo.com>
Date: Tue, 26 Jan 2021 11:56:03 +0530
Subject: [PATCH] Add doc strings (#90)

* Added some docstrings in utils.py

* renamed some func/meth to better names and added doc strings + lint

* added more docstrings

* more docstrings

* improve docstrings

* docstrings

* added more docstrings, lint

* fix import error
---
 tests/test_cdx.py     |   4 +-
 tests/test_cli.py     |   3 +-
 tests/test_utils.py   |  12 +-
 tests/test_wrapper.py |   4 -
 waybackpy/cdx.py      |  23 +++-
 waybackpy/cli.py      |   1 +
 waybackpy/snapshot.py |  19 +++-
 waybackpy/utils.py    | 252 ++++++++++++++++++++++++++++++++++--------
 waybackpy/wrapper.py  | 246 ++++++++++++++++++++++++++++++++---------
 9 files changed, 443 insertions(+), 121 deletions(-)

diff --git a/tests/test_cdx.py b/tests/test_cdx.py
index 887afd7..fdc6bbe 100644
--- a/tests/test_cdx.py
+++ b/tests/test_cdx.py
@@ -79,7 +79,7 @@ def test_all_cdx():
     c = 0
     for snapshot in snapshots:
         c += 1
-        if c > 30_529:  # deafult limit is 10k
+        if c > 30529:  # deafult limit is 10k
             break
 
     url = "https://github.com/*"
@@ -89,5 +89,5 @@ def test_all_cdx():
 
     for snapshot in snapshots:
         c += 1
-        if c > 100_529:
+        if c > 100529:
             break
diff --git a/tests/test_cli.py b/tests/test_cli.py
index d8593c7..f788c2e 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -5,8 +5,7 @@ import random
 import string
 import argparse
 
-sys.path.append("..")
-import waybackpy.cli as cli  # noqa: E402
+import waybackpy.cli as cli
 from waybackpy.wrapper import Url  # noqa: E402
 from waybackpy.__version__ import __version__
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 08cfaec..4c869d7 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -14,14 +14,14 @@ from waybackpy.utils import (
     _check_match_type,
     _check_collapses,
     _check_filters,
-    _ts,
+    _timestamp_manager,
 )
 
 
-def test_ts():
+def test_timestamp_manager():
     timestamp = True
     data = {}
-    assert _ts(timestamp, data)
+    assert _timestamp_manager(timestamp, data)
 
     data = """
     {"archived_snapshots": {"closest": {"timestamp": "20210109155628", "available": true, "status": "200", "url": "http://web.archive.org/web/20210109155628/https://www.google.com/"}}, "url": "https://www.google.com/"}
@@ -61,10 +61,10 @@ def test_check_collapses():
 
 
 def test_check_match_type():
-    assert None == _check_match_type(None, "url")
+    assert _check_match_type(None, "url") is None
     match_type = "exact"
     url = "test_url"
-    assert None == _check_match_type(match_type, url)
+    assert _check_match_type(match_type, url) is None
 
     url = "has * in it"
     with pytest.raises(WaybackError):
@@ -82,7 +82,7 @@ def test_cleaned_url():
 
 def test_url_check():
     good_url = "https://akamhy.github.io"
-    assert None == _url_check(good_url)
+    assert _url_check(good_url) is None
 
     bad_url = "https://github-com"
     with pytest.raises(URLError):
diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py
index 359ba91..100608f 100644
--- a/tests/test_wrapper.py
+++ b/tests/test_wrapper.py
@@ -1,8 +1,4 @@
-import sys
 import pytest
-import random
-import requests
-from datetime import datetime
 
 from waybackpy.wrapper import Url
 
diff --git a/waybackpy/cdx.py b/waybackpy/cdx.py
index 3ce30bf..b2295c7 100644
--- a/waybackpy/cdx.py
+++ b/waybackpy/cdx.py
@@ -11,6 +11,7 @@ from .utils import (
 )
 
 # TODO : Threading support for pagination API. It's designed for Threading.
+# TODO : Add get method here if type is Vaild HTML, SVG other but not - or warc. Test it.
 
 
 class Cdx:
@@ -42,7 +43,22 @@ class Cdx:
         self.use_page = False
 
     def cdx_api_manager(self, payload, headers, use_page=False):
-        """
+        """Act as button, we can choose between the normal API and pagination API.
+
+        Parameters
+        ----------
+        self : waybackpy.cdx.Cdx
+            The instance itself
+
+        payload : dict
+            Get request parameters name value pairs
+
+        headers : dict
+            The headers for making the GET request.
+
+        use_page : bool
+            If True use pagination API else use normal resume key based API.
+
         We have two options to get the snapshots, we use this
         method to make a selection between pagination API and
         the normal one with Resumption Key, sequential querying
@@ -141,7 +157,7 @@ class Cdx:
     def snapshots(self):
         """
         This function yeilds snapshots encapsulated
-        in CdxSnapshot for more usability.
+        in CdxSnapshot for increased usability.
 
         All the get request values are set if the conditions match
 
@@ -188,10 +204,9 @@ class Cdx:
 
                 prop_values = snapshot.split(" ")
 
-                # Making sure that we get the same number of
-                # property values as the number of properties
                 prop_values_len = len(prop_values)
                 properties_len = len(properties)
+
                 if prop_values_len != properties_len:
                     raise WaybackError(
                         "Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(
diff --git a/waybackpy/cli.py b/waybackpy/cli.py
index adbf1aa..45f305a 100644
--- a/waybackpy/cli.py
+++ b/waybackpy/cli.py
@@ -5,6 +5,7 @@ import json
 import random
 import string
 import argparse
+
 from .wrapper import Url
 from .exceptions import WaybackError
 from .__version__ import __version__
diff --git a/waybackpy/snapshot.py b/waybackpy/snapshot.py
index 992ad2e..e3dc027 100644
--- a/waybackpy/snapshot.py
+++ b/waybackpy/snapshot.py
@@ -3,15 +3,24 @@ from datetime import datetime
 
 class CdxSnapshot:
     """
-    This class helps to use the Cdx Snapshots easily.
+    This class encapsulates the snapshots for greater usability.
 
     Raw Snapshot data looks like:
     org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
 
-    properties is a dict containg all of the 7 cdx snapshot properties.
     """
 
     def __init__(self, properties):
+        """
+        Parameters
+        ----------
+        self : waybackpy.snapshot.CdxSnapshot
+            The instance itself
+
+        properties : dict
+            Properties is a dict containg all of the 7 cdx snapshot properties.
+
+        """
         self.urlkey = properties["urlkey"]
         self.timestamp = properties["timestamp"]
         self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
@@ -25,6 +34,12 @@ class CdxSnapshot:
         )
 
     def __str__(self):
+        """Returns the Cdx snapshot line.
+
+        Output format:
+        org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
+
+        """
         return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
             urlkey=self.urlkey,
             timestamp=self.timestamp,
diff --git a/waybackpy/utils.py b/waybackpy/utils.py
index 8bfee70..7c6958d 100644
--- a/waybackpy/utils.py
+++ b/waybackpy/utils.py
@@ -1,28 +1,72 @@
 import re
 import time
 import requests
-from .exceptions import WaybackError, URLError
 from datetime import datetime
 
+from .exceptions import WaybackError, URLError
+from .__version__ import __version__
+
 from urllib3.util.retry import Retry
 from requests.adapters import HTTPAdapter
-from .__version__ import __version__
 
 quote = requests.utils.quote
 default_user_agent = "waybackpy python package - https://github.com/akamhy/waybackpy"
 
 
 def _latest_version(package_name, headers):
-    endpoint = "https://pypi.org/pypi/" + package_name + "/json"
-    json = _get_response(endpoint, headers=headers).json()
-    return json["info"]["version"]
+    """Returns the latest version of package_name.
+
+    Parameters
+    ----------
+    package_name : str
+        The name of the python package
+
+    headers : dict
+        Headers that will be used while making get requests
+
+    Return type is str
+
+    Use API <https://pypi.org/pypi/> to get the latest version of
+    waybackpy, but can be used to get latest version of any package
+    on PyPi.
+    """
+
+    request_url = "https://pypi.org/pypi/" + package_name + "/json"
+    response = _get_response(request_url, headers=headers)
+    data = response.json()
+    return data["info"]["version"]
 
 
-def _unix_ts_to_wayback_ts(unix_ts):
-    return datetime.utcfromtimestamp(int(unix_ts)).strftime("%Y%m%d%H%M%S")
+def _unix_timestamp_to_wayback_timestamp(unix_timestamp):
+    """Returns unix timestamp converted to datetime.datetime
+
+    Parameters
+    ----------
+    unix_timestamp : str, int or float
+        Unix-timestamp that needs to be converted to datetime.datetime
+
+    Converts and returns input unix_timestamp to datetime.datetime object.
+    Does not matter if unix_timestamp is str, float or int.
+    """
+
+    return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
 
 
 def _add_payload(instance, payload):
+    """Adds payload from instance that can be used to make get requests.
+
+    Parameters
+    ----------
+    instance : waybackpy.cdx.Cdx
+        instance of the Cdx class
+
+    payload : dict
+        A dict onto which we need to add keys and values based on instance.
+
+    instance is object of Cdx class and it contains the data required to fill
+    the payload dictionary.
+    """
+
     if instance.start_timestamp:
         payload["from"] = instance.start_timestamp
 
@@ -43,18 +87,27 @@ def _add_payload(instance, payload):
         for i, f in enumerate(instance.collapses):
             payload["collapse" + str(i)] = f
 
+    # Don't need to return anything as it's dictionary.
     payload["url"] = instance.url
 
 
-def _ts(timestamp, data):
-    """
-    Get timestamp of last fetched archive.
-    If used before fetching any archive, will
-    use whatever self.JSON returns.
+def _timestamp_manager(timestamp, data):
+    """Returns the timestamp.
 
-    self.timestamp is None implies that
-    self.JSON will return any archive's JSON
-    that wayback machine provides it.
+    Parameters
+    ----------
+    timestamp : datetime.datetime
+        datetime object
+
+    data : dict
+        A python dictionary, which is loaded JSON os the availability API.
+
+    Return type:
+        datetime.datetime
+
+     If timestamp is not None then sets the value to timestamp itself.
+     If timestamp is None the returns the value from the last fetched API data.
+     If not timestamp and can not read the archived_snapshots form data return datetime.max
     """
 
     if timestamp:
@@ -69,6 +122,21 @@ def _ts(timestamp, data):
 
 
 def _check_match_type(match_type, url):
+    """Checks the validity of match_type parameter of the CDX GET requests.
+
+    Parameters
+    ----------
+    match_type : list
+        list  that may contain any or all from  ["exact", "prefix", "host", "domain"]
+        See https://github.com/akamhy/waybackpy/wiki/Python-package-docs#url-match-scope
+
+    url : str
+        The URL used to create the waybackpy Url object.
+
+    If not vaild match_type raise Exception.
+
+    """
+
     if not match_type:
         return
 
@@ -85,6 +153,19 @@ def _check_match_type(match_type, url):
 
 
 def _check_collapses(collapses):
+    """Checks the validity of collapse parameter of the CDX GET request.
+
+    One or more field or field:N to 'collapses=[]' where
+    field is one of (urlkey, timestamp, original, mimetype, statuscode,
+    digest and length) and N is the first N characters of field to test.
+
+    Parameters
+    ----------
+    collapses : list
+
+    If not vaild collapses raise Exception.
+
+    """
 
     if not isinstance(collapses, list):
         raise WaybackError("collapses must be a list.")
@@ -119,12 +200,26 @@ def _check_collapses(collapses):
 
 
 def _check_filters(filters):
+    """Checks the validity of filter parameter of the CDX GET request.
+
+    Any number of filter params of the following form may be specified:
+        filters=["[!]field:regex"] may be specified..
+
+    Parameters
+    ----------
+    filters : list
+
+    If not vaild filters raise Exception.
+
+    """
+
     if not isinstance(filters, list):
         raise WaybackError("filters must be a list.")
 
     # [!]field:regex
     for _filter in filters:
         try:
+
             match = re.search(
                 r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
                 _filter,
@@ -134,8 +229,9 @@ def _check_filters(filters):
             val = match.group(2)
 
         except Exception:
+
             exc_message = (
-                "Filter '{_filter}' not following the cdx filter syntax.".format(
+                "Filter '{_filter}' is not following the cdx filter syntax.".format(
                     _filter=_filter
                 )
             )
@@ -143,6 +239,9 @@ def _check_filters(filters):
 
 
 def _cleaned_url(url):
+    """Sanatize the url
+    Remove and replace illegal whitespace characters from the URL.
+    """
     return str(url).strip().replace(" ", "%20")
 
 
@@ -161,16 +260,29 @@ def _url_check(url):
 
 
 def _full_url(endpoint, params):
-    full_url = endpoint
-    if params:
-        full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
-        for key, val in params.items():
-            key = "filter" if key.startswith("filter") else key
-            key = "collapse" if key.startswith("collapse") else key
-            amp = "" if full_url.endswith("?") else "&"
-            full_url = (
-                full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
-            )
+    """API endpoint + GET parameters = full_url
+
+    Parameters
+    ----------
+    endpoint : str
+        The API endpoint
+
+    params : dict
+        Dictionary that has name-value pairs.
+
+    Return type is str
+
+    """
+
+    if not params:
+        return endpoint
+
+    full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
+    for key, val in params.items():
+        key = "filter" if key.startswith("filter") else key
+        key = "collapse" if key.startswith("collapse") else key
+        amp = "" if full_url.endswith("?") else "&"
+        full_url = full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
     return full_url
 
 
@@ -191,17 +303,31 @@ def _get_total_pages(url, user_agent):
 
 
 def _archive_url_parser(header, url, latest_version=__version__, instance=None):
-    """
+    """Returns the archive after parsing it from the response header.
+
+    Parameters
+    ----------
+    header : str
+        The response header of WayBack Machine's Save API
+
+    url : str
+        The input url, the one used to created the Url object.
+
+    latest_version : str
+        The latest version of waybackpy (default is __version__)
+
+    instance : waybackpy.wrapper.Url
+        Instance of Url class
+
+
     The wayback machine's save API doesn't
     return JSON response, we are required
     to read the header of the API response
-    and look for the archive URL.
+    and find the archive URL.
 
-    This method has some regexen (or regexes)
-    that search for archive url in header.
-
-    This method is used when you try to
-    save a webpage on wayback machine.
+    This method has some regular expressions
+    that are used to search for the archive url
+    in the response header of Save API.
 
     Two cases are possible:
     1) Either we find the archive url in
@@ -213,7 +339,6 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
     If we found the archive URL we return it.
 
     Return format:
-
     web.archive.org/web/<TIMESTAMP>/<URL>
 
     And if we couldn't find it, we raise
@@ -304,9 +429,7 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
 
 
 def _wayback_timestamp(**kwargs):
-    """
-    Wayback Machine archive URLs
-    have a timestamp in them.
+    """Returns a valid waybackpy timestamp.
 
     The standard archive URL format is
     https://web.archive.org/web/20191214041711/https://www.youtube.com
@@ -316,13 +439,17 @@ def _wayback_timestamp(**kwargs):
     2 ) timestamp (20191214041711)
     3 ) https://www.youtube.com, the original URL
 
-    The near method takes year, month, day, hour and minute
-    as Arguments, their type is int.
+
+    The near method of Url class in wrapper.py takes year, month, day, hour
+    and minute as arguments, their type is int.
 
     This method takes those integers and converts it to
     wayback machine timestamp and returns it.
 
-    Return format is string.
+
+    zfill(2) adds 1 zero in front of single digit days, months hour etc.
+
+    Return type is string.
     """
 
     return "".join(
@@ -339,16 +466,37 @@ def _get_response(
     backoff_factor=0.5,
     no_raise_on_redirects=False,
 ):
-    """
-    This function is used make get request.
-    We use the requests package to make the
-    requests.
+    """Makes get requests.
+
+    Parameters
+    ----------
+    endpoint : str
+        The API endpoint.
+
+    params : dict
+        The get request parameters. (default is None)
+
+    headers : dict
+        Headers for the get request. (default is None)
+
+    return_full_url : bool
+        Determines whether the call went full url returned along with the
+        response. (default is False)
+
+    retries : int
+        Maximum number of retries for the get request. (default is 5)
+
+    backoff_factor : float
+        The factor by which we determine the next retry time after wait.
+        https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html
+        (default is 0.5)
+
+    no_raise_on_redirects : bool
+        If maximum 30(default for requests) times redirected than instead of
+        exceptions return. (default is False)
 
 
-    We try five times and if it fails it raises
-    WaybackError exception.
-
-    You can handles WaybackError by importing:
+    To handle WaybackError:
     from waybackpy.exceptions import WaybackError
 
     try:
@@ -370,20 +518,28 @@ def _get_response(
 
     s.mount("https://", HTTPAdapter(max_retries=retries))
 
+    # The URL with parameters required for the get request
     url = _full_url(endpoint, params)
 
     try:
+
         if not return_full_url:
             return s.get(url, headers=headers)
+
         return (url, s.get(url, headers=headers))
+
     except Exception as e:
+
         reason = str(e)
+
         if no_raise_on_redirects:
             if "Exceeded 30 redirects" in reason:
                 return
+
         exc_message = "Error while retrieving {url}.\n{reason}".format(
             url=url, reason=reason
         )
+
         exc = WaybackError(exc_message)
         exc.__cause__ = e
         raise exc
diff --git a/waybackpy/wrapper.py b/waybackpy/wrapper.py
index 77add29..ef24a81 100644
--- a/waybackpy/wrapper.py
+++ b/waybackpy/wrapper.py
@@ -1,5 +1,6 @@
 import re
 from datetime import datetime, timedelta
+
 from .exceptions import WaybackError
 from .cdx import Cdx
 from .utils import (
@@ -9,13 +10,85 @@ from .utils import (
     default_user_agent,
     _url_check,
     _cleaned_url,
-    _ts,
-    _unix_ts_to_wayback_ts,
+    _timestamp_manager,
+    _unix_timestamp_to_wayback_timestamp,
     _latest_version,
 )
 
 
 class Url:
+    """
+
+    Attributes
+    ----------
+    url : str
+        The input URL, wayback machine API operations are performed
+        on this URL after sanatizing it.
+
+    user_agent : str
+        The user_agent used while making the GET requests to the
+        Wayback machine APIs
+
+    _archive_url : str
+        Caches the last fetched archive.
+
+    timestamp : datetime.datetime
+        timestamp of the archive URL as datetime object for
+        greater usability
+
+    _JSON : dict
+        Caches the last fetched availability API data
+
+    latest_version : str
+        The latest version of waybackpy on PyPi
+
+    cached_save : bool
+        Flag to check if WayBack machine returned a cached
+        archive instead of creating a new archive. WayBack
+        machine allows only one 1 archive for an URL in
+        30 minutes. If the archive returned by WayBack machine
+        is older than 3 minutes than this flag is set to True
+
+    Methods turned properties
+    ----------
+    JSON : dict
+        JSON response of availability API as dictionary / loaded JSON
+
+    archive_url : str
+        Return the archive url, returns str
+
+    _timestamp : datetime.datetime
+        Sets the value of self.timestamp if still not set
+
+    Methods
+    -------
+    save()
+        Archives the URL on WayBack machine
+
+    get(url="", user_agent="", encoding="")
+        Gets the source of archive url, can also be used to get source
+        of any URL if passed into it.
+
+    near(year=None, month=None, day=None, hour=None, minute=None, unix_timestamp=None)
+        Wayback Machine can have many archives for a URL/webpage, sometimes we want
+        archive close to a specific time.
+        This method takes year, month, day, hour, minute and unix_timestamp as input.
+
+    oldest(year=1994)
+        The oldest archive of an URL.
+
+    newest()
+        The newest archive of an URL
+
+    total_archives(start_timestamp=None, end_timestamp=None)
+        total number of archives of an URL, the timeframe can be confined by
+        start_timestamp and end_timestamp
+
+    known_urls(subdomain=False, host=False, start_timestamp=None, end_timestamp=None, match_type="prefix")
+        Known URLs for an URL, subdomain, URL as prefix etc.
+
+    """
+
     def __init__(self, url, user_agent=default_user_agent):
         self.url = url
         self.user_agent = str(user_agent)
@@ -32,29 +105,17 @@ class Url:
         )
 
     def __str__(self):
-        """
-        Output when print() is used on <class 'waybackpy.wrapper.Url'>
-        This should print an archive URL.
-
-        We check if self._archive_url is not None.
-        If not None, good. We return string of self._archive_url.
-
-        If self._archive_url is None, it means we ain't used any method that
-        sets self._archive_url, we now set self._archive_url to self.archive_url
-        and return it.
-        """
-
         if not self._archive_url:
             self._archive_url = self.archive_url
+
         return "{archive_url}".format(archive_url=self._archive_url)
 
     def __len__(self):
-        """
-        Why do we have len here?
+        """Number of days between today and the date of archive based on the timestamp
 
-        Applying len() on <class 'waybackpy.wrapper.Url'>
-        will calculate the number of days between today and
-        the archive timestamp.
+        len() of waybackpy.wrapper.Url should return
+        the number of days between today and the
+        archive timestamp.
 
         Can be applied on return values of near and its
         childs (e.g. oldest) and if applied on waybackpy.Url()
@@ -76,32 +137,30 @@ class Url:
 
     @property
     def JSON(self):
-        """
-        If the end user has used near() or its childs like oldest, newest
-        and archive_url then the JSON response of these are cached in self._JSON
+        """Returns JSON response of availability API as dictionary / loaded JSON
 
-        If we find that self._JSON is not None we return it.
-        else we get the response of 'https://archive.org/wayback/available?url=YOUR-URL'
-        and return it.
+        return type : dict
         """
 
+        # If user used the near method or any method that depends on near, we
+        # are certain that we have a loaded dictionary cached in self._JSON.
+        # Return the loaded JSON data.
         if self._JSON:
             return self._JSON
 
+        # If no cached data found, get data and return + cache it.
         endpoint = "https://archive.org/wayback/available"
         headers = {"User-Agent": self.user_agent}
         payload = {"url": "{url}".format(url=_cleaned_url(self.url))}
         response = _get_response(endpoint, params=payload, headers=headers)
-        return response.json()
+        self._JSON = response.json()
+        return self._JSON
 
     @property
     def archive_url(self):
-        """
-        Returns any random archive for the instance.
-        But if near, oldest, newest were used before
-        then it returns the same archive again.
+        """Return the archive url.
 
-        We cache archive in self._archive_url
+        return type : str
         """
 
         if self._archive_url:
@@ -121,11 +180,16 @@ class Url:
 
     @property
     def _timestamp(self):
-        self.timestamp = _ts(self.timestamp, self.JSON)
-        return self.timestamp
+        """Sets the value of self.timestamp if still not set.
+
+        Return type : datetime.datetime
+
+        """
+        return _timestamp_manager(self.timestamp, self.JSON)
 
     def save(self):
-        """
+        """Saves/Archive the URL.
+
         To save a webpage on WayBack machine we
         need to send get request to https://web.archive.org/save/
 
@@ -136,6 +200,8 @@ class Url:
 
         _archive_url_parser() parses the archive from the header.
 
+        return type : waybackpy.wrapper.Url
+
         """
         request_url = "https://web.archive.org/save/" + _cleaned_url(self.url)
         headers = {"User-Agent": self.user_agent}
@@ -161,7 +227,9 @@ class Url:
             instance=self,
         )
 
-        m = re.search(r"https?://web.archive.org/web/([0-9]{14})/http", self._archive_url)
+        m = re.search(
+            r"https?://web.archive.org/web/([0-9]{14})/http", self._archive_url
+        )
         str_ts = m.group(1)
         ts = datetime.strptime(str_ts, "%Y%m%d%H%M%S")
         now = datetime.utcnow()
@@ -175,9 +243,22 @@ class Url:
         return self
 
     def get(self, url="", user_agent="", encoding=""):
-        """
-        Return the source code of the last archived URL,
-        if no URL is passed to this method.
+        """GET the source of archive or any other URL.
+
+        url : str, waybackpy.wrapper.Url
+            The method will return the source code of
+            this URL instead of last fetched archive.
+
+        user_agent : str
+            The user_agent for GET request to API
+
+        encoding : str
+            If user is using any other encoding that
+            can't be detected by response.encoding
+
+        Return the source code of the last fetched
+        archive URL if no URL is passed to this method
+        else it returns the source code of url passed.
 
         If encoding is not supplied, it is auto-detected
          from the response itself by requests package.
@@ -213,6 +294,27 @@ class Url:
         unix_timestamp=None,
     ):
         """
+        Parameters
+        ----------
+
+        year : int
+            Archive close to year
+
+        month : int
+            Archive close to month
+
+        day : int
+            Archive close to day
+
+        hour : int
+            Archive close to hour
+
+        minute : int
+            Archive close to minute
+
+        unix_timestamp : str, float or int
+            Archive close to this unix_timestamp
+
         Wayback Machine can have many archives of a webpage,
         sometimes we want archive close to a specific time.
 
@@ -235,7 +337,7 @@ class Url:
         """
 
         if unix_timestamp:
-            timestamp = _unix_ts_to_wayback_ts(unix_timestamp)
+            timestamp = _unix_timestamp_to_wayback_timestamp(unix_timestamp)
         else:
             now = datetime.utcnow().timetuple()
             timestamp = _wayback_timestamp(
@@ -285,28 +387,45 @@ class Url:
 
         We simply pass the year in near() and return it.
         """
+
         return self.near(year=year)
 
     def newest(self):
-        """
-        Return the newest Wayback Machine archive available for this URL.
+        """Return the newest Wayback Machine archive available.
 
-        We return the output of self.near() as it deafults to current utc time.
+        We return the return value of self.near() as it deafults to current UTC time.
 
         Due to Wayback Machine database lag, this may not always be the
         most recent archive.
+
+        return type : waybackpy.wrapper.Url
         """
+
         return self.near()
 
     def total_archives(self, start_timestamp=None, end_timestamp=None):
-        """
+        """Returns the total number of archives for an URL
+
+        Parameters
+        ----------
+        start_timestamp : str
+            1 to 14 digit string of numbers, you are not required to
+            pass a full 14 digit timestamp.
+
+        end_timestamp : str
+            1 to 14 digit string of numbers, you are not required to
+            pass a full 14 digit timestamp.
+
+
+        return type : int
+
+
         A webpage can have multiple archives on the wayback machine
         If someone wants to count the total number of archives of a
         webpage on wayback machine they can use this method.
 
         Returns the total number of Wayback Machine archives for the URL.
 
-        Return type in integer.
         """
 
         cdx = Cdx(
@@ -315,6 +434,8 @@ class Url:
             start_timestamp=start_timestamp,
             end_timestamp=end_timestamp,
         )
+
+        # cdx.snapshots() is generator not list.
         i = 0
         for _ in cdx.snapshots():
             i = i + 1
@@ -328,15 +449,36 @@ class Url:
         end_timestamp=None,
         match_type="prefix",
     ):
-        """
+        """Yields known_urls URLs from the CDX API.
+
+        Parameters
+        ----------
+
+        subdomain : bool
+            If True fetch subdomain URLs along with the host URLs.
+
+        host : bool
+            Only fetch host URLs.
+
+        start_timestamp : str
+            1 to 14 digit string of numbers, you are not required to
+            pass a full 14 digit timestamp.
+
+        end_timestamp : str
+            1 to 14 digit string of numbers, you are not required to
+            pass a full 14 digit timestamp.
+
+        match_type : str
+            One of  (exact, prefix, host and domain)
+
+        return type : waybackpy.snapshot.CdxSnapshot
+
         Yields list of URLs known to exist for given input.
         Defaults to input URL as prefix.
 
-        This method is kept for compatibility, use the Cdx class instead.
-        This method itself depends on Cdx.
-
-         Idea by Mohammed Diaa (https://github.com/mhmdiaa) from:
-         https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050
+        Based on:
+        https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050
+        By Mohammed Diaa (https://github.com/mhmdiaa)
         """
 
         if subdomain:
@@ -353,7 +495,5 @@ class Url:
             collapses=["urlkey"],
         )
 
-        snapshots = cdx.snapshots()
-
-        for snapshot in snapshots:
+        for snapshot in cdx.snapshots():
             yield (snapshot.original)