Add doc strings (#90)

* Added some docstrings in utils.py * renamed some func/meth to better names and added doc strings + lint * added more docstrings * more docstrings * improve docstrings * docstrings * added more docstrings, lint * fix import error
2021-01-26 11:56:03 +05:30
parent 88cda94c0b
commit db8f902cff
9 changed files with 443 additions and 121 deletions
--- a/tests/test_cdx.py
+++ b/tests/test_cdx.py
@@ -79,7 +79,7 @@ def test_all_cdx():
    c = 0
    for snapshot in snapshots:
        c += 1
-        if c > 30_529:  # deafult limit is 10k
+        if c > 30529:  # deafult limit is 10k
            break

    url = "https://github.com/*"
@@ -89,5 +89,5 @@ def test_all_cdx():

    for snapshot in snapshots:
        c += 1
-        if c > 100_529:
+        if c > 100529:
            break
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -5,8 +5,7 @@ import random
 import string
 import argparse

-sys.path.append("..")
-import waybackpy.cli as cli  # noqa: E402
+import waybackpy.cli as cli
 from waybackpy.wrapper import Url  # noqa: E402
 from waybackpy.__version__ import __version__

--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -14,14 +14,14 @@ from waybackpy.utils import (
    _check_match_type,
    _check_collapses,
    _check_filters,
-    _ts,
+    _timestamp_manager,
 )


-def test_ts():
+def test_timestamp_manager():
    timestamp = True
    data = {}
-    assert _ts(timestamp, data)
+    assert _timestamp_manager(timestamp, data)

    data = """
    {"archived_snapshots": {"closest": {"timestamp": "20210109155628", "available": true, "status": "200", "url": "http://web.archive.org/web/20210109155628/https://www.google.com/"}}, "url": "https://www.google.com/"}
@@ -61,10 +61,10 @@ def test_check_collapses():


 def test_check_match_type():
-    assert None == _check_match_type(None, "url")
+    assert _check_match_type(None, "url") is None
    match_type = "exact"
    url = "test_url"
-    assert None == _check_match_type(match_type, url)
+    assert _check_match_type(match_type, url) is None

    url = "has * in it"
    with pytest.raises(WaybackError):
@@ -82,7 +82,7 @@ def test_cleaned_url():

 def test_url_check():
    good_url = "https://akamhy.github.io"
-    assert None == _url_check(good_url)
+    assert _url_check(good_url) is None

    bad_url = "https://github-com"
    with pytest.raises(URLError):
--- a/tests/test_wrapper.py
+++ b/tests/test_wrapper.py
@@ -1,8 +1,4 @@
-import sys
 import pytest
-import random
-import requests
-from datetime import datetime

 from waybackpy.wrapper import Url

--- a/waybackpy/cdx.py
+++ b/waybackpy/cdx.py
@@ -11,6 +11,7 @@ from .utils import (
 )

 # TODO : Threading support for pagination API. It's designed for Threading.
+# TODO : Add get method here if type is Vaild HTML, SVG other but not - or warc. Test it.


 class Cdx:
@@ -42,7 +43,22 @@ class Cdx:
        self.use_page = False

    def cdx_api_manager(self, payload, headers, use_page=False):
-        """
+        """Act as button, we can choose between the normal API and pagination API.
+
+        Parameters
+        ----------
+        self : waybackpy.cdx.Cdx
+            The instance itself
+
+        payload : dict
+            Get request parameters name value pairs
+
+        headers : dict
+            The headers for making the GET request.
+
+        use_page : bool
+            If True use pagination API else use normal resume key based API.
+
        We have two options to get the snapshots, we use this
        method to make a selection between pagination API and
        the normal one with Resumption Key, sequential querying
@@ -141,7 +157,7 @@ class Cdx:
    def snapshots(self):
        """
        This function yeilds snapshots encapsulated
-        in CdxSnapshot for more usability.
+        in CdxSnapshot for increased usability.

        All the get request values are set if the conditions match

@@ -188,10 +204,9 @@ class Cdx:

                prop_values = snapshot.split(" ")

-                # Making sure that we get the same number of
-                # property values as the number of properties
                prop_values_len = len(prop_values)
                properties_len = len(properties)
+
                if prop_values_len != properties_len:
                    raise WaybackError(
                        "Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(
--- a/waybackpy/cli.py
+++ b/waybackpy/cli.py
@@ -5,6 +5,7 @@ import json
 import random
 import string
 import argparse
+
 from .wrapper import Url
 from .exceptions import WaybackError
 from .__version__ import __version__
--- a/waybackpy/snapshot.py
+++ b/waybackpy/snapshot.py
@@ -3,15 +3,24 @@ from datetime import datetime

 class CdxSnapshot:
    """
-    This class helps to use the Cdx Snapshots easily.
+    This class encapsulates the snapshots for greater usability.

    Raw Snapshot data looks like:
    org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415

-    properties is a dict containg all of the 7 cdx snapshot properties.
    """

    def __init__(self, properties):
+        """
+        Parameters
+        ----------
+        self : waybackpy.snapshot.CdxSnapshot
+            The instance itself
+
+        properties : dict
+            Properties is a dict containg all of the 7 cdx snapshot properties.
+
+        """
        self.urlkey = properties["urlkey"]
        self.timestamp = properties["timestamp"]
        self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
@@ -25,6 +34,12 @@ class CdxSnapshot:
        )

    def __str__(self):
+        """Returns the Cdx snapshot line.
+
+        Output format:
+        org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
+
+        """
        return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
            urlkey=self.urlkey,
            timestamp=self.timestamp,
--- a/waybackpy/utils.py
+++ b/waybackpy/utils.py
@@ -1,28 +1,72 @@
 import re
 import time
 import requests
-from .exceptions import WaybackError, URLError
 from datetime import datetime

+from .exceptions import WaybackError, URLError
+from .__version__ import __version__
+
 from urllib3.util.retry import Retry
 from requests.adapters import HTTPAdapter
-from .__version__ import __version__

 quote = requests.utils.quote
 default_user_agent = "waybackpy python package - https://github.com/akamhy/waybackpy"


 def _latest_version(package_name, headers):
-    endpoint = "https://pypi.org/pypi/" + package_name + "/json"
-    json = _get_response(endpoint, headers=headers).json()
-    return json["info"]["version"]
+    """Returns the latest version of package_name.
+
+    Parameters
+    ----------
+    package_name : str
+        The name of the python package
+
+    headers : dict
+        Headers that will be used while making get requests
+
+    Return type is str
+
+    Use API <https://pypi.org/pypi/> to get the latest version of
+    waybackpy, but can be used to get latest version of any package
+    on PyPi.
+    """
+
+    request_url = "https://pypi.org/pypi/" + package_name + "/json"
+    response = _get_response(request_url, headers=headers)
+    data = response.json()
+    return data["info"]["version"]


-def _unix_ts_to_wayback_ts(unix_ts):
-    return datetime.utcfromtimestamp(int(unix_ts)).strftime("%Y%m%d%H%M%S")
+def _unix_timestamp_to_wayback_timestamp(unix_timestamp):
+    """Returns unix timestamp converted to datetime.datetime
+
+    Parameters
+    ----------
+    unix_timestamp : str, int or float
+        Unix-timestamp that needs to be converted to datetime.datetime
+
+    Converts and returns input unix_timestamp to datetime.datetime object.
+    Does not matter if unix_timestamp is str, float or int.
+    """
+
+    return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")


 def _add_payload(instance, payload):
+    """Adds payload from instance that can be used to make get requests.
+
+    Parameters
+    ----------
+    instance : waybackpy.cdx.Cdx
+        instance of the Cdx class
+
+    payload : dict
+        A dict onto which we need to add keys and values based on instance.
+
+    instance is object of Cdx class and it contains the data required to fill
+    the payload dictionary.
+    """
+
    if instance.start_timestamp:
        payload["from"] = instance.start_timestamp

@@ -43,18 +87,27 @@ def _add_payload(instance, payload):
        for i, f in enumerate(instance.collapses):
            payload["collapse" + str(i)] = f

+    # Don't need to return anything as it's dictionary.
    payload["url"] = instance.url


-def _ts(timestamp, data):
-    """
-    Get timestamp of last fetched archive.
-    If used before fetching any archive, will
-    use whatever self.JSON returns.
+def _timestamp_manager(timestamp, data):
+    """Returns the timestamp.

-    self.timestamp is None implies that
-    self.JSON will return any archive's JSON
-    that wayback machine provides it.
+    Parameters
+    ----------
+    timestamp : datetime.datetime
+        datetime object
+
+    data : dict
+        A python dictionary, which is loaded JSON os the availability API.
+
+    Return type:
+        datetime.datetime
+
+     If timestamp is not None then sets the value to timestamp itself.
+     If timestamp is None the returns the value from the last fetched API data.
+     If not timestamp and can not read the archived_snapshots form data return datetime.max
    """

    if timestamp:
@@ -69,6 +122,21 @@ def _ts(timestamp, data):


 def _check_match_type(match_type, url):
+    """Checks the validity of match_type parameter of the CDX GET requests.
+
+    Parameters
+    ----------
+    match_type : list
+        list  that may contain any or all from  ["exact", "prefix", "host", "domain"]
+        See https://github.com/akamhy/waybackpy/wiki/Python-package-docs#url-match-scope
+
+    url : str
+        The URL used to create the waybackpy Url object.
+
+    If not vaild match_type raise Exception.
+
+    """
+
    if not match_type:
        return

@@ -85,6 +153,19 @@ def _check_match_type(match_type, url):


 def _check_collapses(collapses):
+    """Checks the validity of collapse parameter of the CDX GET request.
+
+    One or more field or field:N to 'collapses=[]' where
+    field is one of (urlkey, timestamp, original, mimetype, statuscode,
+    digest and length) and N is the first N characters of field to test.
+
+    Parameters
+    ----------
+    collapses : list
+
+    If not vaild collapses raise Exception.
+
+    """

    if not isinstance(collapses, list):
        raise WaybackError("collapses must be a list.")
@@ -119,12 +200,26 @@ def _check_collapses(collapses):


 def _check_filters(filters):
+    """Checks the validity of filter parameter of the CDX GET request.
+
+    Any number of filter params of the following form may be specified:
+        filters=["[!]field:regex"] may be specified..
+
+    Parameters
+    ----------
+    filters : list
+
+    If not vaild filters raise Exception.
+
+    """
+
    if not isinstance(filters, list):
        raise WaybackError("filters must be a list.")

    # [!]field:regex
    for _filter in filters:
        try:
+
            match = re.search(
                r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
                _filter,
@@ -134,8 +229,9 @@ def _check_filters(filters):
            val = match.group(2)

        except Exception:
+
            exc_message = (
-                "Filter '{_filter}' not following the cdx filter syntax.".format(
+                "Filter '{_filter}' is not following the cdx filter syntax.".format(
                    _filter=_filter
                )
            )
@@ -143,6 +239,9 @@ def _check_filters(filters):


 def _cleaned_url(url):
+    """Sanatize the url
+    Remove and replace illegal whitespace characters from the URL.
+    """
    return str(url).strip().replace(" ", "%20")


@@ -161,16 +260,29 @@ def _url_check(url):


 def _full_url(endpoint, params):
-    full_url = endpoint
-    if params:
+    """API endpoint + GET parameters = full_url
+
+    Parameters
+    ----------
+    endpoint : str
+        The API endpoint
+
+    params : dict
+        Dictionary that has name-value pairs.
+
+    Return type is str
+
+    """
+
+    if not params:
+        return endpoint
+
    full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
    for key, val in params.items():
        key = "filter" if key.startswith("filter") else key
        key = "collapse" if key.startswith("collapse") else key
        amp = "" if full_url.endswith("?") else "&"
-            full_url = (
-                full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
-            )
+        full_url = full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
    return full_url


@@ -191,17 +303,31 @@ def _get_total_pages(url, user_agent):


 def _archive_url_parser(header, url, latest_version=__version__, instance=None):
-    """
+    """Returns the archive after parsing it from the response header.
+
+    Parameters
+    ----------
+    header : str
+        The response header of WayBack Machine's Save API
+
+    url : str
+        The input url, the one used to created the Url object.
+
+    latest_version : str
+        The latest version of waybackpy (default is __version__)
+
+    instance : waybackpy.wrapper.Url
+        Instance of Url class
+
+
    The wayback machine's save API doesn't
    return JSON response, we are required
    to read the header of the API response
-    and look for the archive URL.
+    and find the archive URL.

-    This method has some regexen (or regexes)
-    that search for archive url in header.
-
-    This method is used when you try to
-    save a webpage on wayback machine.
+    This method has some regular expressions
+    that are used to search for the archive url
+    in the response header of Save API.

    Two cases are possible:
    1) Either we find the archive url in
@@ -213,7 +339,6 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
    If we found the archive URL we return it.

    Return format:
-
    web.archive.org/web/<TIMESTAMP>/<URL>

    And if we couldn't find it, we raise
@@ -304,9 +429,7 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):


 def _wayback_timestamp(**kwargs):
-    """
-    Wayback Machine archive URLs
-    have a timestamp in them.
+    """Returns a valid waybackpy timestamp.

    The standard archive URL format is
    https://web.archive.org/web/20191214041711/https://www.youtube.com
@@ -316,13 +439,17 @@ def _wayback_timestamp(**kwargs):
    2 ) timestamp (20191214041711)
    3 ) https://www.youtube.com, the original URL

-    The near method takes year, month, day, hour and minute
-    as Arguments, their type is int.
+
+    The near method of Url class in wrapper.py takes year, month, day, hour
+    and minute as arguments, their type is int.

    This method takes those integers and converts it to
    wayback machine timestamp and returns it.

-    Return format is string.
+
+    zfill(2) adds 1 zero in front of single digit days, months hour etc.
+
+    Return type is string.
    """

    return "".join(
@@ -339,16 +466,37 @@ def _get_response(
    backoff_factor=0.5,
    no_raise_on_redirects=False,
 ):
-    """
-    This function is used make get request.
-    We use the requests package to make the
-    requests.
+    """Makes get requests.
+
+    Parameters
+    ----------
+    endpoint : str
+        The API endpoint.
+
+    params : dict
+        The get request parameters. (default is None)
+
+    headers : dict
+        Headers for the get request. (default is None)
+
+    return_full_url : bool
+        Determines whether the call went full url returned along with the
+        response. (default is False)
+
+    retries : int
+        Maximum number of retries for the get request. (default is 5)
+
+    backoff_factor : float
+        The factor by which we determine the next retry time after wait.
+        https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html
+        (default is 0.5)
+
+    no_raise_on_redirects : bool
+        If maximum 30(default for requests) times redirected than instead of
+        exceptions return. (default is False)


-    We try five times and if it fails it raises
-    WaybackError exception.
-
-    You can handles WaybackError by importing:
+    To handle WaybackError:
    from waybackpy.exceptions import WaybackError

    try:
@@ -370,20 +518,28 @@ def _get_response(

    s.mount("https://", HTTPAdapter(max_retries=retries))

+    # The URL with parameters required for the get request
    url = _full_url(endpoint, params)

    try:
+
        if not return_full_url:
            return s.get(url, headers=headers)
+
        return (url, s.get(url, headers=headers))
+
    except Exception as e:
+
        reason = str(e)
+
        if no_raise_on_redirects:
            if "Exceeded 30 redirects" in reason:
                return
+
        exc_message = "Error while retrieving {url}.\n{reason}".format(
            url=url, reason=reason
        )
+
        exc = WaybackError(exc_message)
        exc.__cause__ = e
        raise exc
--- a/waybackpy/wrapper.py
+++ b/waybackpy/wrapper.py
@@ -1,5 +1,6 @@
 import re
 from datetime import datetime, timedelta
+
 from .exceptions import WaybackError
 from .cdx import Cdx
 from .utils import (
@@ -9,13 +10,85 @@ from .utils import (
    default_user_agent,
    _url_check,
    _cleaned_url,
-    _ts,
-    _unix_ts_to_wayback_ts,
+    _timestamp_manager,
+    _unix_timestamp_to_wayback_timestamp,
    _latest_version,
 )


 class Url:
+    """
+
+    Attributes
+    ----------
+    url : str
+        The input URL, wayback machine API operations are performed
+        on this URL after sanatizing it.
+
+    user_agent : str
+        The user_agent used while making the GET requests to the
+        Wayback machine APIs
+
+    _archive_url : str
+        Caches the last fetched archive.
+
+    timestamp : datetime.datetime
+        timestamp of the archive URL as datetime object for
+        greater usability
+
+    _JSON : dict
+        Caches the last fetched availability API data
+
+    latest_version : str
+        The latest version of waybackpy on PyPi
+
+    cached_save : bool
+        Flag to check if WayBack machine returned a cached
+        archive instead of creating a new archive. WayBack
+        machine allows only one 1 archive for an URL in
+        30 minutes. If the archive returned by WayBack machine
+        is older than 3 minutes than this flag is set to True
+
+    Methods turned properties
+    ----------
+    JSON : dict
+        JSON response of availability API as dictionary / loaded JSON
+
+    archive_url : str
+        Return the archive url, returns str
+
+    _timestamp : datetime.datetime
+        Sets the value of self.timestamp if still not set
+
+    Methods
+    -------
+    save()
+        Archives the URL on WayBack machine
+
+    get(url="", user_agent="", encoding="")
+        Gets the source of archive url, can also be used to get source
+        of any URL if passed into it.
+
+    near(year=None, month=None, day=None, hour=None, minute=None, unix_timestamp=None)
+        Wayback Machine can have many archives for a URL/webpage, sometimes we want
+        archive close to a specific time.
+        This method takes year, month, day, hour, minute and unix_timestamp as input.
+
+    oldest(year=1994)
+        The oldest archive of an URL.
+
+    newest()
+        The newest archive of an URL
+
+    total_archives(start_timestamp=None, end_timestamp=None)
+        total number of archives of an URL, the timeframe can be confined by
+        start_timestamp and end_timestamp
+
+    known_urls(subdomain=False, host=False, start_timestamp=None, end_timestamp=None, match_type="prefix")
+        Known URLs for an URL, subdomain, URL as prefix etc.
+
+    """
+
    def __init__(self, url, user_agent=default_user_agent):
        self.url = url
        self.user_agent = str(user_agent)
@@ -32,29 +105,17 @@ class Url:
        )

    def __str__(self):
-        """
-        Output when print() is used on <class 'waybackpy.wrapper.Url'>
-        This should print an archive URL.
-
-        We check if self._archive_url is not None.
-        If not None, good. We return string of self._archive_url.
-
-        If self._archive_url is None, it means we ain't used any method that
-        sets self._archive_url, we now set self._archive_url to self.archive_url
-        and return it.
-        """
-
        if not self._archive_url:
            self._archive_url = self.archive_url
+
        return "{archive_url}".format(archive_url=self._archive_url)

    def __len__(self):
-        """
-        Why do we have len here?
+        """Number of days between today and the date of archive based on the timestamp

-        Applying len() on <class 'waybackpy.wrapper.Url'>
-        will calculate the number of days between today and
-        the archive timestamp.
+        len() of waybackpy.wrapper.Url should return
+        the number of days between today and the
+        archive timestamp.

        Can be applied on return values of near and its
        childs (e.g. oldest) and if applied on waybackpy.Url()
@@ -76,32 +137,30 @@ class Url:

    @property
    def JSON(self):
-        """
-        If the end user has used near() or its childs like oldest, newest
-        and archive_url then the JSON response of these are cached in self._JSON
+        """Returns JSON response of availability API as dictionary / loaded JSON

-        If we find that self._JSON is not None we return it.
-        else we get the response of 'https://archive.org/wayback/available?url=YOUR-URL'
-        and return it.
+        return type : dict
        """

+        # If user used the near method or any method that depends on near, we
+        # are certain that we have a loaded dictionary cached in self._JSON.
+        # Return the loaded JSON data.
        if self._JSON:
            return self._JSON

+        # If no cached data found, get data and return + cache it.
        endpoint = "https://archive.org/wayback/available"
        headers = {"User-Agent": self.user_agent}
        payload = {"url": "{url}".format(url=_cleaned_url(self.url))}
        response = _get_response(endpoint, params=payload, headers=headers)
-        return response.json()
+        self._JSON = response.json()
+        return self._JSON

    @property
    def archive_url(self):
-        """
-        Returns any random archive for the instance.
-        But if near, oldest, newest were used before
-        then it returns the same archive again.
+        """Return the archive url.

-        We cache archive in self._archive_url
+        return type : str
        """

        if self._archive_url:
@@ -121,11 +180,16 @@ class Url:

    @property
    def _timestamp(self):
-        self.timestamp = _ts(self.timestamp, self.JSON)
-        return self.timestamp
+        """Sets the value of self.timestamp if still not set.
+
+        Return type : datetime.datetime
+
+        """
+        return _timestamp_manager(self.timestamp, self.JSON)

    def save(self):
-        """
+        """Saves/Archive the URL.
+
        To save a webpage on WayBack machine we
        need to send get request to https://web.archive.org/save/

@@ -136,6 +200,8 @@ class Url:

        _archive_url_parser() parses the archive from the header.

+        return type : waybackpy.wrapper.Url
+
        """
        request_url = "https://web.archive.org/save/" + _cleaned_url(self.url)
        headers = {"User-Agent": self.user_agent}
@@ -161,7 +227,9 @@ class Url:
            instance=self,
        )

-        m = re.search(r"https?://web.archive.org/web/([0-9]{14})/http", self._archive_url)
+        m = re.search(
+            r"https?://web.archive.org/web/([0-9]{14})/http", self._archive_url
+        )
        str_ts = m.group(1)
        ts = datetime.strptime(str_ts, "%Y%m%d%H%M%S")
        now = datetime.utcnow()
@@ -175,9 +243,22 @@ class Url:
        return self

    def get(self, url="", user_agent="", encoding=""):
-        """
-        Return the source code of the last archived URL,
-        if no URL is passed to this method.
+        """GET the source of archive or any other URL.
+
+        url : str, waybackpy.wrapper.Url
+            The method will return the source code of
+            this URL instead of last fetched archive.
+
+        user_agent : str
+            The user_agent for GET request to API
+
+        encoding : str
+            If user is using any other encoding that
+            can't be detected by response.encoding
+
+        Return the source code of the last fetched
+        archive URL if no URL is passed to this method
+        else it returns the source code of url passed.

        If encoding is not supplied, it is auto-detected
         from the response itself by requests package.
@@ -213,6 +294,27 @@ class Url:
        unix_timestamp=None,
    ):
        """
+        Parameters
+        ----------
+
+        year : int
+            Archive close to year
+
+        month : int
+            Archive close to month
+
+        day : int
+            Archive close to day
+
+        hour : int
+            Archive close to hour
+
+        minute : int
+            Archive close to minute
+
+        unix_timestamp : str, float or int
+            Archive close to this unix_timestamp
+
        Wayback Machine can have many archives of a webpage,
        sometimes we want archive close to a specific time.

@@ -235,7 +337,7 @@ class Url:
        """

        if unix_timestamp:
-            timestamp = _unix_ts_to_wayback_ts(unix_timestamp)
+            timestamp = _unix_timestamp_to_wayback_timestamp(unix_timestamp)
        else:
            now = datetime.utcnow().timetuple()
            timestamp = _wayback_timestamp(
@@ -285,28 +387,45 @@ class Url:

        We simply pass the year in near() and return it.
        """
+
        return self.near(year=year)

    def newest(self):
-        """
-        Return the newest Wayback Machine archive available for this URL.
+        """Return the newest Wayback Machine archive available.

-        We return the output of self.near() as it deafults to current utc time.
+        We return the return value of self.near() as it deafults to current UTC time.

        Due to Wayback Machine database lag, this may not always be the
        most recent archive.
+
+        return type : waybackpy.wrapper.Url
        """
+
        return self.near()

    def total_archives(self, start_timestamp=None, end_timestamp=None):
-        """
+        """Returns the total number of archives for an URL
+
+        Parameters
+        ----------
+        start_timestamp : str
+            1 to 14 digit string of numbers, you are not required to
+            pass a full 14 digit timestamp.
+
+        end_timestamp : str
+            1 to 14 digit string of numbers, you are not required to
+            pass a full 14 digit timestamp.
+
+
+        return type : int
+
+
        A webpage can have multiple archives on the wayback machine
        If someone wants to count the total number of archives of a
        webpage on wayback machine they can use this method.

        Returns the total number of Wayback Machine archives for the URL.

-        Return type in integer.
        """

        cdx = Cdx(
@@ -315,6 +434,8 @@ class Url:
            start_timestamp=start_timestamp,
            end_timestamp=end_timestamp,
        )
+
+        # cdx.snapshots() is generator not list.
        i = 0
        for _ in cdx.snapshots():
            i = i + 1
@@ -328,15 +449,36 @@ class Url:
        end_timestamp=None,
        match_type="prefix",
    ):
-        """
+        """Yields known_urls URLs from the CDX API.
+
+        Parameters
+        ----------
+
+        subdomain : bool
+            If True fetch subdomain URLs along with the host URLs.
+
+        host : bool
+            Only fetch host URLs.
+
+        start_timestamp : str
+            1 to 14 digit string of numbers, you are not required to
+            pass a full 14 digit timestamp.
+
+        end_timestamp : str
+            1 to 14 digit string of numbers, you are not required to
+            pass a full 14 digit timestamp.
+
+        match_type : str
+            One of  (exact, prefix, host and domain)
+
+        return type : waybackpy.snapshot.CdxSnapshot
+
        Yields list of URLs known to exist for given input.
        Defaults to input URL as prefix.

-        This method is kept for compatibility, use the Cdx class instead.
-        This method itself depends on Cdx.
-
-         Idea by Mohammed Diaa (https://github.com/mhmdiaa) from:
+        Based on:
        https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050
+        By Mohammed Diaa (https://github.com/mhmdiaa)
        """

        if subdomain:
@@ -353,7 +495,5 @@ class Url:
            collapses=["urlkey"],
        )

-        snapshots = cdx.snapshots()
-
-        for snapshot in snapshots:
+        for snapshot in cdx.snapshots():
            yield (snapshot.original)