Add doc strings (#90)

* Added some docstrings in utils.py * renamed some func/meth to better names and added doc strings + lint * added more docstrings * more docstrings * improve docstrings * docstrings * added more docstrings, lint * fix import error
2021-01-26 11:56:03 +05:30
parent 88cda94c0b
commit db8f902cff
9 changed files with 443 additions and 121 deletions
--- a/tests/test_cdx.py
+++ b/tests/test_cdx.py
@@ -79,7 +79,7 @@ def test_all_cdx():
    c = 0
    for snapshot in snapshots:
        c += 1
-        if c > 30_529:  # deafult limit is 10k
+        if c > 30529:  # deafult limit is 10k
            break
    url = "https://github.com/*"
@@ -89,5 +89,5 @@ def test_all_cdx():
    for snapshot in snapshots:
        c += 1
-        if c > 100_529:
+        if c > 100529:
            break
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -5,8 +5,7 @@ import random
 import string
 import argparse
-sys.path.append("..")
+import waybackpy.cli as cli
 import waybackpy.cli as cli  # noqa: E402
 from waybackpy.wrapper import Url  # noqa: E402
 from waybackpy.__version__ import __version__
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -14,14 +14,14 @@ from waybackpy.utils import (
    _check_match_type,
    _check_collapses,
    _check_filters,
-    _ts,
+    _timestamp_manager,
 )
-def test_ts():
+def test_timestamp_manager():
    timestamp = True
    data = {}
-    assert _ts(timestamp, data)
+    assert _timestamp_manager(timestamp, data)
    data = """
    {"archived_snapshots": {"closest": {"timestamp": "20210109155628", "available": true, "status": "200", "url": "http://web.archive.org/web/20210109155628/https://www.google.com/"}}, "url": "https://www.google.com/"}
@@ -61,10 +61,10 @@ def test_check_collapses():
 def test_check_match_type():
-    assert None == _check_match_type(None, "url")
+    assert _check_match_type(None, "url") is None
    match_type = "exact"
    url = "test_url"
-    assert None == _check_match_type(match_type, url)
+    assert _check_match_type(match_type, url) is None
    url = "has * in it"
    with pytest.raises(WaybackError):
@@ -82,7 +82,7 @@ def test_cleaned_url():
 def test_url_check():
    good_url = "https://akamhy.github.io"
-    assert None == _url_check(good_url)
+    assert _url_check(good_url) is None
    bad_url = "https://github-com"
    with pytest.raises(URLError):
--- a/tests/test_wrapper.py
+++ b/tests/test_wrapper.py
@@ -1,8 +1,4 @@
 import sys
 import pytest
 import random
 import requests
 from datetime import datetime
 from waybackpy.wrapper import Url
--- a/waybackpy/cdx.py
+++ b/waybackpy/cdx.py
@@ -11,6 +11,7 @@ from .utils import (
 )
 # TODO : Threading support for pagination API. It's designed for Threading.
 # TODO : Add get method here if type is Vaild HTML, SVG other but not - or warc. Test it.
 class Cdx:
@@ -42,7 +43,22 @@ class Cdx:
        self.use_page = False
    def cdx_api_manager(self, payload, headers, use_page=False):
-        """
+        """Act as button, we can choose between the normal API and pagination API.
        Parameters
        ----------
        self : waybackpy.cdx.Cdx
            The instance itself
        payload : dict
            Get request parameters name value pairs
        headers : dict
            The headers for making the GET request.
        use_page : bool
            If True use pagination API else use normal resume key based API.
        We have two options to get the snapshots, we use this
        method to make a selection between pagination API and
        the normal one with Resumption Key, sequential querying
@@ -141,7 +157,7 @@ class Cdx:
    def snapshots(self):
        """
        This function yeilds snapshots encapsulated
-        in CdxSnapshot for more usability.
+        in CdxSnapshot for increased usability.
        All the get request values are set if the conditions match
@@ -188,10 +204,9 @@ class Cdx:
                prop_values = snapshot.split(" ")
                # Making sure that we get the same number of
                # property values as the number of properties
                prop_values_len = len(prop_values)
                properties_len = len(properties)
                if prop_values_len != properties_len:
                    raise WaybackError(
                        "Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(
--- a/waybackpy/cli.py
+++ b/waybackpy/cli.py
@@ -5,6 +5,7 @@ import json
 import random
 import string
 import argparse
 from .wrapper import Url
 from .exceptions import WaybackError
 from .__version__ import __version__
--- a/waybackpy/snapshot.py
+++ b/waybackpy/snapshot.py
@@ -3,15 +3,24 @@ from datetime import datetime
 class CdxSnapshot:
    """
-    This class helps to use the Cdx Snapshots easily.
+    This class encapsulates the snapshots for greater usability.
    Raw Snapshot data looks like:
    org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
    properties is a dict containg all of the 7 cdx snapshot properties.
    """
    def __init__(self, properties):
        """
        Parameters
        ----------
        self : waybackpy.snapshot.CdxSnapshot
            The instance itself
        properties : dict
            Properties is a dict containg all of the 7 cdx snapshot properties.
        """
        self.urlkey = properties["urlkey"]
        self.timestamp = properties["timestamp"]
        self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
@@ -25,6 +34,12 @@ class CdxSnapshot:
        )
    def __str__(self):
        """Returns the Cdx snapshot line.
        Output format:
        org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
        """
        return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
            urlkey=self.urlkey,
            timestamp=self.timestamp,
--- a/waybackpy/utils.py
+++ b/waybackpy/utils.py
@@ -1,28 +1,72 @@
 import re
 import time
 import requests
 from .exceptions import WaybackError, URLError
 from datetime import datetime
 from .exceptions import WaybackError, URLError
 from .__version__ import __version__
 from urllib3.util.retry import Retry
 from requests.adapters import HTTPAdapter
 from .__version__ import __version__
 quote = requests.utils.quote
 default_user_agent = "waybackpy python package - https://github.com/akamhy/waybackpy"
 def _latest_version(package_name, headers):
-    endpoint = "https://pypi.org/pypi/" + package_name + "/json"
+    """Returns the latest version of package_name.
-    json = _get_response(endpoint, headers=headers).json()
+
-    return json["info"]["version"]
+    Parameters
    ----------
    package_name : str
        The name of the python package
    headers : dict
        Headers that will be used while making get requests
    Return type is str
    Use API <https://pypi.org/pypi/> to get the latest version of
    waybackpy, but can be used to get latest version of any package
    on PyPi.
    """
    request_url = "https://pypi.org/pypi/" + package_name + "/json"
    response = _get_response(request_url, headers=headers)
    data = response.json()
    return data["info"]["version"]
-def _unix_ts_to_wayback_ts(unix_ts):
+def _unix_timestamp_to_wayback_timestamp(unix_timestamp):
-    return datetime.utcfromtimestamp(int(unix_ts)).strftime("%Y%m%d%H%M%S")
+    """Returns unix timestamp converted to datetime.datetime
    Parameters
    ----------
    unix_timestamp : str, int or float
        Unix-timestamp that needs to be converted to datetime.datetime
    Converts and returns input unix_timestamp to datetime.datetime object.
    Does not matter if unix_timestamp is str, float or int.
    """
    return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
 def _add_payload(instance, payload):
    """Adds payload from instance that can be used to make get requests.
    Parameters
    ----------
    instance : waybackpy.cdx.Cdx
        instance of the Cdx class
    payload : dict
        A dict onto which we need to add keys and values based on instance.
    instance is object of Cdx class and it contains the data required to fill
    the payload dictionary.
    """
    if instance.start_timestamp:
        payload["from"] = instance.start_timestamp
@@ -43,18 +87,27 @@ def _add_payload(instance, payload):
        for i, f in enumerate(instance.collapses):
            payload["collapse" + str(i)] = f
    # Don't need to return anything as it's dictionary.
    payload["url"] = instance.url
-def _ts(timestamp, data):
+def _timestamp_manager(timestamp, data):
-    """
+    """Returns the timestamp.
    Get timestamp of last fetched archive.
    If used before fetching any archive, will
    use whatever self.JSON returns.
-    self.timestamp is None implies that
+    Parameters
-    self.JSON will return any archive's JSON
+    ----------
-    that wayback machine provides it.
+    timestamp : datetime.datetime
        datetime object
    data : dict
        A python dictionary, which is loaded JSON os the availability API.
    Return type:
        datetime.datetime
     If timestamp is not None then sets the value to timestamp itself.
     If timestamp is None the returns the value from the last fetched API data.
     If not timestamp and can not read the archived_snapshots form data return datetime.max
    """
    if timestamp:
@@ -69,6 +122,21 @@ def _ts(timestamp, data):
 def _check_match_type(match_type, url):
    """Checks the validity of match_type parameter of the CDX GET requests.
    Parameters
    ----------
    match_type : list
        list  that may contain any or all from  ["exact", "prefix", "host", "domain"]
        See https://github.com/akamhy/waybackpy/wiki/Python-package-docs#url-match-scope
    url : str
        The URL used to create the waybackpy Url object.
    If not vaild match_type raise Exception.
    """
    if not match_type:
        return
@@ -85,6 +153,19 @@ def _check_match_type(match_type, url):
 def _check_collapses(collapses):
    """Checks the validity of collapse parameter of the CDX GET request.
    One or more field or field:N to 'collapses=[]' where
    field is one of (urlkey, timestamp, original, mimetype, statuscode,
    digest and length) and N is the first N characters of field to test.
    Parameters
    ----------
    collapses : list
    If not vaild collapses raise Exception.
    """
    if not isinstance(collapses, list):
        raise WaybackError("collapses must be a list.")
@@ -119,12 +200,26 @@ def _check_collapses(collapses):
 def _check_filters(filters):
    """Checks the validity of filter parameter of the CDX GET request.
    Any number of filter params of the following form may be specified:
        filters=["[!]field:regex"] may be specified..
    Parameters
    ----------
    filters : list
    If not vaild filters raise Exception.
    """
    if not isinstance(filters, list):
        raise WaybackError("filters must be a list.")
    # [!]field:regex
    for _filter in filters:
        try:
            match = re.search(
                r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
                _filter,
@@ -134,8 +229,9 @@ def _check_filters(filters):
            val = match.group(2)
        except Exception:
            exc_message = (
-                "Filter '{_filter}' not following the cdx filter syntax.".format(
+                "Filter '{_filter}' is not following the cdx filter syntax.".format(
                    _filter=_filter
                )
            )
@@ -143,6 +239,9 @@ def _check_filters(filters):
 def _cleaned_url(url):
    """Sanatize the url
    Remove and replace illegal whitespace characters from the URL.
    """
    return str(url).strip().replace(" ", "%20")
@@ -161,16 +260,29 @@ def _url_check(url):
 def _full_url(endpoint, params):
-    full_url = endpoint
+    """API endpoint + GET parameters = full_url
-    if params:
+
    Parameters
    ----------
    endpoint : str
        The API endpoint
    params : dict
        Dictionary that has name-value pairs.
    Return type is str
    """
    if not params:
        return endpoint
    full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
    for key, val in params.items():
        key = "filter" if key.startswith("filter") else key
        key = "collapse" if key.startswith("collapse") else key
        amp = "" if full_url.endswith("?") else "&"
-            full_url = (
+        full_url = full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
                full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
            )
    return full_url
@@ -191,17 +303,31 @@ def _get_total_pages(url, user_agent):
 def _archive_url_parser(header, url, latest_version=__version__, instance=None):
-    """
+    """Returns the archive after parsing it from the response header.
    Parameters
    ----------
    header : str
        The response header of WayBack Machine's Save API
    url : str
        The input url, the one used to created the Url object.
    latest_version : str
        The latest version of waybackpy (default is __version__)
    instance : waybackpy.wrapper.Url
        Instance of Url class
    The wayback machine's save API doesn't
    return JSON response, we are required
    to read the header of the API response
-    and look for the archive URL.
+    and find the archive URL.
-    This method has some regexen (or regexes)
+    This method has some regular expressions
-    that search for archive url in header.
+    that are used to search for the archive url
-
+    in the response header of Save API.
    This method is used when you try to
    save a webpage on wayback machine.
    Two cases are possible:
    1) Either we find the archive url in
@@ -213,7 +339,6 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
    If we found the archive URL we return it.
    Return format:
    web.archive.org/web/<TIMESTAMP>/<URL>
    And if we couldn't find it, we raise
@@ -304,9 +429,7 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
 def _wayback_timestamp(**kwargs):
-    """
+    """Returns a valid waybackpy timestamp.
    Wayback Machine archive URLs
    have a timestamp in them.
    The standard archive URL format is
    https://web.archive.org/web/20191214041711/https://www.youtube.com
@@ -316,13 +439,17 @@ def _wayback_timestamp(**kwargs):
    2 ) timestamp (20191214041711)
    3 ) https://www.youtube.com, the original URL
-    The near method takes year, month, day, hour and minute
+
-    as Arguments, their type is int.
+    The near method of Url class in wrapper.py takes year, month, day, hour
    and minute as arguments, their type is int.
    This method takes those integers and converts it to
    wayback machine timestamp and returns it.
-    Return format is string.
+
    zfill(2) adds 1 zero in front of single digit days, months hour etc.
    Return type is string.
    """
    return "".join(
@@ -339,16 +466,37 @@ def _get_response(
    backoff_factor=0.5,
    no_raise_on_redirects=False,
 ):
-    """
+    """Makes get requests.
-    This function is used make get request.
+
-    We use the requests package to make the
+    Parameters
-    requests.
+    ----------
    endpoint : str
        The API endpoint.
    params : dict
        The get request parameters. (default is None)
    headers : dict
        Headers for the get request. (default is None)
    return_full_url : bool
        Determines whether the call went full url returned along with the
        response. (default is False)
    retries : int
        Maximum number of retries for the get request. (default is 5)
    backoff_factor : float
        The factor by which we determine the next retry time after wait.
        https://urllib3.readthedocs.io/en/latest/reference/urllib3.util.html
        (default is 0.5)
    no_raise_on_redirects : bool
        If maximum 30(default for requests) times redirected than instead of
        exceptions return. (default is False)
-    We try five times and if it fails it raises
+    To handle WaybackError:
    WaybackError exception.
    You can handles WaybackError by importing:
    from waybackpy.exceptions import WaybackError
    try:
@@ -370,20 +518,28 @@ def _get_response(
    s.mount("https://", HTTPAdapter(max_retries=retries))
    # The URL with parameters required for the get request
    url = _full_url(endpoint, params)
    try:
        if not return_full_url:
            return s.get(url, headers=headers)
        return (url, s.get(url, headers=headers))
    except Exception as e:
        reason = str(e)
        if no_raise_on_redirects:
            if "Exceeded 30 redirects" in reason:
                return
        exc_message = "Error while retrieving {url}.\n{reason}".format(
            url=url, reason=reason
        )
        exc = WaybackError(exc_message)
        exc.__cause__ = e
        raise exc
--- a/waybackpy/wrapper.py
+++ b/waybackpy/wrapper.py
@@ -1,5 +1,6 @@
 import re
 from datetime import datetime, timedelta
 from .exceptions import WaybackError
 from .cdx import Cdx
 from .utils import (
@@ -9,13 +10,85 @@ from .utils import (
    default_user_agent,
    _url_check,
    _cleaned_url,
-    _ts,
+    _timestamp_manager,
-    _unix_ts_to_wayback_ts,
+    _unix_timestamp_to_wayback_timestamp,
    _latest_version,
 )
 class Url:
    """
    Attributes
    ----------
    url : str
        The input URL, wayback machine API operations are performed
        on this URL after sanatizing it.
    user_agent : str
        The user_agent used while making the GET requests to the
        Wayback machine APIs
    _archive_url : str
        Caches the last fetched archive.
    timestamp : datetime.datetime
        timestamp of the archive URL as datetime object for
        greater usability
    _JSON : dict
        Caches the last fetched availability API data
    latest_version : str
        The latest version of waybackpy on PyPi
    cached_save : bool
        Flag to check if WayBack machine returned a cached
        archive instead of creating a new archive. WayBack
        machine allows only one 1 archive for an URL in
        30 minutes. If the archive returned by WayBack machine
        is older than 3 minutes than this flag is set to True
    Methods turned properties
    ----------
    JSON : dict
        JSON response of availability API as dictionary / loaded JSON
    archive_url : str
        Return the archive url, returns str
    _timestamp : datetime.datetime
        Sets the value of self.timestamp if still not set
    Methods
    -------
    save()
        Archives the URL on WayBack machine
    get(url="", user_agent="", encoding="")
        Gets the source of archive url, can also be used to get source
        of any URL if passed into it.
    near(year=None, month=None, day=None, hour=None, minute=None, unix_timestamp=None)
        Wayback Machine can have many archives for a URL/webpage, sometimes we want
        archive close to a specific time.
        This method takes year, month, day, hour, minute and unix_timestamp as input.
    oldest(year=1994)
        The oldest archive of an URL.
    newest()
        The newest archive of an URL
    total_archives(start_timestamp=None, end_timestamp=None)
        total number of archives of an URL, the timeframe can be confined by
        start_timestamp and end_timestamp
    known_urls(subdomain=False, host=False, start_timestamp=None, end_timestamp=None, match_type="prefix")
        Known URLs for an URL, subdomain, URL as prefix etc.
    """
    def __init__(self, url, user_agent=default_user_agent):
        self.url = url
        self.user_agent = str(user_agent)
@@ -32,29 +105,17 @@ class Url:
        )
    def __str__(self):
        """
        Output when print() is used on <class 'waybackpy.wrapper.Url'>
        This should print an archive URL.
        We check if self._archive_url is not None.
        If not None, good. We return string of self._archive_url.
        If self._archive_url is None, it means we ain't used any method that
        sets self._archive_url, we now set self._archive_url to self.archive_url
        and return it.
        """
        if not self._archive_url:
            self._archive_url = self.archive_url
        return "{archive_url}".format(archive_url=self._archive_url)
    def __len__(self):
-        """
+        """Number of days between today and the date of archive based on the timestamp
        Why do we have len here?
-        Applying len() on <class 'waybackpy.wrapper.Url'>
+        len() of waybackpy.wrapper.Url should return
-        will calculate the number of days between today and
+        the number of days between today and the
-        the archive timestamp.
+        archive timestamp.
        Can be applied on return values of near and its
        childs (e.g. oldest) and if applied on waybackpy.Url()
@@ -76,32 +137,30 @@ class Url:
    @property
    def JSON(self):
-        """
+        """Returns JSON response of availability API as dictionary / loaded JSON
        If the end user has used near() or its childs like oldest, newest
        and archive_url then the JSON response of these are cached in self._JSON
-        If we find that self._JSON is not None we return it.
+        return type : dict
        else we get the response of 'https://archive.org/wayback/available?url=YOUR-URL'
        and return it.
        """
        # If user used the near method or any method that depends on near, we
        # are certain that we have a loaded dictionary cached in self._JSON.
        # Return the loaded JSON data.
        if self._JSON:
            return self._JSON
        # If no cached data found, get data and return + cache it.
        endpoint = "https://archive.org/wayback/available"
        headers = {"User-Agent": self.user_agent}
        payload = {"url": "{url}".format(url=_cleaned_url(self.url))}
        response = _get_response(endpoint, params=payload, headers=headers)
-        return response.json()
+        self._JSON = response.json()
        return self._JSON
    @property
    def archive_url(self):
-        """
+        """Return the archive url.
        Returns any random archive for the instance.
        But if near, oldest, newest were used before
        then it returns the same archive again.
-        We cache archive in self._archive_url
+        return type : str
        """
        if self._archive_url:
@@ -121,11 +180,16 @@ class Url:
    @property
    def _timestamp(self):
-        self.timestamp = _ts(self.timestamp, self.JSON)
+        """Sets the value of self.timestamp if still not set.
-        return self.timestamp
+
        Return type : datetime.datetime
        """
        return _timestamp_manager(self.timestamp, self.JSON)
    def save(self):
-        """
+        """Saves/Archive the URL.
        To save a webpage on WayBack machine we
        need to send get request to https://web.archive.org/save/
@@ -136,6 +200,8 @@ class Url:
        _archive_url_parser() parses the archive from the header.
        return type : waybackpy.wrapper.Url
        """
        request_url = "https://web.archive.org/save/" + _cleaned_url(self.url)
        headers = {"User-Agent": self.user_agent}
@@ -161,7 +227,9 @@ class Url:
            instance=self,
        )
-        m = re.search(r"https?://web.archive.org/web/([0-9]{14})/http", self._archive_url)
+        m = re.search(
            r"https?://web.archive.org/web/([0-9]{14})/http", self._archive_url
        )
        str_ts = m.group(1)
        ts = datetime.strptime(str_ts, "%Y%m%d%H%M%S")
        now = datetime.utcnow()
@@ -175,9 +243,22 @@ class Url:
        return self
    def get(self, url="", user_agent="", encoding=""):
-        """
+        """GET the source of archive or any other URL.
-        Return the source code of the last archived URL,
+
-        if no URL is passed to this method.
+        url : str, waybackpy.wrapper.Url
            The method will return the source code of
            this URL instead of last fetched archive.
        user_agent : str
            The user_agent for GET request to API
        encoding : str
            If user is using any other encoding that
            can't be detected by response.encoding
        Return the source code of the last fetched
        archive URL if no URL is passed to this method
        else it returns the source code of url passed.
        If encoding is not supplied, it is auto-detected
         from the response itself by requests package.
@@ -213,6 +294,27 @@ class Url:
        unix_timestamp=None,
    ):
        """
        Parameters
        ----------
        year : int
            Archive close to year
        month : int
            Archive close to month
        day : int
            Archive close to day
        hour : int
            Archive close to hour
        minute : int
            Archive close to minute
        unix_timestamp : str, float or int
            Archive close to this unix_timestamp
        Wayback Machine can have many archives of a webpage,
        sometimes we want archive close to a specific time.
@@ -235,7 +337,7 @@ class Url:
        """
        if unix_timestamp:
-            timestamp = _unix_ts_to_wayback_ts(unix_timestamp)
+            timestamp = _unix_timestamp_to_wayback_timestamp(unix_timestamp)
        else:
            now = datetime.utcnow().timetuple()
            timestamp = _wayback_timestamp(
@@ -285,28 +387,45 @@ class Url:
        We simply pass the year in near() and return it.
        """
        return self.near(year=year)
    def newest(self):
-        """
+        """Return the newest Wayback Machine archive available.
        Return the newest Wayback Machine archive available for this URL.
-        We return the output of self.near() as it deafults to current utc time.
+        We return the return value of self.near() as it deafults to current UTC time.
        Due to Wayback Machine database lag, this may not always be the
        most recent archive.
        return type : waybackpy.wrapper.Url
        """
        return self.near()
    def total_archives(self, start_timestamp=None, end_timestamp=None):
-        """
+        """Returns the total number of archives for an URL
        Parameters
        ----------
        start_timestamp : str
            1 to 14 digit string of numbers, you are not required to
            pass a full 14 digit timestamp.
        end_timestamp : str
            1 to 14 digit string of numbers, you are not required to
            pass a full 14 digit timestamp.
        return type : int
        A webpage can have multiple archives on the wayback machine
        If someone wants to count the total number of archives of a
        webpage on wayback machine they can use this method.
        Returns the total number of Wayback Machine archives for the URL.
        Return type in integer.
        """
        cdx = Cdx(
@@ -315,6 +434,8 @@ class Url:
            start_timestamp=start_timestamp,
            end_timestamp=end_timestamp,
        )
        # cdx.snapshots() is generator not list.
        i = 0
        for _ in cdx.snapshots():
            i = i + 1
@@ -328,15 +449,36 @@ class Url:
        end_timestamp=None,
        match_type="prefix",
    ):
-        """
+        """Yields known_urls URLs from the CDX API.
        Parameters
        ----------
        subdomain : bool
            If True fetch subdomain URLs along with the host URLs.
        host : bool
            Only fetch host URLs.
        start_timestamp : str
            1 to 14 digit string of numbers, you are not required to
            pass a full 14 digit timestamp.
        end_timestamp : str
            1 to 14 digit string of numbers, you are not required to
            pass a full 14 digit timestamp.
        match_type : str
            One of  (exact, prefix, host and domain)
        return type : waybackpy.snapshot.CdxSnapshot
        Yields list of URLs known to exist for given input.
        Defaults to input URL as prefix.
-        This method is kept for compatibility, use the Cdx class instead.
+        Based on:
        This method itself depends on Cdx.
         Idea by Mohammed Diaa (https://github.com/mhmdiaa) from:
        https://gist.github.com/mhmdiaa/adf6bff70142e5091792841d4b372050
        By Mohammed Diaa (https://github.com/mhmdiaa)
        """
        if subdomain:
@@ -353,7 +495,5 @@ class Url:
            collapses=["urlkey"],
        )
-        snapshots = cdx.snapshots()
+        for snapshot in cdx.snapshots():
        for snapshot in snapshots:
            yield (snapshot.original)