From e0a4b007d5b55042c8dd0489e30e987d91d41c7a Mon Sep 17 00:00:00 2001
From: Akash Mahanty <akamhy@yahoo.com>
Date: Tue, 5 Jan 2021 01:46:12 +0530
Subject: [PATCH] improve docs

---
 waybackpy/__init__.py   | 41 +++++++++++++++++--------
 waybackpy/exceptions.py |  3 --
 waybackpy/wrapper.py    | 67 +++++++++++++++++++++++++++++++++--------
 3 files changed, 84 insertions(+), 27 deletions(-)

diff --git a/waybackpy/__init__.py b/waybackpy/__init__.py
index 8ec185e..223d098 100644
--- a/waybackpy/__init__.py
+++ b/waybackpy/__init__.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 # ┏┓┏┓┏┓━━━━━━━━━━┏━━┓━━━━━━━━━━┏┓━━┏━━━┓━━━━━
 # ┃┃┃┃┃┃━━━━━━━━━━┃┏┓┃━━━━━━━━━━┃┃━━┃┏━┓┃━━━━━
 # ┃┃┃┃┃┃┏━━┓━┏┓━┏┓┃┗┛┗┓┏━━┓━┏━━┓┃┃┏┓┃┗━┛┃┏┓━┏┓
@@ -10,24 +8,43 @@
 # ━━━━━━━━━━━┗━━┛━━━━━━━━━━━━━━━━━━━━━━━━┗━━┛━
 
 """
-Waybackpy is a Python package that interfaces with the Internet Archive's Wayback Machine API.
+Waybackpy is a Python package & command-line program that interfaces with the Internet Archive's Wayback Machine API.
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Archive pages and retrieve archived pages easily.
+Archive webpage and retrieve archived URLs easily.
 
 Usage:
-   >>> import waybackpy
-   >>> target_url = waybackpy.Url('https://www.python.org', 'Your-apps-cool-user-agent')
-   >>> new_archive = target_url.save()
-   >>> print(new_archive)
-   https://web.archive.org/web/20200502170312/https://www.python.org/
+    >>> import waybackpy
 
-Full documentation @ <https://akamhy.github.io/waybackpy/>.
-:copyright: (c) 2020 by akamhy.
+    >>> url = "https://en.wikipedia.org/wiki/Multivariable_calculus"
+    >>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0"
+
+    >>> wayback = waybackpy.Url(url, user_agent)
+
+    >>> archive = wayback.save()
+    >>> str(archive)
+    'https://web.archive.org/web/20210104173410/https://en.wikipedia.org/wiki/Multivariable_calculus'
+
+    >>> archive.timestamp
+    datetime.datetime(2021, 1, 4, 17, 35, 12, 691741)
+
+    >>> oldest_archive = wayback.oldest()
+    >>> str(oldest_archive)
+    'https://web.archive.org/web/20050422130129/http://en.wikipedia.org:80/wiki/Multivariable_calculus'
+
+    >>> archive_close_to_2010_feb = wayback.near(year=2010, month=2)
+    >>> str(archive_close_to_2010_feb)
+    'https://web.archive.org/web/20100215001541/http://en.wikipedia.org:80/wiki/Multivariable_calculus'
+
+    >>> str(wayback.newest())
+    'https://web.archive.org/web/20210104173410/https://en.wikipedia.org/wiki/Multivariable_calculus'
+
+Full documentation @ <https://github.com/akamhy/waybackpy/wiki>.
+:copyright: (c) 2020-2021 AKash Mahanty Et al.
 :license: MIT
 """
 
-from .wrapper import Url
+from .wrapper import Url, Cdx
 from .__version__ import (
     __title__,
     __description__,
diff --git a/waybackpy/exceptions.py b/waybackpy/exceptions.py
index 2de2b6f..b08f6d6 100644
--- a/waybackpy/exceptions.py
+++ b/waybackpy/exceptions.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-
-
 class WaybackError(Exception):
     """
     Raised when Wayback Machine API Service is unreachable/down.
diff --git a/waybackpy/wrapper.py b/waybackpy/wrapper.py
index 79b61ee..d305960 100644
--- a/waybackpy/wrapper.py
+++ b/waybackpy/wrapper.py
@@ -11,10 +11,10 @@ default_user_agent = "waybackpy python package - https://github.com/akamhy/wayba
 
 def _get_total_pages(url, user_agent):
     """
-    If showNumPages is passed in cdx API, it returns 'number of pages of'
-    and each page has many archives.
+    If showNumPages is passed in cdx API, it returns
+    'number of archive pages'and each page has many archives.
 
-    This func returns number of pages (type int).
+    This func returns number of pages of archives (type int).
     """
     total_pages_url = (
         "https://web.archive.org/cdx/search/cdx?url=%s&showNumPages=true" % url
@@ -167,6 +167,19 @@ class Url:
         return "%s" % self._archive_url
 
     def __len__(self):
+        """
+        Why do we have len here?
+
+        Applying len() on <class 'waybackpy.wrapper.Url'>
+        will calculate the number of days between today and
+        the archive timestamp.
+
+        Can be applied on return values of near and its
+        childs (e.g. oldest) and if applied on waybackpy.Url()
+        whithout using any functions, it just grabs
+        self._timestamp and def _timestamp gets it
+        from def JSON.
+        """
         td_max = timedelta(
             days=999999999, hours=23, minutes=59, seconds=59, microseconds=999999
         )
@@ -194,7 +207,12 @@ class Url:
     @property
     def JSON(self):
         """
-        Returns JSON data from 'https://archive.org/wayback/available?url=YOUR-URL'.
+        If the end user has used near() or its childs like oldest, newest
+        and archive_url then the JSON response of these are cached in self._JSON
+
+        If we find that self._JSON is not None we return it.
+        else we get the response of 'https://archive.org/wayback/available?url=YOUR-URL'
+        and return it.
         """
 
         if self._JSON:
@@ -235,8 +253,12 @@ class Url:
     def _timestamp(self):
         """
         Get timestamp of last fetched archive.
-        If used before fetching any archive, This
-        randomly picks archive.
+        If used before fetching any archive, will
+        use whatever self.JSON returns.
+
+        self.timestamp is None implies that
+        self.JSON will return any archive's JSON
+        that wayback machine provides it.
         """
 
         if self.timestamp:
@@ -256,13 +278,25 @@ class Url:
 
     def _cleaned_url(self):
         """
-        Remove newlines
+        Remove EOL
         replace " " with "_"
         """
         return str(self.url).strip().replace(" ", "_")
 
     def save(self):
-        """Create a new Wayback Machine archive for this URL."""
+        """
+        To save a webpage on WayBack machine we
+        need to send get request to https://web.archive.org/save/
+
+        And to get the archive URL we are required to read the
+        header of the API response.
+
+        _get_response() takes care of the get requests. It uses requests
+        package.
+
+        _archive_url_parser() parses the archive from the header.
+
+        """
         request_url = "https://web.archive.org/save/" + self._cleaned_url()
         headers = {"User-Agent": self.user_agent}
         response = _get_response(request_url, params=None, headers=headers)
@@ -271,8 +305,10 @@ class Url:
         return self
 
     def get(self, url="", user_agent="", encoding=""):
-        """Return the source code of the supplied URL.
-        If encoding is not supplied, it is auto-detected from the response.
+        """
+        Return the source code of the supplied URL.
+        If encoding is not supplied, it is auto-detected
+         from the response itself by requests package.
         """
 
         if not url:
@@ -452,8 +488,10 @@ class Url:
 
 class CdxSnapshot:
     """
-    [["urlkey","timestamp","original","mimetype","statuscode","digest","length"],
-    ["org,archive)/", "19970126045828", "http://www.archive.org:80/", "text/html", "200", "Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY", "1415"]]
+    This class helps to handle the Cdx Snapshots easily.
+
+    What the raw data looks like:
+    org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
     """
 
     def __init__(
@@ -478,6 +516,7 @@ class CdxSnapshot:
 class Cdx:
     """
     waybackpy Cdx class, Type : <class 'waybackpy.wrapper.Cdx'>
+
     Cdx keys are :
     urlkey
     timestamp
@@ -501,6 +540,10 @@ class Cdx:
         self.end_timestamp = str(end_timestamp) if end_timestamp else None
 
     def snapshots(self):
+        """
+        This function yeilds snapshots encapsulated
+        in CdxSnapshot for more usability.
+        """
         payload = {}
         endpoint = "https://web.archive.org/cdx/search/cdx"
         total_pages = _get_total_pages(self.url, self.user_agent)