more docstrings

This commit is contained in:
Akash Mahanty
2021-01-25 23:18:09 +05:30
parent 5e2fac666a
commit 36ab6405be
3 changed files with 39 additions and 7 deletions

View File

@@ -11,6 +11,7 @@ from .utils import (
)
# TODO : Threading support for pagination API. It's designed for Threading.
# TODO : Add get method here if type is Vaild HTML, SVG other but not - or warc. Test it.
class Cdx:
@@ -42,7 +43,22 @@ class Cdx:
self.use_page = False
def cdx_api_manager(self, payload, headers, use_page=False):
"""
"""Act as button, we can choose between the normal API and pagination API.
Parameters
----------
self : waybackpy.cdx.Cdx
The instance itself
payload : dict
Get request parameters name value pairs
headers : dict
The headers for making the GET request.
use_page : bool
If True use pagination API else use normal resume key based API.
We have two options to get the snapshots, we use this
method to make a selection between pagination API and
the normal one with Resumption Key, sequential querying
@@ -141,7 +157,7 @@ class Cdx:
def snapshots(self):
"""
This function yeilds snapshots encapsulated
in CdxSnapshot for more usability.
in CdxSnapshot for increased usability.
All the get request values are set if the conditions match
@@ -188,10 +204,9 @@ class Cdx:
prop_values = snapshot.split(" ")
# Making sure that we get the same number of
# property values as the number of properties
prop_values_len = len(prop_values)
properties_len = len(properties)
if prop_values_len != properties_len:
raise WaybackError(
"Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(

View File

@@ -3,15 +3,24 @@ from datetime import datetime
class CdxSnapshot:
"""
This class helps to use the Cdx Snapshots easily.
This class encapsulates the snapshots for greater usability.
Raw Snapshot data looks like:
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
properties is a dict containg all of the 7 cdx snapshot properties.
"""
def __init__(self, properties):
"""
Parameters
----------
self : waybackpy.snapshot.CdxSnapshot
The instance itself
properties : dict
Properties is a dict containg all of the 7 cdx snapshot properties.
"""
self.urlkey = properties["urlkey"]
self.timestamp = properties["timestamp"]
self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
@@ -25,6 +34,12 @@ class CdxSnapshot:
)
def __str__(self):
"""Returns the Cdx snapshot line.
Output format:
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
"""
return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
urlkey=self.urlkey,
timestamp=self.timestamp,

View File

@@ -439,15 +439,17 @@ def _wayback_timestamp(**kwargs):
2 ) timestamp (20191214041711)
3 ) https://www.youtube.com, the original URL
The near method of Url class in wrapper.py takes year, month, day, hour
and minute as arguments, their type is int.
This method takes those integers and converts it to
wayback machine timestamp and returns it.
zfill(2) adds 1 zero in front of single digit days, months hour etc.
Return format is string.
Return type is string.
"""
return "".join(