diff --git a/waybackpy/cdx.py b/waybackpy/cdx.py index 3ce30bf..b2295c7 100644 --- a/waybackpy/cdx.py +++ b/waybackpy/cdx.py @@ -11,6 +11,7 @@ from .utils import ( ) # TODO : Threading support for pagination API. It's designed for Threading. +# TODO : Add get method here if type is Vaild HTML, SVG other but not - or warc. Test it. class Cdx: @@ -42,7 +43,22 @@ class Cdx: self.use_page = False def cdx_api_manager(self, payload, headers, use_page=False): - """ + """Act as button, we can choose between the normal API and pagination API. + + Parameters + ---------- + self : waybackpy.cdx.Cdx + The instance itself + + payload : dict + Get request parameters name value pairs + + headers : dict + The headers for making the GET request. + + use_page : bool + If True use pagination API else use normal resume key based API. + We have two options to get the snapshots, we use this method to make a selection between pagination API and the normal one with Resumption Key, sequential querying @@ -141,7 +157,7 @@ class Cdx: def snapshots(self): """ This function yeilds snapshots encapsulated - in CdxSnapshot for more usability. + in CdxSnapshot for increased usability. All the get request values are set if the conditions match @@ -188,10 +204,9 @@ class Cdx: prop_values = snapshot.split(" ") - # Making sure that we get the same number of - # property values as the number of properties prop_values_len = len(prop_values) properties_len = len(properties) + if prop_values_len != properties_len: raise WaybackError( "Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format( diff --git a/waybackpy/snapshot.py b/waybackpy/snapshot.py index 992ad2e..e3dc027 100644 --- a/waybackpy/snapshot.py +++ b/waybackpy/snapshot.py @@ -3,15 +3,24 @@ from datetime import datetime class CdxSnapshot: """ - This class helps to use the Cdx Snapshots easily. + This class encapsulates the snapshots for greater usability. Raw Snapshot data looks like: org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415 - properties is a dict containg all of the 7 cdx snapshot properties. """ def __init__(self, properties): + """ + Parameters + ---------- + self : waybackpy.snapshot.CdxSnapshot + The instance itself + + properties : dict + Properties is a dict containg all of the 7 cdx snapshot properties. + + """ self.urlkey = properties["urlkey"] self.timestamp = properties["timestamp"] self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S") @@ -25,6 +34,12 @@ class CdxSnapshot: ) def __str__(self): + """Returns the Cdx snapshot line. + + Output format: + org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415 + + """ return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format( urlkey=self.urlkey, timestamp=self.timestamp, diff --git a/waybackpy/utils.py b/waybackpy/utils.py index 6d9c83f..7c6958d 100644 --- a/waybackpy/utils.py +++ b/waybackpy/utils.py @@ -439,15 +439,17 @@ def _wayback_timestamp(**kwargs): 2 ) timestamp (20191214041711) 3 ) https://www.youtube.com, the original URL + The near method of Url class in wrapper.py takes year, month, day, hour and minute as arguments, their type is int. This method takes those integers and converts it to wayback machine timestamp and returns it. + zfill(2) adds 1 zero in front of single digit days, months hour etc. - Return format is string. + Return type is string. """ return "".join(