more docstrings
This commit is contained in:
@@ -11,6 +11,7 @@ from .utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
# TODO : Threading support for pagination API. It's designed for Threading.
|
# TODO : Threading support for pagination API. It's designed for Threading.
|
||||||
|
# TODO : Add get method here if type is Vaild HTML, SVG other but not - or warc. Test it.
|
||||||
|
|
||||||
|
|
||||||
class Cdx:
|
class Cdx:
|
||||||
@@ -42,7 +43,22 @@ class Cdx:
|
|||||||
self.use_page = False
|
self.use_page = False
|
||||||
|
|
||||||
def cdx_api_manager(self, payload, headers, use_page=False):
|
def cdx_api_manager(self, payload, headers, use_page=False):
|
||||||
"""
|
"""Act as button, we can choose between the normal API and pagination API.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
self : waybackpy.cdx.Cdx
|
||||||
|
The instance itself
|
||||||
|
|
||||||
|
payload : dict
|
||||||
|
Get request parameters name value pairs
|
||||||
|
|
||||||
|
headers : dict
|
||||||
|
The headers for making the GET request.
|
||||||
|
|
||||||
|
use_page : bool
|
||||||
|
If True use pagination API else use normal resume key based API.
|
||||||
|
|
||||||
We have two options to get the snapshots, we use this
|
We have two options to get the snapshots, we use this
|
||||||
method to make a selection between pagination API and
|
method to make a selection between pagination API and
|
||||||
the normal one with Resumption Key, sequential querying
|
the normal one with Resumption Key, sequential querying
|
||||||
@@ -141,7 +157,7 @@ class Cdx:
|
|||||||
def snapshots(self):
|
def snapshots(self):
|
||||||
"""
|
"""
|
||||||
This function yeilds snapshots encapsulated
|
This function yeilds snapshots encapsulated
|
||||||
in CdxSnapshot for more usability.
|
in CdxSnapshot for increased usability.
|
||||||
|
|
||||||
All the get request values are set if the conditions match
|
All the get request values are set if the conditions match
|
||||||
|
|
||||||
@@ -188,10 +204,9 @@ class Cdx:
|
|||||||
|
|
||||||
prop_values = snapshot.split(" ")
|
prop_values = snapshot.split(" ")
|
||||||
|
|
||||||
# Making sure that we get the same number of
|
|
||||||
# property values as the number of properties
|
|
||||||
prop_values_len = len(prop_values)
|
prop_values_len = len(prop_values)
|
||||||
properties_len = len(properties)
|
properties_len = len(properties)
|
||||||
|
|
||||||
if prop_values_len != properties_len:
|
if prop_values_len != properties_len:
|
||||||
raise WaybackError(
|
raise WaybackError(
|
||||||
"Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(
|
"Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(
|
||||||
|
@@ -3,15 +3,24 @@ from datetime import datetime
|
|||||||
|
|
||||||
class CdxSnapshot:
|
class CdxSnapshot:
|
||||||
"""
|
"""
|
||||||
This class helps to use the Cdx Snapshots easily.
|
This class encapsulates the snapshots for greater usability.
|
||||||
|
|
||||||
Raw Snapshot data looks like:
|
Raw Snapshot data looks like:
|
||||||
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
|
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
|
||||||
|
|
||||||
properties is a dict containg all of the 7 cdx snapshot properties.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, properties):
|
def __init__(self, properties):
|
||||||
|
"""
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
self : waybackpy.snapshot.CdxSnapshot
|
||||||
|
The instance itself
|
||||||
|
|
||||||
|
properties : dict
|
||||||
|
Properties is a dict containg all of the 7 cdx snapshot properties.
|
||||||
|
|
||||||
|
"""
|
||||||
self.urlkey = properties["urlkey"]
|
self.urlkey = properties["urlkey"]
|
||||||
self.timestamp = properties["timestamp"]
|
self.timestamp = properties["timestamp"]
|
||||||
self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
|
self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
|
||||||
@@ -25,6 +34,12 @@ class CdxSnapshot:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
"""Returns the Cdx snapshot line.
|
||||||
|
|
||||||
|
Output format:
|
||||||
|
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
|
||||||
|
|
||||||
|
"""
|
||||||
return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
|
return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
|
||||||
urlkey=self.urlkey,
|
urlkey=self.urlkey,
|
||||||
timestamp=self.timestamp,
|
timestamp=self.timestamp,
|
||||||
|
@@ -439,15 +439,17 @@ def _wayback_timestamp(**kwargs):
|
|||||||
2 ) timestamp (20191214041711)
|
2 ) timestamp (20191214041711)
|
||||||
3 ) https://www.youtube.com, the original URL
|
3 ) https://www.youtube.com, the original URL
|
||||||
|
|
||||||
|
|
||||||
The near method of Url class in wrapper.py takes year, month, day, hour
|
The near method of Url class in wrapper.py takes year, month, day, hour
|
||||||
and minute as arguments, their type is int.
|
and minute as arguments, their type is int.
|
||||||
|
|
||||||
This method takes those integers and converts it to
|
This method takes those integers and converts it to
|
||||||
wayback machine timestamp and returns it.
|
wayback machine timestamp and returns it.
|
||||||
|
|
||||||
|
|
||||||
zfill(2) adds 1 zero in front of single digit days, months hour etc.
|
zfill(2) adds 1 zero in front of single digit days, months hour etc.
|
||||||
|
|
||||||
Return format is string.
|
Return type is string.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return "".join(
|
return "".join(
|
||||||
|
Reference in New Issue
Block a user