more docstrings
This commit is contained in:
@@ -11,6 +11,7 @@ from .utils import (
|
||||
)
|
||||
|
||||
# TODO : Threading support for pagination API. It's designed for Threading.
|
||||
# TODO : Add get method here if type is Vaild HTML, SVG other but not - or warc. Test it.
|
||||
|
||||
|
||||
class Cdx:
|
||||
@@ -42,7 +43,22 @@ class Cdx:
|
||||
self.use_page = False
|
||||
|
||||
def cdx_api_manager(self, payload, headers, use_page=False):
|
||||
"""
|
||||
"""Act as button, we can choose between the normal API and pagination API.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
self : waybackpy.cdx.Cdx
|
||||
The instance itself
|
||||
|
||||
payload : dict
|
||||
Get request parameters name value pairs
|
||||
|
||||
headers : dict
|
||||
The headers for making the GET request.
|
||||
|
||||
use_page : bool
|
||||
If True use pagination API else use normal resume key based API.
|
||||
|
||||
We have two options to get the snapshots, we use this
|
||||
method to make a selection between pagination API and
|
||||
the normal one with Resumption Key, sequential querying
|
||||
@@ -141,7 +157,7 @@ class Cdx:
|
||||
def snapshots(self):
|
||||
"""
|
||||
This function yeilds snapshots encapsulated
|
||||
in CdxSnapshot for more usability.
|
||||
in CdxSnapshot for increased usability.
|
||||
|
||||
All the get request values are set if the conditions match
|
||||
|
||||
@@ -188,10 +204,9 @@ class Cdx:
|
||||
|
||||
prop_values = snapshot.split(" ")
|
||||
|
||||
# Making sure that we get the same number of
|
||||
# property values as the number of properties
|
||||
prop_values_len = len(prop_values)
|
||||
properties_len = len(properties)
|
||||
|
||||
if prop_values_len != properties_len:
|
||||
raise WaybackError(
|
||||
"Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(
|
||||
|
@@ -3,15 +3,24 @@ from datetime import datetime
|
||||
|
||||
class CdxSnapshot:
|
||||
"""
|
||||
This class helps to use the Cdx Snapshots easily.
|
||||
This class encapsulates the snapshots for greater usability.
|
||||
|
||||
Raw Snapshot data looks like:
|
||||
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
|
||||
|
||||
properties is a dict containg all of the 7 cdx snapshot properties.
|
||||
"""
|
||||
|
||||
def __init__(self, properties):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
self : waybackpy.snapshot.CdxSnapshot
|
||||
The instance itself
|
||||
|
||||
properties : dict
|
||||
Properties is a dict containg all of the 7 cdx snapshot properties.
|
||||
|
||||
"""
|
||||
self.urlkey = properties["urlkey"]
|
||||
self.timestamp = properties["timestamp"]
|
||||
self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
|
||||
@@ -25,6 +34,12 @@ class CdxSnapshot:
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
"""Returns the Cdx snapshot line.
|
||||
|
||||
Output format:
|
||||
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
|
||||
|
||||
"""
|
||||
return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
|
||||
urlkey=self.urlkey,
|
||||
timestamp=self.timestamp,
|
||||
|
@@ -439,15 +439,17 @@ def _wayback_timestamp(**kwargs):
|
||||
2 ) timestamp (20191214041711)
|
||||
3 ) https://www.youtube.com, the original URL
|
||||
|
||||
|
||||
The near method of Url class in wrapper.py takes year, month, day, hour
|
||||
and minute as arguments, their type is int.
|
||||
|
||||
This method takes those integers and converts it to
|
||||
wayback machine timestamp and returns it.
|
||||
|
||||
|
||||
zfill(2) adds 1 zero in front of single digit days, months hour etc.
|
||||
|
||||
Return format is string.
|
||||
Return type is string.
|
||||
"""
|
||||
|
||||
return "".join(
|
||||
|
Reference in New Issue
Block a user