full cdx api support

This commit is contained in:
Akash Mahanty
2021-01-10 02:23:53 +05:30
parent a2550f17d7
commit a03813315f
10 changed files with 915 additions and 403 deletions

26
waybackpy/snapshot.py Normal file
View File

@@ -0,0 +1,26 @@
from datetime import datetime
class CdxSnapshot:
"""
This class helps to handle the Cdx Snapshots easily.
What the raw data looks like:
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
"""
def __init__(
self, urlkey, timestamp, original, mimetype, statuscode, digest, length
):
self.urlkey = urlkey
self.timestamp = timestamp
self.datetime_timestamp = datetime.strptime(timestamp, "%Y%m%d%H%M%S")
self.original = original
self.mimetype = mimetype
self.statuscode = statuscode
self.digest = digest
self.length = length
self.archive_url = "https://web.archive.org/web/" + timestamp + "/" + original
def __str__(self):
return self.archive_url