change str repr of cdxsnapshot to cdx line

This commit is contained in:
Akash Mahanty 2021-01-11 09:34:37 +05:30
parent f4f2e51315
commit 4693dbf9c1
3 changed files with 36 additions and 6 deletions

View File

@ -26,7 +26,15 @@ def test_CdxSnapshot():
assert properties["statuscode"] == snapshot.statuscode
assert properties["digest"] == snapshot.digest
assert properties["length"] == snapshot.length
assert datetime.strptime(properties["timestamp"], "%Y%m%d%H%M%S") == snapshot.datetime_timestamp
archive_url = "https://web.archive.org/web/" + properties["timestamp"] + "/" + properties["original"]
assert (
datetime.strptime(properties["timestamp"], "%Y%m%d%H%M%S")
== snapshot.datetime_timestamp
)
archive_url = (
"https://web.archive.org/web/"
+ properties["timestamp"]
+ "/"
+ properties["original"]
)
assert archive_url == snapshot.archive_url
assert archive_url == str(snapshot)
assert sample_input == str(snapshot)

View File

@ -25,4 +25,12 @@ class CdxSnapshot:
)
def __str__(self):
return self.archive_url
return ("%s %s %s %s %s %s %s") % (
self.urlkey,
self.timestamp,
self.original,
self.mimetype,
self.statuscode,
self.digest,
self.length,
)

View File

@ -298,9 +298,23 @@ class Url:
url_list = []
if subdomain:
cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="domain", collapses=["urlkey"])
cdx = Cdx(
_cleaned_url(self.url),
user_agent=self.user_agent,
start_timestamp=start_timestamp,
end_timestamp=end_timestamp,
match_type="domain",
collapses=["urlkey"],
)
else:
cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="host", collapses=["urlkey"])
cdx = Cdx(
_cleaned_url(self.url),
user_agent=self.user_agent,
start_timestamp=start_timestamp,
end_timestamp=end_timestamp,
match_type="host",
collapses=["urlkey"],
)
snapshots = cdx.snapshots()