change str repr of cdxsnapshot to cdx line

This commit is contained in:
Akash Mahanty
2021-01-11 09:34:37 +05:30
parent f4f2e51315
commit 4693dbf9c1
3 changed files with 36 additions and 6 deletions

View File

@@ -25,4 +25,12 @@ class CdxSnapshot:
)
def __str__(self):
return self.archive_url
return ("%s %s %s %s %s %s %s") % (
self.urlkey,
self.timestamp,
self.original,
self.mimetype,
self.statuscode,
self.digest,
self.length,
)

View File

@@ -298,9 +298,23 @@ class Url:
url_list = []
if subdomain:
cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="domain", collapses=["urlkey"])
cdx = Cdx(
_cleaned_url(self.url),
user_agent=self.user_agent,
start_timestamp=start_timestamp,
end_timestamp=end_timestamp,
match_type="domain",
collapses=["urlkey"],
)
else:
cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="host", collapses=["urlkey"])
cdx = Cdx(
_cleaned_url(self.url),
user_agent=self.user_agent,
start_timestamp=start_timestamp,
end_timestamp=end_timestamp,
match_type="host",
collapses=["urlkey"],
)
snapshots = cdx.snapshots()