change str repr of cdxsnapshot to cdx line
This commit is contained in:
parent
f4f2e51315
commit
4693dbf9c1
@ -26,7 +26,15 @@ def test_CdxSnapshot():
|
|||||||
assert properties["statuscode"] == snapshot.statuscode
|
assert properties["statuscode"] == snapshot.statuscode
|
||||||
assert properties["digest"] == snapshot.digest
|
assert properties["digest"] == snapshot.digest
|
||||||
assert properties["length"] == snapshot.length
|
assert properties["length"] == snapshot.length
|
||||||
assert datetime.strptime(properties["timestamp"], "%Y%m%d%H%M%S") == snapshot.datetime_timestamp
|
assert (
|
||||||
archive_url = "https://web.archive.org/web/" + properties["timestamp"] + "/" + properties["original"]
|
datetime.strptime(properties["timestamp"], "%Y%m%d%H%M%S")
|
||||||
|
== snapshot.datetime_timestamp
|
||||||
|
)
|
||||||
|
archive_url = (
|
||||||
|
"https://web.archive.org/web/"
|
||||||
|
+ properties["timestamp"]
|
||||||
|
+ "/"
|
||||||
|
+ properties["original"]
|
||||||
|
)
|
||||||
assert archive_url == snapshot.archive_url
|
assert archive_url == snapshot.archive_url
|
||||||
assert archive_url == str(snapshot)
|
assert sample_input == str(snapshot)
|
||||||
|
@ -25,4 +25,12 @@ class CdxSnapshot:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.archive_url
|
return ("%s %s %s %s %s %s %s") % (
|
||||||
|
self.urlkey,
|
||||||
|
self.timestamp,
|
||||||
|
self.original,
|
||||||
|
self.mimetype,
|
||||||
|
self.statuscode,
|
||||||
|
self.digest,
|
||||||
|
self.length,
|
||||||
|
)
|
||||||
|
@ -298,9 +298,23 @@ class Url:
|
|||||||
url_list = []
|
url_list = []
|
||||||
|
|
||||||
if subdomain:
|
if subdomain:
|
||||||
cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="domain", collapses=["urlkey"])
|
cdx = Cdx(
|
||||||
|
_cleaned_url(self.url),
|
||||||
|
user_agent=self.user_agent,
|
||||||
|
start_timestamp=start_timestamp,
|
||||||
|
end_timestamp=end_timestamp,
|
||||||
|
match_type="domain",
|
||||||
|
collapses=["urlkey"],
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="host", collapses=["urlkey"])
|
cdx = Cdx(
|
||||||
|
_cleaned_url(self.url),
|
||||||
|
user_agent=self.user_agent,
|
||||||
|
start_timestamp=start_timestamp,
|
||||||
|
end_timestamp=end_timestamp,
|
||||||
|
match_type="host",
|
||||||
|
collapses=["urlkey"],
|
||||||
|
)
|
||||||
|
|
||||||
snapshots = cdx.snapshots()
|
snapshots = cdx.snapshots()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user