Before and After methods (#175)
* Added before and after functions * add tests * formatting
This commit is contained in:
parent
0202efd39d
commit
3b3e78d901
@ -176,3 +176,39 @@ def test_near() -> None:
|
|||||||
filters=["statuscode:200"],
|
filters=["statuscode:200"],
|
||||||
)
|
)
|
||||||
cdx.near(unix_timestamp=1286705410)
|
cdx.near(unix_timestamp=1286705410)
|
||||||
|
|
||||||
|
|
||||||
|
def test_before() -> None:
|
||||||
|
user_agent = (
|
||||||
|
"Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) "
|
||||||
|
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||||
|
)
|
||||||
|
|
||||||
|
cdx = WaybackMachineCDXServerAPI(
|
||||||
|
url="http://www.google.com/",
|
||||||
|
user_agent=user_agent,
|
||||||
|
filters=["statuscode:200"],
|
||||||
|
)
|
||||||
|
before = cdx.before(wayback_machine_timestamp=20160731235949)
|
||||||
|
assert "20160731233347" in before.timestamp
|
||||||
|
assert "google" in before.urlkey
|
||||||
|
assert before.original.find("google.com") != -1
|
||||||
|
assert before.archive_url.find("google.com") != -1
|
||||||
|
|
||||||
|
|
||||||
|
def test_after() -> None:
|
||||||
|
user_agent = (
|
||||||
|
"Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) "
|
||||||
|
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||||
|
)
|
||||||
|
|
||||||
|
cdx = WaybackMachineCDXServerAPI(
|
||||||
|
url="http://www.google.com/",
|
||||||
|
user_agent=user_agent,
|
||||||
|
filters=["statuscode:200"],
|
||||||
|
)
|
||||||
|
after = cdx.after(wayback_machine_timestamp=20160731235949)
|
||||||
|
assert "20160801000917" in after.timestamp, after.timestamp
|
||||||
|
assert "google" in after.urlkey
|
||||||
|
assert after.original.find("google.com") != -1
|
||||||
|
assert after.archive_url.find("google.com") != -1
|
||||||
|
@ -191,6 +191,88 @@ class WaybackMachineCDXServerAPI:
|
|||||||
|
|
||||||
payload["url"] = self.url
|
payload["url"] = self.url
|
||||||
|
|
||||||
|
def before(
|
||||||
|
self,
|
||||||
|
year: Optional[int] = None,
|
||||||
|
month: Optional[int] = None,
|
||||||
|
day: Optional[int] = None,
|
||||||
|
hour: Optional[int] = None,
|
||||||
|
minute: Optional[int] = None,
|
||||||
|
unix_timestamp: Optional[int] = None,
|
||||||
|
wayback_machine_timestamp: Optional[Union[int, str]] = None,
|
||||||
|
) -> CDXSnapshot:
|
||||||
|
"""
|
||||||
|
Gets the nearest archive before the given datetime.
|
||||||
|
"""
|
||||||
|
if unix_timestamp:
|
||||||
|
timestamp = unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||||
|
elif wayback_machine_timestamp:
|
||||||
|
timestamp = str(wayback_machine_timestamp)
|
||||||
|
else:
|
||||||
|
now = datetime.utcnow().timetuple()
|
||||||
|
timestamp = wayback_timestamp(
|
||||||
|
year=now.tm_year if year is None else year,
|
||||||
|
month=now.tm_mon if month is None else month,
|
||||||
|
day=now.tm_mday if day is None else day,
|
||||||
|
hour=now.tm_hour if hour is None else hour,
|
||||||
|
minute=now.tm_min if minute is None else minute,
|
||||||
|
)
|
||||||
|
self.closest = timestamp
|
||||||
|
self.sort = "closest"
|
||||||
|
self.limit = 25000
|
||||||
|
for snapshot in self.snapshots():
|
||||||
|
if snapshot.timestamp < timestamp:
|
||||||
|
return snapshot
|
||||||
|
|
||||||
|
# If a snapshot isn't returned, then none were found.
|
||||||
|
raise NoCDXRecordFound(
|
||||||
|
"No records were found before the given date for the query."
|
||||||
|
+ "Either there are no archives before the given date,"
|
||||||
|
+ " the URL may not have any archived, or the URL may have been"
|
||||||
|
+ " recently archived and is still not available on the CDX server."
|
||||||
|
)
|
||||||
|
|
||||||
|
def after(
|
||||||
|
self,
|
||||||
|
year: Optional[int] = None,
|
||||||
|
month: Optional[int] = None,
|
||||||
|
day: Optional[int] = None,
|
||||||
|
hour: Optional[int] = None,
|
||||||
|
minute: Optional[int] = None,
|
||||||
|
unix_timestamp: Optional[int] = None,
|
||||||
|
wayback_machine_timestamp: Optional[Union[int, str]] = None,
|
||||||
|
) -> CDXSnapshot:
|
||||||
|
"""
|
||||||
|
Gets the nearest archive after the given datetime.
|
||||||
|
"""
|
||||||
|
if unix_timestamp:
|
||||||
|
timestamp = unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||||
|
elif wayback_machine_timestamp:
|
||||||
|
timestamp = str(wayback_machine_timestamp)
|
||||||
|
else:
|
||||||
|
now = datetime.utcnow().timetuple()
|
||||||
|
timestamp = wayback_timestamp(
|
||||||
|
year=now.tm_year if year is None else year,
|
||||||
|
month=now.tm_mon if month is None else month,
|
||||||
|
day=now.tm_mday if day is None else day,
|
||||||
|
hour=now.tm_hour if hour is None else hour,
|
||||||
|
minute=now.tm_min if minute is None else minute,
|
||||||
|
)
|
||||||
|
self.closest = timestamp
|
||||||
|
self.sort = "closest"
|
||||||
|
self.limit = 25000
|
||||||
|
for snapshot in self.snapshots():
|
||||||
|
if snapshot.timestamp > timestamp:
|
||||||
|
return snapshot
|
||||||
|
|
||||||
|
# If a snapshot isn't returned, then none were found.
|
||||||
|
raise NoCDXRecordFound(
|
||||||
|
"No records were found after the given date for the query."
|
||||||
|
+ "Either there are no archives after the given date,"
|
||||||
|
+ " the URL may not have any archives, or the URL may have been"
|
||||||
|
+ " recently archived and is still not available on the CDX server."
|
||||||
|
)
|
||||||
|
|
||||||
def near(
|
def near(
|
||||||
self,
|
self,
|
||||||
year: Optional[int] = None,
|
year: Optional[int] = None,
|
||||||
|
Loading…
Reference in New Issue
Block a user