Before and After methods (#175)
* Added before and after functions * add tests * formatting
This commit is contained in:
parent
0202efd39d
commit
3b3e78d901
@ -176,3 +176,39 @@ def test_near() -> None:
|
||||
filters=["statuscode:200"],
|
||||
)
|
||||
cdx.near(unix_timestamp=1286705410)
|
||||
|
||||
|
||||
def test_before() -> None:
|
||||
user_agent = (
|
||||
"Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) "
|
||||
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||
)
|
||||
|
||||
cdx = WaybackMachineCDXServerAPI(
|
||||
url="http://www.google.com/",
|
||||
user_agent=user_agent,
|
||||
filters=["statuscode:200"],
|
||||
)
|
||||
before = cdx.before(wayback_machine_timestamp=20160731235949)
|
||||
assert "20160731233347" in before.timestamp
|
||||
assert "google" in before.urlkey
|
||||
assert before.original.find("google.com") != -1
|
||||
assert before.archive_url.find("google.com") != -1
|
||||
|
||||
|
||||
def test_after() -> None:
|
||||
user_agent = (
|
||||
"Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) "
|
||||
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||
)
|
||||
|
||||
cdx = WaybackMachineCDXServerAPI(
|
||||
url="http://www.google.com/",
|
||||
user_agent=user_agent,
|
||||
filters=["statuscode:200"],
|
||||
)
|
||||
after = cdx.after(wayback_machine_timestamp=20160731235949)
|
||||
assert "20160801000917" in after.timestamp, after.timestamp
|
||||
assert "google" in after.urlkey
|
||||
assert after.original.find("google.com") != -1
|
||||
assert after.archive_url.find("google.com") != -1
|
||||
|
@ -191,6 +191,88 @@ class WaybackMachineCDXServerAPI:
|
||||
|
||||
payload["url"] = self.url
|
||||
|
||||
def before(
|
||||
self,
|
||||
year: Optional[int] = None,
|
||||
month: Optional[int] = None,
|
||||
day: Optional[int] = None,
|
||||
hour: Optional[int] = None,
|
||||
minute: Optional[int] = None,
|
||||
unix_timestamp: Optional[int] = None,
|
||||
wayback_machine_timestamp: Optional[Union[int, str]] = None,
|
||||
) -> CDXSnapshot:
|
||||
"""
|
||||
Gets the nearest archive before the given datetime.
|
||||
"""
|
||||
if unix_timestamp:
|
||||
timestamp = unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||
elif wayback_machine_timestamp:
|
||||
timestamp = str(wayback_machine_timestamp)
|
||||
else:
|
||||
now = datetime.utcnow().timetuple()
|
||||
timestamp = wayback_timestamp(
|
||||
year=now.tm_year if year is None else year,
|
||||
month=now.tm_mon if month is None else month,
|
||||
day=now.tm_mday if day is None else day,
|
||||
hour=now.tm_hour if hour is None else hour,
|
||||
minute=now.tm_min if minute is None else minute,
|
||||
)
|
||||
self.closest = timestamp
|
||||
self.sort = "closest"
|
||||
self.limit = 25000
|
||||
for snapshot in self.snapshots():
|
||||
if snapshot.timestamp < timestamp:
|
||||
return snapshot
|
||||
|
||||
# If a snapshot isn't returned, then none were found.
|
||||
raise NoCDXRecordFound(
|
||||
"No records were found before the given date for the query."
|
||||
+ "Either there are no archives before the given date,"
|
||||
+ " the URL may not have any archived, or the URL may have been"
|
||||
+ " recently archived and is still not available on the CDX server."
|
||||
)
|
||||
|
||||
def after(
|
||||
self,
|
||||
year: Optional[int] = None,
|
||||
month: Optional[int] = None,
|
||||
day: Optional[int] = None,
|
||||
hour: Optional[int] = None,
|
||||
minute: Optional[int] = None,
|
||||
unix_timestamp: Optional[int] = None,
|
||||
wayback_machine_timestamp: Optional[Union[int, str]] = None,
|
||||
) -> CDXSnapshot:
|
||||
"""
|
||||
Gets the nearest archive after the given datetime.
|
||||
"""
|
||||
if unix_timestamp:
|
||||
timestamp = unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||
elif wayback_machine_timestamp:
|
||||
timestamp = str(wayback_machine_timestamp)
|
||||
else:
|
||||
now = datetime.utcnow().timetuple()
|
||||
timestamp = wayback_timestamp(
|
||||
year=now.tm_year if year is None else year,
|
||||
month=now.tm_mon if month is None else month,
|
||||
day=now.tm_mday if day is None else day,
|
||||
hour=now.tm_hour if hour is None else hour,
|
||||
minute=now.tm_min if minute is None else minute,
|
||||
)
|
||||
self.closest = timestamp
|
||||
self.sort = "closest"
|
||||
self.limit = 25000
|
||||
for snapshot in self.snapshots():
|
||||
if snapshot.timestamp > timestamp:
|
||||
return snapshot
|
||||
|
||||
# If a snapshot isn't returned, then none were found.
|
||||
raise NoCDXRecordFound(
|
||||
"No records were found after the given date for the query."
|
||||
+ "Either there are no archives after the given date,"
|
||||
+ " the URL may not have any archives, or the URL may have been"
|
||||
+ " recently archived and is still not available on the CDX server."
|
||||
)
|
||||
|
||||
def near(
|
||||
self,
|
||||
year: Optional[int] = None,
|
||||
|
Loading…
Reference in New Issue
Block a user