use cdx matchtype for domain and host
This commit is contained in:
parent
a6470b1036
commit
6c71dfbe41
@ -7,7 +7,9 @@ This module contains the set of Waybackpy's exceptions.
|
||||
|
||||
class WaybackError(Exception):
|
||||
"""
|
||||
Raised when Wayback Machine API Service is unreachable/down.
|
||||
Raised when Waybackpy can not return what you asked for.
|
||||
1) Wayback Machine API Service is unreachable/down.
|
||||
2) You passed illegal arguments.
|
||||
"""
|
||||
|
||||
|
||||
|
@ -3,10 +3,12 @@ from datetime import datetime
|
||||
|
||||
class CdxSnapshot:
|
||||
"""
|
||||
This class helps to handle the Cdx Snapshots easily.
|
||||
This class helps to use the Cdx Snapshots easily.
|
||||
|
||||
What the raw data looks like:
|
||||
Raw Snapshot data looks like:
|
||||
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
|
||||
|
||||
properties is a dict containg all of the 7 cdx snapshot properties.
|
||||
"""
|
||||
|
||||
def __init__(self, properties):
|
||||
|
@ -298,16 +298,10 @@ class Url:
|
||||
url_list = []
|
||||
|
||||
if subdomain:
|
||||
url = "*.%s/*" % _cleaned_url(self.url)
|
||||
cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="domain")
|
||||
else:
|
||||
url = "%s/*" % _cleaned_url(self.url)
|
||||
cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="host")
|
||||
|
||||
cdx = Cdx(
|
||||
url,
|
||||
user_agent=self.user_agent,
|
||||
start_timestamp=start_timestamp,
|
||||
end_timestamp=end_timestamp,
|
||||
)
|
||||
snapshots = cdx.snapshots()
|
||||
|
||||
url_list = []
|
||||
|
Loading…
Reference in New Issue
Block a user