use cdx matchtype for domain and host

This commit is contained in:
Akash Mahanty 2021-01-10 11:10:49 +05:30
parent a6470b1036
commit 6c71dfbe41
3 changed files with 9 additions and 11 deletions

View File

@ -7,7 +7,9 @@ This module contains the set of Waybackpy's exceptions.
class WaybackError(Exception): class WaybackError(Exception):
""" """
Raised when Wayback Machine API Service is unreachable/down. Raised when Waybackpy can not return what you asked for.
1) Wayback Machine API Service is unreachable/down.
2) You passed illegal arguments.
""" """

View File

@ -3,10 +3,12 @@ from datetime import datetime
class CdxSnapshot: class CdxSnapshot:
""" """
This class helps to handle the Cdx Snapshots easily. This class helps to use the Cdx Snapshots easily.
What the raw data looks like: Raw Snapshot data looks like:
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415 org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
properties is a dict containg all of the 7 cdx snapshot properties.
""" """
def __init__(self, properties): def __init__(self, properties):

View File

@ -298,16 +298,10 @@ class Url:
url_list = [] url_list = []
if subdomain: if subdomain:
url = "*.%s/*" % _cleaned_url(self.url) cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="domain")
else: else:
url = "%s/*" % _cleaned_url(self.url) cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="host")
cdx = Cdx(
url,
user_agent=self.user_agent,
start_timestamp=start_timestamp,
end_timestamp=end_timestamp,
)
snapshots = cdx.snapshots() snapshots = cdx.snapshots()
url_list = [] url_list = []