From 6c71dfbe41ce8791ebd352817e6cfc0833f38140 Mon Sep 17 00:00:00 2001 From: Akash Mahanty Date: Sun, 10 Jan 2021 11:10:49 +0530 Subject: [PATCH] use cdx matchtype for domain and host --- waybackpy/exceptions.py | 4 +++- waybackpy/snapshot.py | 6 ++++-- waybackpy/wrapper.py | 10 ++-------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/waybackpy/exceptions.py b/waybackpy/exceptions.py index f220d15..71e62ec 100644 --- a/waybackpy/exceptions.py +++ b/waybackpy/exceptions.py @@ -7,7 +7,9 @@ This module contains the set of Waybackpy's exceptions. class WaybackError(Exception): """ - Raised when Wayback Machine API Service is unreachable/down. + Raised when Waybackpy can not return what you asked for. + 1) Wayback Machine API Service is unreachable/down. + 2) You passed illegal arguments. """ diff --git a/waybackpy/snapshot.py b/waybackpy/snapshot.py index 7fa2653..e6546f3 100644 --- a/waybackpy/snapshot.py +++ b/waybackpy/snapshot.py @@ -3,10 +3,12 @@ from datetime import datetime class CdxSnapshot: """ - This class helps to handle the Cdx Snapshots easily. + This class helps to use the Cdx Snapshots easily. - What the raw data looks like: + Raw Snapshot data looks like: org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415 + + properties is a dict containg all of the 7 cdx snapshot properties. """ def __init__(self, properties): diff --git a/waybackpy/wrapper.py b/waybackpy/wrapper.py index e12e34c..0922a8c 100644 --- a/waybackpy/wrapper.py +++ b/waybackpy/wrapper.py @@ -298,16 +298,10 @@ class Url: url_list = [] if subdomain: - url = "*.%s/*" % _cleaned_url(self.url) + cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="domain") else: - url = "%s/*" % _cleaned_url(self.url) + cdx = Cdx(_cleaned_url(self.url), user_agent=self.user_agent, start_timestamp=start_timestamp, end_timestamp=end_timestamp, match_type="host") - cdx = Cdx( - url, - user_agent=self.user_agent, - start_timestamp=start_timestamp, - end_timestamp=end_timestamp, - ) snapshots = cdx.snapshots() url_list = []