not passing dict to cdxsnapshot
This commit is contained in:
@@ -7,6 +7,7 @@ from .utils import (
|
||||
_check_filters,
|
||||
_check_collapses,
|
||||
_check_match_type,
|
||||
_add_payload,
|
||||
)
|
||||
|
||||
# TODO : Threading support for pagination API. It's designed for Threading.
|
||||
@@ -147,27 +148,7 @@ class Cdx:
|
||||
payload = {}
|
||||
headers = {"User-Agent": self.user_agent}
|
||||
|
||||
if self.start_timestamp:
|
||||
payload["from"] = self.start_timestamp
|
||||
|
||||
if self.end_timestamp:
|
||||
payload["to"] = self.end_timestamp
|
||||
|
||||
if self.gzip != True:
|
||||
payload["gzip"] = "false"
|
||||
|
||||
if self.match_type:
|
||||
payload["matchType"] = self.match_type
|
||||
|
||||
if self.filters and len(self.filters) > 0:
|
||||
for i, f in enumerate(self.filters):
|
||||
payload["filter" + str(i)] = f
|
||||
|
||||
if self.collapses and len(self.collapses) > 0:
|
||||
for i, f in enumerate(self.collapses):
|
||||
payload["collapse" + str(i)] = f
|
||||
|
||||
payload["url"] = self.url
|
||||
_add_payload(self, payload)
|
||||
|
||||
if not self.start_timestamp or self.end_timestamp:
|
||||
self.use_page = True
|
||||
@@ -221,12 +202,4 @@ class Cdx:
|
||||
properties["length"],
|
||||
) = prop_values
|
||||
|
||||
yield CdxSnapshot(
|
||||
properties["urlkey"],
|
||||
properties["timestamp"],
|
||||
properties["original"],
|
||||
properties["mimetype"],
|
||||
properties["statuscode"],
|
||||
properties["digest"],
|
||||
properties["length"],
|
||||
)
|
||||
yield CdxSnapshot(properties)
|
||||
|
@@ -9,18 +9,18 @@ class CdxSnapshot:
|
||||
org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, urlkey, timestamp, original, mimetype, statuscode, digest, length
|
||||
):
|
||||
self.urlkey = urlkey
|
||||
self.timestamp = timestamp
|
||||
self.datetime_timestamp = datetime.strptime(timestamp, "%Y%m%d%H%M%S")
|
||||
self.original = original
|
||||
self.mimetype = mimetype
|
||||
self.statuscode = statuscode
|
||||
self.digest = digest
|
||||
self.length = length
|
||||
self.archive_url = "https://web.archive.org/web/" + timestamp + "/" + original
|
||||
def __init__(self, properties):
|
||||
self.urlkey = properties["urlkey"]
|
||||
self.timestamp = properties["timestamp"]
|
||||
self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
|
||||
self.original = properties["original"]
|
||||
self.mimetype = properties["mimetype"]
|
||||
self.statuscode = properties["statuscode"]
|
||||
self.digest = properties["digest"]
|
||||
self.length = properties["length"]
|
||||
self.archive_url = (
|
||||
"https://web.archive.org/web/" + self.timestamp + "/" + self.original
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return self.archive_url
|
||||
|
@@ -11,6 +11,30 @@ quote = requests.utils.quote
|
||||
default_user_agent = "waybackpy python package - https://github.com/akamhy/waybackpy"
|
||||
|
||||
|
||||
def _add_payload(self, payload):
|
||||
if self.start_timestamp:
|
||||
payload["from"] = self.start_timestamp
|
||||
|
||||
if self.end_timestamp:
|
||||
payload["to"] = self.end_timestamp
|
||||
|
||||
if self.gzip != True:
|
||||
payload["gzip"] = "false"
|
||||
|
||||
if self.match_type:
|
||||
payload["matchType"] = self.match_type
|
||||
|
||||
if self.filters and len(self.filters) > 0:
|
||||
for i, f in enumerate(self.filters):
|
||||
payload["filter" + str(i)] = f
|
||||
|
||||
if self.collapses and len(self.collapses) > 0:
|
||||
for i, f in enumerate(self.collapses):
|
||||
payload["collapse" + str(i)] = f
|
||||
|
||||
payload["url"] = self.url
|
||||
|
||||
|
||||
def _ts(timestamp, data):
|
||||
"""
|
||||
Get timestamp of last fetched archive.
|
||||
@@ -96,18 +120,12 @@ def _check_filters(filters):
|
||||
key = match.group(1)
|
||||
val = match.group(2)
|
||||
|
||||
|
||||
except Exception:
|
||||
e = "Filter '%s' not following the cdx filter syntax." % f
|
||||
raise WaybackError(e)
|
||||
|
||||
|
||||
def _cleaned_url(url):
|
||||
print(1)
|
||||
"""
|
||||
Remove EOL
|
||||
replace " " with "_"
|
||||
"""
|
||||
return str(url).strip().replace(" ", "%20")
|
||||
|
||||
|
||||
@@ -258,7 +276,6 @@ def _get_response(
|
||||
)
|
||||
s.mount("https://", HTTPAdapter(max_retries=retries))
|
||||
url = _full_url(endpoint, params)
|
||||
print(url)
|
||||
try:
|
||||
if not return_full_url:
|
||||
return s.get(url, headers=headers)
|
||||
|
Reference in New Issue
Block a user