not passing dict to cdxsnapshot

2021-01-10 10:40:32 +05:30
parent 04cda4558e
commit a6470b1036
5 changed files with 59 additions and 71 deletions
--- a/waybackpy/cdx.py
+++ b/waybackpy/cdx.py
@@ -7,6 +7,7 @@ from .utils import (
    _check_filters,
    _check_collapses,
    _check_match_type,
+    _add_payload,
 )

 # TODO : Threading support for pagination API. It's designed for Threading.
@@ -147,27 +148,7 @@ class Cdx:
        payload = {}
        headers = {"User-Agent": self.user_agent}

-        if self.start_timestamp:
-            payload["from"] = self.start_timestamp
-
-        if self.end_timestamp:
-            payload["to"] = self.end_timestamp
-
-        if self.gzip != True:
-            payload["gzip"] = "false"
-
-        if self.match_type:
-            payload["matchType"] = self.match_type
-
-        if self.filters and len(self.filters) > 0:
-            for i, f in enumerate(self.filters):
-                payload["filter" + str(i)] = f
-
-        if self.collapses and len(self.collapses) > 0:
-            for i, f in enumerate(self.collapses):
-                payload["collapse" + str(i)] = f
-
-        payload["url"] = self.url
+        _add_payload(self, payload)

        if not self.start_timestamp or self.end_timestamp:
            self.use_page = True
@@ -221,12 +202,4 @@ class Cdx:
                    properties["length"],
                ) = prop_values

-                yield CdxSnapshot(
-                    properties["urlkey"],
-                    properties["timestamp"],
-                    properties["original"],
-                    properties["mimetype"],
-                    properties["statuscode"],
-                    properties["digest"],
-                    properties["length"],
-                )
+                yield CdxSnapshot(properties)
--- a/waybackpy/snapshot.py
+++ b/waybackpy/snapshot.py
@@ -9,18 +9,18 @@ class CdxSnapshot:
    org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
    """

-    def __init__(
-        self, urlkey, timestamp, original, mimetype, statuscode, digest, length
-    ):
-        self.urlkey = urlkey
-        self.timestamp = timestamp
-        self.datetime_timestamp = datetime.strptime(timestamp, "%Y%m%d%H%M%S")
-        self.original = original
-        self.mimetype = mimetype
-        self.statuscode = statuscode
-        self.digest = digest
-        self.length = length
-        self.archive_url = "https://web.archive.org/web/" + timestamp + "/" + original
+    def __init__(self, properties):
+        self.urlkey = properties["urlkey"]
+        self.timestamp = properties["timestamp"]
+        self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
+        self.original = properties["original"]
+        self.mimetype = properties["mimetype"]
+        self.statuscode = properties["statuscode"]
+        self.digest = properties["digest"]
+        self.length = properties["length"]
+        self.archive_url = (
+            "https://web.archive.org/web/" + self.timestamp + "/" + self.original
+        )

    def __str__(self):
        return self.archive_url
--- a/waybackpy/utils.py
+++ b/waybackpy/utils.py
@@ -11,6 +11,30 @@ quote = requests.utils.quote
 default_user_agent = "waybackpy python package - https://github.com/akamhy/waybackpy"


+def _add_payload(self, payload):
+    if self.start_timestamp:
+        payload["from"] = self.start_timestamp
+
+    if self.end_timestamp:
+        payload["to"] = self.end_timestamp
+
+    if self.gzip != True:
+        payload["gzip"] = "false"
+
+    if self.match_type:
+        payload["matchType"] = self.match_type
+
+    if self.filters and len(self.filters) > 0:
+        for i, f in enumerate(self.filters):
+            payload["filter" + str(i)] = f
+
+    if self.collapses and len(self.collapses) > 0:
+        for i, f in enumerate(self.collapses):
+            payload["collapse" + str(i)] = f
+
+    payload["url"] = self.url
+
+
 def _ts(timestamp, data):
    """
    Get timestamp of last fetched archive.
@@ -96,18 +120,12 @@ def _check_filters(filters):
            key = match.group(1)
            val = match.group(2)

-
        except Exception:
            e = "Filter '%s' not following the cdx filter syntax." % f
            raise WaybackError(e)


 def _cleaned_url(url):
-    print(1)
-    """
-    Remove EOL
-    replace " " with "_"
-    """
    return str(url).strip().replace(" ", "%20")


@@ -258,7 +276,6 @@ def _get_response(
    )
    s.mount("https://", HTTPAdapter(max_retries=retries))
    url = _full_url(endpoint, params)
-    print(url)
    try:
        if not return_full_url:
            return s.get(url, headers=headers)