not passing dict to cdxsnapshot

2021-01-10 10:40:32 +05:30
parent 04cda4558e
commit a6470b1036
5 changed files with 59 additions and 71 deletions
--- a/README.md
+++ b/README.md
@@ -11,7 +11,6 @@
 <a href="https://github.com/akamhy/waybackpy/actions?query=workflow%3ACI"><img alt="Build Status" src="https://github.com/akamhy/waybackpy/workflows/CI/badge.svg"></a>
 <a href="https://www.codacy.com/manual/akamhy/waybackpy?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=akamhy/waybackpy&amp;utm_campaign=Badge_Grade"><img alt="Codacy Badge" src="https://api.codacy.com/project/badge/Grade/255459cede9341e39436ec8866d3fb65"></a>
 <a href="https://codecov.io/gh/akamhy/waybackpy"><img alt="codecov" src="https://codecov.io/gh/akamhy/waybackpy/branch/master/graph/badge.svg"></a>
-<a href="https://codeclimate.com/github/akamhy/waybackpy/maintainability"><img alt="Maintainability" src="https://api.codeclimate.com/v1/badges/942f13d8177a56c1c906/maintainability"></a>
 <a href="https://github.com/akamhy/waybackpy/blob/master/CONTRIBUTING.md"><img alt="Contributions Welcome" src="https://img.shields.io/static/v1.svg?label=Contributions&message=Welcome&color=0059b3&style=flat-square"></a>
 <a href="https://pepy.tech/project/waybackpy?versions=2*&versions=1*&versions=3*"><img alt="Downloads" src="https://pepy.tech/badge/waybackpy/month"></a>
 <a href="https://github.com/akamhy/waybackpy/commits/master"><img alt="GitHub lastest commit" src="https://img.shields.io/github/last-commit/akamhy/waybackpy?color=blue&style=flat-square"></a>
@@ -100,4 +99,3 @@ Released under the MIT License. See


 -----------------------------------------------------------------------------------------------------------------------------------------------
-
--- a/tests/test_snapshot.py
+++ b/tests/test_snapshot.py
@@ -5,28 +5,28 @@ from waybackpy.snapshot import CdxSnapshot, datetime

 def test_CdxSnapshot():
    sample_input = "org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415"
+    prop_values = sample_input.split(" ")
+    properties = {}
    (
-        urlkey,
-        timestamp,
-        original,
-        mimetype,
-        statuscode,
-        digest,
-        length,
-    ) = sample_input.split(" ")
+        properties["urlkey"],
+        properties["timestamp"],
+        properties["original"],
+        properties["mimetype"],
+        properties["statuscode"],
+        properties["digest"],
+        properties["length"],
+    ) = prop_values

-    snapshot = CdxSnapshot(
-        urlkey, timestamp, original, mimetype, statuscode, digest, length
-    )
+    snapshot = CdxSnapshot(properties)

-    assert urlkey == snapshot.urlkey
-    assert timestamp == snapshot.timestamp
-    assert original == snapshot.original
-    assert mimetype == snapshot.mimetype
-    assert statuscode == snapshot.statuscode
-    assert digest == snapshot.digest
-    assert length == snapshot.length
-    assert datetime.strptime(timestamp, "%Y%m%d%H%M%S") == snapshot.datetime_timestamp
-    archive_url = "https://web.archive.org/web/" + timestamp + "/" + original
+    assert properties["urlkey"] == snapshot.urlkey
+    assert properties["timestamp"] == snapshot.timestamp
+    assert properties["original"] == snapshot.original
+    assert properties["mimetype"] == snapshot.mimetype
+    assert properties["statuscode"] == snapshot.statuscode
+    assert properties["digest"] == snapshot.digest
+    assert properties["length"] == snapshot.length
+    assert datetime.strptime(properties["timestamp"], "%Y%m%d%H%M%S") == snapshot.datetime_timestamp
+    archive_url = "https://web.archive.org/web/" + properties["timestamp"] + "/" + properties["original"]
    assert archive_url == snapshot.archive_url
    assert archive_url == str(snapshot)
--- a/waybackpy/cdx.py
+++ b/waybackpy/cdx.py
@@ -7,6 +7,7 @@ from .utils import (
    _check_filters,
    _check_collapses,
    _check_match_type,
+    _add_payload,
 )

 # TODO : Threading support for pagination API. It's designed for Threading.
@@ -147,27 +148,7 @@ class Cdx:
        payload = {}
        headers = {"User-Agent": self.user_agent}

-        if self.start_timestamp:
-            payload["from"] = self.start_timestamp
-
-        if self.end_timestamp:
-            payload["to"] = self.end_timestamp
-
-        if self.gzip != True:
-            payload["gzip"] = "false"
-
-        if self.match_type:
-            payload["matchType"] = self.match_type
-
-        if self.filters and len(self.filters) > 0:
-            for i, f in enumerate(self.filters):
-                payload["filter" + str(i)] = f
-
-        if self.collapses and len(self.collapses) > 0:
-            for i, f in enumerate(self.collapses):
-                payload["collapse" + str(i)] = f
-
-        payload["url"] = self.url
+        _add_payload(self, payload)

        if not self.start_timestamp or self.end_timestamp:
            self.use_page = True
@@ -221,12 +202,4 @@ class Cdx:
                    properties["length"],
                ) = prop_values

-                yield CdxSnapshot(
-                    properties["urlkey"],
-                    properties["timestamp"],
-                    properties["original"],
-                    properties["mimetype"],
-                    properties["statuscode"],
-                    properties["digest"],
-                    properties["length"],
-                )
+                yield CdxSnapshot(properties)
--- a/waybackpy/snapshot.py
+++ b/waybackpy/snapshot.py
@@ -9,18 +9,18 @@ class CdxSnapshot:
    org,archive)/ 20080126045828 http://github.com text/html 200 Q4YULN754FHV2U6Q5JUT6Q2P57WEWNNY 1415
    """

-    def __init__(
-        self, urlkey, timestamp, original, mimetype, statuscode, digest, length
-    ):
-        self.urlkey = urlkey
-        self.timestamp = timestamp
-        self.datetime_timestamp = datetime.strptime(timestamp, "%Y%m%d%H%M%S")
-        self.original = original
-        self.mimetype = mimetype
-        self.statuscode = statuscode
-        self.digest = digest
-        self.length = length
-        self.archive_url = "https://web.archive.org/web/" + timestamp + "/" + original
+    def __init__(self, properties):
+        self.urlkey = properties["urlkey"]
+        self.timestamp = properties["timestamp"]
+        self.datetime_timestamp = datetime.strptime(self.timestamp, "%Y%m%d%H%M%S")
+        self.original = properties["original"]
+        self.mimetype = properties["mimetype"]
+        self.statuscode = properties["statuscode"]
+        self.digest = properties["digest"]
+        self.length = properties["length"]
+        self.archive_url = (
+            "https://web.archive.org/web/" + self.timestamp + "/" + self.original
+        )

    def __str__(self):
        return self.archive_url
--- a/waybackpy/utils.py
+++ b/waybackpy/utils.py
@@ -11,6 +11,30 @@ quote = requests.utils.quote
 default_user_agent = "waybackpy python package - https://github.com/akamhy/waybackpy"


+def _add_payload(self, payload):
+    if self.start_timestamp:
+        payload["from"] = self.start_timestamp
+
+    if self.end_timestamp:
+        payload["to"] = self.end_timestamp
+
+    if self.gzip != True:
+        payload["gzip"] = "false"
+
+    if self.match_type:
+        payload["matchType"] = self.match_type
+
+    if self.filters and len(self.filters) > 0:
+        for i, f in enumerate(self.filters):
+            payload["filter" + str(i)] = f
+
+    if self.collapses and len(self.collapses) > 0:
+        for i, f in enumerate(self.collapses):
+            payload["collapse" + str(i)] = f
+
+    payload["url"] = self.url
+
+
 def _ts(timestamp, data):
    """
    Get timestamp of last fetched archive.
@@ -96,18 +120,12 @@ def _check_filters(filters):
            key = match.group(1)
            val = match.group(2)

-
        except Exception:
            e = "Filter '%s' not following the cdx filter syntax." % f
            raise WaybackError(e)


 def _cleaned_url(url):
-    print(1)
-    """
-    Remove EOL
-    replace " " with "_"
-    """
    return str(url).strip().replace(" ", "%20")


@@ -258,7 +276,6 @@ def _get_response(
    )
    s.mount("https://", HTTPAdapter(max_retries=retries))
    url = _full_url(endpoint, params)
-    print(url)
    try:
        if not return_full_url:
            return s.get(url, headers=headers)