code formated with black (#47)

2020-12-14 01:18:04 +05:30
parent fde28d57aa
commit d3e68d0e70
6 changed files with 395 additions and 149 deletions
--- a/waybackpy/cli.py
+++ b/waybackpy/cli.py
@@ -10,27 +10,27 @@ from waybackpy.__version__ import __version__


 def _save(obj):
-    return (obj.save())
+    return obj.save()


 def _archive_url(obj):
-    return (obj.archive_url)
+    return obj.archive_url


 def _json(obj):
-    return (obj.JSON)
+    return obj.JSON


 def _oldest(obj):
-    return (obj.oldest())
+    return obj.oldest()


 def _newest(obj):
-    return (obj.newest())
+    return obj.newest()


 def _total_archives(obj):
-    return (obj.total_archives())
+    return obj.total_archives()


 def _near(obj, args):
@@ -45,17 +45,19 @@ def _near(obj, args):
        _near_args["hour"] = args.hour
    if args.minute:
        _near_args["minute"] = args.minute
-    return (obj.near(**_near_args))
+    return obj.near(**_near_args)


 def _save_urls_on_file(input_list, live_url_count):
-    m = re.search('https?://([A-Za-z_0-9.-]+).*', input_list[0])
+    m = re.search("https?://([A-Za-z_0-9.-]+).*", input_list[0])
    if m:
        domain = m.group(1)
    else:
        domain = "domain-unknown"

-    uid = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(6))
+    uid = "".join(
+        random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
+    )

    file_name = "%s-%d-urls-%s.txt" % (domain, live_url_count, uid)
    file_content = "\n".join(input_list)
@@ -89,34 +91,37 @@ def _known_urls(obj, args):

 def _get(obj, args):
    if args.get.lower() == "url":
-        return (obj.get())
+        return obj.get()

    if args.get.lower() == "archive_url":
-        return (obj.get(obj.archive_url))
+        return obj.get(obj.archive_url)

    if args.get.lower() == "oldest":
-        return (obj.get(obj.oldest()))
+        return obj.get(obj.oldest())

    if args.get.lower() == "latest" or args.get.lower() == "newest":
-        return (obj.get(obj.newest()))
+        return obj.get(obj.newest())

    if args.get.lower() == "save":
-        return (obj.get(obj.save()))
+        return obj.get(obj.save())

-    return ("Use get as \"--get 'source'\", 'source' can be one of the followings: \
+    return "Use get as \"--get 'source'\", 'source' can be one of the followings: \
        \n1) url - get the source code of the url specified using --url/-u.\
        \n2) archive_url - get the source code of the newest archive for the supplied url, alias of newest.\
        \n3) oldest - get the source code of the oldest archive for the supplied url.\
        \n4) newest - get the source code of the newest archive for the supplied url.\
-        \n5) save - Create a new archive and get the source code of this new archive for the supplied url.")
+        \n5) save - Create a new archive and get the source code of this new archive for the supplied url."


 def args_handler(args):
    if args.version:
-        return ("waybackpy version %s" % __version__)
+        return "waybackpy version %s" % __version__

    if not args.url:
-        return ("waybackpy %s \nSee 'waybackpy --help' for help using this tool." % __version__)
+        return (
+            "waybackpy %s \nSee 'waybackpy --help' for help using this tool."
+            % __version__
+        )

    if args.user_agent:
        obj = Url(args.url, args.user_agent)
@@ -151,52 +156,93 @@ def args_handler(args):
 def parse_args(argv):
    parser = argparse.ArgumentParser()

-    requiredArgs = parser.add_argument_group('URL argument (required)')
-    requiredArgs.add_argument("--url", "-u", help="URL on which Wayback machine operations would occur")
+    requiredArgs = parser.add_argument_group("URL argument (required)")
+    requiredArgs.add_argument(
+        "--url", "-u", help="URL on which Wayback machine operations would occur"
+    )

-    userAgentArg = parser.add_argument_group('User Agent')
-    help_text = "User agent, default user_agent is \"waybackpy python package - https://github.com/akamhy/waybackpy\""
+    userAgentArg = parser.add_argument_group("User Agent")
+    help_text = 'User agent, default user_agent is "waybackpy python package - https://github.com/akamhy/waybackpy"'
    userAgentArg.add_argument("--user_agent", "-ua", help=help_text)

    saveArg = parser.add_argument_group("Create new archive/save URL")
-    saveArg.add_argument("--save", "-s", action='store_true', help="Save the URL on the Wayback machine")
+    saveArg.add_argument(
+        "--save", "-s", action="store_true", help="Save the URL on the Wayback machine"
+    )

    auArg = parser.add_argument_group("Get the latest Archive")
-    auArg.add_argument("--archive_url", "-au", action='store_true', help="Get the latest archive URL, alias for --newest")
+    auArg.add_argument(
+        "--archive_url",
+        "-au",
+        action="store_true",
+        help="Get the latest archive URL, alias for --newest",
+    )

    jsonArg = parser.add_argument_group("Get the JSON data")
-    jsonArg.add_argument("--json", "-j", action='store_true', help="JSON data of the availability API request")
+    jsonArg.add_argument(
+        "--json",
+        "-j",
+        action="store_true",
+        help="JSON data of the availability API request",
+    )

    oldestArg = parser.add_argument_group("Oldest archive")
-    oldestArg.add_argument("--oldest", "-o", action='store_true', help="Oldest archive for the specified URL")
+    oldestArg.add_argument(
+        "--oldest",
+        "-o",
+        action="store_true",
+        help="Oldest archive for the specified URL",
+    )

    newestArg = parser.add_argument_group("Newest archive")
-    newestArg.add_argument("--newest", "-n", action='store_true', help="Newest archive for the specified URL")
+    newestArg.add_argument(
+        "--newest",
+        "-n",
+        action="store_true",
+        help="Newest archive for the specified URL",
+    )

    totalArg = parser.add_argument_group("Total number of archives")
-    totalArg.add_argument("--total", "-t", action='store_true', help="Total number of archives for the specified URL")
+    totalArg.add_argument(
+        "--total",
+        "-t",
+        action="store_true",
+        help="Total number of archives for the specified URL",
+    )

    getArg = parser.add_argument_group("Get source code")
-    getArg.add_argument("--get", "-g", help="Prints the source code of the supplied url. Use '--get help' for extended usage")
+    getArg.add_argument(
+        "--get",
+        "-g",
+        help="Prints the source code of the supplied url. Use '--get help' for extended usage",
+    )

-    knownUrlArg = parser.add_argument_group("URLs known and archived to Waybcak Machine for the site.")
-    knownUrlArg.add_argument("--known_urls", "-ku", action='store_true', help="URLs known for the domain.")
+    knownUrlArg = parser.add_argument_group(
+        "URLs known and archived to Waybcak Machine for the site."
+    )
+    knownUrlArg.add_argument(
+        "--known_urls", "-ku", action="store_true", help="URLs known for the domain."
+    )
    help_text = "Use with '--known_urls' to include known URLs for subdomains."
-    knownUrlArg.add_argument("--subdomain", "-sub", action='store_true', help=help_text)
+    knownUrlArg.add_argument("--subdomain", "-sub", action="store_true", help=help_text)
    help_text = "Only include live URLs. Will not inlclude dead links."
-    knownUrlArg.add_argument("--alive", "-a", action='store_true', help=help_text)
+    knownUrlArg.add_argument("--alive", "-a", action="store_true", help=help_text)

-    nearArg = parser.add_argument_group('Archive close to time specified')
-    nearArg.add_argument("--near", "-N", action='store_true', help="Archive near specified time")
+    nearArg = parser.add_argument_group("Archive close to time specified")
+    nearArg.add_argument(
+        "--near", "-N", action="store_true", help="Archive near specified time"
+    )

-    nearArgs = parser.add_argument_group('Arguments that are used only with --near')
+    nearArgs = parser.add_argument_group("Arguments that are used only with --near")
    nearArgs.add_argument("--year", "-Y", type=int, help="Year in integer")
    nearArgs.add_argument("--month", "-M", type=int, help="Month in integer")
    nearArgs.add_argument("--day", "-D", type=int, help="Day in integer.")
    nearArgs.add_argument("--hour", "-H", type=int, help="Hour in intege")
    nearArgs.add_argument("--minute", "-MIN", type=int, help="Minute in integer")

-    parser.add_argument("--version", "-v", action='store_true', help="Waybackpy version")
+    parser.add_argument(
+        "--version", "-v", action="store_true", help="Waybackpy version"
+    )

    return parser.parse_args(argv[1:])

--- a/waybackpy/exceptions.py
+++ b/waybackpy/exceptions.py
@@ -1,10 +1,12 @@
 # -*- coding: utf-8 -*-

+
 class WaybackError(Exception):
    """
    Raised when Wayback Machine API Service is unreachable/down.
    """

+
 class URLError(Exception):
    """
    Raised when malformed URLs are passed as arguments.
--- a/waybackpy/wrapper.py
+++ b/waybackpy/wrapper.py
@@ -14,9 +14,7 @@ default_UA = "waybackpy python package - https://github.com/akamhy/waybackpy"
 def _archive_url_parser(header):
    """Parse out the archive from header."""
    # Regex1
-    arch = re.search(
-        r"Content-Location: (/web/[0-9]{14}/.*)", str(header)
-    )
+    arch = re.search(r"Content-Location: (/web/[0-9]{14}/.*)", str(header))
    if arch:
        return "web.archive.org" + arch.group(1)
    # Regex2
@@ -79,11 +77,7 @@ class Url:

    def __len__(self):
        td_max = timedelta(
-            days=999999999,
-            hours=23,
-            minutes=59,
-            seconds=59,
-            microseconds=999999
+            days=999999999, hours=23, minutes=59, seconds=59, microseconds=999999
        )
        if self.timestamp == datetime.max:
            return td_max.days
@@ -112,9 +106,7 @@ class Url:
        else:
            archive_url = data["archived_snapshots"]["closest"]["url"]
            archive_url = archive_url.replace(
-                "http://web.archive.org/web/",
-                "https://web.archive.org/web/",
-                1
+                "http://web.archive.org/web/", "https://web.archive.org/web/", 1
            )

        return archive_url
@@ -127,10 +119,9 @@ class Url:
            time = datetime.max

        else:
-            time = datetime.strptime(data["archived_snapshots"]
-                                     ["closest"]
-                                     ["timestamp"],
-                                     '%Y%m%d%H%M%S')
+            time = datetime.strptime(
+                data["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
+            )

        return time

@@ -170,9 +161,9 @@ class Url:
        return response.content.decode(encoding.replace("text/html", "UTF-8", 1))

    def near(self, year=None, month=None, day=None, hour=None, minute=None):
-        """ Return the closest Wayback Machine archive to the time supplied.
-            Supported params are year, month, day, hour and minute.
-            Any non-supplied parameters default to the current time.
+        """Return the closest Wayback Machine archive to the time supplied.
+        Supported params are year, month, day, hour and minute.
+        Any non-supplied parameters default to the current time.

        """
        now = datetime.utcnow().timetuple()
@@ -184,10 +175,9 @@ class Url:
            minute=minute if minute else now.tm_min,
        )

-
        endpoint = "https://archive.org/wayback/available"
        headers = {"User-Agent": "%s" % self.user_agent}
-        payload = {"url": "%s" % self._clean_url(), "timestamp" : timestamp}
+        payload = {"url": "%s" % self._clean_url(), "timestamp": timestamp}
        response = _get_response(endpoint, params=payload, headers=headers)
        data = response.json()
        if not data["archived_snapshots"]:
@@ -201,7 +191,9 @@ class Url:
        )

        self.archive_url = archive_url
-        self.timestamp = datetime.strptime(data["archived_snapshots"]["closest"]["timestamp"], '%Y%m%d%H%M%S')
+        self.timestamp = datetime.strptime(
+            data["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
+        )

        return self

@@ -221,7 +213,11 @@ class Url:
        """Returns the total number of Wayback Machine archives for this URL."""

        endpoint = "https://web.archive.org/cdx/search/cdx"
-        headers = {"User-Agent": "%s" % self.user_agent, "output" : "json", "fl" : "statuscode"}
+        headers = {
+            "User-Agent": "%s" % self.user_agent,
+            "output": "json",
+            "fl": "statuscode",
+        }
        payload = {"url": "%s" % self._clean_url()}
        response = _get_response(endpoint, params=payload, headers=headers)

@@ -253,11 +249,13 @@ class Url:

        if subdomain:
            request_url = (
-                "https://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey" % self._clean_url()
+                "https://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey"
+                % self._clean_url()
            )
        else:
            request_url = (
-                "http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey" % self._clean_url()
+                "http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey"
+                % self._clean_url()
            )

        headers = {"User-Agent": "%s" % self.user_agent}