Fix Pylint errors were pointed out by codacy (#133)

* fix: pylint errors were pointed out by codacy * fix: line length * fix: help text * fix: revert https://stackoverflow.com/a/64477857 makes cli unusable * fix: cli error and refactor codes
2022-02-05 05:25:40 +09:00
parent 9d9cc3328b
commit 5f3cd28046
4 changed files with 119 additions and 136 deletions
@@ -26,27 +26,25 @@ class WaybackMachineCDXServerAPI(object):
        user_agent: str = DEFAULT_USER_AGENT,
        start_timestamp: Optional[str] = None,
        end_timestamp: Optional[str] = None,
-        filters: List[str] = [],
+        filters: Optional[List[str]] = None,
        match_type: Optional[str] = None,
        gzip: Optional[str] = None,
-        collapses: List[str] = [],
+        collapses: Optional[List[str]] = None,
        limit: Optional[str] = None,
        max_tries: int = 3,
    ) -> None:
        self.url = str(url).strip().replace(" ", "%20")
        self.user_agent = user_agent
-        self.start_timestamp = (
-            str(start_timestamp) if start_timestamp is not None else None
-        )
-        self.end_timestamp = str(end_timestamp) if end_timestamp is not None else None
-        self.filters = filters
+        self.start_timestamp = None if start_timestamp is None else str(start_timestamp)
+        self.end_timestamp = None if end_timestamp is None else str(end_timestamp)
+        self.filters = [] if filters is None else filters
        check_filters(self.filters)
-        self.match_type = str(match_type).strip() if match_type is not None else None
+        self.match_type = None if match_type is None else str(match_type).strip()
        check_match_type(self.match_type, self.url)
        self.gzip = gzip
-        self.collapses = collapses
+        self.collapses = [] if collapses is None else collapses
        check_collapses(self.collapses)
-        self.limit = limit if limit is not None else 5000
+        self.limit = 5000 if limit is None else limit
        self.max_tries = max_tries
        self.last_api_request_url: Optional[str] = None
        self.use_page = False
@@ -16,6 +16,52 @@ from .utils import DEFAULT_USER_AGENT
 from .wrapper import Url


+def echo_availability_api(
+    availability_api_instance: WaybackMachineAvailabilityAPI, json: bool
+) -> None:
+    click.echo("Archive URL:")
+    if not availability_api_instance.archive_url:
+        archive_url = (
+            "NO ARCHIVE FOUND - The requested URL is probably "
+            + "not yet archived or if the URL was recently archived then it is "
+            + "not yet available via the Wayback Machine's availability API "
+            + "because of database lag and should be available after some time."
+        )
+    else:
+        archive_url = availability_api_instance.archive_url
+    click.echo(archive_url)
+    if json:
+        click.echo("JSON response:")
+        click.echo(JSON.dumps(availability_api_instance.JSON))
+
+
+def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
+    domain = None
+    sys_random = random.SystemRandom()
+    uid = "".join(
+        sys_random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
+    )
+    url_count = 0
+    file_name = None
+
+    for url in url_gen:
+        url_count += 1
+        if not domain:
+            match = re.search("https?://([A-Za-z_0-9.-]+).*", url)
+            domain = "domain-unknown" if match is None else match.group(1)
+            file_name = f"{domain}-urls-{uid}.txt"
+            file_path = os.path.join(os.getcwd(), file_name)
+            with open(file_path, "a") as f:
+                f.write(f"{url}\n")
+
+        click.echo(url)
+
+    if url_count > 0 or file_name is not None:
+        click.echo(f"\n\n'{file_name}' saved in current working directory")
+    else:
+        click.echo("No known URLs found. Please try a diffrent input!")
+
+
@click.command()
@click.option(
    "-u", "--url", help="URL on which Wayback machine operations are to be performed."
@@ -30,11 +76,12 @@ from .wrapper import Url
@click.option("-v", "--version", is_flag=True, default=False, help="waybackpy version.")
@click.option(
    "-l",
-    "--showlicense",
+    "--show-license",
+    "--show_license",
    "--license",
    is_flag=True,
    default=False,
-    help="show license of Waybackpy.",
+    help="Show license of Waybackpy.",
 )
@click.option(
    "-n",
@@ -129,7 +176,8 @@ from .wrapper import Url
 )
@click.option(
    "-f",
-    "--cdxfilter",
+    "--cdx-filter",
+    "--cdx_filter",
    "--filter",
    multiple=True,
    help="Filter on a specific field or all the CDX fields.",
@@ -169,11 +217,11 @@ from .wrapper import Url
    + "if this parameter is not used then the plain text response of the CDX API "
    + "will be printed.",
 )
-def main(
+def main(  # pylint: disable=no-value-for-parameter
    url: Optional[str],
    user_agent: str,
    version: bool,
-    showlicense: bool,
+    show_license: bool,
    newest: bool,
    oldest: bool,
    json: bool,
@@ -191,7 +239,7 @@ def main(
    cdx: bool,
    start_timestamp: Optional[str],
    end_timestamp: Optional[str],
-    cdxfilter: List[str],
+    cdx_filter: List[str],
    match_type: Optional[str],
    gzip: Optional[str],
    collapse: List[str],
@@ -219,26 +267,18 @@ def main(
    Released under the MIT License. Use the flag --license for license.

    """
-
    if version:
        click.echo(f"waybackpy version {__version__}")
-        return
-
-    if showlicense:
+    elif show_license:
        click.echo(
            requests.get(
                url="https://raw.githubusercontent.com/akamhy/waybackpy/master/LICENSE"
            ).text
        )
-        return
-
-    if not url:
+    elif url is None:
        click.echo("No URL detected. Please provide an URL.")
-        return
-
-    if (
-        url
-        and not version
+    elif (
+        not version
        and not oldest
        and not newest
        and not near
@@ -250,39 +290,16 @@ def main(
            "Only URL passed, but did not specify what to do with the URL. "
            "Use --help flag for help using waybackpy."
        )
-        return
-
-    def echo_availability_api(
-        availability_api_instance: WaybackMachineAvailabilityAPI,
-    ) -> None:
-        click.echo("Archive URL:")
-        if not availability_api_instance.archive_url:
-            archive_url = (
-                "NO ARCHIVE FOUND - The requested URL is probably "
-                + "not yet archived or if the URL was recently archived then it is "
-                + "not yet available via the Wayback Machine's availability API "
-                + "because of database lag and should be available after some time."
-            )
-        else:
-            archive_url = availability_api_instance.archive_url
-        click.echo(archive_url)
-        if json:
-            click.echo("JSON response:")
-            click.echo(JSON.dumps(availability_api_instance.JSON))
-
-    availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
-
-    if oldest:
+    elif oldest:
+        availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
        availability_api.oldest()
-        echo_availability_api(availability_api)
-        return
-
-    if newest:
+        echo_availability_api(availability_api, json)
+    elif newest:
+        availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
        availability_api.newest()
-        echo_availability_api(availability_api)
-        return
-
-    if near:
+        echo_availability_api(availability_api, json)
+    elif near:
+        availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
        near_args = {}
        keys = ["year", "month", "day", "hour", "minute"]
        args_arr = [year, month, day, hour, minute]
@@ -290,10 +307,8 @@ def main(
            if arg:
                near_args[key] = arg
        availability_api.near(**near_args)
-        echo_availability_api(availability_api)
-        return
-
-    if save:
+        echo_availability_api(availability_api, json)
+    elif save:
        save_api = WaybackMachineSaveAPI(url, user_agent=user_agent)
        save_api.save()
        click.echo("Archive URL:")
@@ -303,43 +318,7 @@ def main(
        if headers:
            click.echo("Save API headers:")
            click.echo(save_api.headers)
-        return
-
-    def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
-        domain = None
-        sys_random = random.SystemRandom()
-        uid = "".join(
-            sys_random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
-        )
-        url_count = 0
-        file_name = None
-
-        for url in url_gen:
-            url_count += 1
-            if not domain:
-                match = re.search("https?://([A-Za-z_0-9.-]+).*", url)
-
-                domain = "domain-unknown"
-
-                if match is not None:
-                    domain = match.group(1)
-
-                file_name = f"{domain}-urls-{uid}.txt"
-                file_path = os.path.join(os.getcwd(), file_name)
-                if not os.path.isfile(file_path):
-                    open(file_path, "w+").close()
-
-                with open(file_path, "a") as f:
-                    f.write(f"{url}\n")
-
-            click.echo(url)
-
-        if url_count > 0 or file_name is not None:
-            click.echo(f"\n\n'{file_name}' saved in current working directory")
-        else:
-            click.echo("No known URLs found. Please try a diffrent input!")
-
-    if known_urls:
+    elif known_urls:
        wayback = Url(url, user_agent)
        url_gen = wayback.known_urls(subdomain=subdomain)

@@ -348,9 +327,8 @@ def main(
        else:
            for url in url_gen:
                click.echo(url)
-
-    if cdx:
-        filters = list(cdxfilter)
+    elif cdx:
+        filters = list(cdx_filter)
        collapses = list(collapse)
        cdx_print = list(cdx_print)

@@ -372,35 +350,36 @@ def main(
            if len(cdx_print) == 0:
                click.echo(snapshot)
            else:
-                output_string = ""
+                output_string = []
                if any(val in cdx_print for val in ["urlkey", "url-key", "url_key"]):
-                    output_string = output_string + snapshot.urlkey + " "
+                    output_string.append(snapshot.urlkey)
                if any(
                    val in cdx_print
                    for val in ["timestamp", "time-stamp", "time_stamp"]
                ):
-                    output_string = output_string + snapshot.timestamp + " "
+                    output_string.append(snapshot.timestamp)
                if "original" in cdx_print:
-                    output_string = output_string + snapshot.original + " "
+                    output_string.append(snapshot.original)
                if any(
                    val in cdx_print for val in ["mimetype", "mime-type", "mime_type"]
                ):
-                    output_string = output_string + snapshot.mimetype + " "
+                    output_string.append(snapshot.mimetype)
                if any(
                    val in cdx_print
                    for val in ["statuscode", "status-code", "status_code"]
                ):
-                    output_string = output_string + snapshot.statuscode + " "
+                    output_string.append(snapshot.statuscode)
                if "digest" in cdx_print:
-                    output_string = output_string + snapshot.digest + " "
+                    output_string.append(snapshot.digest)
                if "length" in cdx_print:
-                    output_string = output_string + snapshot.length + " "
+                    output_string.append(snapshot.length)
                if any(
                    val in cdx_print
                    for val in ["archiveurl", "archive-url", "archive_url"]
                ):
-                    output_string = output_string + snapshot.archive_url + " "
-                click.echo(output_string)
+                    output_string.append(snapshot.archive_url)
+
+                click.echo(" ".join(output_string))


 if __name__ == "__main__":
@@ -19,7 +19,10 @@ class WaybackMachineSaveAPI(object):
    """

    def __init__(
-        self, url: str, user_agent: str = DEFAULT_USER_AGENT, max_tries: int = 8
+        self,
+        url: str,
+        user_agent: str = DEFAULT_USER_AGENT,
+        max_tries: int = 8,
    ) -> None:
        self.url = str(url).strip().replace(" ", "%20")
        self.request_url = "https://web.archive.org/save/" + self.url
@@ -169,17 +172,16 @@ class WaybackMachineSaveAPI(object):
        tries = 0

        while True:
-            if self.saved_archive is None:
-                if tries >= 1:
-                    self.sleep(tries)
+            if tries >= 1:
+                self.sleep(tries)

-                self.get_save_request_headers()
-                self.saved_archive = self.archive_url_parser()
+            self.get_save_request_headers()
+            self.saved_archive = self.archive_url_parser()

-                if isinstance(self.saved_archive, str):
-                    self._archive_url = self.saved_archive
-                    self.timestamp()
-                    return self.saved_archive
+            if isinstance(self.saved_archive, str):
+                self._archive_url = self.saved_archive
+                self.timestamp()
+                return self.saved_archive

            tries += 1
            if tries >= self.max_tries:
@@ -6,21 +6,25 @@ from .cdx_api import WaybackMachineCDXServerAPI
 from .save_api import WaybackMachineSaveAPI
 from .utils import DEFAULT_USER_AGENT

-"""
-The Url class is not recommended to be used anymore, instead use the
-WaybackMachineSaveAPI, WaybackMachineAvailabilityAPI and WaybackMachineCDXServerAPI.
-
-The reason it is still in the code is backwards compatibility with 2.x.x versions.
-
-If were are using the Url before the update to version 3.x.x, your code should still be
-working fine and there is no hurry to update the interface but is recommended that you
-do not use the Url class for new code as it would be removed after 2025 also the first
-3.x.x versions was released in January 2022 and three years are more than enough to
-update the older interface code.
-"""
-

 class Url(object):
+    """
+    The Url class is not recommended to be used anymore, instead use:
+
+    - WaybackMachineSaveAPI
+    - WaybackMachineAvailabilityAPI
+    - WaybackMachineCDXServerAPI
+
+    The reason it is still in the code is backwards compatibility with 2.x.x
+    versions.
+
+    If were are using the Url before the update to version 3.x.x, your code should
+    still be working fine and there is no hurry to update the interface but is
+    recommended that you do not use the Url class for new code as it would be
+    removed after 2025 also the first 3.x.x versions was released in January 2022
+    and three years are more than enough to update the older interface code.
+    """
+
    def __init__(self, url: str, user_agent: str = DEFAULT_USER_AGENT) -> None:
        self.url = url
        self.user_agent = str(user_agent)