Fix Pylint errors were pointed out by codacy (#133)

* fix: pylint errors were pointed out by codacy * fix: line length * fix: help text * fix: revert https://stackoverflow.com/a/64477857 makes cli unusable * fix: cli error and refactor codes
2022-02-05 05:25:40 +09:00
parent 9d9cc3328b
commit 5f3cd28046
4 changed files with 119 additions and 136 deletions
--- a/waybackpy/cdx_api.py
+++ b/waybackpy/cdx_api.py
@@ -26,27 +26,25 @@ class WaybackMachineCDXServerAPI(object):
        user_agent: str = DEFAULT_USER_AGENT,
        start_timestamp: Optional[str] = None,
        end_timestamp: Optional[str] = None,
-        filters: List[str] = [],
+        filters: Optional[List[str]] = None,
        match_type: Optional[str] = None,
        gzip: Optional[str] = None,
-        collapses: List[str] = [],
+        collapses: Optional[List[str]] = None,
        limit: Optional[str] = None,
        max_tries: int = 3,
    ) -> None:
        self.url = str(url).strip().replace(" ", "%20")
        self.user_agent = user_agent
-        self.start_timestamp = (
+        self.start_timestamp = None if start_timestamp is None else str(start_timestamp)
-            str(start_timestamp) if start_timestamp is not None else None
+        self.end_timestamp = None if end_timestamp is None else str(end_timestamp)
-        )
+        self.filters = [] if filters is None else filters
        self.end_timestamp = str(end_timestamp) if end_timestamp is not None else None
        self.filters = filters
        check_filters(self.filters)
-        self.match_type = str(match_type).strip() if match_type is not None else None
+        self.match_type = None if match_type is None else str(match_type).strip()
        check_match_type(self.match_type, self.url)
        self.gzip = gzip
-        self.collapses = collapses
+        self.collapses = [] if collapses is None else collapses
        check_collapses(self.collapses)
-        self.limit = limit if limit is not None else 5000
+        self.limit = 5000 if limit is None else limit
        self.max_tries = max_tries
        self.last_api_request_url: Optional[str] = None
        self.use_page = False
--- a/waybackpy/cli.py
+++ b/waybackpy/cli.py
@@ -16,6 +16,52 @@ from .utils import DEFAULT_USER_AGENT
 from .wrapper import Url
 def echo_availability_api(
    availability_api_instance: WaybackMachineAvailabilityAPI, json: bool
 ) -> None:
    click.echo("Archive URL:")
    if not availability_api_instance.archive_url:
        archive_url = (
            "NO ARCHIVE FOUND - The requested URL is probably "
            + "not yet archived or if the URL was recently archived then it is "
            + "not yet available via the Wayback Machine's availability API "
            + "because of database lag and should be available after some time."
        )
    else:
        archive_url = availability_api_instance.archive_url
    click.echo(archive_url)
    if json:
        click.echo("JSON response:")
        click.echo(JSON.dumps(availability_api_instance.JSON))
 def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
    domain = None
    sys_random = random.SystemRandom()
    uid = "".join(
        sys_random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
    )
    url_count = 0
    file_name = None
    for url in url_gen:
        url_count += 1
        if not domain:
            match = re.search("https?://([A-Za-z_0-9.-]+).*", url)
            domain = "domain-unknown" if match is None else match.group(1)
            file_name = f"{domain}-urls-{uid}.txt"
            file_path = os.path.join(os.getcwd(), file_name)
            with open(file_path, "a") as f:
                f.write(f"{url}\n")
        click.echo(url)
    if url_count > 0 or file_name is not None:
        click.echo(f"\n\n'{file_name}' saved in current working directory")
    else:
        click.echo("No known URLs found. Please try a diffrent input!")
@click.command()
@click.option(
    "-u", "--url", help="URL on which Wayback machine operations are to be performed."
@@ -30,11 +76,12 @@ from .wrapper import Url
@click.option("-v", "--version", is_flag=True, default=False, help="waybackpy version.")
@click.option(
    "-l",
-    "--showlicense",
+    "--show-license",
    "--show_license",
    "--license",
    is_flag=True,
    default=False,
-    help="show license of Waybackpy.",
+    help="Show license of Waybackpy.",
 )
@click.option(
    "-n",
@@ -129,7 +176,8 @@ from .wrapper import Url
 )
@click.option(
    "-f",
-    "--cdxfilter",
+    "--cdx-filter",
    "--cdx_filter",
    "--filter",
    multiple=True,
    help="Filter on a specific field or all the CDX fields.",
@@ -169,11 +217,11 @@ from .wrapper import Url
    + "if this parameter is not used then the plain text response of the CDX API "
    + "will be printed.",
 )
-def main(
+def main(  # pylint: disable=no-value-for-parameter
    url: Optional[str],
    user_agent: str,
    version: bool,
-    showlicense: bool,
+    show_license: bool,
    newest: bool,
    oldest: bool,
    json: bool,
@@ -191,7 +239,7 @@ def main(
    cdx: bool,
    start_timestamp: Optional[str],
    end_timestamp: Optional[str],
-    cdxfilter: List[str],
+    cdx_filter: List[str],
    match_type: Optional[str],
    gzip: Optional[str],
    collapse: List[str],
@@ -219,26 +267,18 @@ def main(
    Released under the MIT License. Use the flag --license for license.
    """
    if version:
        click.echo(f"waybackpy version {__version__}")
-        return
+    elif show_license:
    if showlicense:
        click.echo(
            requests.get(
                url="https://raw.githubusercontent.com/akamhy/waybackpy/master/LICENSE"
            ).text
        )
-        return
+    elif url is None:
    if not url:
        click.echo("No URL detected. Please provide an URL.")
-        return
+    elif (
-
+        not version
    if (
        url
        and not version
        and not oldest
        and not newest
        and not near
@@ -250,39 +290,16 @@ def main(
            "Only URL passed, but did not specify what to do with the URL. "
            "Use --help flag for help using waybackpy."
        )
-        return
+    elif oldest:
-
+        availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
    def echo_availability_api(
        availability_api_instance: WaybackMachineAvailabilityAPI,
    ) -> None:
        click.echo("Archive URL:")
        if not availability_api_instance.archive_url:
            archive_url = (
                "NO ARCHIVE FOUND - The requested URL is probably "
                + "not yet archived or if the URL was recently archived then it is "
                + "not yet available via the Wayback Machine's availability API "
                + "because of database lag and should be available after some time."
            )
        else:
            archive_url = availability_api_instance.archive_url
        click.echo(archive_url)
        if json:
            click.echo("JSON response:")
            click.echo(JSON.dumps(availability_api_instance.JSON))
    availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
    if oldest:
        availability_api.oldest()
-        echo_availability_api(availability_api)
+        echo_availability_api(availability_api, json)
-        return
+    elif newest:
-
+        availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
    if newest:
        availability_api.newest()
-        echo_availability_api(availability_api)
+        echo_availability_api(availability_api, json)
-        return
+    elif near:
-
+        availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
    if near:
        near_args = {}
        keys = ["year", "month", "day", "hour", "minute"]
        args_arr = [year, month, day, hour, minute]
@@ -290,10 +307,8 @@ def main(
            if arg:
                near_args[key] = arg
        availability_api.near(**near_args)
-        echo_availability_api(availability_api)
+        echo_availability_api(availability_api, json)
-        return
+    elif save:
    if save:
        save_api = WaybackMachineSaveAPI(url, user_agent=user_agent)
        save_api.save()
        click.echo("Archive URL:")
@@ -303,43 +318,7 @@ def main(
        if headers:
            click.echo("Save API headers:")
            click.echo(save_api.headers)
-        return
+    elif known_urls:
    def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
        domain = None
        sys_random = random.SystemRandom()
        uid = "".join(
            sys_random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
        )
        url_count = 0
        file_name = None
        for url in url_gen:
            url_count += 1
            if not domain:
                match = re.search("https?://([A-Za-z_0-9.-]+).*", url)
                domain = "domain-unknown"
                if match is not None:
                    domain = match.group(1)
                file_name = f"{domain}-urls-{uid}.txt"
                file_path = os.path.join(os.getcwd(), file_name)
                if not os.path.isfile(file_path):
                    open(file_path, "w+").close()
                with open(file_path, "a") as f:
                    f.write(f"{url}\n")
            click.echo(url)
        if url_count > 0 or file_name is not None:
            click.echo(f"\n\n'{file_name}' saved in current working directory")
        else:
            click.echo("No known URLs found. Please try a diffrent input!")
    if known_urls:
        wayback = Url(url, user_agent)
        url_gen = wayback.known_urls(subdomain=subdomain)
@@ -348,9 +327,8 @@ def main(
        else:
            for url in url_gen:
                click.echo(url)
-
+    elif cdx:
-    if cdx:
+        filters = list(cdx_filter)
        filters = list(cdxfilter)
        collapses = list(collapse)
        cdx_print = list(cdx_print)
@@ -372,35 +350,36 @@ def main(
            if len(cdx_print) == 0:
                click.echo(snapshot)
            else:
-                output_string = ""
+                output_string = []
                if any(val in cdx_print for val in ["urlkey", "url-key", "url_key"]):
-                    output_string = output_string + snapshot.urlkey + " "
+                    output_string.append(snapshot.urlkey)
                if any(
                    val in cdx_print
                    for val in ["timestamp", "time-stamp", "time_stamp"]
                ):
-                    output_string = output_string + snapshot.timestamp + " "
+                    output_string.append(snapshot.timestamp)
                if "original" in cdx_print:
-                    output_string = output_string + snapshot.original + " "
+                    output_string.append(snapshot.original)
                if any(
                    val in cdx_print for val in ["mimetype", "mime-type", "mime_type"]
                ):
-                    output_string = output_string + snapshot.mimetype + " "
+                    output_string.append(snapshot.mimetype)
                if any(
                    val in cdx_print
                    for val in ["statuscode", "status-code", "status_code"]
                ):
-                    output_string = output_string + snapshot.statuscode + " "
+                    output_string.append(snapshot.statuscode)
                if "digest" in cdx_print:
-                    output_string = output_string + snapshot.digest + " "
+                    output_string.append(snapshot.digest)
                if "length" in cdx_print:
-                    output_string = output_string + snapshot.length + " "
+                    output_string.append(snapshot.length)
                if any(
                    val in cdx_print
                    for val in ["archiveurl", "archive-url", "archive_url"]
                ):
-                    output_string = output_string + snapshot.archive_url + " "
+                    output_string.append(snapshot.archive_url)
-                click.echo(output_string)
+
                click.echo(" ".join(output_string))
 if __name__ == "__main__":
--- a/waybackpy/save_api.py
+++ b/waybackpy/save_api.py
@@ -19,7 +19,10 @@ class WaybackMachineSaveAPI(object):
    """
    def __init__(
-        self, url: str, user_agent: str = DEFAULT_USER_AGENT, max_tries: int = 8
+        self,
        url: str,
        user_agent: str = DEFAULT_USER_AGENT,
        max_tries: int = 8,
    ) -> None:
        self.url = str(url).strip().replace(" ", "%20")
        self.request_url = "https://web.archive.org/save/" + self.url
@@ -169,17 +172,16 @@ class WaybackMachineSaveAPI(object):
        tries = 0
        while True:
-            if self.saved_archive is None:
+            if tries >= 1:
-                if tries >= 1:
+                self.sleep(tries)
                    self.sleep(tries)
-                self.get_save_request_headers()
+            self.get_save_request_headers()
-                self.saved_archive = self.archive_url_parser()
+            self.saved_archive = self.archive_url_parser()
-                if isinstance(self.saved_archive, str):
+            if isinstance(self.saved_archive, str):
-                    self._archive_url = self.saved_archive
+                self._archive_url = self.saved_archive
-                    self.timestamp()
+                self.timestamp()
-                    return self.saved_archive
+                return self.saved_archive
            tries += 1
            if tries >= self.max_tries:
--- a/waybackpy/wrapper.py
+++ b/waybackpy/wrapper.py
@@ -6,21 +6,25 @@ from .cdx_api import WaybackMachineCDXServerAPI
 from .save_api import WaybackMachineSaveAPI
 from .utils import DEFAULT_USER_AGENT
 """
 The Url class is not recommended to be used anymore, instead use the
 WaybackMachineSaveAPI, WaybackMachineAvailabilityAPI and WaybackMachineCDXServerAPI.
 The reason it is still in the code is backwards compatibility with 2.x.x versions.
 If were are using the Url before the update to version 3.x.x, your code should still be
 working fine and there is no hurry to update the interface but is recommended that you
 do not use the Url class for new code as it would be removed after 2025 also the first
 3.x.x versions was released in January 2022 and three years are more than enough to
 update the older interface code.
 """
 class Url(object):
    """
    The Url class is not recommended to be used anymore, instead use:
    - WaybackMachineSaveAPI
    - WaybackMachineAvailabilityAPI
    - WaybackMachineCDXServerAPI
    The reason it is still in the code is backwards compatibility with 2.x.x
    versions.
    If were are using the Url before the update to version 3.x.x, your code should
    still be working fine and there is no hurry to update the interface but is
    recommended that you do not use the Url class for new code as it would be
    removed after 2025 also the first 3.x.x versions was released in January 2022
    and three years are more than enough to update the older interface code.
    """
    def __init__(self, url: str, user_agent: str = DEFAULT_USER_AGENT) -> None:
        self.url = url
        self.user_agent = str(user_agent)