waybackpy/cli.py: Added help text, fix bug in the cdx_print parameter and lots of other stuff

parameter --filters is now --filter

parameter --collapses is now --collapse

added a new --license flag for fetching the license from GitHub repo and printing it.
This commit is contained in:
Akash Mahanty 2022-01-30 20:00:50 +05:30
parent 004027f73b
commit 946c28eddf

View File

@ -3,6 +3,7 @@ import re
import os import os
import json as JSON import json as JSON
import random import random
import requests
import string import string
from .__version__ import __version__ from .__version__ import __version__
from .utils import DEFAULT_USER_AGENT from .utils import DEFAULT_USER_AGENT
@ -21,10 +22,11 @@ from .wrapper import Url
"--user-agent", "--user-agent",
"--user_agent", "--user_agent",
default=DEFAULT_USER_AGENT, default=DEFAULT_USER_AGENT,
help="User agent, default user agent is '%s' " % DEFAULT_USER_AGENT, help="User agent, default value is '%s'." % DEFAULT_USER_AGENT,
) )
@click.option("-v", "--version", is_flag=True, default=False, help="waybackpy version.")
@click.option( @click.option(
"-v", "--version", is_flag=True, default=False, help="Print waybackpy version." "-l", "--license", is_flag=True, default=False, help="license of Waybackpy."
) )
@click.option( @click.option(
"-n", "-n",
@ -34,24 +36,28 @@ from .wrapper import Url
"--archive-url", "--archive-url",
default=False, default=False,
is_flag=True, is_flag=True,
help="Fetch the newest archive of the specified URL", help="Retrieve the newest archive of URL.",
) )
@click.option( @click.option(
"-o", "-o",
"--oldest", "--oldest",
default=False, default=False,
is_flag=True, is_flag=True,
help="Fetch the oldest archive of the specified URL", help="Retrieve the oldest archive of URL.",
) )
@click.option( @click.option(
"-j", "-j",
"--json", "--json",
default=False, default=False,
is_flag=True, is_flag=True,
help="Spit out the JSON data for availability_api commands.", help="JSON data returned by the availability API.",
) )
@click.option( @click.option(
"-N", "--near", default=False, is_flag=True, help="Archive near specified time." "-N",
"--near",
default=False,
is_flag=True,
help="Archive close to a specified time.",
) )
@click.option("-Y", "--year", type=click.IntRange(1994, 9999), help="Year in integer.") @click.option("-Y", "--year", type=click.IntRange(1994, 9999), help="Year in integer.")
@click.option("-M", "--month", type=click.IntRange(1, 12), help="Month in integer.") @click.option("-M", "--month", type=click.IntRange(1, 12), help="Month in integer.")
@ -70,7 +76,7 @@ from .wrapper import Url
"--headers", "--headers",
default=False, default=False,
is_flag=True, is_flag=True,
help="Spit out the headers data for save_api commands.", help="Headers data of the SavePageNow API.",
) )
@click.option( @click.option(
"-ku", "-ku",
@ -99,51 +105,66 @@ from .wrapper import Url
"--cdx", "--cdx",
default=False, default=False,
is_flag=True, is_flag=True,
help="Spit out the headers data for save_api commands.", help="Flag for using CDX API.",
) )
@click.option( @click.option(
"-st", "-st",
"--start-timestamp", "--start-timestamp",
"--start_timestamp", "--start_timestamp",
help="Start timestamp for CDX API in yyyyMMddhhmmss format.",
) )
@click.option( @click.option(
"-et", "-et",
"--end-timestamp", "--end-timestamp",
"--end_timestamp", "--end_timestamp",
help="End timestamp for CDX API in yyyyMMddhhmmss format.",
) )
@click.option( @click.option(
"-f", "-f",
"--filters", "--filter",
multiple=True, multiple=True,
help="Filter on a specific field or all the CDX fields.",
) )
@click.option( @click.option(
"-mt", "-mt",
"--match-type", "--match-type",
"--match_type", "--match_type",
help="The default behavior is to return matches for an exact URL. "
+ "However, the CDX server can also return results matching a certain prefix, "
+ "a certain host, or all sub-hosts by using the match_type",
) )
@click.option( @click.option(
"-gz", "-gz",
"--gzip", "--gzip",
help="To disable gzip compression pass false as argument to this parameter. "
+ "The default behavior is gzip compression enabled.",
) )
@click.option( @click.option(
"-c", "-c",
"--collapses", "--collapse",
multiple=True, multiple=True,
help="Filtering or 'collapse' results based on a field, or a substring of a field.",
) )
@click.option( @click.option(
"-l", "-l",
"--limit", "--limit",
help="Number of maximum record that CDX API is asked to return per API call, "
+ "default value is 500 records.",
) )
@click.option( @click.option(
"-cp", "-cp",
"--cdx-print", "--cdx-print",
"--cdx_print", "--cdx_print",
multiple=True, multiple=True,
help="Print only certain fields of the CDX API response, "
+ "if this parameter is not used then the plain text response of the CDX API "
+ "will be printed.",
) )
def main( def main(
url, url,
user_agent, user_agent,
version, version,
license,
newest, newest,
oldest, oldest,
json, json,
@ -161,15 +182,14 @@ def main(
cdx, cdx,
start_timestamp, start_timestamp,
end_timestamp, end_timestamp,
filters, filter,
match_type, match_type,
gzip, gzip,
collapses, collapse,
limit, limit,
cdx_print, cdx_print,
): ):
""" """\b
\b
_ _ _ _
| | | | | | | |
__ ____ _ _ _| |__ __ _ ___| | ___ __ _ _ __ ____ _ _ _| |__ __ _ ___| | ___ __ _ _
@ -181,15 +201,13 @@ def main(
Python package & CLI tool that interfaces the Wayback Machine APIs Python package & CLI tool that interfaces the Wayback Machine APIs
Released under the MIT License.
License: https://github.com/akamhy/waybackpy/blob/master/LICENSE
Repository: https://github.com/akamhy/waybackpy Repository: https://github.com/akamhy/waybackpy
Documentation: https://github.com/akamhy/waybackpy/wiki/CLI-docs
waybackpy - CLI usage(Demo video): https://asciinema.org/a/464367 waybackpy - CLI usage(Demo video): https://asciinema.org/a/464367
Documentation: https://github.com/akamhy/waybackpy/wiki/CLI-docs Released under the MIT License. Use the flag --license for license.
""" """
@ -197,8 +215,31 @@ def main(
click.echo("waybackpy version %s" % __version__) click.echo("waybackpy version %s" % __version__)
return return
if license:
click.echo(
requests.get(
url="https://raw.githubusercontent.com/akamhy/waybackpy/master/LICENSE"
).text
)
return
if not url: if not url:
click.echo("No URL detected. Please pass an URL.") click.echo("No URL detected. Please provide an URL.")
return
if (
url
and not version
and not oldest
and not newest
and not near
and not save
and not known_urls
and not cdx
):
click.echo(
"Only URL passed, but did not specify what to do with the URL. Use --help flag for help using waybackpy."
)
return return
def echo_availability_api(availability_api_instance): def echo_availability_api(availability_api_instance):
@ -300,8 +341,8 @@ def main(
click.echo(url) click.echo(url)
if cdx: if cdx:
filters = list(filters) filters = list(filter)
collapses = list(collapses) collapses = list(collapse)
cdx_print = list(cdx_print) cdx_print = list(cdx_print)
cdx_api = WaybackMachineCDXServerAPI( cdx_api = WaybackMachineCDXServerAPI(
@ -323,23 +364,34 @@ def main(
click.echo(snapshot) click.echo(snapshot)
else: else:
output_string = "" output_string = ""
if "urlkey" or "url-key" or "url_key" in cdx_print: if any(val in cdx_print for val in ["urlkey", "url-key", "url_key"]):
output_string = output_string + snapshot.urlkey + " " output_string = output_string + snapshot.urlkey + " "
if "timestamp" or "time-stamp" or "time_stamp" in cdx_print: if any(
val in cdx_print
for val in ["timestamp", "time-stamp", "time_stamp"]
):
output_string = output_string + snapshot.timestamp + " " output_string = output_string + snapshot.timestamp + " "
if "original" in cdx_print: if "original" in cdx_print:
output_string = output_string + snapshot.original + " " output_string = output_string + snapshot.original + " "
if "original" in cdx_print: if "original" in cdx_print:
output_string = output_string + snapshot.original + " " output_string = output_string + snapshot.original + " "
if "mimetype" or "mime-type" or "mime_type" in cdx_print: if any(
val in cdx_print for val in ["mimetype", "mime-type", "mime_type"]
):
output_string = output_string + snapshot.mimetype + " " output_string = output_string + snapshot.mimetype + " "
if "statuscode" or "status-code" or "status_code" in cdx_print: if any(
val in cdx_print
for val in ["statuscode", "status-code", "status_code"]
):
output_string = output_string + snapshot.statuscode + " " output_string = output_string + snapshot.statuscode + " "
if "digest" in cdx_print: if "digest" in cdx_print:
output_string = output_string + snapshot.digest + " " output_string = output_string + snapshot.digest + " "
if "length" in cdx_print: if "length" in cdx_print:
output_string = output_string + snapshot.length + " " output_string = output_string + snapshot.length + " "
if "archiveurl" or "archive-url" or "archive_url" in cdx_print: if any(
val in cdx_print
for val in ["archiveurl", "archive-url", "archive_url"]
):
output_string = output_string + snapshot.archive_url + " " output_string = output_string + snapshot.archive_url + " "
click.echo(output_string) click.echo(output_string)