waybackpy/cli.py: Added help text, fix bug in the cdx_print parameter and lots of other stuff
parameter --filters is now --filter parameter --collapses is now --collapse added a new --license flag for fetching the license from GitHub repo and printing it.
This commit is contained in:
parent
004027f73b
commit
946c28eddf
106
waybackpy/cli.py
106
waybackpy/cli.py
@ -3,6 +3,7 @@ import re
|
|||||||
import os
|
import os
|
||||||
import json as JSON
|
import json as JSON
|
||||||
import random
|
import random
|
||||||
|
import requests
|
||||||
import string
|
import string
|
||||||
from .__version__ import __version__
|
from .__version__ import __version__
|
||||||
from .utils import DEFAULT_USER_AGENT
|
from .utils import DEFAULT_USER_AGENT
|
||||||
@ -21,10 +22,11 @@ from .wrapper import Url
|
|||||||
"--user-agent",
|
"--user-agent",
|
||||||
"--user_agent",
|
"--user_agent",
|
||||||
default=DEFAULT_USER_AGENT,
|
default=DEFAULT_USER_AGENT,
|
||||||
help="User agent, default user agent is '%s' " % DEFAULT_USER_AGENT,
|
help="User agent, default value is '%s'." % DEFAULT_USER_AGENT,
|
||||||
)
|
)
|
||||||
|
@click.option("-v", "--version", is_flag=True, default=False, help="waybackpy version.")
|
||||||
@click.option(
|
@click.option(
|
||||||
"-v", "--version", is_flag=True, default=False, help="Print waybackpy version."
|
"-l", "--license", is_flag=True, default=False, help="license of Waybackpy."
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-n",
|
"-n",
|
||||||
@ -34,24 +36,28 @@ from .wrapper import Url
|
|||||||
"--archive-url",
|
"--archive-url",
|
||||||
default=False,
|
default=False,
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="Fetch the newest archive of the specified URL",
|
help="Retrieve the newest archive of URL.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-o",
|
"-o",
|
||||||
"--oldest",
|
"--oldest",
|
||||||
default=False,
|
default=False,
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="Fetch the oldest archive of the specified URL",
|
help="Retrieve the oldest archive of URL.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-j",
|
"-j",
|
||||||
"--json",
|
"--json",
|
||||||
default=False,
|
default=False,
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="Spit out the JSON data for availability_api commands.",
|
help="JSON data returned by the availability API.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-N", "--near", default=False, is_flag=True, help="Archive near specified time."
|
"-N",
|
||||||
|
"--near",
|
||||||
|
default=False,
|
||||||
|
is_flag=True,
|
||||||
|
help="Archive close to a specified time.",
|
||||||
)
|
)
|
||||||
@click.option("-Y", "--year", type=click.IntRange(1994, 9999), help="Year in integer.")
|
@click.option("-Y", "--year", type=click.IntRange(1994, 9999), help="Year in integer.")
|
||||||
@click.option("-M", "--month", type=click.IntRange(1, 12), help="Month in integer.")
|
@click.option("-M", "--month", type=click.IntRange(1, 12), help="Month in integer.")
|
||||||
@ -70,7 +76,7 @@ from .wrapper import Url
|
|||||||
"--headers",
|
"--headers",
|
||||||
default=False,
|
default=False,
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="Spit out the headers data for save_api commands.",
|
help="Headers data of the SavePageNow API.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-ku",
|
"-ku",
|
||||||
@ -99,51 +105,66 @@ from .wrapper import Url
|
|||||||
"--cdx",
|
"--cdx",
|
||||||
default=False,
|
default=False,
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="Spit out the headers data for save_api commands.",
|
help="Flag for using CDX API.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-st",
|
"-st",
|
||||||
"--start-timestamp",
|
"--start-timestamp",
|
||||||
"--start_timestamp",
|
"--start_timestamp",
|
||||||
|
help="Start timestamp for CDX API in yyyyMMddhhmmss format.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-et",
|
"-et",
|
||||||
"--end-timestamp",
|
"--end-timestamp",
|
||||||
"--end_timestamp",
|
"--end_timestamp",
|
||||||
|
help="End timestamp for CDX API in yyyyMMddhhmmss format.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-f",
|
"-f",
|
||||||
"--filters",
|
"--filter",
|
||||||
multiple=True,
|
multiple=True,
|
||||||
|
help="Filter on a specific field or all the CDX fields.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-mt",
|
"-mt",
|
||||||
"--match-type",
|
"--match-type",
|
||||||
"--match_type",
|
"--match_type",
|
||||||
|
help="The default behavior is to return matches for an exact URL. "
|
||||||
|
+ "However, the CDX server can also return results matching a certain prefix, "
|
||||||
|
+ "a certain host, or all sub-hosts by using the match_type",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-gz",
|
"-gz",
|
||||||
"--gzip",
|
"--gzip",
|
||||||
|
help="To disable gzip compression pass false as argument to this parameter. "
|
||||||
|
+ "The default behavior is gzip compression enabled.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-c",
|
"-c",
|
||||||
"--collapses",
|
"--collapse",
|
||||||
multiple=True,
|
multiple=True,
|
||||||
|
help="Filtering or 'collapse' results based on a field, or a substring of a field.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-l",
|
"-l",
|
||||||
"--limit",
|
"--limit",
|
||||||
|
help="Number of maximum record that CDX API is asked to return per API call, "
|
||||||
|
+ "default value is 500 records.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-cp",
|
"-cp",
|
||||||
"--cdx-print",
|
"--cdx-print",
|
||||||
"--cdx_print",
|
"--cdx_print",
|
||||||
multiple=True,
|
multiple=True,
|
||||||
|
help="Print only certain fields of the CDX API response, "
|
||||||
|
+ "if this parameter is not used then the plain text response of the CDX API "
|
||||||
|
+ "will be printed.",
|
||||||
)
|
)
|
||||||
def main(
|
def main(
|
||||||
url,
|
url,
|
||||||
user_agent,
|
user_agent,
|
||||||
version,
|
version,
|
||||||
|
license,
|
||||||
newest,
|
newest,
|
||||||
oldest,
|
oldest,
|
||||||
json,
|
json,
|
||||||
@ -161,15 +182,14 @@ def main(
|
|||||||
cdx,
|
cdx,
|
||||||
start_timestamp,
|
start_timestamp,
|
||||||
end_timestamp,
|
end_timestamp,
|
||||||
filters,
|
filter,
|
||||||
match_type,
|
match_type,
|
||||||
gzip,
|
gzip,
|
||||||
collapses,
|
collapse,
|
||||||
limit,
|
limit,
|
||||||
cdx_print,
|
cdx_print,
|
||||||
):
|
):
|
||||||
"""
|
"""\b
|
||||||
\b
|
|
||||||
_ _
|
_ _
|
||||||
| | | |
|
| | | |
|
||||||
__ ____ _ _ _| |__ __ _ ___| | ___ __ _ _
|
__ ____ _ _ _| |__ __ _ ___| | ___ __ _ _
|
||||||
@ -181,15 +201,13 @@ def main(
|
|||||||
|
|
||||||
Python package & CLI tool that interfaces the Wayback Machine APIs
|
Python package & CLI tool that interfaces the Wayback Machine APIs
|
||||||
|
|
||||||
Released under the MIT License.
|
|
||||||
|
|
||||||
License: https://github.com/akamhy/waybackpy/blob/master/LICENSE
|
|
||||||
|
|
||||||
Repository: https://github.com/akamhy/waybackpy
|
Repository: https://github.com/akamhy/waybackpy
|
||||||
|
|
||||||
|
Documentation: https://github.com/akamhy/waybackpy/wiki/CLI-docs
|
||||||
|
|
||||||
waybackpy - CLI usage(Demo video): https://asciinema.org/a/464367
|
waybackpy - CLI usage(Demo video): https://asciinema.org/a/464367
|
||||||
|
|
||||||
Documentation: https://github.com/akamhy/waybackpy/wiki/CLI-docs
|
Released under the MIT License. Use the flag --license for license.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -197,8 +215,31 @@ def main(
|
|||||||
click.echo("waybackpy version %s" % __version__)
|
click.echo("waybackpy version %s" % __version__)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if license:
|
||||||
|
click.echo(
|
||||||
|
requests.get(
|
||||||
|
url="https://raw.githubusercontent.com/akamhy/waybackpy/master/LICENSE"
|
||||||
|
).text
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
if not url:
|
if not url:
|
||||||
click.echo("No URL detected. Please pass an URL.")
|
click.echo("No URL detected. Please provide an URL.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if (
|
||||||
|
url
|
||||||
|
and not version
|
||||||
|
and not oldest
|
||||||
|
and not newest
|
||||||
|
and not near
|
||||||
|
and not save
|
||||||
|
and not known_urls
|
||||||
|
and not cdx
|
||||||
|
):
|
||||||
|
click.echo(
|
||||||
|
"Only URL passed, but did not specify what to do with the URL. Use --help flag for help using waybackpy."
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
def echo_availability_api(availability_api_instance):
|
def echo_availability_api(availability_api_instance):
|
||||||
@ -300,8 +341,8 @@ def main(
|
|||||||
click.echo(url)
|
click.echo(url)
|
||||||
|
|
||||||
if cdx:
|
if cdx:
|
||||||
filters = list(filters)
|
filters = list(filter)
|
||||||
collapses = list(collapses)
|
collapses = list(collapse)
|
||||||
cdx_print = list(cdx_print)
|
cdx_print = list(cdx_print)
|
||||||
|
|
||||||
cdx_api = WaybackMachineCDXServerAPI(
|
cdx_api = WaybackMachineCDXServerAPI(
|
||||||
@ -323,23 +364,34 @@ def main(
|
|||||||
click.echo(snapshot)
|
click.echo(snapshot)
|
||||||
else:
|
else:
|
||||||
output_string = ""
|
output_string = ""
|
||||||
if "urlkey" or "url-key" or "url_key" in cdx_print:
|
if any(val in cdx_print for val in ["urlkey", "url-key", "url_key"]):
|
||||||
output_string = output_string + snapshot.urlkey + " "
|
output_string = output_string + snapshot.urlkey + " "
|
||||||
if "timestamp" or "time-stamp" or "time_stamp" in cdx_print:
|
if any(
|
||||||
|
val in cdx_print
|
||||||
|
for val in ["timestamp", "time-stamp", "time_stamp"]
|
||||||
|
):
|
||||||
output_string = output_string + snapshot.timestamp + " "
|
output_string = output_string + snapshot.timestamp + " "
|
||||||
if "original" in cdx_print:
|
if "original" in cdx_print:
|
||||||
output_string = output_string + snapshot.original + " "
|
output_string = output_string + snapshot.original + " "
|
||||||
if "original" in cdx_print:
|
if "original" in cdx_print:
|
||||||
output_string = output_string + snapshot.original + " "
|
output_string = output_string + snapshot.original + " "
|
||||||
if "mimetype" or "mime-type" or "mime_type" in cdx_print:
|
if any(
|
||||||
|
val in cdx_print for val in ["mimetype", "mime-type", "mime_type"]
|
||||||
|
):
|
||||||
output_string = output_string + snapshot.mimetype + " "
|
output_string = output_string + snapshot.mimetype + " "
|
||||||
if "statuscode" or "status-code" or "status_code" in cdx_print:
|
if any(
|
||||||
|
val in cdx_print
|
||||||
|
for val in ["statuscode", "status-code", "status_code"]
|
||||||
|
):
|
||||||
output_string = output_string + snapshot.statuscode + " "
|
output_string = output_string + snapshot.statuscode + " "
|
||||||
if "digest" in cdx_print:
|
if "digest" in cdx_print:
|
||||||
output_string = output_string + snapshot.digest + " "
|
output_string = output_string + snapshot.digest + " "
|
||||||
if "length" in cdx_print:
|
if "length" in cdx_print:
|
||||||
output_string = output_string + snapshot.length + " "
|
output_string = output_string + snapshot.length + " "
|
||||||
if "archiveurl" or "archive-url" or "archive_url" in cdx_print:
|
if any(
|
||||||
|
val in cdx_print
|
||||||
|
for val in ["archiveurl", "archive-url", "archive_url"]
|
||||||
|
):
|
||||||
output_string = output_string + snapshot.archive_url + " "
|
output_string = output_string + snapshot.archive_url + " "
|
||||||
click.echo(output_string)
|
click.echo(output_string)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user