Add tests (#149)
* enable codecov * fix save_urls_on_file * increase the limit of CDX to 25000 from 5000. 5X increase. * added test for the CLI module * make flake 8 happy * make mypy happy
This commit is contained in:
parent
2bbfee7b2f
commit
f8bf9c16f9
6
.github/workflows/unit-test.yml
vendored
6
.github/workflows/unit-test.yml
vendored
@ -38,6 +38,6 @@ jobs:
|
|||||||
- name: Test with pytest
|
- name: Test with pytest
|
||||||
run: |
|
run: |
|
||||||
pytest
|
pytest
|
||||||
# - name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
# run: |
|
run: |
|
||||||
# bash <(curl -s https://codecov.io/bash) -t ${{ secrets.CODECOV_TOKEN }}
|
bash <(curl -s https://codecov.io/bash) -t ${{ secrets.CODECOV_TOKEN }}
|
||||||
|
169
tests/test_cli.py
Normal file
169
tests/test_cli.py
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
import requests
|
||||||
|
from click.testing import CliRunner
|
||||||
|
|
||||||
|
from waybackpy.cli import main
|
||||||
|
from waybackpy import __version__
|
||||||
|
|
||||||
|
|
||||||
|
def test_oldest() -> None:
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(main, ["--url", " https://github.com ", "--oldest"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert (
|
||||||
|
result.output
|
||||||
|
== "Archive URL:\nhttps://web.archive.org/web/2008051421\
|
||||||
|
0148/http://github.com/\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_near() -> None:
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(
|
||||||
|
main,
|
||||||
|
[
|
||||||
|
"--url",
|
||||||
|
" https://facebook.com ",
|
||||||
|
"--near",
|
||||||
|
"--year",
|
||||||
|
"2010",
|
||||||
|
"--month",
|
||||||
|
"5",
|
||||||
|
"--day",
|
||||||
|
"10",
|
||||||
|
"--hour",
|
||||||
|
"6",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert (
|
||||||
|
result.output
|
||||||
|
== "Archive URL:\nhttps://web.archive.org/web/2010051008\
|
||||||
|
2647/http://www.facebook.com/\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_json() -> None:
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(
|
||||||
|
main,
|
||||||
|
[
|
||||||
|
"--url",
|
||||||
|
" https://apple.com ",
|
||||||
|
"--near",
|
||||||
|
"--year",
|
||||||
|
"2010",
|
||||||
|
"--month",
|
||||||
|
"2",
|
||||||
|
"--day",
|
||||||
|
"8",
|
||||||
|
"--hour",
|
||||||
|
"12",
|
||||||
|
"--json",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert (
|
||||||
|
result.output.find(
|
||||||
|
"""Archive URL:\nhttps://web.archive.org/web/2010020812\
|
||||||
|
5854/http://www.apple.com/\nJSON respons\
|
||||||
|
e:\n{"url": "https://apple.com", "archived_snapshots": {"close\
|
||||||
|
st": {"status": "200", "available": true, "url": "http://web.ar\
|
||||||
|
chive.org/web/20100208125854/http://www.apple.com/", "timest\
|
||||||
|
amp": "20100208125854"}}, "timestamp":"""
|
||||||
|
)
|
||||||
|
!= -1
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_newest() -> None:
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(main, ["--url", " https://microsoft.com ", "--newest"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert (
|
||||||
|
result.output.find("microsoft.com") != -1
|
||||||
|
and result.output.find("Archive URL:\n") != -1
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_cdx() -> None:
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(
|
||||||
|
main,
|
||||||
|
"--url https://twitter.com/jack --cdx --user-agent some-user-agent \
|
||||||
|
--start-timestamp 2010 --end-timestamp 2012 --collapse urlkey \
|
||||||
|
--match-type prefix --cdx-print archiveurl --cdx-print length \
|
||||||
|
--cdx-print digest --cdx-print statuscode --cdx-print mimetype \
|
||||||
|
--cdx-print original --cdx-print timestamp --cdx-print urlkey".split(
|
||||||
|
" "
|
||||||
|
),
|
||||||
|
)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.output.count("\n") > 3000
|
||||||
|
|
||||||
|
|
||||||
|
def test_save() -> None:
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(
|
||||||
|
main,
|
||||||
|
"--url https://news.ycombinator.com --user_agent my-unique-user-agent \
|
||||||
|
--save --headers".split(
|
||||||
|
" "
|
||||||
|
),
|
||||||
|
)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.output.find("Archive URL:") != -1
|
||||||
|
assert (result.output.find("Cached save:\nTrue") != -1) or (
|
||||||
|
result.output.find("Cached save:\nFalse") != -1
|
||||||
|
)
|
||||||
|
assert result.output.find("Save API headers:\n") != -1
|
||||||
|
assert result.output.find("://news.ycombinator.com") != -1
|
||||||
|
|
||||||
|
|
||||||
|
def test_version() -> None:
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(main, ["--version"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.output == f"waybackpy version {__version__}\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_license() -> None:
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(main, ["--license"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert (
|
||||||
|
result.output
|
||||||
|
== requests.get(
|
||||||
|
url="https://raw.githubusercontent.com/akamhy/waybackpy/master/LICENSE"
|
||||||
|
).text
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_only_url() -> None:
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(main, ["--url", "https://google.com"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert (
|
||||||
|
result.output
|
||||||
|
== "Only URL passed, but did not specify what to do with the URL. Use \
|
||||||
|
--help flag for help using waybackpy.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_known_url() -> None:
|
||||||
|
# with file generator enabled
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(
|
||||||
|
main, ["--url", "https://akamhy.github.io", "--known-urls", "--file"]
|
||||||
|
)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.output.count("\n") > 40
|
||||||
|
assert result.output.count("akamhy.github.io") > 40
|
||||||
|
assert result.output.find("in the current working directory.\n") != -1
|
||||||
|
|
||||||
|
# without file
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(main, ["--url", "https://akamhy.github.io", "--known-urls"])
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert result.output.count("\n") > 40
|
||||||
|
assert result.output.count("akamhy.github.io") > 40
|
@ -60,7 +60,7 @@ class WaybackMachineCDXServerAPI:
|
|||||||
self.gzip = gzip
|
self.gzip = gzip
|
||||||
self.collapses = [] if collapses is None else collapses
|
self.collapses = [] if collapses is None else collapses
|
||||||
check_collapses(self.collapses)
|
check_collapses(self.collapses)
|
||||||
self.limit = 5000 if limit is None else limit
|
self.limit = 25000 if limit is None else limit
|
||||||
self.max_tries = max_tries
|
self.max_tries = max_tries
|
||||||
self.last_api_request_url: Optional[str] = None
|
self.last_api_request_url: Optional[str] = None
|
||||||
self.use_page = False
|
self.use_page = False
|
||||||
|
@ -59,17 +59,28 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
|
|||||||
for url in url_gen:
|
for url in url_gen:
|
||||||
url_count += 1
|
url_count += 1
|
||||||
if not domain:
|
if not domain:
|
||||||
match = re.search("https?://([A-Za-z_0-9.-]+).*", url)
|
m = re.search("https?://([A-Za-z_0-9.-]+).*", url)
|
||||||
domain = "domain-unknown" if match is None else match.group(1)
|
|
||||||
file_name = f"{domain}-urls-{uid}.txt"
|
domain = "domain-unknown"
|
||||||
|
|
||||||
|
if m:
|
||||||
|
domain = m.group(1)
|
||||||
|
|
||||||
|
file_name = "{domain}-urls-{uid}.txt".format(domain=domain, uid=uid)
|
||||||
file_path = os.path.join(os.getcwd(), file_name)
|
file_path = os.path.join(os.getcwd(), file_name)
|
||||||
with open(file_path, "a", encoding="UTF-8") as file:
|
if not os.path.isfile(file_path):
|
||||||
file.write(f"{url}\n")
|
open(file_path, "w+").close()
|
||||||
|
|
||||||
|
with open(file_path, "a") as f:
|
||||||
|
f.write("{url}\n".format(url=url))
|
||||||
|
|
||||||
click.echo(url)
|
click.echo(url)
|
||||||
|
|
||||||
if url_count > 0 or file_name is not None:
|
if url_count > 0:
|
||||||
click.echo(f"\n\n'{file_name}' saved in current working directory")
|
click.echo(
|
||||||
|
f"\n\n{url_count} URLs saved inside '{file_name}' in the current "
|
||||||
|
+ "working directory."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
click.echo("No known URLs found. Please try a diffrent input!")
|
click.echo("No known URLs found. Please try a diffrent input!")
|
||||||
|
|
||||||
@ -343,10 +354,10 @@ def main( # pylint: disable=no-value-for-parameter
|
|||||||
url_gen = wayback.known_urls(subdomain=subdomain)
|
url_gen = wayback.known_urls(subdomain=subdomain)
|
||||||
|
|
||||||
if file:
|
if file:
|
||||||
return save_urls_on_file(url_gen)
|
save_urls_on_file(url_gen)
|
||||||
|
else:
|
||||||
for url_ in url_gen:
|
for url_ in url_gen:
|
||||||
click.echo(url_)
|
click.echo(url_)
|
||||||
|
|
||||||
elif cdx:
|
elif cdx:
|
||||||
filters = list(cdx_filter)
|
filters = list(cdx_filter)
|
||||||
|
Loading…
Reference in New Issue
Block a user