Add tests (#149)

* enable codecov

* fix save_urls_on_file

* increase the limit of CDX to 25000 from 5000. 5X increase.

* added test for the CLI module

* make flake 8 happy

* make mypy happy
This commit is contained in:
Akash Mahanty 2022-02-08 17:46:59 +05:30 committed by GitHub
parent 2bbfee7b2f
commit f8bf9c16f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 195 additions and 15 deletions

View File

@ -38,6 +38,6 @@ jobs:
- name: Test with pytest
run: |
pytest
# - name: Upload coverage to Codecov
# run: |
# bash <(curl -s https://codecov.io/bash) -t ${{ secrets.CODECOV_TOKEN }}
- name: Upload coverage to Codecov
run: |
bash <(curl -s https://codecov.io/bash) -t ${{ secrets.CODECOV_TOKEN }}

169
tests/test_cli.py Normal file
View File

@ -0,0 +1,169 @@
import requests
from click.testing import CliRunner
from waybackpy.cli import main
from waybackpy import __version__
def test_oldest() -> None:
runner = CliRunner()
result = runner.invoke(main, ["--url", " https://github.com ", "--oldest"])
assert result.exit_code == 0
assert (
result.output
== "Archive URL:\nhttps://web.archive.org/web/2008051421\
0148/http://github.com/\n"
)
def test_near() -> None:
runner = CliRunner()
result = runner.invoke(
main,
[
"--url",
" https://facebook.com ",
"--near",
"--year",
"2010",
"--month",
"5",
"--day",
"10",
"--hour",
"6",
],
)
assert result.exit_code == 0
assert (
result.output
== "Archive URL:\nhttps://web.archive.org/web/2010051008\
2647/http://www.facebook.com/\n"
)
def test_json() -> None:
runner = CliRunner()
result = runner.invoke(
main,
[
"--url",
" https://apple.com ",
"--near",
"--year",
"2010",
"--month",
"2",
"--day",
"8",
"--hour",
"12",
"--json",
],
)
assert result.exit_code == 0
assert (
result.output.find(
"""Archive URL:\nhttps://web.archive.org/web/2010020812\
5854/http://www.apple.com/\nJSON respons\
e:\n{"url": "https://apple.com", "archived_snapshots": {"close\
st": {"status": "200", "available": true, "url": "http://web.ar\
chive.org/web/20100208125854/http://www.apple.com/", "timest\
amp": "20100208125854"}}, "timestamp":"""
)
!= -1
)
def test_newest() -> None:
runner = CliRunner()
result = runner.invoke(main, ["--url", " https://microsoft.com ", "--newest"])
assert result.exit_code == 0
assert (
result.output.find("microsoft.com") != -1
and result.output.find("Archive URL:\n") != -1
)
def test_cdx() -> None:
runner = CliRunner()
result = runner.invoke(
main,
"--url https://twitter.com/jack --cdx --user-agent some-user-agent \
--start-timestamp 2010 --end-timestamp 2012 --collapse urlkey \
--match-type prefix --cdx-print archiveurl --cdx-print length \
--cdx-print digest --cdx-print statuscode --cdx-print mimetype \
--cdx-print original --cdx-print timestamp --cdx-print urlkey".split(
" "
),
)
assert result.exit_code == 0
assert result.output.count("\n") > 3000
def test_save() -> None:
runner = CliRunner()
result = runner.invoke(
main,
"--url https://news.ycombinator.com --user_agent my-unique-user-agent \
--save --headers".split(
" "
),
)
assert result.exit_code == 0
assert result.output.find("Archive URL:") != -1
assert (result.output.find("Cached save:\nTrue") != -1) or (
result.output.find("Cached save:\nFalse") != -1
)
assert result.output.find("Save API headers:\n") != -1
assert result.output.find("://news.ycombinator.com") != -1
def test_version() -> None:
runner = CliRunner()
result = runner.invoke(main, ["--version"])
assert result.exit_code == 0
assert result.output == f"waybackpy version {__version__}\n"
def test_license() -> None:
runner = CliRunner()
result = runner.invoke(main, ["--license"])
assert result.exit_code == 0
assert (
result.output
== requests.get(
url="https://raw.githubusercontent.com/akamhy/waybackpy/master/LICENSE"
).text
+ "\n"
)
def test_only_url() -> None:
runner = CliRunner()
result = runner.invoke(main, ["--url", "https://google.com"])
assert result.exit_code == 0
assert (
result.output
== "Only URL passed, but did not specify what to do with the URL. Use \
--help flag for help using waybackpy.\n"
)
def test_known_url() -> None:
# with file generator enabled
runner = CliRunner()
result = runner.invoke(
main, ["--url", "https://akamhy.github.io", "--known-urls", "--file"]
)
assert result.exit_code == 0
assert result.output.count("\n") > 40
assert result.output.count("akamhy.github.io") > 40
assert result.output.find("in the current working directory.\n") != -1
# without file
runner = CliRunner()
result = runner.invoke(main, ["--url", "https://akamhy.github.io", "--known-urls"])
assert result.exit_code == 0
assert result.output.count("\n") > 40
assert result.output.count("akamhy.github.io") > 40

View File

@ -60,7 +60,7 @@ class WaybackMachineCDXServerAPI:
self.gzip = gzip
self.collapses = [] if collapses is None else collapses
check_collapses(self.collapses)
self.limit = 5000 if limit is None else limit
self.limit = 25000 if limit is None else limit
self.max_tries = max_tries
self.last_api_request_url: Optional[str] = None
self.use_page = False

View File

@ -59,17 +59,28 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
for url in url_gen:
url_count += 1
if not domain:
match = re.search("https?://([A-Za-z_0-9.-]+).*", url)
domain = "domain-unknown" if match is None else match.group(1)
file_name = f"{domain}-urls-{uid}.txt"
m = re.search("https?://([A-Za-z_0-9.-]+).*", url)
domain = "domain-unknown"
if m:
domain = m.group(1)
file_name = "{domain}-urls-{uid}.txt".format(domain=domain, uid=uid)
file_path = os.path.join(os.getcwd(), file_name)
with open(file_path, "a", encoding="UTF-8") as file:
file.write(f"{url}\n")
if not os.path.isfile(file_path):
open(file_path, "w+").close()
with open(file_path, "a") as f:
f.write("{url}\n".format(url=url))
click.echo(url)
if url_count > 0 or file_name is not None:
click.echo(f"\n\n'{file_name}' saved in current working directory")
if url_count > 0:
click.echo(
f"\n\n{url_count} URLs saved inside '{file_name}' in the current "
+ "working directory."
)
else:
click.echo("No known URLs found. Please try a diffrent input!")
@ -343,8 +354,8 @@ def main( # pylint: disable=no-value-for-parameter
url_gen = wayback.known_urls(subdomain=subdomain)
if file:
return save_urls_on_file(url_gen)
save_urls_on_file(url_gen)
else:
for url_ in url_gen:
click.echo(url_)