Add tests (#149)
* enable codecov * fix save_urls_on_file * increase the limit of CDX to 25000 from 5000. 5X increase. * added test for the CLI module * make flake 8 happy * make mypy happy
This commit is contained in:
		@@ -60,7 +60,7 @@ class WaybackMachineCDXServerAPI:
 | 
			
		||||
        self.gzip = gzip
 | 
			
		||||
        self.collapses = [] if collapses is None else collapses
 | 
			
		||||
        check_collapses(self.collapses)
 | 
			
		||||
        self.limit = 5000 if limit is None else limit
 | 
			
		||||
        self.limit = 25000 if limit is None else limit
 | 
			
		||||
        self.max_tries = max_tries
 | 
			
		||||
        self.last_api_request_url: Optional[str] = None
 | 
			
		||||
        self.use_page = False
 | 
			
		||||
 
 | 
			
		||||
@@ -59,17 +59,28 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
 | 
			
		||||
    for url in url_gen:
 | 
			
		||||
        url_count += 1
 | 
			
		||||
        if not domain:
 | 
			
		||||
            match = re.search("https?://([A-Za-z_0-9.-]+).*", url)
 | 
			
		||||
            domain = "domain-unknown" if match is None else match.group(1)
 | 
			
		||||
            file_name = f"{domain}-urls-{uid}.txt"
 | 
			
		||||
            m = re.search("https?://([A-Za-z_0-9.-]+).*", url)
 | 
			
		||||
 | 
			
		||||
            domain = "domain-unknown"
 | 
			
		||||
 | 
			
		||||
            if m:
 | 
			
		||||
                domain = m.group(1)
 | 
			
		||||
 | 
			
		||||
            file_name = "{domain}-urls-{uid}.txt".format(domain=domain, uid=uid)
 | 
			
		||||
            file_path = os.path.join(os.getcwd(), file_name)
 | 
			
		||||
            with open(file_path, "a", encoding="UTF-8") as file:
 | 
			
		||||
                file.write(f"{url}\n")
 | 
			
		||||
            if not os.path.isfile(file_path):
 | 
			
		||||
                open(file_path, "w+").close()
 | 
			
		||||
 | 
			
		||||
        with open(file_path, "a") as f:
 | 
			
		||||
            f.write("{url}\n".format(url=url))
 | 
			
		||||
 | 
			
		||||
        click.echo(url)
 | 
			
		||||
 | 
			
		||||
    if url_count > 0 or file_name is not None:
 | 
			
		||||
        click.echo(f"\n\n'{file_name}' saved in current working directory")
 | 
			
		||||
    if url_count > 0:
 | 
			
		||||
        click.echo(
 | 
			
		||||
            f"\n\n{url_count} URLs saved inside '{file_name}' in the current "
 | 
			
		||||
            + "working directory."
 | 
			
		||||
        )
 | 
			
		||||
    else:
 | 
			
		||||
        click.echo("No known URLs found. Please try a diffrent input!")
 | 
			
		||||
 | 
			
		||||
@@ -343,10 +354,10 @@ def main(  # pylint: disable=no-value-for-parameter
 | 
			
		||||
        url_gen = wayback.known_urls(subdomain=subdomain)
 | 
			
		||||
 | 
			
		||||
        if file:
 | 
			
		||||
            return save_urls_on_file(url_gen)
 | 
			
		||||
 | 
			
		||||
        for url_ in url_gen:
 | 
			
		||||
            click.echo(url_)
 | 
			
		||||
            save_urls_on_file(url_gen)
 | 
			
		||||
        else:
 | 
			
		||||
            for url_ in url_gen:
 | 
			
		||||
                click.echo(url_)
 | 
			
		||||
 | 
			
		||||
    elif cdx:
 | 
			
		||||
        filters = list(cdx_filter)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user