fix: f-string
This commit is contained in:
@@ -77,7 +77,7 @@ def test_no_archive() -> None:
|
|||||||
"""
|
"""
|
||||||
with pytest.raises(ArchiveNotInAvailabilityAPIResponse):
|
with pytest.raises(ArchiveNotInAvailabilityAPIResponse):
|
||||||
availability_api = WaybackMachineAvailabilityAPI(
|
availability_api = WaybackMachineAvailabilityAPI(
|
||||||
url="https://%s.cn" % rndstr(30), user_agent=user_agent
|
url=f"https://{rndstr(30)}.cn", user_agent=user_agent
|
||||||
)
|
)
|
||||||
_ = availability_api.archive_url
|
_ = availability_api.archive_url
|
||||||
|
|
||||||
@@ -90,7 +90,7 @@ def test_no_api_call_str_repr() -> None:
|
|||||||
str() must not return None so we return ""
|
str() must not return None so we return ""
|
||||||
"""
|
"""
|
||||||
availability_api = WaybackMachineAvailabilityAPI(
|
availability_api = WaybackMachineAvailabilityAPI(
|
||||||
url="https://%s.gov" % rndstr(30), user_agent=user_agent
|
url=f"https://{rndstr(30)}.gov", user_agent=user_agent
|
||||||
)
|
)
|
||||||
assert "" == str(availability_api)
|
assert "" == str(availability_api)
|
||||||
|
|
||||||
@@ -101,6 +101,6 @@ def test_no_call_timestamp() -> None:
|
|||||||
the datetime.max as a default value.
|
the datetime.max as a default value.
|
||||||
"""
|
"""
|
||||||
availability_api = WaybackMachineAvailabilityAPI(
|
availability_api = WaybackMachineAvailabilityAPI(
|
||||||
url="https://%s.in" % rndstr(30), user_agent=user_agent
|
url=f"https://{rndstr(30)}.in", user_agent=user_agent
|
||||||
)
|
)
|
||||||
assert datetime.max == availability_api.timestamp()
|
assert datetime.max == availability_api.timestamp()
|
||||||
|
@@ -46,7 +46,7 @@ def test_get_response() -> None:
|
|||||||
user_agent = (
|
user_agent = (
|
||||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"
|
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"
|
||||||
)
|
)
|
||||||
headers = {"User-Agent": "%s" % user_agent}
|
headers = {"User-Agent": str(user_agent)}
|
||||||
response = get_response(url, headers=headers)
|
response = get_response(url, headers=headers)
|
||||||
assert not isinstance(response, Exception) and response.status_code == 200
|
assert not isinstance(response, Exception) and response.status_code == 200
|
||||||
|
|
||||||
|
@@ -33,7 +33,7 @@ def test_save() -> None:
|
|||||||
|
|
||||||
def test_max_redirect_exceeded() -> None:
|
def test_max_redirect_exceeded() -> None:
|
||||||
with pytest.raises(MaximumSaveRetriesExceeded):
|
with pytest.raises(MaximumSaveRetriesExceeded):
|
||||||
url = "https://%s.gov" % rndstr
|
url = f"https://{rndstr}.gov"
|
||||||
user_agent = "Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
user_agent = "Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||||
save_api = WaybackMachineSaveAPI(url, user_agent, max_tries=3)
|
save_api = WaybackMachineSaveAPI(url, user_agent, max_tries=3)
|
||||||
save_api.save()
|
save_api.save()
|
||||||
@@ -64,13 +64,12 @@ def test_timestamp() -> None:
|
|||||||
url = "https://example.com"
|
url = "https://example.com"
|
||||||
user_agent = "Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
user_agent = "Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||||
save_api = WaybackMachineSaveAPI(url, user_agent)
|
save_api = WaybackMachineSaveAPI(url, user_agent)
|
||||||
now = datetime.utcnow()
|
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
|
||||||
save_api._archive_url = (
|
save_api._archive_url = f"https://web.archive.org/web/{now}{url}/"
|
||||||
"https://web.archive.org/web/%s/" % now.strftime("%Y%m%d%H%M%S") + url
|
|
||||||
)
|
|
||||||
save_api.timestamp()
|
save_api.timestamp()
|
||||||
assert save_api.cached_save is False
|
assert save_api.cached_save is False
|
||||||
save_api._archive_url = "https://web.archive.org/web/%s/" % "20100124063622" + url
|
now = "20100124063622"
|
||||||
|
save_api._archive_url = f"https://web.archive.org/web/{now}{url}/"
|
||||||
save_api.timestamp()
|
save_api.timestamp()
|
||||||
assert save_api.cached_save is True
|
assert save_api.cached_save is True
|
||||||
|
|
||||||
|
@@ -9,7 +9,7 @@ from waybackpy.utils import (
|
|||||||
def test_default_user_agent() -> None:
|
def test_default_user_agent() -> None:
|
||||||
assert (
|
assert (
|
||||||
DEFAULT_USER_AGENT
|
DEFAULT_USER_AGENT
|
||||||
== "waybackpy %s - https://github.com/akamhy/waybackpy" % __version__
|
== f"waybackpy {__version__} - https://github.com/akamhy/waybackpy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@@ -5,11 +5,7 @@ __description__ = (
|
|||||||
)
|
)
|
||||||
__url__ = "https://akamhy.github.io/waybackpy/"
|
__url__ = "https://akamhy.github.io/waybackpy/"
|
||||||
__version__ = "3.0.2"
|
__version__ = "3.0.2"
|
||||||
__download_url__ = (
|
__download_url__ = f"https://github.com/akamhy/waybackpy/archive/{__version__}.tar.gz"
|
||||||
"https://github.com/akamhy/waybackpy/archive/{version}.tar.gz".format(
|
|
||||||
version=__version__
|
|
||||||
)
|
|
||||||
)
|
|
||||||
__author__ = "Akash Mahanty"
|
__author__ = "Akash Mahanty"
|
||||||
__author_email__ = "akamhy@yahoo.com"
|
__author_email__ = "akamhy@yahoo.com"
|
||||||
__license__ = "MIT"
|
__license__ = "MIT"
|
||||||
|
@@ -25,7 +25,7 @@ class WaybackMachineAvailabilityAPI(object):
|
|||||||
self.url = str(url).strip().replace(" ", "%20")
|
self.url = str(url).strip().replace(" ", "%20")
|
||||||
self.user_agent = user_agent
|
self.user_agent = user_agent
|
||||||
self.headers: Dict[str, str] = {"User-Agent": self.user_agent}
|
self.headers: Dict[str, str] = {"User-Agent": self.user_agent}
|
||||||
self.payload = {"url": "{url}".format(url=self.url)}
|
self.payload = {"url": self.url}
|
||||||
self.endpoint = "https://archive.org/wayback/available"
|
self.endpoint = "https://archive.org/wayback/available"
|
||||||
self.max_tries = max_tries
|
self.max_tries = max_tries
|
||||||
self.tries = 0
|
self.tries = 0
|
||||||
@@ -79,7 +79,7 @@ class WaybackMachineAvailabilityAPI(object):
|
|||||||
self.JSON = self.response.json()
|
self.JSON = self.response.json()
|
||||||
except json.decoder.JSONDecodeError:
|
except json.decoder.JSONDecodeError:
|
||||||
raise InvalidJSONInAvailabilityAPIResponse(
|
raise InvalidJSONInAvailabilityAPIResponse(
|
||||||
"Response data:\n{text}".format(text=self.response.text)
|
f"Response data:\n{self.response.text}"
|
||||||
)
|
)
|
||||||
|
|
||||||
return self.JSON
|
return self.JSON
|
||||||
@@ -142,9 +142,9 @@ class WaybackMachineAvailabilityAPI(object):
|
|||||||
if not data or not data["archived_snapshots"]:
|
if not data or not data["archived_snapshots"]:
|
||||||
raise ArchiveNotInAvailabilityAPIResponse(
|
raise ArchiveNotInAvailabilityAPIResponse(
|
||||||
"Archive not found in the availability "
|
"Archive not found in the availability "
|
||||||
+ "API response, the URL you requested may not have any "
|
"API response, the URL you requested may not have any "
|
||||||
+ "archives yet. You may retry after some time or archive the webpage now."
|
"archives yet. You may retry after some time or archive the webpage now.\n"
|
||||||
+ "\nResponse data:\n{response}".format(response=self.response.text)
|
f"Response data:\n{self.response.text}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
archive_url = data["archived_snapshots"]["closest"]["url"]
|
archive_url = data["archived_snapshots"]["closest"]["url"]
|
||||||
|
@@ -177,15 +177,9 @@ class WaybackMachineCDXServerAPI(object):
|
|||||||
|
|
||||||
if prop_values_len != properties_len:
|
if prop_values_len != properties_len:
|
||||||
raise WaybackError(
|
raise WaybackError(
|
||||||
"Snapshot returned by Cdx API has {prop_values_len} properties".format(
|
f"Snapshot returned by Cdx API has {prop_values_len} properties "
|
||||||
prop_values_len=prop_values_len
|
f"instead of expected {properties_len} properties.\n"
|
||||||
)
|
f"Problematic Snapshot: {snapshot}"
|
||||||
+ " instead of expected {properties_len} ".format(
|
|
||||||
properties_len=properties_len
|
|
||||||
)
|
|
||||||
+ "properties.\nProblematic Snapshot : {snapshot}".format(
|
|
||||||
snapshot=snapshot
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
(
|
(
|
||||||
|
@@ -21,16 +21,11 @@ class CDXSnapshot(object):
|
|||||||
self.digest = properties["digest"]
|
self.digest = properties["digest"]
|
||||||
self.length = properties["length"]
|
self.length = properties["length"]
|
||||||
self.archive_url = (
|
self.archive_url = (
|
||||||
"https://web.archive.org/web/" + self.timestamp + "/" + self.original
|
f"https://web.archive.org/web/{self.timestamp}/{self.original}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
|
return (
|
||||||
urlkey=self.urlkey,
|
f"{self.urlkey} {self.timestamp} {self.original} "
|
||||||
timestamp=self.timestamp,
|
f"{self.mimetype} {self.statuscode} {self.digest} {self.length}"
|
||||||
original=self.original,
|
|
||||||
mimetype=self.mimetype,
|
|
||||||
statuscode=self.statuscode,
|
|
||||||
digest=self.digest,
|
|
||||||
length=self.length,
|
|
||||||
)
|
)
|
||||||
|
@@ -30,9 +30,8 @@ def full_url(endpoint: str, params: Dict[str, Any]) -> str:
|
|||||||
key = "filter" if key.startswith("filter") else key
|
key = "filter" if key.startswith("filter") else key
|
||||||
key = "collapse" if key.startswith("collapse") else key
|
key = "collapse" if key.startswith("collapse") else key
|
||||||
amp = "" if full_url.endswith("?") else "&"
|
amp = "" if full_url.endswith("?") else "&"
|
||||||
full_url = (
|
val = quote(str(val), safe="")
|
||||||
full_url + amp + "{key}={val}".format(key=key, val=quote(str(val), safe=""))
|
full_url += f"{amp}{key}={val}"
|
||||||
)
|
|
||||||
return full_url
|
return full_url
|
||||||
|
|
||||||
|
|
||||||
@@ -57,9 +56,7 @@ def get_response(
|
|||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
reason = str(e)
|
reason = str(e)
|
||||||
exc_message = "Error while retrieving {url}.\n{reason}".format(
|
exc_message = f"Error while retrieving {url}.\n{reason}"
|
||||||
url=url, reason=reason
|
|
||||||
)
|
|
||||||
exc = WaybackError(exc_message)
|
exc = WaybackError(exc_message)
|
||||||
exc.__cause__ = e
|
exc.__cause__ = e
|
||||||
raise exc
|
raise exc
|
||||||
@@ -78,11 +75,7 @@ def check_filters(filters: List[str]) -> None:
|
|||||||
|
|
||||||
if match is None or len(match.groups()) != 2:
|
if match is None or len(match.groups()) != 2:
|
||||||
|
|
||||||
exc_message = (
|
exc_message = f"Filter '{_filter}' is not following the cdx filter syntax."
|
||||||
"Filter '{_filter}' is not following the cdx filter syntax.".format(
|
|
||||||
_filter=_filter
|
|
||||||
)
|
|
||||||
)
|
|
||||||
raise WaybackError(exc_message)
|
raise WaybackError(exc_message)
|
||||||
|
|
||||||
|
|
||||||
@@ -98,9 +91,7 @@ def check_collapses(collapses: List[str]) -> bool:
|
|||||||
collapse,
|
collapse,
|
||||||
)
|
)
|
||||||
if match is None or len(match.groups()) != 2:
|
if match is None or len(match.groups()) != 2:
|
||||||
exc_message = "collapse argument '{collapse}' is not following the cdx collapse syntax.".format(
|
exc_message = f"collapse argument '{collapse}' is not following the cdx collapse syntax."
|
||||||
collapse=collapse
|
|
||||||
)
|
|
||||||
raise WaybackError(exc_message)
|
raise WaybackError(exc_message)
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
@@ -115,9 +106,7 @@ def check_match_type(match_type: Optional[str], url: str) -> bool:
|
|||||||
"Can not use wildcard in the URL along with the match_type arguments."
|
"Can not use wildcard in the URL along with the match_type arguments."
|
||||||
)
|
)
|
||||||
elif match_type not in legal_match_type:
|
elif match_type not in legal_match_type:
|
||||||
exc_message = "{match_type} is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'".format(
|
exc_message = f"{match_type} is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'"
|
||||||
match_type=match_type
|
|
||||||
)
|
|
||||||
raise WaybackError(exc_message)
|
raise WaybackError(exc_message)
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
@@ -25,7 +25,7 @@ from .wrapper import Url
|
|||||||
"--user-agent",
|
"--user-agent",
|
||||||
"--user_agent",
|
"--user_agent",
|
||||||
default=DEFAULT_USER_AGENT,
|
default=DEFAULT_USER_AGENT,
|
||||||
help="User agent, default value is '%s'." % DEFAULT_USER_AGENT,
|
help=f"User agent, default value is '{DEFAULT_USER_AGENT}'.",
|
||||||
)
|
)
|
||||||
@click.option("-v", "--version", is_flag=True, default=False, help="waybackpy version.")
|
@click.option("-v", "--version", is_flag=True, default=False, help="waybackpy version.")
|
||||||
@click.option(
|
@click.option(
|
||||||
@@ -215,7 +215,7 @@ def main(
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if version:
|
if version:
|
||||||
click.echo("waybackpy version %s" % __version__)
|
click.echo(f"waybackpy version {__version__}")
|
||||||
return
|
return
|
||||||
|
|
||||||
if license:
|
if license:
|
||||||
@@ -317,22 +317,18 @@ def main(
|
|||||||
if match is not None:
|
if match is not None:
|
||||||
domain = match.group(1)
|
domain = match.group(1)
|
||||||
|
|
||||||
file_name = "{domain}-urls-{uid}.txt".format(domain=domain, uid=uid)
|
file_name = f"{domain}-urls-{uid}.txt"
|
||||||
file_path = os.path.join(os.getcwd(), file_name)
|
file_path = os.path.join(os.getcwd(), file_name)
|
||||||
if not os.path.isfile(file_path):
|
if not os.path.isfile(file_path):
|
||||||
open(file_path, "w+").close()
|
open(file_path, "w+").close()
|
||||||
|
|
||||||
with open(file_path, "a") as f:
|
with open(file_path, "a") as f:
|
||||||
f.write("{url}\n".format(url=url))
|
f.write(f"{url}\n")
|
||||||
|
|
||||||
click.echo(url)
|
click.echo(url)
|
||||||
|
|
||||||
if url_count > 0 or file_name is not None:
|
if url_count > 0 or file_name is not None:
|
||||||
click.echo(
|
click.echo(f"\n\n'{file_name}' saved in current working directory")
|
||||||
"\n\n'{file_name}' saved in current working directory".format(
|
|
||||||
file_name=file_name
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
click.echo("No known URLs found. Please try a diffrent input!")
|
click.echo("No known URLs found. Please try a diffrent input!")
|
||||||
|
|
||||||
|
@@ -182,7 +182,7 @@ class WaybackMachineSaveAPI(object):
|
|||||||
tries += 1
|
tries += 1
|
||||||
if tries >= self.max_tries:
|
if tries >= self.max_tries:
|
||||||
raise MaximumSaveRetriesExceeded(
|
raise MaximumSaveRetriesExceeded(
|
||||||
"Tried %s times but failed to save and retrieve the" % str(tries)
|
f"Tried {str(tries)} times but failed to save and retrieve the archive for {self.url}.\n"
|
||||||
+ " archive for %s.\nResponse URL:\n%s \nResponse Header:\n%s\n"
|
f"Response URL:\n{self.response_url}\n"
|
||||||
% (self.url, self.response_url, self.headers_str),
|
f"Response Header:\n{self.headers_str}"
|
||||||
)
|
)
|
||||||
|
@@ -3,7 +3,7 @@ import requests
|
|||||||
from . import __version__
|
from . import __version__
|
||||||
|
|
||||||
DEFAULT_USER_AGENT: str = (
|
DEFAULT_USER_AGENT: str = (
|
||||||
"waybackpy %s - https://github.com/akamhy/waybackpy" % __version__
|
f"waybackpy {__version__} - https://github.com/akamhy/waybackpy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user