getting rid of c style str formatting, now using .format

This commit is contained in:
Akash Mahanty 2021-01-14 19:30:07 +05:30
parent 76205d9cf6
commit dcd7b03302
5 changed files with 79 additions and 50 deletions

View File

@ -194,8 +194,11 @@ class Cdx:
properties_len = len(properties)
if prop_values_len != properties_len:
raise WaybackError(
"Snapshot returned by Cdx API has %s properties instead of expected %s properties.\nInvolved Snapshot : %s"
% (prop_values_len, properties_len, snapshot)
"Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(
prop_values_len=prop_values_len,
properties_len=properties_len,
snapshot=snapshot,
)
)
(

View File

@ -20,13 +20,14 @@ def _save(obj):
if "No archive URL found in the API response" in e:
return (
"\n[waybackpy] Can not save/archive your link.\n[waybackpy] This "
"could happen because either your waybackpy (%s) is likely out of "
"could happen because either your waybackpy ({version}) is likely out of "
"date or Wayback Machine is malfunctioning.\n[waybackpy] Visit "
"https://github.com/akamhy/waybackpy for the latest version of "
"waybackpy.\n[waybackpy] API response Header :\n%s"
% (__version__, header)
"waybackpy.\n[waybackpy] API response Header :\n{header}".format(
version=__version__, header=header
)
)
return WaybackError(err)
raise WaybackError(err)
def _archive_url(obj):
@ -45,11 +46,13 @@ def no_archive_handler(e, obj):
if "github.com/akamhy/waybackpy" in ua:
ua = "YOUR_USER_AGENT_HERE"
return (
"\n[Waybackpy] Can not find archive for '%s'.\n[Waybackpy] You can"
"\n[Waybackpy] Can not find archive for '{url}'.\n[Waybackpy] You can"
" save the URL using the following command:\n[Waybackpy] waybackpy --"
'user_agent "%s" --url "%s" --save' % (url, ua, url)
'user_agent "{user_agent}" --url "{url}" --save'.format(
url=url, user_agent=ua
)
)
return WaybackError(e)
raise WaybackError(e)
def _oldest(obj):
@ -96,12 +99,16 @@ def _save_urls_on_file(input_list, live_url_count):
random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
)
file_name = "%s-%d-urls-%s.txt" % (domain, live_url_count, uid)
file_name = "{domain}-{live_url_count}-urls-{uid}.txt".format(
domain=domain, live_url_count=live_url_count, uid=uid
)
file_content = "\n".join(input_list)
file_path = os.path.join(os.getcwd(), file_name)
with open(file_path, "w+") as f:
f.write(file_content)
return "%s\n\n'%s' saved in current working directory" % (file_content, file_name)
return "{file_content}\n\n'{file_name}' saved in current working directory".format(
file_content=file_content, file_name=file_name
)
def _known_urls(obj, args):
@ -147,12 +154,11 @@ def _get(obj, args):
def args_handler(args):
if args.version:
return "waybackpy version %s" % __version__
return "waybackpy version {version}".format(version=__version__)
if not args.url:
return (
"waybackpy %s \nSee 'waybackpy --help' for help using this tool."
% __version__
return "waybackpy {version} \nSee 'waybackpy --help' for help using this tool.".format(
version=__version__
)
obj = Url(args.url)

View File

@ -25,12 +25,12 @@ class CdxSnapshot:
)
def __str__(self):
return ("%s %s %s %s %s %s %s") % (
self.urlkey,
self.timestamp,
self.original,
self.mimetype,
self.statuscode,
self.digest,
self.length,
return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
urlkey=self.urlkey,
timestamp=self.timestamp,
original=self.original,
mimetype=self.mimetype,
statuscode=self.statuscode,
digest=self.digest,
length=self.length,
)

View File

@ -71,10 +71,10 @@ def _check_match_type(match_type, url):
legal_match_type = ["exact", "prefix", "host", "domain"]
if match_type not in legal_match_type:
raise WaybackError(
"%s is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'"
% match_type
exc_message = "{match_type} is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'".format(
match_type=match_type
)
raise WaybackError(exc_message)
def _check_collapses(collapses):
@ -85,11 +85,11 @@ def _check_collapses(collapses):
if len(collapses) == 0:
return
for c in collapses:
for collapse in collapses:
try:
match = re.search(
r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?",
c,
collapse,
)
field = match.group(1)
@ -98,15 +98,17 @@ def _check_collapses(collapses):
N = match.group(2)
if N:
if not (field + N == c):
if not (field + N == collapse):
raise Exception
else:
if not (field == c):
if not (field == collapse):
raise Exception
except Exception:
e = "collapse argument '%s' is not following the cdx collapse syntax." % c
raise WaybackError(e)
exc_message = "collapse argument '{collapse}' is not following the cdx collapse syntax.".format(
collapse=collapse
)
raise WaybackError(exc_message)
def _check_filters(filters):
@ -114,19 +116,23 @@ def _check_filters(filters):
raise WaybackError("filters must be a list.")
# [!]field:regex
for f in filters:
for filter in filters:
try:
match = re.search(
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
f,
filter,
)
key = match.group(1)
val = match.group(2)
except Exception:
e = "Filter '%s' not following the cdx filter syntax." % f
raise WaybackError(e)
exc_message = (
"Filter '{filter}' not following the cdx filter syntax.".format(
filter=filter
)
)
raise WaybackError(exc_message)
def _cleaned_url(url):
@ -143,7 +149,8 @@ def _url_check(url):
"""
if "." not in url:
raise URLError("'%s' is not a vaild URL." % url)
exc_message = "'{url}' is not a vaild URL.".format(url=url)
raise URLError(exc_message)
def _full_url(endpoint, params):
@ -154,7 +161,9 @@ def _full_url(endpoint, params):
key = "filter" if key.startswith("filter") else key
key = "collapse" if key.startswith("collapse") else key
amp = "" if full_url.endswith("?") else "&"
full_url = full_url + amp + "%s=%s" % (key, quote(str(val)))
full_url = (
full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
)
return full_url
@ -166,7 +175,9 @@ def _get_total_pages(url, user_agent):
This func returns number of pages of archives (type int).
"""
total_pages_url = (
"https://web.archive.org/cdx/search/cdx?url=%s&showNumPages=true" % url
"https://web.archive.org/cdx/search/cdx?url={url}&showNumPages=true".format(
url=url
)
)
headers = {"User-Agent": user_agent}
return int((_get_response(total_pages_url, headers=headers).text).strip())
@ -217,10 +228,12 @@ def _archive_url_parser(header, url):
raise WaybackError(
"No archive URL found in the API response. "
"If '%s' can be accessed via your web browser then either "
"this version of waybackpy (%s) is out of date or WayBack Machine is malfunctioning. Visit "
"If '{url}' can be accessed via your web browser then either "
"this version of waybackpy ({version}) is out of date or WayBack Machine is malfunctioning. Visit "
"'https://github.com/akamhy/waybackpy' for the latest version "
"of waybackpy.\nHeader:\n%s" % (url, __version__, str(header))
"of waybackpy.\nHeader:\n{header}".format(
url=url, version=__version__, header=header
)
)
@ -292,6 +305,7 @@ def _get_response(
return s.get(url, headers=headers)
return (url, s.get(url, headers=headers))
except Exception as e:
exc = WaybackError("Error while retrieving %s" % url)
exc_message = "Error while retrieving {url}".format(url=url)
exc = WaybackError(exc_message)
exc.__cause__ = e
raise exc

View File

@ -26,7 +26,9 @@ class Url:
self._alive_url_list = []
def __repr__(self):
return "waybackpy.Url(url=%s, user_agent=%s)" % (self.url, self.user_agent)
return "waybackpy.Url(url={url}, user_agent={user_agent})".format(
url=self.url, user_agent=self.user_agent
)
def __str__(self):
"""
@ -43,7 +45,7 @@ class Url:
if not self._archive_url:
self._archive_url = self.archive_url
return "%s" % self._archive_url
return "{archive_url}".format(archive_url=self._archive_url)
def __len__(self):
"""
@ -87,7 +89,7 @@ class Url:
endpoint = "https://archive.org/wayback/available"
headers = {"User-Agent": self.user_agent}
payload = {"url": "%s" % _cleaned_url(self.url)}
payload = {"url": "{url}".format(url=_cleaned_url(self.url))}
response = _get_response(endpoint, params=payload, headers=headers)
return response.json()
@ -217,15 +219,19 @@ class Url:
endpoint = "https://archive.org/wayback/available"
headers = {"User-Agent": self.user_agent}
payload = {"url": "%s" % _cleaned_url(self.url), "timestamp": timestamp}
payload = {
"url": "{url}".format(url=_cleaned_url(self.url)),
"timestamp": timestamp,
}
response = _get_response(endpoint, params=payload, headers=headers)
data = response.json()
if not data["archived_snapshots"]:
raise WaybackError(
"Can not find archive for '%s' try later or use wayback.Url(url, user_agent).save() "
"to create a new archive.\nAPI response:\n%s"
% (_cleaned_url(self.url), response.text)
"Can not find archive for '{url}' try later or use wayback.Url(url, user_agent).save() "
"to create a new archive.\nAPI response:\n{text}".format(
url=_cleaned_url(self.url), text=response.text
)
)
archive_url = data["archived_snapshots"]["closest"]["url"]
archive_url = archive_url.replace(