getting rid of c style str formatting, now using .format

This commit is contained in:
Akash Mahanty 2021-01-14 19:30:07 +05:30
parent 76205d9cf6
commit dcd7b03302
5 changed files with 79 additions and 50 deletions

View File

@ -194,8 +194,11 @@ class Cdx:
properties_len = len(properties) properties_len = len(properties)
if prop_values_len != properties_len: if prop_values_len != properties_len:
raise WaybackError( raise WaybackError(
"Snapshot returned by Cdx API has %s properties instead of expected %s properties.\nInvolved Snapshot : %s" "Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(
% (prop_values_len, properties_len, snapshot) prop_values_len=prop_values_len,
properties_len=properties_len,
snapshot=snapshot,
)
) )
( (

View File

@ -20,13 +20,14 @@ def _save(obj):
if "No archive URL found in the API response" in e: if "No archive URL found in the API response" in e:
return ( return (
"\n[waybackpy] Can not save/archive your link.\n[waybackpy] This " "\n[waybackpy] Can not save/archive your link.\n[waybackpy] This "
"could happen because either your waybackpy (%s) is likely out of " "could happen because either your waybackpy ({version}) is likely out of "
"date or Wayback Machine is malfunctioning.\n[waybackpy] Visit " "date or Wayback Machine is malfunctioning.\n[waybackpy] Visit "
"https://github.com/akamhy/waybackpy for the latest version of " "https://github.com/akamhy/waybackpy for the latest version of "
"waybackpy.\n[waybackpy] API response Header :\n%s" "waybackpy.\n[waybackpy] API response Header :\n{header}".format(
% (__version__, header) version=__version__, header=header
) )
return WaybackError(err) )
raise WaybackError(err)
def _archive_url(obj): def _archive_url(obj):
@ -45,11 +46,13 @@ def no_archive_handler(e, obj):
if "github.com/akamhy/waybackpy" in ua: if "github.com/akamhy/waybackpy" in ua:
ua = "YOUR_USER_AGENT_HERE" ua = "YOUR_USER_AGENT_HERE"
return ( return (
"\n[Waybackpy] Can not find archive for '%s'.\n[Waybackpy] You can" "\n[Waybackpy] Can not find archive for '{url}'.\n[Waybackpy] You can"
" save the URL using the following command:\n[Waybackpy] waybackpy --" " save the URL using the following command:\n[Waybackpy] waybackpy --"
'user_agent "%s" --url "%s" --save' % (url, ua, url) 'user_agent "{user_agent}" --url "{url}" --save'.format(
url=url, user_agent=ua
) )
return WaybackError(e) )
raise WaybackError(e)
def _oldest(obj): def _oldest(obj):
@ -96,12 +99,16 @@ def _save_urls_on_file(input_list, live_url_count):
random.choice(string.ascii_lowercase + string.digits) for _ in range(6) random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
) )
file_name = "%s-%d-urls-%s.txt" % (domain, live_url_count, uid) file_name = "{domain}-{live_url_count}-urls-{uid}.txt".format(
domain=domain, live_url_count=live_url_count, uid=uid
)
file_content = "\n".join(input_list) file_content = "\n".join(input_list)
file_path = os.path.join(os.getcwd(), file_name) file_path = os.path.join(os.getcwd(), file_name)
with open(file_path, "w+") as f: with open(file_path, "w+") as f:
f.write(file_content) f.write(file_content)
return "%s\n\n'%s' saved in current working directory" % (file_content, file_name) return "{file_content}\n\n'{file_name}' saved in current working directory".format(
file_content=file_content, file_name=file_name
)
def _known_urls(obj, args): def _known_urls(obj, args):
@ -147,12 +154,11 @@ def _get(obj, args):
def args_handler(args): def args_handler(args):
if args.version: if args.version:
return "waybackpy version %s" % __version__ return "waybackpy version {version}".format(version=__version__)
if not args.url: if not args.url:
return ( return "waybackpy {version} \nSee 'waybackpy --help' for help using this tool.".format(
"waybackpy %s \nSee 'waybackpy --help' for help using this tool." version=__version__
% __version__
) )
obj = Url(args.url) obj = Url(args.url)

View File

@ -25,12 +25,12 @@ class CdxSnapshot:
) )
def __str__(self): def __str__(self):
return ("%s %s %s %s %s %s %s") % ( return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
self.urlkey, urlkey=self.urlkey,
self.timestamp, timestamp=self.timestamp,
self.original, original=self.original,
self.mimetype, mimetype=self.mimetype,
self.statuscode, statuscode=self.statuscode,
self.digest, digest=self.digest,
self.length, length=self.length,
) )

View File

@ -71,10 +71,10 @@ def _check_match_type(match_type, url):
legal_match_type = ["exact", "prefix", "host", "domain"] legal_match_type = ["exact", "prefix", "host", "domain"]
if match_type not in legal_match_type: if match_type not in legal_match_type:
raise WaybackError( exc_message = "{match_type} is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'".format(
"%s is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'" match_type=match_type
% match_type
) )
raise WaybackError(exc_message)
def _check_collapses(collapses): def _check_collapses(collapses):
@ -85,11 +85,11 @@ def _check_collapses(collapses):
if len(collapses) == 0: if len(collapses) == 0:
return return
for c in collapses: for collapse in collapses:
try: try:
match = re.search( match = re.search(
r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?", r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?",
c, collapse,
) )
field = match.group(1) field = match.group(1)
@ -98,15 +98,17 @@ def _check_collapses(collapses):
N = match.group(2) N = match.group(2)
if N: if N:
if not (field + N == c): if not (field + N == collapse):
raise Exception raise Exception
else: else:
if not (field == c): if not (field == collapse):
raise Exception raise Exception
except Exception: except Exception:
e = "collapse argument '%s' is not following the cdx collapse syntax." % c exc_message = "collapse argument '{collapse}' is not following the cdx collapse syntax.".format(
raise WaybackError(e) collapse=collapse
)
raise WaybackError(exc_message)
def _check_filters(filters): def _check_filters(filters):
@ -114,19 +116,23 @@ def _check_filters(filters):
raise WaybackError("filters must be a list.") raise WaybackError("filters must be a list.")
# [!]field:regex # [!]field:regex
for f in filters: for filter in filters:
try: try:
match = re.search( match = re.search(
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)", r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
f, filter,
) )
key = match.group(1) key = match.group(1)
val = match.group(2) val = match.group(2)
except Exception: except Exception:
e = "Filter '%s' not following the cdx filter syntax." % f exc_message = (
raise WaybackError(e) "Filter '{filter}' not following the cdx filter syntax.".format(
filter=filter
)
)
raise WaybackError(exc_message)
def _cleaned_url(url): def _cleaned_url(url):
@ -143,7 +149,8 @@ def _url_check(url):
""" """
if "." not in url: if "." not in url:
raise URLError("'%s' is not a vaild URL." % url) exc_message = "'{url}' is not a vaild URL.".format(url=url)
raise URLError(exc_message)
def _full_url(endpoint, params): def _full_url(endpoint, params):
@ -154,7 +161,9 @@ def _full_url(endpoint, params):
key = "filter" if key.startswith("filter") else key key = "filter" if key.startswith("filter") else key
key = "collapse" if key.startswith("collapse") else key key = "collapse" if key.startswith("collapse") else key
amp = "" if full_url.endswith("?") else "&" amp = "" if full_url.endswith("?") else "&"
full_url = full_url + amp + "%s=%s" % (key, quote(str(val))) full_url = (
full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
)
return full_url return full_url
@ -166,7 +175,9 @@ def _get_total_pages(url, user_agent):
This func returns number of pages of archives (type int). This func returns number of pages of archives (type int).
""" """
total_pages_url = ( total_pages_url = (
"https://web.archive.org/cdx/search/cdx?url=%s&showNumPages=true" % url "https://web.archive.org/cdx/search/cdx?url={url}&showNumPages=true".format(
url=url
)
) )
headers = {"User-Agent": user_agent} headers = {"User-Agent": user_agent}
return int((_get_response(total_pages_url, headers=headers).text).strip()) return int((_get_response(total_pages_url, headers=headers).text).strip())
@ -217,10 +228,12 @@ def _archive_url_parser(header, url):
raise WaybackError( raise WaybackError(
"No archive URL found in the API response. " "No archive URL found in the API response. "
"If '%s' can be accessed via your web browser then either " "If '{url}' can be accessed via your web browser then either "
"this version of waybackpy (%s) is out of date or WayBack Machine is malfunctioning. Visit " "this version of waybackpy ({version}) is out of date or WayBack Machine is malfunctioning. Visit "
"'https://github.com/akamhy/waybackpy' for the latest version " "'https://github.com/akamhy/waybackpy' for the latest version "
"of waybackpy.\nHeader:\n%s" % (url, __version__, str(header)) "of waybackpy.\nHeader:\n{header}".format(
url=url, version=__version__, header=header
)
) )
@ -292,6 +305,7 @@ def _get_response(
return s.get(url, headers=headers) return s.get(url, headers=headers)
return (url, s.get(url, headers=headers)) return (url, s.get(url, headers=headers))
except Exception as e: except Exception as e:
exc = WaybackError("Error while retrieving %s" % url) exc_message = "Error while retrieving {url}".format(url=url)
exc = WaybackError(exc_message)
exc.__cause__ = e exc.__cause__ = e
raise exc raise exc

View File

@ -26,7 +26,9 @@ class Url:
self._alive_url_list = [] self._alive_url_list = []
def __repr__(self): def __repr__(self):
return "waybackpy.Url(url=%s, user_agent=%s)" % (self.url, self.user_agent) return "waybackpy.Url(url={url}, user_agent={user_agent})".format(
url=self.url, user_agent=self.user_agent
)
def __str__(self): def __str__(self):
""" """
@ -43,7 +45,7 @@ class Url:
if not self._archive_url: if not self._archive_url:
self._archive_url = self.archive_url self._archive_url = self.archive_url
return "%s" % self._archive_url return "{archive_url}".format(archive_url=self._archive_url)
def __len__(self): def __len__(self):
""" """
@ -87,7 +89,7 @@ class Url:
endpoint = "https://archive.org/wayback/available" endpoint = "https://archive.org/wayback/available"
headers = {"User-Agent": self.user_agent} headers = {"User-Agent": self.user_agent}
payload = {"url": "%s" % _cleaned_url(self.url)} payload = {"url": "{url}".format(url=_cleaned_url(self.url))}
response = _get_response(endpoint, params=payload, headers=headers) response = _get_response(endpoint, params=payload, headers=headers)
return response.json() return response.json()
@ -217,15 +219,19 @@ class Url:
endpoint = "https://archive.org/wayback/available" endpoint = "https://archive.org/wayback/available"
headers = {"User-Agent": self.user_agent} headers = {"User-Agent": self.user_agent}
payload = {"url": "%s" % _cleaned_url(self.url), "timestamp": timestamp} payload = {
"url": "{url}".format(url=_cleaned_url(self.url)),
"timestamp": timestamp,
}
response = _get_response(endpoint, params=payload, headers=headers) response = _get_response(endpoint, params=payload, headers=headers)
data = response.json() data = response.json()
if not data["archived_snapshots"]: if not data["archived_snapshots"]:
raise WaybackError( raise WaybackError(
"Can not find archive for '%s' try later or use wayback.Url(url, user_agent).save() " "Can not find archive for '{url}' try later or use wayback.Url(url, user_agent).save() "
"to create a new archive.\nAPI response:\n%s" "to create a new archive.\nAPI response:\n{text}".format(
% (_cleaned_url(self.url), response.text) url=_cleaned_url(self.url), text=response.text
)
) )
archive_url = data["archived_snapshots"]["closest"]["url"] archive_url = data["archived_snapshots"]["closest"]["url"]
archive_url = archive_url.replace( archive_url = archive_url.replace(