From dcd7b0330285fc0e4f54b5f912b20c3c38eb1f04 Mon Sep 17 00:00:00 2001 From: Akash Mahanty Date: Thu, 14 Jan 2021 19:30:07 +0530 Subject: [PATCH] getting rid of c style str formatting, now using .format --- waybackpy/cdx.py | 7 ++++-- waybackpy/cli.py | 32 ++++++++++++++----------- waybackpy/snapshot.py | 16 ++++++------- waybackpy/utils.py | 54 +++++++++++++++++++++++++++---------------- waybackpy/wrapper.py | 20 ++++++++++------ 5 files changed, 79 insertions(+), 50 deletions(-) diff --git a/waybackpy/cdx.py b/waybackpy/cdx.py index fc53266..3ce30bf 100644 --- a/waybackpy/cdx.py +++ b/waybackpy/cdx.py @@ -194,8 +194,11 @@ class Cdx: properties_len = len(properties) if prop_values_len != properties_len: raise WaybackError( - "Snapshot returned by Cdx API has %s properties instead of expected %s properties.\nInvolved Snapshot : %s" - % (prop_values_len, properties_len, snapshot) + "Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format( + prop_values_len=prop_values_len, + properties_len=properties_len, + snapshot=snapshot, + ) ) ( diff --git a/waybackpy/cli.py b/waybackpy/cli.py index 1128a44..370f586 100644 --- a/waybackpy/cli.py +++ b/waybackpy/cli.py @@ -20,13 +20,14 @@ def _save(obj): if "No archive URL found in the API response" in e: return ( "\n[waybackpy] Can not save/archive your link.\n[waybackpy] This " - "could happen because either your waybackpy (%s) is likely out of " + "could happen because either your waybackpy ({version}) is likely out of " "date or Wayback Machine is malfunctioning.\n[waybackpy] Visit " "https://github.com/akamhy/waybackpy for the latest version of " - "waybackpy.\n[waybackpy] API response Header :\n%s" - % (__version__, header) + "waybackpy.\n[waybackpy] API response Header :\n{header}".format( + version=__version__, header=header + ) ) - return WaybackError(err) + raise WaybackError(err) def _archive_url(obj): @@ -45,11 +46,13 @@ def no_archive_handler(e, obj): if "github.com/akamhy/waybackpy" in ua: ua = "YOUR_USER_AGENT_HERE" return ( - "\n[Waybackpy] Can not find archive for '%s'.\n[Waybackpy] You can" + "\n[Waybackpy] Can not find archive for '{url}'.\n[Waybackpy] You can" " save the URL using the following command:\n[Waybackpy] waybackpy --" - 'user_agent "%s" --url "%s" --save' % (url, ua, url) + 'user_agent "{user_agent}" --url "{url}" --save'.format( + url=url, user_agent=ua + ) ) - return WaybackError(e) + raise WaybackError(e) def _oldest(obj): @@ -96,12 +99,16 @@ def _save_urls_on_file(input_list, live_url_count): random.choice(string.ascii_lowercase + string.digits) for _ in range(6) ) - file_name = "%s-%d-urls-%s.txt" % (domain, live_url_count, uid) + file_name = "{domain}-{live_url_count}-urls-{uid}.txt".format( + domain=domain, live_url_count=live_url_count, uid=uid + ) file_content = "\n".join(input_list) file_path = os.path.join(os.getcwd(), file_name) with open(file_path, "w+") as f: f.write(file_content) - return "%s\n\n'%s' saved in current working directory" % (file_content, file_name) + return "{file_content}\n\n'{file_name}' saved in current working directory".format( + file_content=file_content, file_name=file_name + ) def _known_urls(obj, args): @@ -147,12 +154,11 @@ def _get(obj, args): def args_handler(args): if args.version: - return "waybackpy version %s" % __version__ + return "waybackpy version {version}".format(version=__version__) if not args.url: - return ( - "waybackpy %s \nSee 'waybackpy --help' for help using this tool." - % __version__ + return "waybackpy {version} \nSee 'waybackpy --help' for help using this tool.".format( + version=__version__ ) obj = Url(args.url) diff --git a/waybackpy/snapshot.py b/waybackpy/snapshot.py index d6797b2..992ad2e 100644 --- a/waybackpy/snapshot.py +++ b/waybackpy/snapshot.py @@ -25,12 +25,12 @@ class CdxSnapshot: ) def __str__(self): - return ("%s %s %s %s %s %s %s") % ( - self.urlkey, - self.timestamp, - self.original, - self.mimetype, - self.statuscode, - self.digest, - self.length, + return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format( + urlkey=self.urlkey, + timestamp=self.timestamp, + original=self.original, + mimetype=self.mimetype, + statuscode=self.statuscode, + digest=self.digest, + length=self.length, ) diff --git a/waybackpy/utils.py b/waybackpy/utils.py index 9876578..ac7102d 100644 --- a/waybackpy/utils.py +++ b/waybackpy/utils.py @@ -71,10 +71,10 @@ def _check_match_type(match_type, url): legal_match_type = ["exact", "prefix", "host", "domain"] if match_type not in legal_match_type: - raise WaybackError( - "%s is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'" - % match_type + exc_message = "{match_type} is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'".format( + match_type=match_type ) + raise WaybackError(exc_message) def _check_collapses(collapses): @@ -85,11 +85,11 @@ def _check_collapses(collapses): if len(collapses) == 0: return - for c in collapses: + for collapse in collapses: try: match = re.search( r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?", - c, + collapse, ) field = match.group(1) @@ -98,15 +98,17 @@ def _check_collapses(collapses): N = match.group(2) if N: - if not (field + N == c): + if not (field + N == collapse): raise Exception else: - if not (field == c): + if not (field == collapse): raise Exception except Exception: - e = "collapse argument '%s' is not following the cdx collapse syntax." % c - raise WaybackError(e) + exc_message = "collapse argument '{collapse}' is not following the cdx collapse syntax.".format( + collapse=collapse + ) + raise WaybackError(exc_message) def _check_filters(filters): @@ -114,19 +116,23 @@ def _check_filters(filters): raise WaybackError("filters must be a list.") # [!]field:regex - for f in filters: + for filter in filters: try: match = re.search( r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)", - f, + filter, ) key = match.group(1) val = match.group(2) except Exception: - e = "Filter '%s' not following the cdx filter syntax." % f - raise WaybackError(e) + exc_message = ( + "Filter '{filter}' not following the cdx filter syntax.".format( + filter=filter + ) + ) + raise WaybackError(exc_message) def _cleaned_url(url): @@ -143,7 +149,8 @@ def _url_check(url): """ if "." not in url: - raise URLError("'%s' is not a vaild URL." % url) + exc_message = "'{url}' is not a vaild URL.".format(url=url) + raise URLError(exc_message) def _full_url(endpoint, params): @@ -154,7 +161,9 @@ def _full_url(endpoint, params): key = "filter" if key.startswith("filter") else key key = "collapse" if key.startswith("collapse") else key amp = "" if full_url.endswith("?") else "&" - full_url = full_url + amp + "%s=%s" % (key, quote(str(val))) + full_url = ( + full_url + amp + "{key}={val}".format(key=key, val=quote(str(val))) + ) return full_url @@ -166,7 +175,9 @@ def _get_total_pages(url, user_agent): This func returns number of pages of archives (type int). """ total_pages_url = ( - "https://web.archive.org/cdx/search/cdx?url=%s&showNumPages=true" % url + "https://web.archive.org/cdx/search/cdx?url={url}&showNumPages=true".format( + url=url + ) ) headers = {"User-Agent": user_agent} return int((_get_response(total_pages_url, headers=headers).text).strip()) @@ -217,10 +228,12 @@ def _archive_url_parser(header, url): raise WaybackError( "No archive URL found in the API response. " - "If '%s' can be accessed via your web browser then either " - "this version of waybackpy (%s) is out of date or WayBack Machine is malfunctioning. Visit " + "If '{url}' can be accessed via your web browser then either " + "this version of waybackpy ({version}) is out of date or WayBack Machine is malfunctioning. Visit " "'https://github.com/akamhy/waybackpy' for the latest version " - "of waybackpy.\nHeader:\n%s" % (url, __version__, str(header)) + "of waybackpy.\nHeader:\n{header}".format( + url=url, version=__version__, header=header + ) ) @@ -292,6 +305,7 @@ def _get_response( return s.get(url, headers=headers) return (url, s.get(url, headers=headers)) except Exception as e: - exc = WaybackError("Error while retrieving %s" % url) + exc_message = "Error while retrieving {url}".format(url=url) + exc = WaybackError(exc_message) exc.__cause__ = e raise exc diff --git a/waybackpy/wrapper.py b/waybackpy/wrapper.py index cba1741..d11ebb8 100644 --- a/waybackpy/wrapper.py +++ b/waybackpy/wrapper.py @@ -26,7 +26,9 @@ class Url: self._alive_url_list = [] def __repr__(self): - return "waybackpy.Url(url=%s, user_agent=%s)" % (self.url, self.user_agent) + return "waybackpy.Url(url={url}, user_agent={user_agent})".format( + url=self.url, user_agent=self.user_agent + ) def __str__(self): """ @@ -43,7 +45,7 @@ class Url: if not self._archive_url: self._archive_url = self.archive_url - return "%s" % self._archive_url + return "{archive_url}".format(archive_url=self._archive_url) def __len__(self): """ @@ -87,7 +89,7 @@ class Url: endpoint = "https://archive.org/wayback/available" headers = {"User-Agent": self.user_agent} - payload = {"url": "%s" % _cleaned_url(self.url)} + payload = {"url": "{url}".format(url=_cleaned_url(self.url))} response = _get_response(endpoint, params=payload, headers=headers) return response.json() @@ -217,15 +219,19 @@ class Url: endpoint = "https://archive.org/wayback/available" headers = {"User-Agent": self.user_agent} - payload = {"url": "%s" % _cleaned_url(self.url), "timestamp": timestamp} + payload = { + "url": "{url}".format(url=_cleaned_url(self.url)), + "timestamp": timestamp, + } response = _get_response(endpoint, params=payload, headers=headers) data = response.json() if not data["archived_snapshots"]: raise WaybackError( - "Can not find archive for '%s' try later or use wayback.Url(url, user_agent).save() " - "to create a new archive.\nAPI response:\n%s" - % (_cleaned_url(self.url), response.text) + "Can not find archive for '{url}' try later or use wayback.Url(url, user_agent).save() " + "to create a new archive.\nAPI response:\n{text}".format( + url=_cleaned_url(self.url), text=response.text + ) ) archive_url = data["archived_snapshots"]["closest"]["url"] archive_url = archive_url.replace(