getting rid of c style str formatting, now using .format
This commit is contained in:
parent
76205d9cf6
commit
dcd7b03302
@ -194,8 +194,11 @@ class Cdx:
|
||||
properties_len = len(properties)
|
||||
if prop_values_len != properties_len:
|
||||
raise WaybackError(
|
||||
"Snapshot returned by Cdx API has %s properties instead of expected %s properties.\nInvolved Snapshot : %s"
|
||||
% (prop_values_len, properties_len, snapshot)
|
||||
"Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(
|
||||
prop_values_len=prop_values_len,
|
||||
properties_len=properties_len,
|
||||
snapshot=snapshot,
|
||||
)
|
||||
)
|
||||
|
||||
(
|
||||
|
@ -20,13 +20,14 @@ def _save(obj):
|
||||
if "No archive URL found in the API response" in e:
|
||||
return (
|
||||
"\n[waybackpy] Can not save/archive your link.\n[waybackpy] This "
|
||||
"could happen because either your waybackpy (%s) is likely out of "
|
||||
"could happen because either your waybackpy ({version}) is likely out of "
|
||||
"date or Wayback Machine is malfunctioning.\n[waybackpy] Visit "
|
||||
"https://github.com/akamhy/waybackpy for the latest version of "
|
||||
"waybackpy.\n[waybackpy] API response Header :\n%s"
|
||||
% (__version__, header)
|
||||
"waybackpy.\n[waybackpy] API response Header :\n{header}".format(
|
||||
version=__version__, header=header
|
||||
)
|
||||
return WaybackError(err)
|
||||
)
|
||||
raise WaybackError(err)
|
||||
|
||||
|
||||
def _archive_url(obj):
|
||||
@ -45,11 +46,13 @@ def no_archive_handler(e, obj):
|
||||
if "github.com/akamhy/waybackpy" in ua:
|
||||
ua = "YOUR_USER_AGENT_HERE"
|
||||
return (
|
||||
"\n[Waybackpy] Can not find archive for '%s'.\n[Waybackpy] You can"
|
||||
"\n[Waybackpy] Can not find archive for '{url}'.\n[Waybackpy] You can"
|
||||
" save the URL using the following command:\n[Waybackpy] waybackpy --"
|
||||
'user_agent "%s" --url "%s" --save' % (url, ua, url)
|
||||
'user_agent "{user_agent}" --url "{url}" --save'.format(
|
||||
url=url, user_agent=ua
|
||||
)
|
||||
return WaybackError(e)
|
||||
)
|
||||
raise WaybackError(e)
|
||||
|
||||
|
||||
def _oldest(obj):
|
||||
@ -96,12 +99,16 @@ def _save_urls_on_file(input_list, live_url_count):
|
||||
random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
|
||||
)
|
||||
|
||||
file_name = "%s-%d-urls-%s.txt" % (domain, live_url_count, uid)
|
||||
file_name = "{domain}-{live_url_count}-urls-{uid}.txt".format(
|
||||
domain=domain, live_url_count=live_url_count, uid=uid
|
||||
)
|
||||
file_content = "\n".join(input_list)
|
||||
file_path = os.path.join(os.getcwd(), file_name)
|
||||
with open(file_path, "w+") as f:
|
||||
f.write(file_content)
|
||||
return "%s\n\n'%s' saved in current working directory" % (file_content, file_name)
|
||||
return "{file_content}\n\n'{file_name}' saved in current working directory".format(
|
||||
file_content=file_content, file_name=file_name
|
||||
)
|
||||
|
||||
|
||||
def _known_urls(obj, args):
|
||||
@ -147,12 +154,11 @@ def _get(obj, args):
|
||||
|
||||
def args_handler(args):
|
||||
if args.version:
|
||||
return "waybackpy version %s" % __version__
|
||||
return "waybackpy version {version}".format(version=__version__)
|
||||
|
||||
if not args.url:
|
||||
return (
|
||||
"waybackpy %s \nSee 'waybackpy --help' for help using this tool."
|
||||
% __version__
|
||||
return "waybackpy {version} \nSee 'waybackpy --help' for help using this tool.".format(
|
||||
version=__version__
|
||||
)
|
||||
|
||||
obj = Url(args.url)
|
||||
|
@ -25,12 +25,12 @@ class CdxSnapshot:
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return ("%s %s %s %s %s %s %s") % (
|
||||
self.urlkey,
|
||||
self.timestamp,
|
||||
self.original,
|
||||
self.mimetype,
|
||||
self.statuscode,
|
||||
self.digest,
|
||||
self.length,
|
||||
return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
|
||||
urlkey=self.urlkey,
|
||||
timestamp=self.timestamp,
|
||||
original=self.original,
|
||||
mimetype=self.mimetype,
|
||||
statuscode=self.statuscode,
|
||||
digest=self.digest,
|
||||
length=self.length,
|
||||
)
|
||||
|
@ -71,10 +71,10 @@ def _check_match_type(match_type, url):
|
||||
legal_match_type = ["exact", "prefix", "host", "domain"]
|
||||
|
||||
if match_type not in legal_match_type:
|
||||
raise WaybackError(
|
||||
"%s is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'"
|
||||
% match_type
|
||||
exc_message = "{match_type} is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'".format(
|
||||
match_type=match_type
|
||||
)
|
||||
raise WaybackError(exc_message)
|
||||
|
||||
|
||||
def _check_collapses(collapses):
|
||||
@ -85,11 +85,11 @@ def _check_collapses(collapses):
|
||||
if len(collapses) == 0:
|
||||
return
|
||||
|
||||
for c in collapses:
|
||||
for collapse in collapses:
|
||||
try:
|
||||
match = re.search(
|
||||
r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?",
|
||||
c,
|
||||
collapse,
|
||||
)
|
||||
field = match.group(1)
|
||||
|
||||
@ -98,15 +98,17 @@ def _check_collapses(collapses):
|
||||
N = match.group(2)
|
||||
|
||||
if N:
|
||||
if not (field + N == c):
|
||||
if not (field + N == collapse):
|
||||
raise Exception
|
||||
else:
|
||||
if not (field == c):
|
||||
if not (field == collapse):
|
||||
raise Exception
|
||||
|
||||
except Exception:
|
||||
e = "collapse argument '%s' is not following the cdx collapse syntax." % c
|
||||
raise WaybackError(e)
|
||||
exc_message = "collapse argument '{collapse}' is not following the cdx collapse syntax.".format(
|
||||
collapse=collapse
|
||||
)
|
||||
raise WaybackError(exc_message)
|
||||
|
||||
|
||||
def _check_filters(filters):
|
||||
@ -114,19 +116,23 @@ def _check_filters(filters):
|
||||
raise WaybackError("filters must be a list.")
|
||||
|
||||
# [!]field:regex
|
||||
for f in filters:
|
||||
for filter in filters:
|
||||
try:
|
||||
match = re.search(
|
||||
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
|
||||
f,
|
||||
filter,
|
||||
)
|
||||
|
||||
key = match.group(1)
|
||||
val = match.group(2)
|
||||
|
||||
except Exception:
|
||||
e = "Filter '%s' not following the cdx filter syntax." % f
|
||||
raise WaybackError(e)
|
||||
exc_message = (
|
||||
"Filter '{filter}' not following the cdx filter syntax.".format(
|
||||
filter=filter
|
||||
)
|
||||
)
|
||||
raise WaybackError(exc_message)
|
||||
|
||||
|
||||
def _cleaned_url(url):
|
||||
@ -143,7 +149,8 @@ def _url_check(url):
|
||||
"""
|
||||
|
||||
if "." not in url:
|
||||
raise URLError("'%s' is not a vaild URL." % url)
|
||||
exc_message = "'{url}' is not a vaild URL.".format(url=url)
|
||||
raise URLError(exc_message)
|
||||
|
||||
|
||||
def _full_url(endpoint, params):
|
||||
@ -154,7 +161,9 @@ def _full_url(endpoint, params):
|
||||
key = "filter" if key.startswith("filter") else key
|
||||
key = "collapse" if key.startswith("collapse") else key
|
||||
amp = "" if full_url.endswith("?") else "&"
|
||||
full_url = full_url + amp + "%s=%s" % (key, quote(str(val)))
|
||||
full_url = (
|
||||
full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
|
||||
)
|
||||
return full_url
|
||||
|
||||
|
||||
@ -166,7 +175,9 @@ def _get_total_pages(url, user_agent):
|
||||
This func returns number of pages of archives (type int).
|
||||
"""
|
||||
total_pages_url = (
|
||||
"https://web.archive.org/cdx/search/cdx?url=%s&showNumPages=true" % url
|
||||
"https://web.archive.org/cdx/search/cdx?url={url}&showNumPages=true".format(
|
||||
url=url
|
||||
)
|
||||
)
|
||||
headers = {"User-Agent": user_agent}
|
||||
return int((_get_response(total_pages_url, headers=headers).text).strip())
|
||||
@ -217,10 +228,12 @@ def _archive_url_parser(header, url):
|
||||
|
||||
raise WaybackError(
|
||||
"No archive URL found in the API response. "
|
||||
"If '%s' can be accessed via your web browser then either "
|
||||
"this version of waybackpy (%s) is out of date or WayBack Machine is malfunctioning. Visit "
|
||||
"If '{url}' can be accessed via your web browser then either "
|
||||
"this version of waybackpy ({version}) is out of date or WayBack Machine is malfunctioning. Visit "
|
||||
"'https://github.com/akamhy/waybackpy' for the latest version "
|
||||
"of waybackpy.\nHeader:\n%s" % (url, __version__, str(header))
|
||||
"of waybackpy.\nHeader:\n{header}".format(
|
||||
url=url, version=__version__, header=header
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@ -292,6 +305,7 @@ def _get_response(
|
||||
return s.get(url, headers=headers)
|
||||
return (url, s.get(url, headers=headers))
|
||||
except Exception as e:
|
||||
exc = WaybackError("Error while retrieving %s" % url)
|
||||
exc_message = "Error while retrieving {url}".format(url=url)
|
||||
exc = WaybackError(exc_message)
|
||||
exc.__cause__ = e
|
||||
raise exc
|
||||
|
@ -26,7 +26,9 @@ class Url:
|
||||
self._alive_url_list = []
|
||||
|
||||
def __repr__(self):
|
||||
return "waybackpy.Url(url=%s, user_agent=%s)" % (self.url, self.user_agent)
|
||||
return "waybackpy.Url(url={url}, user_agent={user_agent})".format(
|
||||
url=self.url, user_agent=self.user_agent
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
@ -43,7 +45,7 @@ class Url:
|
||||
|
||||
if not self._archive_url:
|
||||
self._archive_url = self.archive_url
|
||||
return "%s" % self._archive_url
|
||||
return "{archive_url}".format(archive_url=self._archive_url)
|
||||
|
||||
def __len__(self):
|
||||
"""
|
||||
@ -87,7 +89,7 @@ class Url:
|
||||
|
||||
endpoint = "https://archive.org/wayback/available"
|
||||
headers = {"User-Agent": self.user_agent}
|
||||
payload = {"url": "%s" % _cleaned_url(self.url)}
|
||||
payload = {"url": "{url}".format(url=_cleaned_url(self.url))}
|
||||
response = _get_response(endpoint, params=payload, headers=headers)
|
||||
return response.json()
|
||||
|
||||
@ -217,15 +219,19 @@ class Url:
|
||||
|
||||
endpoint = "https://archive.org/wayback/available"
|
||||
headers = {"User-Agent": self.user_agent}
|
||||
payload = {"url": "%s" % _cleaned_url(self.url), "timestamp": timestamp}
|
||||
payload = {
|
||||
"url": "{url}".format(url=_cleaned_url(self.url)),
|
||||
"timestamp": timestamp,
|
||||
}
|
||||
response = _get_response(endpoint, params=payload, headers=headers)
|
||||
data = response.json()
|
||||
|
||||
if not data["archived_snapshots"]:
|
||||
raise WaybackError(
|
||||
"Can not find archive for '%s' try later or use wayback.Url(url, user_agent).save() "
|
||||
"to create a new archive.\nAPI response:\n%s"
|
||||
% (_cleaned_url(self.url), response.text)
|
||||
"Can not find archive for '{url}' try later or use wayback.Url(url, user_agent).save() "
|
||||
"to create a new archive.\nAPI response:\n{text}".format(
|
||||
url=_cleaned_url(self.url), text=response.text
|
||||
)
|
||||
)
|
||||
archive_url = data["archived_snapshots"]["closest"]["url"]
|
||||
archive_url = archive_url.replace(
|
||||
|
Loading…
Reference in New Issue
Block a user