getting rid of c style str formatting, now using .format
This commit is contained in:
parent
76205d9cf6
commit
dcd7b03302
@ -194,8 +194,11 @@ class Cdx:
|
|||||||
properties_len = len(properties)
|
properties_len = len(properties)
|
||||||
if prop_values_len != properties_len:
|
if prop_values_len != properties_len:
|
||||||
raise WaybackError(
|
raise WaybackError(
|
||||||
"Snapshot returned by Cdx API has %s properties instead of expected %s properties.\nInvolved Snapshot : %s"
|
"Snapshot returned by Cdx API has {prop_values_len} properties instead of expected {properties_len} properties.\nInvolved Snapshot : {snapshot}".format(
|
||||||
% (prop_values_len, properties_len, snapshot)
|
prop_values_len=prop_values_len,
|
||||||
|
properties_len=properties_len,
|
||||||
|
snapshot=snapshot,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
(
|
(
|
||||||
|
@ -20,13 +20,14 @@ def _save(obj):
|
|||||||
if "No archive URL found in the API response" in e:
|
if "No archive URL found in the API response" in e:
|
||||||
return (
|
return (
|
||||||
"\n[waybackpy] Can not save/archive your link.\n[waybackpy] This "
|
"\n[waybackpy] Can not save/archive your link.\n[waybackpy] This "
|
||||||
"could happen because either your waybackpy (%s) is likely out of "
|
"could happen because either your waybackpy ({version}) is likely out of "
|
||||||
"date or Wayback Machine is malfunctioning.\n[waybackpy] Visit "
|
"date or Wayback Machine is malfunctioning.\n[waybackpy] Visit "
|
||||||
"https://github.com/akamhy/waybackpy for the latest version of "
|
"https://github.com/akamhy/waybackpy for the latest version of "
|
||||||
"waybackpy.\n[waybackpy] API response Header :\n%s"
|
"waybackpy.\n[waybackpy] API response Header :\n{header}".format(
|
||||||
% (__version__, header)
|
version=__version__, header=header
|
||||||
|
)
|
||||||
)
|
)
|
||||||
return WaybackError(err)
|
raise WaybackError(err)
|
||||||
|
|
||||||
|
|
||||||
def _archive_url(obj):
|
def _archive_url(obj):
|
||||||
@ -45,11 +46,13 @@ def no_archive_handler(e, obj):
|
|||||||
if "github.com/akamhy/waybackpy" in ua:
|
if "github.com/akamhy/waybackpy" in ua:
|
||||||
ua = "YOUR_USER_AGENT_HERE"
|
ua = "YOUR_USER_AGENT_HERE"
|
||||||
return (
|
return (
|
||||||
"\n[Waybackpy] Can not find archive for '%s'.\n[Waybackpy] You can"
|
"\n[Waybackpy] Can not find archive for '{url}'.\n[Waybackpy] You can"
|
||||||
" save the URL using the following command:\n[Waybackpy] waybackpy --"
|
" save the URL using the following command:\n[Waybackpy] waybackpy --"
|
||||||
'user_agent "%s" --url "%s" --save' % (url, ua, url)
|
'user_agent "{user_agent}" --url "{url}" --save'.format(
|
||||||
|
url=url, user_agent=ua
|
||||||
|
)
|
||||||
)
|
)
|
||||||
return WaybackError(e)
|
raise WaybackError(e)
|
||||||
|
|
||||||
|
|
||||||
def _oldest(obj):
|
def _oldest(obj):
|
||||||
@ -96,12 +99,16 @@ def _save_urls_on_file(input_list, live_url_count):
|
|||||||
random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
|
random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
|
||||||
)
|
)
|
||||||
|
|
||||||
file_name = "%s-%d-urls-%s.txt" % (domain, live_url_count, uid)
|
file_name = "{domain}-{live_url_count}-urls-{uid}.txt".format(
|
||||||
|
domain=domain, live_url_count=live_url_count, uid=uid
|
||||||
|
)
|
||||||
file_content = "\n".join(input_list)
|
file_content = "\n".join(input_list)
|
||||||
file_path = os.path.join(os.getcwd(), file_name)
|
file_path = os.path.join(os.getcwd(), file_name)
|
||||||
with open(file_path, "w+") as f:
|
with open(file_path, "w+") as f:
|
||||||
f.write(file_content)
|
f.write(file_content)
|
||||||
return "%s\n\n'%s' saved in current working directory" % (file_content, file_name)
|
return "{file_content}\n\n'{file_name}' saved in current working directory".format(
|
||||||
|
file_content=file_content, file_name=file_name
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _known_urls(obj, args):
|
def _known_urls(obj, args):
|
||||||
@ -147,12 +154,11 @@ def _get(obj, args):
|
|||||||
|
|
||||||
def args_handler(args):
|
def args_handler(args):
|
||||||
if args.version:
|
if args.version:
|
||||||
return "waybackpy version %s" % __version__
|
return "waybackpy version {version}".format(version=__version__)
|
||||||
|
|
||||||
if not args.url:
|
if not args.url:
|
||||||
return (
|
return "waybackpy {version} \nSee 'waybackpy --help' for help using this tool.".format(
|
||||||
"waybackpy %s \nSee 'waybackpy --help' for help using this tool."
|
version=__version__
|
||||||
% __version__
|
|
||||||
)
|
)
|
||||||
|
|
||||||
obj = Url(args.url)
|
obj = Url(args.url)
|
||||||
|
@ -25,12 +25,12 @@ class CdxSnapshot:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return ("%s %s %s %s %s %s %s") % (
|
return "{urlkey} {timestamp} {original} {mimetype} {statuscode} {digest} {length}".format(
|
||||||
self.urlkey,
|
urlkey=self.urlkey,
|
||||||
self.timestamp,
|
timestamp=self.timestamp,
|
||||||
self.original,
|
original=self.original,
|
||||||
self.mimetype,
|
mimetype=self.mimetype,
|
||||||
self.statuscode,
|
statuscode=self.statuscode,
|
||||||
self.digest,
|
digest=self.digest,
|
||||||
self.length,
|
length=self.length,
|
||||||
)
|
)
|
||||||
|
@ -71,10 +71,10 @@ def _check_match_type(match_type, url):
|
|||||||
legal_match_type = ["exact", "prefix", "host", "domain"]
|
legal_match_type = ["exact", "prefix", "host", "domain"]
|
||||||
|
|
||||||
if match_type not in legal_match_type:
|
if match_type not in legal_match_type:
|
||||||
raise WaybackError(
|
exc_message = "{match_type} is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'".format(
|
||||||
"%s is not an allowed match type.\nUse one from 'exact', 'prefix', 'host' or 'domain'"
|
match_type=match_type
|
||||||
% match_type
|
|
||||||
)
|
)
|
||||||
|
raise WaybackError(exc_message)
|
||||||
|
|
||||||
|
|
||||||
def _check_collapses(collapses):
|
def _check_collapses(collapses):
|
||||||
@ -85,11 +85,11 @@ def _check_collapses(collapses):
|
|||||||
if len(collapses) == 0:
|
if len(collapses) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
for c in collapses:
|
for collapse in collapses:
|
||||||
try:
|
try:
|
||||||
match = re.search(
|
match = re.search(
|
||||||
r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?",
|
r"(urlkey|timestamp|original|mimetype|statuscode|digest|length)(:?[0-9]{1,99})?",
|
||||||
c,
|
collapse,
|
||||||
)
|
)
|
||||||
field = match.group(1)
|
field = match.group(1)
|
||||||
|
|
||||||
@ -98,15 +98,17 @@ def _check_collapses(collapses):
|
|||||||
N = match.group(2)
|
N = match.group(2)
|
||||||
|
|
||||||
if N:
|
if N:
|
||||||
if not (field + N == c):
|
if not (field + N == collapse):
|
||||||
raise Exception
|
raise Exception
|
||||||
else:
|
else:
|
||||||
if not (field == c):
|
if not (field == collapse):
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
e = "collapse argument '%s' is not following the cdx collapse syntax." % c
|
exc_message = "collapse argument '{collapse}' is not following the cdx collapse syntax.".format(
|
||||||
raise WaybackError(e)
|
collapse=collapse
|
||||||
|
)
|
||||||
|
raise WaybackError(exc_message)
|
||||||
|
|
||||||
|
|
||||||
def _check_filters(filters):
|
def _check_filters(filters):
|
||||||
@ -114,19 +116,23 @@ def _check_filters(filters):
|
|||||||
raise WaybackError("filters must be a list.")
|
raise WaybackError("filters must be a list.")
|
||||||
|
|
||||||
# [!]field:regex
|
# [!]field:regex
|
||||||
for f in filters:
|
for filter in filters:
|
||||||
try:
|
try:
|
||||||
match = re.search(
|
match = re.search(
|
||||||
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
|
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
|
||||||
f,
|
filter,
|
||||||
)
|
)
|
||||||
|
|
||||||
key = match.group(1)
|
key = match.group(1)
|
||||||
val = match.group(2)
|
val = match.group(2)
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
e = "Filter '%s' not following the cdx filter syntax." % f
|
exc_message = (
|
||||||
raise WaybackError(e)
|
"Filter '{filter}' not following the cdx filter syntax.".format(
|
||||||
|
filter=filter
|
||||||
|
)
|
||||||
|
)
|
||||||
|
raise WaybackError(exc_message)
|
||||||
|
|
||||||
|
|
||||||
def _cleaned_url(url):
|
def _cleaned_url(url):
|
||||||
@ -143,7 +149,8 @@ def _url_check(url):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if "." not in url:
|
if "." not in url:
|
||||||
raise URLError("'%s' is not a vaild URL." % url)
|
exc_message = "'{url}' is not a vaild URL.".format(url=url)
|
||||||
|
raise URLError(exc_message)
|
||||||
|
|
||||||
|
|
||||||
def _full_url(endpoint, params):
|
def _full_url(endpoint, params):
|
||||||
@ -154,7 +161,9 @@ def _full_url(endpoint, params):
|
|||||||
key = "filter" if key.startswith("filter") else key
|
key = "filter" if key.startswith("filter") else key
|
||||||
key = "collapse" if key.startswith("collapse") else key
|
key = "collapse" if key.startswith("collapse") else key
|
||||||
amp = "" if full_url.endswith("?") else "&"
|
amp = "" if full_url.endswith("?") else "&"
|
||||||
full_url = full_url + amp + "%s=%s" % (key, quote(str(val)))
|
full_url = (
|
||||||
|
full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
|
||||||
|
)
|
||||||
return full_url
|
return full_url
|
||||||
|
|
||||||
|
|
||||||
@ -166,7 +175,9 @@ def _get_total_pages(url, user_agent):
|
|||||||
This func returns number of pages of archives (type int).
|
This func returns number of pages of archives (type int).
|
||||||
"""
|
"""
|
||||||
total_pages_url = (
|
total_pages_url = (
|
||||||
"https://web.archive.org/cdx/search/cdx?url=%s&showNumPages=true" % url
|
"https://web.archive.org/cdx/search/cdx?url={url}&showNumPages=true".format(
|
||||||
|
url=url
|
||||||
|
)
|
||||||
)
|
)
|
||||||
headers = {"User-Agent": user_agent}
|
headers = {"User-Agent": user_agent}
|
||||||
return int((_get_response(total_pages_url, headers=headers).text).strip())
|
return int((_get_response(total_pages_url, headers=headers).text).strip())
|
||||||
@ -217,10 +228,12 @@ def _archive_url_parser(header, url):
|
|||||||
|
|
||||||
raise WaybackError(
|
raise WaybackError(
|
||||||
"No archive URL found in the API response. "
|
"No archive URL found in the API response. "
|
||||||
"If '%s' can be accessed via your web browser then either "
|
"If '{url}' can be accessed via your web browser then either "
|
||||||
"this version of waybackpy (%s) is out of date or WayBack Machine is malfunctioning. Visit "
|
"this version of waybackpy ({version}) is out of date or WayBack Machine is malfunctioning. Visit "
|
||||||
"'https://github.com/akamhy/waybackpy' for the latest version "
|
"'https://github.com/akamhy/waybackpy' for the latest version "
|
||||||
"of waybackpy.\nHeader:\n%s" % (url, __version__, str(header))
|
"of waybackpy.\nHeader:\n{header}".format(
|
||||||
|
url=url, version=__version__, header=header
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -292,6 +305,7 @@ def _get_response(
|
|||||||
return s.get(url, headers=headers)
|
return s.get(url, headers=headers)
|
||||||
return (url, s.get(url, headers=headers))
|
return (url, s.get(url, headers=headers))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
exc = WaybackError("Error while retrieving %s" % url)
|
exc_message = "Error while retrieving {url}".format(url=url)
|
||||||
|
exc = WaybackError(exc_message)
|
||||||
exc.__cause__ = e
|
exc.__cause__ = e
|
||||||
raise exc
|
raise exc
|
||||||
|
@ -26,7 +26,9 @@ class Url:
|
|||||||
self._alive_url_list = []
|
self._alive_url_list = []
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "waybackpy.Url(url=%s, user_agent=%s)" % (self.url, self.user_agent)
|
return "waybackpy.Url(url={url}, user_agent={user_agent})".format(
|
||||||
|
url=self.url, user_agent=self.user_agent
|
||||||
|
)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
"""
|
"""
|
||||||
@ -43,7 +45,7 @@ class Url:
|
|||||||
|
|
||||||
if not self._archive_url:
|
if not self._archive_url:
|
||||||
self._archive_url = self.archive_url
|
self._archive_url = self.archive_url
|
||||||
return "%s" % self._archive_url
|
return "{archive_url}".format(archive_url=self._archive_url)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""
|
"""
|
||||||
@ -87,7 +89,7 @@ class Url:
|
|||||||
|
|
||||||
endpoint = "https://archive.org/wayback/available"
|
endpoint = "https://archive.org/wayback/available"
|
||||||
headers = {"User-Agent": self.user_agent}
|
headers = {"User-Agent": self.user_agent}
|
||||||
payload = {"url": "%s" % _cleaned_url(self.url)}
|
payload = {"url": "{url}".format(url=_cleaned_url(self.url))}
|
||||||
response = _get_response(endpoint, params=payload, headers=headers)
|
response = _get_response(endpoint, params=payload, headers=headers)
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
@ -217,15 +219,19 @@ class Url:
|
|||||||
|
|
||||||
endpoint = "https://archive.org/wayback/available"
|
endpoint = "https://archive.org/wayback/available"
|
||||||
headers = {"User-Agent": self.user_agent}
|
headers = {"User-Agent": self.user_agent}
|
||||||
payload = {"url": "%s" % _cleaned_url(self.url), "timestamp": timestamp}
|
payload = {
|
||||||
|
"url": "{url}".format(url=_cleaned_url(self.url)),
|
||||||
|
"timestamp": timestamp,
|
||||||
|
}
|
||||||
response = _get_response(endpoint, params=payload, headers=headers)
|
response = _get_response(endpoint, params=payload, headers=headers)
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
|
||||||
if not data["archived_snapshots"]:
|
if not data["archived_snapshots"]:
|
||||||
raise WaybackError(
|
raise WaybackError(
|
||||||
"Can not find archive for '%s' try later or use wayback.Url(url, user_agent).save() "
|
"Can not find archive for '{url}' try later or use wayback.Url(url, user_agent).save() "
|
||||||
"to create a new archive.\nAPI response:\n%s"
|
"to create a new archive.\nAPI response:\n{text}".format(
|
||||||
% (_cleaned_url(self.url), response.text)
|
url=_cleaned_url(self.url), text=response.text
|
||||||
|
)
|
||||||
)
|
)
|
||||||
archive_url = data["archived_snapshots"]["closest"]["url"]
|
archive_url = data["archived_snapshots"]["closest"]["url"]
|
||||||
archive_url = archive_url.replace(
|
archive_url = archive_url.replace(
|
||||||
|
Loading…
Reference in New Issue
Block a user