Move archive_url_parser out of Url.save()
It's generally poor form to define a function in a function, as it will be re-defined each time the function is run. archive_url_parser does not depend on anything in Url, so it makes sense to move it out of the class.
This commit is contained in:
parent
b3c68add55
commit
bec26c4bae
@ -16,6 +16,26 @@ else: # For python2.x
|
||||
default_UA = "waybackpy python package - https://github.com/akamhy/waybackpy"
|
||||
|
||||
|
||||
def archive_url_parser(header):
|
||||
"""Parse out the archive from header."""
|
||||
# Regex1
|
||||
arch = re.search(
|
||||
r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>", str(header)
|
||||
)
|
||||
if arch:
|
||||
return arch.group(1)
|
||||
# Regex2
|
||||
arch = re.search(r"X-Cache-Key:\shttps(.*)[A-Z]{2}", str(header))
|
||||
if arch:
|
||||
return arch.group(1)
|
||||
raise WaybackError(
|
||||
"No archive URL found in the API response. "
|
||||
"This version of waybackpy (%s) is likely out of date. Visit "
|
||||
"https://github.com/akamhy/waybackpy for the latest version "
|
||||
"of waybackpy.\nHeader:\n%s" % (__version__, str(header))
|
||||
)
|
||||
|
||||
|
||||
class Url:
|
||||
"""waybackpy Url object"""
|
||||
|
||||
@ -59,26 +79,6 @@ class Url:
|
||||
hdr = {"User-Agent": "%s" % self.user_agent} # nosec
|
||||
req = Request(request_url, headers=hdr) # nosec
|
||||
header = self.get_response(req).headers
|
||||
|
||||
def archive_url_parser(header):
|
||||
"""Parse out the archive from header."""
|
||||
# Regex1
|
||||
arch = re.search(
|
||||
r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>", str(header)
|
||||
)
|
||||
if arch:
|
||||
return arch.group(1)
|
||||
# Regex2
|
||||
arch = re.search(r"X-Cache-Key:\shttps(.*)[A-Z]{2}", str(header))
|
||||
if arch:
|
||||
return arch.group(1)
|
||||
raise WaybackError(
|
||||
"No archive URL found in the API response. "
|
||||
"This version of waybackpy (%s) is likely out of date. Visit "
|
||||
"https://github.com/akamhy/waybackpy for the latest version "
|
||||
"of waybackpy.\nHeader:\n%s" % (__version__, str(header))
|
||||
)
|
||||
|
||||
return "https://" + archive_url_parser(header)
|
||||
|
||||
def get(self, url=None, user_agent=None, encoding=None):
|
||||
|
Loading…
Reference in New Issue
Block a user