Move archive_url_parser out of Url.save()
It's generally poor form to define a function in a function, as it will be re-defined each time the function is run. archive_url_parser does not depend on anything in Url, so it makes sense to move it out of the class.
This commit is contained in:
parent
b3c68add55
commit
bec26c4bae
@ -16,6 +16,26 @@ else: # For python2.x
|
|||||||
default_UA = "waybackpy python package - https://github.com/akamhy/waybackpy"
|
default_UA = "waybackpy python package - https://github.com/akamhy/waybackpy"
|
||||||
|
|
||||||
|
|
||||||
|
def archive_url_parser(header):
|
||||||
|
"""Parse out the archive from header."""
|
||||||
|
# Regex1
|
||||||
|
arch = re.search(
|
||||||
|
r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>", str(header)
|
||||||
|
)
|
||||||
|
if arch:
|
||||||
|
return arch.group(1)
|
||||||
|
# Regex2
|
||||||
|
arch = re.search(r"X-Cache-Key:\shttps(.*)[A-Z]{2}", str(header))
|
||||||
|
if arch:
|
||||||
|
return arch.group(1)
|
||||||
|
raise WaybackError(
|
||||||
|
"No archive URL found in the API response. "
|
||||||
|
"This version of waybackpy (%s) is likely out of date. Visit "
|
||||||
|
"https://github.com/akamhy/waybackpy for the latest version "
|
||||||
|
"of waybackpy.\nHeader:\n%s" % (__version__, str(header))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Url:
|
class Url:
|
||||||
"""waybackpy Url object"""
|
"""waybackpy Url object"""
|
||||||
|
|
||||||
@ -59,26 +79,6 @@ class Url:
|
|||||||
hdr = {"User-Agent": "%s" % self.user_agent} # nosec
|
hdr = {"User-Agent": "%s" % self.user_agent} # nosec
|
||||||
req = Request(request_url, headers=hdr) # nosec
|
req = Request(request_url, headers=hdr) # nosec
|
||||||
header = self.get_response(req).headers
|
header = self.get_response(req).headers
|
||||||
|
|
||||||
def archive_url_parser(header):
|
|
||||||
"""Parse out the archive from header."""
|
|
||||||
# Regex1
|
|
||||||
arch = re.search(
|
|
||||||
r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>", str(header)
|
|
||||||
)
|
|
||||||
if arch:
|
|
||||||
return arch.group(1)
|
|
||||||
# Regex2
|
|
||||||
arch = re.search(r"X-Cache-Key:\shttps(.*)[A-Z]{2}", str(header))
|
|
||||||
if arch:
|
|
||||||
return arch.group(1)
|
|
||||||
raise WaybackError(
|
|
||||||
"No archive URL found in the API response. "
|
|
||||||
"This version of waybackpy (%s) is likely out of date. Visit "
|
|
||||||
"https://github.com/akamhy/waybackpy for the latest version "
|
|
||||||
"of waybackpy.\nHeader:\n%s" % (__version__, str(header))
|
|
||||||
)
|
|
||||||
|
|
||||||
return "https://" + archive_url_parser(header)
|
return "https://" + archive_url_parser(header)
|
||||||
|
|
||||||
def get(self, url=None, user_agent=None, encoding=None):
|
def get(self, url=None, user_agent=None, encoding=None):
|
||||||
|
Loading…
Reference in New Issue
Block a user