2 regex for parsing the archive url
This commit is contained in:
@@ -74,9 +74,18 @@ class Url():
|
|||||||
header = response.headers
|
header = response.headers
|
||||||
|
|
||||||
def archive_url_parser(header):
|
def archive_url_parser(header):
|
||||||
|
"""Parse out the archive from header."""
|
||||||
|
|
||||||
|
#Regex1
|
||||||
|
arch = re.search(r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>", str(header))
|
||||||
|
if arch:
|
||||||
|
return arch.group(1)
|
||||||
|
|
||||||
|
#Regex2
|
||||||
arch = re.search(r"X-Cache-Key:\shttps(.*)[A-Z]{2}", str(header))
|
arch = re.search(r"X-Cache-Key:\shttps(.*)[A-Z]{2}", str(header))
|
||||||
if arch:
|
if arch:
|
||||||
return arch.group(1)
|
return arch.group(1)
|
||||||
|
|
||||||
raise WaybackError(
|
raise WaybackError(
|
||||||
"No archive url found in the API response. Visit https://github.com/akamhy/waybackpy for latest version of waybackpy.\nHeader:\n%s" % str(header)
|
"No archive url found in the API response. Visit https://github.com/akamhy/waybackpy for latest version of waybackpy.\nHeader:\n%s" % str(header)
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user