2 regex for parsing the archive url
This commit is contained in:
@@ -74,9 +74,18 @@ class Url():
|
||||
header = response.headers
|
||||
|
||||
def archive_url_parser(header):
|
||||
"""Parse out the archive from header."""
|
||||
|
||||
#Regex1
|
||||
arch = re.search(r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>", str(header))
|
||||
if arch:
|
||||
return arch.group(1)
|
||||
|
||||
#Regex2
|
||||
arch = re.search(r"X-Cache-Key:\shttps(.*)[A-Z]{2}", str(header))
|
||||
if arch:
|
||||
return arch.group(1)
|
||||
|
||||
raise WaybackError(
|
||||
"No archive url found in the API response. Visit https://github.com/akamhy/waybackpy for latest version of waybackpy.\nHeader:\n%s" % str(header)
|
||||
)
|
||||
|
Reference in New Issue
Block a user