new regex to parse archive, IA changed the header again :(
This commit is contained in:
parent
ec9ce92f48
commit
8a4b631c13
@ -19,12 +19,18 @@ default_UA = "waybackpy python package - https://github.com/akamhy/waybackpy"
|
|||||||
def _archive_url_parser(header):
|
def _archive_url_parser(header):
|
||||||
"""Parse out the archive from header."""
|
"""Parse out the archive from header."""
|
||||||
# Regex1
|
# Regex1
|
||||||
|
arch = re.search(
|
||||||
|
r"Content-Location: (/web/[0-9]{14}/.*)", str(header)
|
||||||
|
)
|
||||||
|
if arch:
|
||||||
|
return "web.archive.org" + arch.group(1)
|
||||||
|
# Regex2
|
||||||
arch = re.search(
|
arch = re.search(
|
||||||
r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>", str(header)
|
r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>", str(header)
|
||||||
)
|
)
|
||||||
if arch:
|
if arch:
|
||||||
return arch.group(1)
|
return arch.group(1)
|
||||||
# Regex2
|
# Regex3
|
||||||
arch = re.search(r"X-Cache-Key:\shttps(.*)[A-Z]{2}", str(header))
|
arch = re.search(r"X-Cache-Key:\shttps(.*)[A-Z]{2}", str(header))
|
||||||
if arch:
|
if arch:
|
||||||
return arch.group(1)
|
return arch.group(1)
|
||||||
|
Loading…
Reference in New Issue
Block a user