fix: revert "str(headers)->headers_str" and ignore assignment CaseInsensitiveDict with str
This commit is contained in:
@@ -2,8 +2,10 @@ import random
|
||||
import string
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import cast
|
||||
|
||||
import pytest
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from waybackpy.exceptions import MaximumSaveRetriesExceeded
|
||||
from waybackpy.save_api import WaybackMachineSaveAPI
|
||||
@@ -97,11 +99,11 @@ def test_archive_url_parser() -> None:
|
||||
)
|
||||
save_api = WaybackMachineSaveAPI(url, user_agent)
|
||||
|
||||
save_api.headers_str = """
|
||||
START
|
||||
Content-Location: /web/20201126185327/https://www.scribbr.com/citing-sources/et-al
|
||||
END
|
||||
"""
|
||||
save_api.headers = ( # type: ignore[assignment]
|
||||
"\nSTART\nContent-Location: "
|
||||
"/web/20201126185327/https://www.scribbr.com/citing-sources/et-al"
|
||||
"\nEND\n"
|
||||
)
|
||||
|
||||
expected_url = (
|
||||
"https://web.archive.org/web/20201126185327/"
|
||||
@@ -166,7 +168,8 @@ def test_archive_url_parser() -> None:
|
||||
"X-Archive-Screenname": "0",
|
||||
"Content-Encoding": "gzip",
|
||||
}
|
||||
save_api.headers_str = str(headers)
|
||||
|
||||
save_api.headers = cast(CaseInsensitiveDict[str], headers)
|
||||
|
||||
expected_url2 = (
|
||||
"https://web.archive.org/web/20210102094009/"
|
||||
@@ -178,7 +181,9 @@ def test_archive_url_parser() -> None:
|
||||
"https://web.archive.org/web/20171128185327/"
|
||||
"https://www.scribbr.com/citing-sources/et-al/US"
|
||||
)
|
||||
save_api.headers_str = f"START\nX-Cache-Key: {expected_url_3}\nEND\n"
|
||||
save_api.headers = ( # type: ignore[assignment]
|
||||
f"START\nX-Cache-Key: {expected_url_3}\nEND\n"
|
||||
)
|
||||
|
||||
expected_url4 = (
|
||||
"https://web.archive.org/web/20171128185327/"
|
||||
@@ -186,7 +191,7 @@ def test_archive_url_parser() -> None:
|
||||
)
|
||||
assert save_api.archive_url_parser() == expected_url4
|
||||
|
||||
save_api.headers_str = (
|
||||
save_api.headers = ( # type: ignore[assignment]
|
||||
"TEST TEST TEST AND NO MATCH - TEST FOR RESPONSE URL MATCHING"
|
||||
)
|
||||
save_api.response_url = (
|
||||
|
@@ -5,6 +5,7 @@ from typing import Dict, Optional
|
||||
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from .exceptions import MaximumSaveRetriesExceeded
|
||||
@@ -71,8 +72,7 @@ class WaybackMachineSaveAPI(object):
|
||||
session.mount("https://", HTTPAdapter(max_retries=retries))
|
||||
self.response = session.get(self.request_url, headers=self.request_headers)
|
||||
# requests.response.headers is requests.structures.CaseInsensitiveDict
|
||||
self.headers = self.response.headers
|
||||
self.headers_str = str(self.response.headers)
|
||||
self.headers: CaseInsensitiveDict[str] = self.response.headers
|
||||
self.status_code = self.response.status_code
|
||||
self.response_url = self.response.url
|
||||
session.close()
|
||||
@@ -85,17 +85,17 @@ class WaybackMachineSaveAPI(object):
|
||||
"""
|
||||
|
||||
regex1 = r"Content-Location: (/web/[0-9]{14}/.*)"
|
||||
match = re.search(regex1, self.headers_str)
|
||||
match = re.search(regex1, str(self.headers))
|
||||
if match:
|
||||
return "https://web.archive.org" + match.group(1)
|
||||
|
||||
regex2 = r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>"
|
||||
match = re.search(regex2, self.headers_str)
|
||||
match = re.search(regex2, str(self.headers))
|
||||
if match is not None and len(match.groups()) == 1:
|
||||
return "https://" + match.group(1)
|
||||
|
||||
regex3 = r"X-Cache-Key:\shttps(.*)[A-Z]{2}"
|
||||
match = re.search(regex3, self.headers_str)
|
||||
match = re.search(regex3, str(self.headers))
|
||||
if match is not None and len(match.groups()) == 1:
|
||||
return "https" + match.group(1)
|
||||
|
||||
@@ -187,5 +187,5 @@ class WaybackMachineSaveAPI(object):
|
||||
f"Tried {tries} times but failed to save "
|
||||
f"and retrieve the archive for {self.url}.\n"
|
||||
f"Response URL:\n{self.response_url}\n"
|
||||
f"Response Header:\n{self.headers_str}"
|
||||
f"Response Header:\n{self.headers}"
|
||||
)
|
||||
|
Reference in New Issue
Block a user