fix: revert "str(headers)->headers_str" and ignore assignment CaseInsensitiveDict with str

This commit is contained in:
eggplants
2022-02-04 21:02:14 +09:00
parent 75352df296
commit d0e65d1a5e
2 changed files with 19 additions and 14 deletions

View File

@@ -5,6 +5,7 @@ from typing import Dict, Optional
import requests
from requests.adapters import HTTPAdapter
from requests.structures import CaseInsensitiveDict
from urllib3.util.retry import Retry
from .exceptions import MaximumSaveRetriesExceeded
@@ -71,8 +72,7 @@ class WaybackMachineSaveAPI(object):
session.mount("https://", HTTPAdapter(max_retries=retries))
self.response = session.get(self.request_url, headers=self.request_headers)
# requests.response.headers is requests.structures.CaseInsensitiveDict
self.headers = self.response.headers
self.headers_str = str(self.response.headers)
self.headers: CaseInsensitiveDict[str] = self.response.headers
self.status_code = self.response.status_code
self.response_url = self.response.url
session.close()
@@ -85,17 +85,17 @@ class WaybackMachineSaveAPI(object):
"""
regex1 = r"Content-Location: (/web/[0-9]{14}/.*)"
match = re.search(regex1, self.headers_str)
match = re.search(regex1, str(self.headers))
if match:
return "https://web.archive.org" + match.group(1)
regex2 = r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>"
match = re.search(regex2, self.headers_str)
match = re.search(regex2, str(self.headers))
if match is not None and len(match.groups()) == 1:
return "https://" + match.group(1)
regex3 = r"X-Cache-Key:\shttps(.*)[A-Z]{2}"
match = re.search(regex3, self.headers_str)
match = re.search(regex3, str(self.headers))
if match is not None and len(match.groups()) == 1:
return "https" + match.group(1)
@@ -187,5 +187,5 @@ class WaybackMachineSaveAPI(object):
f"Tried {tries} times but failed to save "
f"and retrieve the archive for {self.url}.\n"
f"Response URL:\n{self.response_url}\n"
f"Response Header:\n{self.headers_str}"
f"Response Header:\n{self.headers}"
)