fix: revert "str(headers)->headers_str" and ignore assignment CaseInsensitiveDict with str

This commit is contained in:
eggplants
2022-02-04 21:02:14 +09:00
parent 75352df296
commit d0e65d1a5e
2 changed files with 19 additions and 14 deletions

View File

@@ -2,8 +2,10 @@ import random
import string import string
import time import time
from datetime import datetime from datetime import datetime
from typing import cast
import pytest import pytest
from requests.structures import CaseInsensitiveDict
from waybackpy.exceptions import MaximumSaveRetriesExceeded from waybackpy.exceptions import MaximumSaveRetriesExceeded
from waybackpy.save_api import WaybackMachineSaveAPI from waybackpy.save_api import WaybackMachineSaveAPI
@@ -97,11 +99,11 @@ def test_archive_url_parser() -> None:
) )
save_api = WaybackMachineSaveAPI(url, user_agent) save_api = WaybackMachineSaveAPI(url, user_agent)
save_api.headers_str = """ save_api.headers = ( # type: ignore[assignment]
START "\nSTART\nContent-Location: "
Content-Location: /web/20201126185327/https://www.scribbr.com/citing-sources/et-al "/web/20201126185327/https://www.scribbr.com/citing-sources/et-al"
END "\nEND\n"
""" )
expected_url = ( expected_url = (
"https://web.archive.org/web/20201126185327/" "https://web.archive.org/web/20201126185327/"
@@ -166,7 +168,8 @@ def test_archive_url_parser() -> None:
"X-Archive-Screenname": "0", "X-Archive-Screenname": "0",
"Content-Encoding": "gzip", "Content-Encoding": "gzip",
} }
save_api.headers_str = str(headers)
save_api.headers = cast(CaseInsensitiveDict[str], headers)
expected_url2 = ( expected_url2 = (
"https://web.archive.org/web/20210102094009/" "https://web.archive.org/web/20210102094009/"
@@ -178,7 +181,9 @@ def test_archive_url_parser() -> None:
"https://web.archive.org/web/20171128185327/" "https://web.archive.org/web/20171128185327/"
"https://www.scribbr.com/citing-sources/et-al/US" "https://www.scribbr.com/citing-sources/et-al/US"
) )
save_api.headers_str = f"START\nX-Cache-Key: {expected_url_3}\nEND\n" save_api.headers = ( # type: ignore[assignment]
f"START\nX-Cache-Key: {expected_url_3}\nEND\n"
)
expected_url4 = ( expected_url4 = (
"https://web.archive.org/web/20171128185327/" "https://web.archive.org/web/20171128185327/"
@@ -186,7 +191,7 @@ def test_archive_url_parser() -> None:
) )
assert save_api.archive_url_parser() == expected_url4 assert save_api.archive_url_parser() == expected_url4
save_api.headers_str = ( save_api.headers = ( # type: ignore[assignment]
"TEST TEST TEST AND NO MATCH - TEST FOR RESPONSE URL MATCHING" "TEST TEST TEST AND NO MATCH - TEST FOR RESPONSE URL MATCHING"
) )
save_api.response_url = ( save_api.response_url = (

View File

@@ -5,6 +5,7 @@ from typing import Dict, Optional
import requests import requests
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.structures import CaseInsensitiveDict
from urllib3.util.retry import Retry from urllib3.util.retry import Retry
from .exceptions import MaximumSaveRetriesExceeded from .exceptions import MaximumSaveRetriesExceeded
@@ -71,8 +72,7 @@ class WaybackMachineSaveAPI(object):
session.mount("https://", HTTPAdapter(max_retries=retries)) session.mount("https://", HTTPAdapter(max_retries=retries))
self.response = session.get(self.request_url, headers=self.request_headers) self.response = session.get(self.request_url, headers=self.request_headers)
# requests.response.headers is requests.structures.CaseInsensitiveDict # requests.response.headers is requests.structures.CaseInsensitiveDict
self.headers = self.response.headers self.headers: CaseInsensitiveDict[str] = self.response.headers
self.headers_str = str(self.response.headers)
self.status_code = self.response.status_code self.status_code = self.response.status_code
self.response_url = self.response.url self.response_url = self.response.url
session.close() session.close()
@@ -85,17 +85,17 @@ class WaybackMachineSaveAPI(object):
""" """
regex1 = r"Content-Location: (/web/[0-9]{14}/.*)" regex1 = r"Content-Location: (/web/[0-9]{14}/.*)"
match = re.search(regex1, self.headers_str) match = re.search(regex1, str(self.headers))
if match: if match:
return "https://web.archive.org" + match.group(1) return "https://web.archive.org" + match.group(1)
regex2 = r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>" regex2 = r"rel=\"memento.*?(web\.archive\.org/web/[0-9]{14}/.*?)>"
match = re.search(regex2, self.headers_str) match = re.search(regex2, str(self.headers))
if match is not None and len(match.groups()) == 1: if match is not None and len(match.groups()) == 1:
return "https://" + match.group(1) return "https://" + match.group(1)
regex3 = r"X-Cache-Key:\shttps(.*)[A-Z]{2}" regex3 = r"X-Cache-Key:\shttps(.*)[A-Z]{2}"
match = re.search(regex3, self.headers_str) match = re.search(regex3, str(self.headers))
if match is not None and len(match.groups()) == 1: if match is not None and len(match.groups()) == 1:
return "https" + match.group(1) return "https" + match.group(1)
@@ -187,5 +187,5 @@ class WaybackMachineSaveAPI(object):
f"Tried {tries} times but failed to save " f"Tried {tries} times but failed to save "
f"and retrieve the archive for {self.url}.\n" f"and retrieve the archive for {self.url}.\n"
f"Response URL:\n{self.response_url}\n" f"Response URL:\n{self.response_url}\n"
f"Response Header:\n{self.headers_str}" f"Response Header:\n{self.headers}"
) )