fix: errors to improve pylint scores

This commit is contained in:
eggplants
2022-02-08 04:15:32 +09:00
parent 97f8b96411
commit 6876c786de
7 changed files with 52 additions and 33 deletions

View File

@@ -1,3 +1,7 @@
"""
Module initializer and provider of static infomation.
"""
__title__ = "waybackpy"
__description__ = (
"Python package that interfaces with the Internet Archive's Wayback Machine APIs. "

View File

@@ -37,7 +37,7 @@ from .utils import DEFAULT_USER_AGENT
ResponseJSON = Dict[str, Any]
class WaybackMachineAvailabilityAPI(object):
class WaybackMachineAvailabilityAPI:
"""
Class that interfaces the availability API of the Wayback Machine.
"""
@@ -55,7 +55,8 @@ class WaybackMachineAvailabilityAPI(object):
self.tries: int = 0
self.last_api_call_unix_time: int = int(time.time())
self.api_call_time_gap: int = 5
self.JSON: Optional[ResponseJSON] = None
self.json: Optional[ResponseJSON] = None
self.response: Optional[Response] = None
@staticmethod
def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str:
@@ -83,12 +84,12 @@ class WaybackMachineAvailabilityAPI(object):
# String should not return anything other than a string object
# So, if a string repr is asked for before making any API requests
# just return ""
if not self.JSON:
if not self.json:
return ""
return self.archive_url
def json(self) -> Optional[ResponseJSON]:
def setup_json(self) -> Optional[ResponseJSON]:
"""
Makes the API call to the availability API and set the JSON response
to the JSON attribute of the instance and also returns the JSON
@@ -109,19 +110,19 @@ class WaybackMachineAvailabilityAPI(object):
if sleep_time > 0:
time.sleep(sleep_time)
self.response: Response = requests.get(
self.response = requests.get(
self.endpoint, params=self.payload, headers=self.headers
)
self.last_api_call_unix_time = int(time.time())
self.tries += 1
try:
self.JSON = self.response.json()
self.json = None if self.response is None else self.response.json()
except json.decoder.JSONDecodeError as json_decode_error:
raise InvalidJSONInAvailabilityAPIResponse(
f"Response data:\n{self.response.text}"
) from json_decode_error
return self.JSON
return self.json
def timestamp(self) -> datetime:
"""
@@ -136,19 +137,19 @@ class WaybackMachineAvailabilityAPI(object):
guaranteed that you can get the datetime object from the timestamp.
"""
if self.JSON is None or "archived_snapshots" not in self.JSON:
if self.json is None or "archived_snapshots" not in self.json:
return datetime.max
if (
self.JSON is not None
and "archived_snapshots" in self.JSON
and self.JSON["archived_snapshots"] is not None
and "closest" in self.JSON["archived_snapshots"]
and self.JSON["archived_snapshots"]["closest"] is not None
and "timestamp" in self.JSON["archived_snapshots"]["closest"]
self.json is not None
and "archived_snapshots" in self.json
and self.json["archived_snapshots"] is not None
and "closest" in self.json["archived_snapshots"]
and self.json["archived_snapshots"]["closest"] is not None
and "timestamp" in self.json["archived_snapshots"]["closest"]
):
return datetime.strptime(
self.JSON["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
self.json["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
)
raise ValueError("Could not get timestamp from result")
@@ -162,7 +163,7 @@ class WaybackMachineAvailabilityAPI(object):
"""
archive_url = ""
data = self.JSON
data = self.json
# If the user didn't invoke oldest, newest or near but tries to access the
# archive_url attribute then assume they are fine with any archive
@@ -176,8 +177,8 @@ class WaybackMachineAvailabilityAPI(object):
while (self.tries < self.max_tries) and (
not data or not data["archived_snapshots"]
):
self.json() # It makes a new API call
data = self.JSON # json() updated the value of JSON attribute
self.setup_json() # It makes a new API call
data = self.json # json() updated the value of JSON attribute
# If we exhausted the max_tries, then we give up and
# raise exception.
@@ -187,7 +188,7 @@ class WaybackMachineAvailabilityAPI(object):
"Archive not found in the availability "
"API response, the URL you requested may not have any archives "
"yet. You may retry after some time or archive the webpage now.\n"
f"Response data:\n{self.response.text}"
f"Response data:\n{None if self.response is None else self.response.text}"
)
else:
archive_url = data["archived_snapshots"]["closest"]["url"]
@@ -262,5 +263,5 @@ class WaybackMachineAvailabilityAPI(object):
)
self.payload["timestamp"] = timestamp
self.json()
self.setup_json()
return self

View File

@@ -24,7 +24,7 @@ from .exceptions import WaybackError
from .utils import DEFAULT_USER_AGENT
class WaybackMachineCDXServerAPI(object):
class WaybackMachineCDXServerAPI:
"""
Class that interfaces the CDX server API of the Wayback Machine.

View File

@@ -11,7 +11,7 @@ from datetime import datetime
from typing import Dict
class CDXSnapshot(object):
class CDXSnapshot:
"""
Class for the CDX snapshot lines('record') returned by the CDX API,
Each valid line of the CDX API is casted to an CDXSnapshot object

View File

@@ -2,11 +2,11 @@
Module that makes waybackpy a CLI tool.
"""
import json as JSON
import os
import random
import re
import string
from json import dumps
from typing import Generator, List, Optional
import click
@@ -40,7 +40,7 @@ def echo_availability_api(
click.echo(archive_url)
if json:
click.echo("JSON response:")
click.echo(JSON.dumps(availability_api_instance.JSON))
click.echo(dumps(availability_api_instance.json))
def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
@@ -63,7 +63,7 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
domain = "domain-unknown" if match is None else match.group(1)
file_name = f"{domain}-urls-{uid}.txt"
file_path = os.path.join(os.getcwd(), file_name)
with open(file_path, "a") as file:
with open(file_path, "a", encoding="UTF-8") as file:
file.write(f"{url}\n")
click.echo(url)
@@ -345,8 +345,8 @@ def main( # pylint: disable=no-value-for-parameter
if file:
return save_urls_on_file(url_gen)
for url in url_gen:
click.echo(url)
for url_ in url_gen:
click.echo(url_)
elif cdx:
filters = list(cdx_filter)

View File

@@ -12,6 +12,7 @@ from typing import Dict, Optional
import requests
from requests.adapters import HTTPAdapter
from requests.models import Response
from requests.structures import CaseInsensitiveDict
from urllib3.util.retry import Retry
@@ -19,7 +20,7 @@ from .exceptions import MaximumSaveRetriesExceeded, TooManyRequestsError, Waybac
from .utils import DEFAULT_USER_AGENT
class WaybackMachineSaveAPI(object):
class WaybackMachineSaveAPI:
"""
WaybackMachineSaveAPI class provides an interface for saving URLs on the
Wayback Machine.
@@ -43,6 +44,12 @@ class WaybackMachineSaveAPI(object):
self.status_forcelist = [500, 502, 503, 504]
self._archive_url: Optional[str] = None
self.instance_birth_time = datetime.utcnow()
self.response: Optional[Response] = None
self.headers: Optional[CaseInsensitiveDict[str]] = None
self.status_code: Optional[int] = None
self.response_url: Optional[str] = None
self.cached_save: Optional[bool] = None
self.saved_archive: Optional[str] = None
@property
def archive_url(self) -> str:
@@ -83,7 +90,7 @@ class WaybackMachineSaveAPI(object):
session.mount("https://", HTTPAdapter(max_retries=retries))
self.response = session.get(self.request_url, headers=self.request_headers)
# requests.response.headers is requests.structures.CaseInsensitiveDict
self.headers: CaseInsensitiveDict[str] = self.response.headers
self.headers = self.response.headers
self.status_code = self.response.status_code
self.response_url = self.response.url
session.close()
@@ -129,7 +136,9 @@ class WaybackMachineSaveAPI(object):
if match is not None and len(match.groups()) == 1:
return "https" + match.group(1)
self.response_url = self.response_url.strip()
self.response_url = (
"" if self.response_url is None else self.response_url.strip()
)
regex4 = r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$"
match = re.search(regex4, self.response_url)
if match is not None:

View File

@@ -7,13 +7,15 @@ the Url class.
from datetime import datetime, timedelta
from typing import Generator, Optional
from .availability_api import WaybackMachineAvailabilityAPI
from requests.structures import CaseInsensitiveDict
from .availability_api import ResponseJSON, WaybackMachineAvailabilityAPI
from .cdx_api import WaybackMachineCDXServerAPI
from .save_api import WaybackMachineSaveAPI
from .utils import DEFAULT_USER_AGENT
class Url(object):
class Url:
"""
The Url class is not recommended to be used anymore, instead use:
@@ -39,6 +41,9 @@ class Url(object):
self.wayback_machine_availability_api = WaybackMachineAvailabilityAPI(
self.url, user_agent=self.user_agent
)
self.wayback_machine_save_api: Optional[WaybackMachineSaveAPI] = None
self.headers: Optional[CaseInsensitiveDict[str]] = None
self.json: Optional[ResponseJSON] = None
def __str__(self) -> str:
if not self.archive_url:
@@ -107,7 +112,7 @@ class Url(object):
def set_availability_api_attrs(self) -> None:
"""Set the attributes for total backwards compatibility."""
self.archive_url = self.wayback_machine_availability_api.archive_url
self.JSON = self.wayback_machine_availability_api.JSON
self.json = self.wayback_machine_availability_api.json
self.timestamp = self.wayback_machine_availability_api.timestamp()
def total_archives(