Improve pylint score (#142)

* fix: errors to improve pylint scores

* fix: test

* fix

* add: flake ignore rule to pip8speaks conf

* fix

* add: test patterns to deepsource conf
This commit is contained in:
eggplants 2022-02-08 06:42:20 +09:00 committed by GitHub
parent d3a8f343f8
commit 0b631592ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 62 additions and 42 deletions

View File

@ -3,6 +3,9 @@ version = 1
[[analyzers]] [[analyzers]]
name = "python" name = "python"
enabled = true enabled = true
test_patterns = [
"tests/**",
"test_*.py"
]
[analyzers.meta] [analyzers.meta]
runtime_version = "3.x.x" runtime_version = "3.x.x"

View File

@ -4,3 +4,4 @@ scanner:
flake8: flake8:
max-line-length: 88 max-line-length: 88
extend-ignore: W503,W605

View File

@ -65,7 +65,7 @@ profile = black
[flake8] [flake8]
indent-size = 4 indent-size = 4
max-line-length = 88 max-line-length = 88
extend-ignore = W605 extend-ignore = W503,W605
[mypy] [mypy]
python_version = 3.9 python_version = 3.9
@ -84,7 +84,3 @@ addopts =
--cov-report=html --cov-report=html
testpaths = testpaths =
tests tests
[pycodestyle]
# for `license` and `filter in `waybackpy.cli.main`
ignore = W0622

View File

@ -40,8 +40,8 @@ def test_oldest() -> None:
oldest_timestamp = oldest.timestamp() oldest_timestamp = oldest.timestamp()
assert abs(oldest_timestamp - now) > timedelta(days=7000) # More than 19 years assert abs(oldest_timestamp - now) > timedelta(days=7000) # More than 19 years
assert ( assert (
availability_api.JSON is not None availability_api.json is not None
and availability_api.JSON["archived_snapshots"]["closest"]["available"] is True and availability_api.json["archived_snapshots"]["closest"]["available"] is True
) )
assert repr(oldest).find("example.com") != -1 assert repr(oldest).find("example.com") != -1
assert "2002" in str(oldest) assert "2002" in str(oldest)

View File

@ -1,3 +1,5 @@
"""Module initializer and provider of static infomation."""
__title__ = "waybackpy" __title__ = "waybackpy"
__description__ = ( __description__ = (
"Python package that interfaces with the Internet Archive's Wayback Machine APIs. " "Python package that interfaces with the Internet Archive's Wayback Machine APIs. "

View File

@ -37,7 +37,7 @@ from .utils import DEFAULT_USER_AGENT
ResponseJSON = Dict[str, Any] ResponseJSON = Dict[str, Any]
class WaybackMachineAvailabilityAPI(object): class WaybackMachineAvailabilityAPI:
""" """
Class that interfaces the availability API of the Wayback Machine. Class that interfaces the availability API of the Wayback Machine.
""" """
@ -55,7 +55,8 @@ class WaybackMachineAvailabilityAPI(object):
self.tries: int = 0 self.tries: int = 0
self.last_api_call_unix_time: int = int(time.time()) self.last_api_call_unix_time: int = int(time.time())
self.api_call_time_gap: int = 5 self.api_call_time_gap: int = 5
self.JSON: Optional[ResponseJSON] = None self.json: Optional[ResponseJSON] = None
self.response: Optional[Response] = None
@staticmethod @staticmethod
def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str: def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str:
@ -83,12 +84,12 @@ class WaybackMachineAvailabilityAPI(object):
# String should not return anything other than a string object # String should not return anything other than a string object
# So, if a string repr is asked for before making any API requests # So, if a string repr is asked for before making any API requests
# just return "" # just return ""
if not self.JSON: if not self.json:
return "" return ""
return self.archive_url return self.archive_url
def json(self) -> Optional[ResponseJSON]: def setup_json(self) -> Optional[ResponseJSON]:
""" """
Makes the API call to the availability API and set the JSON response Makes the API call to the availability API and set the JSON response
to the JSON attribute of the instance and also returns the JSON to the JSON attribute of the instance and also returns the JSON
@ -109,19 +110,19 @@ class WaybackMachineAvailabilityAPI(object):
if sleep_time > 0: if sleep_time > 0:
time.sleep(sleep_time) time.sleep(sleep_time)
self.response: Response = requests.get( self.response = requests.get(
self.endpoint, params=self.payload, headers=self.headers self.endpoint, params=self.payload, headers=self.headers
) )
self.last_api_call_unix_time = int(time.time()) self.last_api_call_unix_time = int(time.time())
self.tries += 1 self.tries += 1
try: try:
self.JSON = self.response.json() self.json = None if self.response is None else self.response.json()
except json.decoder.JSONDecodeError as json_decode_error: except json.decoder.JSONDecodeError as json_decode_error:
raise InvalidJSONInAvailabilityAPIResponse( raise InvalidJSONInAvailabilityAPIResponse(
f"Response data:\n{self.response.text}" f"Response data:\n{self.response.text}"
) from json_decode_error ) from json_decode_error
return self.JSON return self.json
def timestamp(self) -> datetime: def timestamp(self) -> datetime:
""" """
@ -136,19 +137,19 @@ class WaybackMachineAvailabilityAPI(object):
guaranteed that you can get the datetime object from the timestamp. guaranteed that you can get the datetime object from the timestamp.
""" """
if self.JSON is None or "archived_snapshots" not in self.JSON: if self.json is None or "archived_snapshots" not in self.json:
return datetime.max return datetime.max
if ( if (
self.JSON is not None self.json is not None
and "archived_snapshots" in self.JSON and "archived_snapshots" in self.json
and self.JSON["archived_snapshots"] is not None and self.json["archived_snapshots"] is not None
and "closest" in self.JSON["archived_snapshots"] and "closest" in self.json["archived_snapshots"]
and self.JSON["archived_snapshots"]["closest"] is not None and self.json["archived_snapshots"]["closest"] is not None
and "timestamp" in self.JSON["archived_snapshots"]["closest"] and "timestamp" in self.json["archived_snapshots"]["closest"]
): ):
return datetime.strptime( return datetime.strptime(
self.JSON["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S" self.json["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
) )
raise ValueError("Could not get timestamp from result") raise ValueError("Could not get timestamp from result")
@ -162,7 +163,7 @@ class WaybackMachineAvailabilityAPI(object):
""" """
archive_url = "" archive_url = ""
data = self.JSON data = self.json
# If the user didn't invoke oldest, newest or near but tries to access the # If the user didn't invoke oldest, newest or near but tries to access the
# archive_url attribute then assume they are fine with any archive # archive_url attribute then assume they are fine with any archive
@ -176,8 +177,8 @@ class WaybackMachineAvailabilityAPI(object):
while (self.tries < self.max_tries) and ( while (self.tries < self.max_tries) and (
not data or not data["archived_snapshots"] not data or not data["archived_snapshots"]
): ):
self.json() # It makes a new API call self.setup_json() # It makes a new API call
data = self.JSON # json() updated the value of JSON attribute data = self.json # json() updated the value of JSON attribute
# If we exhausted the max_tries, then we give up and # If we exhausted the max_tries, then we give up and
# raise exception. # raise exception.
@ -187,7 +188,10 @@ class WaybackMachineAvailabilityAPI(object):
"Archive not found in the availability " "Archive not found in the availability "
"API response, the URL you requested may not have any archives " "API response, the URL you requested may not have any archives "
"yet. You may retry after some time or archive the webpage now.\n" "yet. You may retry after some time or archive the webpage now.\n"
f"Response data:\n{self.response.text}" "Response data:\n"
""
if self.response is None
else self.response.text
) )
else: else:
archive_url = data["archived_snapshots"]["closest"]["url"] archive_url = data["archived_snapshots"]["closest"]["url"]
@ -262,5 +266,5 @@ class WaybackMachineAvailabilityAPI(object):
) )
self.payload["timestamp"] = timestamp self.payload["timestamp"] = timestamp
self.json() self.setup_json()
return self return self

View File

@ -24,7 +24,7 @@ from .exceptions import WaybackError
from .utils import DEFAULT_USER_AGENT from .utils import DEFAULT_USER_AGENT
class WaybackMachineCDXServerAPI(object): class WaybackMachineCDXServerAPI:
""" """
Class that interfaces the CDX server API of the Wayback Machine. Class that interfaces the CDX server API of the Wayback Machine.

View File

@ -11,7 +11,7 @@ from datetime import datetime
from typing import Dict from typing import Dict
class CDXSnapshot(object): class CDXSnapshot:
""" """
Class for the CDX snapshot lines('record') returned by the CDX API, Class for the CDX snapshot lines('record') returned by the CDX API,
Each valid line of the CDX API is casted to an CDXSnapshot object Each valid line of the CDX API is casted to an CDXSnapshot object

View File

@ -2,11 +2,11 @@
Module that makes waybackpy a CLI tool. Module that makes waybackpy a CLI tool.
""" """
import json as JSON
import os import os
import random import random
import re import re
import string import string
from json import dumps
from typing import Generator, List, Optional from typing import Generator, List, Optional
import click import click
@ -40,7 +40,7 @@ def echo_availability_api(
click.echo(archive_url) click.echo(archive_url)
if json: if json:
click.echo("JSON response:") click.echo("JSON response:")
click.echo(JSON.dumps(availability_api_instance.JSON)) click.echo(dumps(availability_api_instance.json))
def save_urls_on_file(url_gen: Generator[str, None, None]) -> None: def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
@ -63,7 +63,7 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
domain = "domain-unknown" if match is None else match.group(1) domain = "domain-unknown" if match is None else match.group(1)
file_name = f"{domain}-urls-{uid}.txt" file_name = f"{domain}-urls-{uid}.txt"
file_path = os.path.join(os.getcwd(), file_name) file_path = os.path.join(os.getcwd(), file_name)
with open(file_path, "a") as file: with open(file_path, "a", encoding="UTF-8") as file:
file.write(f"{url}\n") file.write(f"{url}\n")
click.echo(url) click.echo(url)
@ -345,8 +345,8 @@ def main( # pylint: disable=no-value-for-parameter
if file: if file:
return save_urls_on_file(url_gen) return save_urls_on_file(url_gen)
for url in url_gen: for url_ in url_gen:
click.echo(url) click.echo(url_)
elif cdx: elif cdx:
filters = list(cdx_filter) filters = list(cdx_filter)

View File

@ -12,6 +12,7 @@ from typing import Dict, Optional
import requests import requests
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from requests.models import Response
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from urllib3.util.retry import Retry from urllib3.util.retry import Retry
@ -19,7 +20,7 @@ from .exceptions import MaximumSaveRetriesExceeded, TooManyRequestsError, Waybac
from .utils import DEFAULT_USER_AGENT from .utils import DEFAULT_USER_AGENT
class WaybackMachineSaveAPI(object): class WaybackMachineSaveAPI:
""" """
WaybackMachineSaveAPI class provides an interface for saving URLs on the WaybackMachineSaveAPI class provides an interface for saving URLs on the
Wayback Machine. Wayback Machine.
@ -43,6 +44,12 @@ class WaybackMachineSaveAPI(object):
self.status_forcelist = [500, 502, 503, 504] self.status_forcelist = [500, 502, 503, 504]
self._archive_url: Optional[str] = None self._archive_url: Optional[str] = None
self.instance_birth_time = datetime.utcnow() self.instance_birth_time = datetime.utcnow()
self.response: Optional[Response] = None
self.headers: Optional[CaseInsensitiveDict[str]] = None
self.status_code: Optional[int] = None
self.response_url: Optional[str] = None
self.cached_save: Optional[bool] = None
self.saved_archive: Optional[str] = None
@property @property
def archive_url(self) -> str: def archive_url(self) -> str:
@ -83,7 +90,7 @@ class WaybackMachineSaveAPI(object):
session.mount("https://", HTTPAdapter(max_retries=retries)) session.mount("https://", HTTPAdapter(max_retries=retries))
self.response = session.get(self.request_url, headers=self.request_headers) self.response = session.get(self.request_url, headers=self.request_headers)
# requests.response.headers is requests.structures.CaseInsensitiveDict # requests.response.headers is requests.structures.CaseInsensitiveDict
self.headers: CaseInsensitiveDict[str] = self.response.headers self.headers = self.response.headers
self.status_code = self.response.status_code self.status_code = self.response.status_code
self.response_url = self.response.url self.response_url = self.response.url
session.close() session.close()
@ -129,7 +136,9 @@ class WaybackMachineSaveAPI(object):
if match is not None and len(match.groups()) == 1: if match is not None and len(match.groups()) == 1:
return "https" + match.group(1) return "https" + match.group(1)
self.response_url = self.response_url.strip() self.response_url = (
"" if self.response_url is None else self.response_url.strip()
)
regex4 = r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$" regex4 = r"web\.archive\.org/web/(?:[0-9]*?)/(?:.*)$"
match = re.search(regex4, self.response_url) match = re.search(regex4, self.response_url)
if match is not None: if match is not None:

View File

@ -7,13 +7,15 @@ the Url class.
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Generator, Optional from typing import Generator, Optional
from .availability_api import WaybackMachineAvailabilityAPI from requests.structures import CaseInsensitiveDict
from .availability_api import ResponseJSON, WaybackMachineAvailabilityAPI
from .cdx_api import WaybackMachineCDXServerAPI from .cdx_api import WaybackMachineCDXServerAPI
from .save_api import WaybackMachineSaveAPI from .save_api import WaybackMachineSaveAPI
from .utils import DEFAULT_USER_AGENT from .utils import DEFAULT_USER_AGENT
class Url(object): class Url:
""" """
The Url class is not recommended to be used anymore, instead use: The Url class is not recommended to be used anymore, instead use:
@ -39,6 +41,9 @@ class Url(object):
self.wayback_machine_availability_api = WaybackMachineAvailabilityAPI( self.wayback_machine_availability_api = WaybackMachineAvailabilityAPI(
self.url, user_agent=self.user_agent self.url, user_agent=self.user_agent
) )
self.wayback_machine_save_api: Optional[WaybackMachineSaveAPI] = None
self.headers: Optional[CaseInsensitiveDict[str]] = None
self.json: Optional[ResponseJSON] = None
def __str__(self) -> str: def __str__(self) -> str:
if not self.archive_url: if not self.archive_url:
@ -107,7 +112,7 @@ class Url(object):
def set_availability_api_attrs(self) -> None: def set_availability_api_attrs(self) -> None:
"""Set the attributes for total backwards compatibility.""" """Set the attributes for total backwards compatibility."""
self.archive_url = self.wayback_machine_availability_api.archive_url self.archive_url = self.wayback_machine_availability_api.archive_url
self.JSON = self.wayback_machine_availability_api.JSON self.json = self.wayback_machine_availability_api.json
self.timestamp = self.wayback_machine_availability_api.timestamp() self.timestamp = self.wayback_machine_availability_api.timestamp()
def total_archives( def total_archives(