Merge branch 'master' into fix_metadata
This commit is contained in:
@@ -24,9 +24,11 @@ keywords =
|
|||||||
CDX API
|
CDX API
|
||||||
savepagenow
|
savepagenow
|
||||||
classifiers =
|
classifiers =
|
||||||
Development Status :: 4 - Beta
|
Development Status :: 5 - Production/Stable
|
||||||
Intended Audience :: Developers
|
Intended Audience :: Developers
|
||||||
|
Intended Audience :: End Users/Desktop
|
||||||
Natural Language :: English
|
Natural Language :: English
|
||||||
|
Typing :: Typed
|
||||||
License :: OSI Approved :: MIT License
|
License :: OSI Approved :: MIT License
|
||||||
Programming Language :: Python
|
Programming Language :: Python
|
||||||
Programming Language :: Python :: 3
|
Programming Language :: Python :: 3
|
||||||
|
@@ -1,9 +1,5 @@
|
|||||||
from waybackpy import __version__
|
from waybackpy import __version__
|
||||||
from waybackpy.utils import (
|
from waybackpy.utils import DEFAULT_USER_AGENT
|
||||||
DEFAULT_USER_AGENT,
|
|
||||||
latest_version_github,
|
|
||||||
latest_version_pypi,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_default_user_agent() -> None:
|
def test_default_user_agent() -> None:
|
||||||
@@ -11,8 +7,3 @@ def test_default_user_agent() -> None:
|
|||||||
DEFAULT_USER_AGENT
|
DEFAULT_USER_AGENT
|
||||||
== f"waybackpy {__version__} - https://github.com/akamhy/waybackpy"
|
== f"waybackpy {__version__} - https://github.com/akamhy/waybackpy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_latest_version() -> None:
|
|
||||||
package_name = "waybackpy"
|
|
||||||
assert latest_version_github(package_name) == latest_version_pypi(package_name)
|
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
"""Module initializer and provider of static infomation."""
|
"""Module initializer and provider of static information."""
|
||||||
|
|
||||||
__version__ = "3.0.2"
|
__version__ = "3.0.3"
|
||||||
|
|
||||||
from .availability_api import WaybackMachineAvailabilityAPI
|
from .availability_api import WaybackMachineAvailabilityAPI
|
||||||
from .cdx_api import WaybackMachineCDXServerAPI
|
from .cdx_api import WaybackMachineCDXServerAPI
|
||||||
|
@@ -1,19 +1,19 @@
|
|||||||
"""
|
"""
|
||||||
This module interfaces the Wayback Machine's availability API.
|
This module interfaces the Wayback Machine's availability API.
|
||||||
|
|
||||||
The interface could be useful for looking up archives and finding archives
|
The interface is useful for looking up archives and finding archives
|
||||||
that are close to a specific date and time.
|
that are close to a specific date and time.
|
||||||
|
|
||||||
It has a class called WaybackMachineAvailabilityAPI, and the class has
|
It has a class WaybackMachineAvailabilityAPI, and the class has
|
||||||
methods such as:
|
methods like:
|
||||||
|
|
||||||
near() for looking up archives close to a specific date and time.
|
near() for retrieving archives close to a specific date and time.
|
||||||
|
|
||||||
oldest() for retrieving the first archive URL of the webpage.
|
oldest() for retrieving the first archive URL of the webpage.
|
||||||
|
|
||||||
newest() for retrieving the latest archive of an URL.
|
newest() for retrieving the latest archive of the webpage.
|
||||||
|
|
||||||
The Wayback Machine Availability response should be a valid JSON and
|
The Wayback Machine Availability API response must be a valid JSON and
|
||||||
if it is not then an exception, InvalidJSONInAvailabilityAPIResponse is raised.
|
if it is not then an exception, InvalidJSONInAvailabilityAPIResponse is raised.
|
||||||
|
|
||||||
If the Availability API returned valid JSON but archive URL could not be found
|
If the Availability API returned valid JSON but archive URL could not be found
|
||||||
@@ -39,7 +39,7 @@ ResponseJSON = Dict[str, Any]
|
|||||||
|
|
||||||
class WaybackMachineAvailabilityAPI:
|
class WaybackMachineAvailabilityAPI:
|
||||||
"""
|
"""
|
||||||
Class that interfaces the availability API of the Wayback Machine.
|
Class that interfaces the Wayback Machine's availability API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -61,7 +61,7 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str:
|
def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str:
|
||||||
"""
|
"""
|
||||||
Converts Unix time to wayback Machine timestamp and the Wayback Machine
|
Converts Unix time to Wayback Machine timestamp, Wayback Machine
|
||||||
timestamp format is yyyyMMddhhmmss.
|
timestamp format is yyyyMMddhhmmss.
|
||||||
"""
|
"""
|
||||||
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
|
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
|
||||||
@@ -76,10 +76,10 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
"""
|
"""
|
||||||
String representation of the class. If atleast one API
|
String representation of the class. If atleast one API
|
||||||
call was successfully made then return the archive URL
|
call was successfully made then return the archive URL
|
||||||
as a string. Else returns "".
|
as a string. Else returns "" (empty string literal).
|
||||||
"""
|
"""
|
||||||
# String should not return anything other than a string object
|
# __str__ can not return anything other than a string object
|
||||||
# So, if a string repr is asked for before making any API requests
|
# So, if a string repr is asked even before making a API request
|
||||||
# just return ""
|
# just return ""
|
||||||
if not self.json:
|
if not self.json:
|
||||||
return ""
|
return ""
|
||||||
@@ -147,7 +147,7 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
self.json["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
|
self.json["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
|
||||||
)
|
)
|
||||||
|
|
||||||
raise ValueError("Could not get timestamp from result")
|
raise ValueError("Timestamp not found in the Availability API's JSON response.")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def archive_url(self) -> str:
|
def archive_url(self) -> str:
|
||||||
@@ -159,8 +159,8 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
archive_url = ""
|
archive_url = ""
|
||||||
data = self.json
|
data = self.json
|
||||||
|
|
||||||
# If the user didn't invoke oldest, newest or near but tries to access the
|
# If the user didn't invoke oldest, newest or near but tries to access
|
||||||
# archive_url attribute then assume they are fine with any archive
|
# archive_url attribute then assume they that are fine with any archive
|
||||||
# and invoke the oldest method.
|
# and invoke the oldest method.
|
||||||
if not data:
|
if not data:
|
||||||
self.oldest()
|
self.oldest()
|
||||||
@@ -172,10 +172,10 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
not data or not data["archived_snapshots"]
|
not data or not data["archived_snapshots"]
|
||||||
):
|
):
|
||||||
self.setup_json() # It makes a new API call
|
self.setup_json() # It makes a new API call
|
||||||
data = self.json # json() updated the value of JSON attribute
|
data = self.json # setup_json() updates value of json attribute
|
||||||
|
|
||||||
# If we exhausted the max_tries, then we give up and
|
# If exhausted max_tries, then give up and
|
||||||
# raise exception.
|
# raise ArchiveNotInAvailabilityAPIResponse.
|
||||||
|
|
||||||
if not data or not data["archived_snapshots"]:
|
if not data or not data["archived_snapshots"]:
|
||||||
raise ArchiveNotInAvailabilityAPIResponse(
|
raise ArchiveNotInAvailabilityAPIResponse(
|
||||||
@@ -198,7 +198,7 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
def wayback_timestamp(**kwargs: int) -> str:
|
def wayback_timestamp(**kwargs: int) -> str:
|
||||||
"""
|
"""
|
||||||
Prepends zero before the year, month, day, hour and minute so that they
|
Prepends zero before the year, month, day, hour and minute so that they
|
||||||
are conformable with the YYYYMMDDhhmmss wayback machine timestamp format.
|
are conformable with the YYYYMMDDhhmmss Wayback Machine timestamp format.
|
||||||
"""
|
"""
|
||||||
return "".join(
|
return "".join(
|
||||||
str(kwargs[key]).zfill(2)
|
str(kwargs[key]).zfill(2)
|
||||||
@@ -218,7 +218,7 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
Passes the current UNIX time to near() for retrieving the newest archive
|
Passes the current UNIX time to near() for retrieving the newest archive
|
||||||
from the availability API.
|
from the availability API.
|
||||||
|
|
||||||
We assume that wayback machine can not archive the future of a webpage.
|
Remember UNIX time is UTC and Wayback Machine is also UTC based.
|
||||||
"""
|
"""
|
||||||
return self.near(unix_timestamp=int(time.time()))
|
return self.near(unix_timestamp=int(time.time()))
|
||||||
|
|
||||||
@@ -232,7 +232,8 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
unix_timestamp: Optional[int] = None,
|
unix_timestamp: Optional[int] = None,
|
||||||
) -> "WaybackMachineAvailabilityAPI":
|
) -> "WaybackMachineAvailabilityAPI":
|
||||||
"""
|
"""
|
||||||
The main method for the Class, oldest() and newest() are dependent on it.
|
The most important method of this Class, oldest() and newest() are
|
||||||
|
dependent on it.
|
||||||
|
|
||||||
It generates the timestamp based on the input either by calling the
|
It generates the timestamp based on the input either by calling the
|
||||||
unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
|
unix_timestamp_to_wayback_timestamp or wayback_timestamp method with
|
||||||
@@ -240,8 +241,8 @@ class WaybackMachineAvailabilityAPI:
|
|||||||
|
|
||||||
Adds the timestamp to the payload dictionary.
|
Adds the timestamp to the payload dictionary.
|
||||||
|
|
||||||
And finally invoking the json method to make the API call then returns
|
And finally invokes the setup_json method to make the API call then
|
||||||
the instance.
|
finally returns the instance.
|
||||||
"""
|
"""
|
||||||
if unix_timestamp:
|
if unix_timestamp:
|
||||||
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||||
|
@@ -173,7 +173,6 @@ class WaybackMachineCDXServerAPI:
|
|||||||
for i, collapse in enumerate(self.collapses):
|
for i, collapse in enumerate(self.collapses):
|
||||||
payload["collapse" + str(i)] = collapse
|
payload["collapse" + str(i)] = collapse
|
||||||
|
|
||||||
# Don't need to return anything as it's dictionary.
|
|
||||||
payload["url"] = self.url
|
payload["url"] = self.url
|
||||||
|
|
||||||
def snapshots(self) -> Generator[CDXSnapshot, None, None]:
|
def snapshots(self) -> Generator[CDXSnapshot, None, None]:
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Module that contains the CDXSnapshot class, CDX records are casted
|
Module that contains the CDXSnapshot class, CDX records/lines are casted
|
||||||
to CDXSnapshot objects for easier access.
|
to CDXSnapshot objects for easier access.
|
||||||
|
|
||||||
The CDX index format is plain text data. Each line ('record') indicates a
|
The CDX index format is plain text data. Each line ('record') indicates a
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
Utility functions required for accessing the CDX server API.
|
Utility functions required for accessing the CDX server API.
|
||||||
|
|
||||||
These are here in this module so that we don’t make any module too
|
These are here in this module so that we don’t make any module too
|
||||||
big.
|
long.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@@ -63,7 +63,7 @@ def get_response(
|
|||||||
backoff_factor: float = 0.5,
|
backoff_factor: float = 0.5,
|
||||||
) -> Union[requests.Response, Exception]:
|
) -> Union[requests.Response, Exception]:
|
||||||
"""
|
"""
|
||||||
Make get request to the CDX server and return the response.
|
Makes get request to the CDX server and returns the response.
|
||||||
"""
|
"""
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
|
|
||||||
|
151
waybackpy/cli.py
151
waybackpy/cli.py
@@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Module that makes waybackpy a CLI tool.
|
Module responsible for enabling waybackpy to function as a CLI tool.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@@ -7,7 +7,7 @@ import random
|
|||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
from json import dumps
|
from json import dumps
|
||||||
from typing import Generator, List, Optional
|
from typing import Any, Generator, List, Optional
|
||||||
|
|
||||||
import click
|
import click
|
||||||
import requests
|
import requests
|
||||||
@@ -15,6 +15,7 @@ import requests
|
|||||||
from . import __version__
|
from . import __version__
|
||||||
from .availability_api import WaybackMachineAvailabilityAPI
|
from .availability_api import WaybackMachineAvailabilityAPI
|
||||||
from .cdx_api import WaybackMachineCDXServerAPI
|
from .cdx_api import WaybackMachineCDXServerAPI
|
||||||
|
from .exceptions import ArchiveNotInAvailabilityAPIResponse
|
||||||
from .save_api import WaybackMachineSaveAPI
|
from .save_api import WaybackMachineSaveAPI
|
||||||
from .utils import DEFAULT_USER_AGENT
|
from .utils import DEFAULT_USER_AGENT
|
||||||
from .wrapper import Url
|
from .wrapper import Url
|
||||||
@@ -24,18 +25,23 @@ def echo_availability_api(
|
|||||||
availability_api_instance: WaybackMachineAvailabilityAPI, json: bool
|
availability_api_instance: WaybackMachineAvailabilityAPI, json: bool
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Output availability API depending functions.
|
Output for method that use the availability API.
|
||||||
Near, oldest and newest output by this method.
|
Near, oldest and newest output via this function.
|
||||||
"""
|
"""
|
||||||
if not availability_api_instance.archive_url:
|
try:
|
||||||
archive_url = (
|
if availability_api_instance.archive_url:
|
||||||
|
archive_url = availability_api_instance.archive_url
|
||||||
|
except ArchiveNotInAvailabilityAPIResponse as error:
|
||||||
|
message = (
|
||||||
"NO ARCHIVE FOUND - The requested URL is probably "
|
"NO ARCHIVE FOUND - The requested URL is probably "
|
||||||
+ "not yet archived or if the URL was recently archived then it is "
|
+ "not yet archived or if the URL was recently archived then it is "
|
||||||
+ "not yet available via the Wayback Machine's availability API "
|
+ "not yet available via the Wayback Machine's availability API "
|
||||||
+ "because of database lag and should be available after some time."
|
+ "because of database lag and should be available after some time."
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
archive_url = availability_api_instance.archive_url
|
click.echo(message + "\nJSON response:\n" + str(error), err=True)
|
||||||
|
return
|
||||||
|
|
||||||
click.echo("Archive URL:")
|
click.echo("Archive URL:")
|
||||||
click.echo(archive_url)
|
click.echo(archive_url)
|
||||||
if json:
|
if json:
|
||||||
@@ -43,6 +49,70 @@ def echo_availability_api(
|
|||||||
click.echo(dumps(availability_api_instance.json))
|
click.echo(dumps(availability_api_instance.json))
|
||||||
|
|
||||||
|
|
||||||
|
def handle_cdx(data: List[Any]) -> None:
|
||||||
|
"""
|
||||||
|
Handles the CDX CLI options and output format.
|
||||||
|
"""
|
||||||
|
url = data[0]
|
||||||
|
user_agent = data[1]
|
||||||
|
start_timestamp = data[2]
|
||||||
|
end_timestamp = data[3]
|
||||||
|
cdx_filter = data[4]
|
||||||
|
collapse = data[5]
|
||||||
|
cdx_print = data[6]
|
||||||
|
limit = data[7]
|
||||||
|
gzip = data[8]
|
||||||
|
match_type = data[9]
|
||||||
|
|
||||||
|
filters = list(cdx_filter)
|
||||||
|
collapses = list(collapse)
|
||||||
|
cdx_print = list(cdx_print)
|
||||||
|
|
||||||
|
cdx_api = WaybackMachineCDXServerAPI(
|
||||||
|
url,
|
||||||
|
user_agent=user_agent,
|
||||||
|
start_timestamp=start_timestamp,
|
||||||
|
end_timestamp=end_timestamp,
|
||||||
|
filters=filters,
|
||||||
|
match_type=match_type,
|
||||||
|
gzip=gzip,
|
||||||
|
collapses=collapses,
|
||||||
|
limit=limit,
|
||||||
|
)
|
||||||
|
|
||||||
|
snapshots = cdx_api.snapshots()
|
||||||
|
|
||||||
|
for snapshot in snapshots:
|
||||||
|
if len(cdx_print) == 0:
|
||||||
|
click.echo(snapshot)
|
||||||
|
else:
|
||||||
|
output_string = []
|
||||||
|
if any(val in cdx_print for val in ["urlkey", "url-key", "url_key"]):
|
||||||
|
output_string.append(snapshot.urlkey)
|
||||||
|
if any(
|
||||||
|
val in cdx_print for val in ["timestamp", "time-stamp", "time_stamp"]
|
||||||
|
):
|
||||||
|
output_string.append(snapshot.timestamp)
|
||||||
|
if "original" in cdx_print:
|
||||||
|
output_string.append(snapshot.original)
|
||||||
|
if any(val in cdx_print for val in ["mimetype", "mime-type", "mime_type"]):
|
||||||
|
output_string.append(snapshot.mimetype)
|
||||||
|
if any(
|
||||||
|
val in cdx_print for val in ["statuscode", "status-code", "status_code"]
|
||||||
|
):
|
||||||
|
output_string.append(snapshot.statuscode)
|
||||||
|
if "digest" in cdx_print:
|
||||||
|
output_string.append(snapshot.digest)
|
||||||
|
if "length" in cdx_print:
|
||||||
|
output_string.append(snapshot.length)
|
||||||
|
if any(
|
||||||
|
val in cdx_print for val in ["archiveurl", "archive-url", "archive_url"]
|
||||||
|
):
|
||||||
|
output_string.append(snapshot.archive_url)
|
||||||
|
|
||||||
|
click.echo(" ".join(output_string))
|
||||||
|
|
||||||
|
|
||||||
def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
|
def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
|
||||||
"""
|
"""
|
||||||
Save output of CDX API on file.
|
Save output of CDX API on file.
|
||||||
@@ -231,7 +301,7 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
|
|||||||
"-l",
|
"-l",
|
||||||
"--limit",
|
"--limit",
|
||||||
help="Number of maximum record that CDX API is asked to return per API call, "
|
help="Number of maximum record that CDX API is asked to return per API call, "
|
||||||
+ "default value is 500 records.",
|
+ "default value is 25000 records.",
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"-cp",
|
"-cp",
|
||||||
@@ -347,58 +417,19 @@ def main( # pylint: disable=no-value-for-parameter
|
|||||||
click.echo(url_)
|
click.echo(url_)
|
||||||
|
|
||||||
elif cdx:
|
elif cdx:
|
||||||
filters = list(cdx_filter)
|
data = [
|
||||||
collapses = list(collapse)
|
|
||||||
cdx_print = list(cdx_print)
|
|
||||||
|
|
||||||
cdx_api = WaybackMachineCDXServerAPI(
|
|
||||||
url,
|
url,
|
||||||
user_agent=user_agent,
|
user_agent,
|
||||||
start_timestamp=start_timestamp,
|
start_timestamp,
|
||||||
end_timestamp=end_timestamp,
|
end_timestamp,
|
||||||
filters=filters,
|
cdx_filter,
|
||||||
match_type=match_type,
|
collapse,
|
||||||
gzip=gzip,
|
cdx_print,
|
||||||
collapses=collapses,
|
limit,
|
||||||
limit=limit,
|
gzip,
|
||||||
)
|
match_type,
|
||||||
|
]
|
||||||
snapshots = cdx_api.snapshots()
|
handle_cdx(data)
|
||||||
|
|
||||||
for snapshot in snapshots:
|
|
||||||
if len(cdx_print) == 0:
|
|
||||||
click.echo(snapshot)
|
|
||||||
else:
|
|
||||||
output_string = []
|
|
||||||
if any(val in cdx_print for val in ["urlkey", "url-key", "url_key"]):
|
|
||||||
output_string.append(snapshot.urlkey)
|
|
||||||
if any(
|
|
||||||
val in cdx_print
|
|
||||||
for val in ["timestamp", "time-stamp", "time_stamp"]
|
|
||||||
):
|
|
||||||
output_string.append(snapshot.timestamp)
|
|
||||||
if "original" in cdx_print:
|
|
||||||
output_string.append(snapshot.original)
|
|
||||||
if any(
|
|
||||||
val in cdx_print for val in ["mimetype", "mime-type", "mime_type"]
|
|
||||||
):
|
|
||||||
output_string.append(snapshot.mimetype)
|
|
||||||
if any(
|
|
||||||
val in cdx_print
|
|
||||||
for val in ["statuscode", "status-code", "status_code"]
|
|
||||||
):
|
|
||||||
output_string.append(snapshot.statuscode)
|
|
||||||
if "digest" in cdx_print:
|
|
||||||
output_string.append(snapshot.digest)
|
|
||||||
if "length" in cdx_print:
|
|
||||||
output_string.append(snapshot.length)
|
|
||||||
if any(
|
|
||||||
val in cdx_print
|
|
||||||
for val in ["archiveurl", "archive-url", "archive_url"]
|
|
||||||
):
|
|
||||||
output_string.append(snapshot.archive_url)
|
|
||||||
|
|
||||||
click.echo(" ".join(output_string))
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
click.echo(
|
click.echo(
|
||||||
|
@@ -12,20 +12,7 @@ class WaybackError(Exception):
|
|||||||
1) Wayback Machine API Service is unreachable/down.
|
1) Wayback Machine API Service is unreachable/down.
|
||||||
2) You passed illegal arguments.
|
2) You passed illegal arguments.
|
||||||
|
|
||||||
All other exceptions are inherited from this class.
|
All other exceptions are inherited from this main exception.
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class RedirectSaveError(WaybackError):
|
|
||||||
"""
|
|
||||||
Raised when the original URL is redirected and the
|
|
||||||
redirect URL is archived but not the original URL.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class URLError(Exception):
|
|
||||||
"""
|
|
||||||
Raised when malformed URLs are passed as arguments.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -33,6 +20,8 @@ class TooManyRequestsError(WaybackError):
|
|||||||
"""
|
"""
|
||||||
Raised when you make more than 15 requests per
|
Raised when you make more than 15 requests per
|
||||||
minute and the Wayback Machine returns 429.
|
minute and the Wayback Machine returns 429.
|
||||||
|
|
||||||
|
See https://github.com/akamhy/waybackpy/issues/131
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@@ -2,49 +2,8 @@
|
|||||||
Utility functions and shared variables like DEFAULT_USER_AGENT are here.
|
Utility functions and shared variables like DEFAULT_USER_AGENT are here.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from . import __version__
|
from . import __version__
|
||||||
|
|
||||||
DEFAULT_USER_AGENT: str = (
|
DEFAULT_USER_AGENT: str = (
|
||||||
f"waybackpy {__version__} - https://github.com/akamhy/waybackpy"
|
f"waybackpy {__version__} - https://github.com/akamhy/waybackpy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def latest_version_pypi(package_name: str, user_agent: str = DEFAULT_USER_AGENT) -> str:
|
|
||||||
"""Latest waybackpy version on PyPi."""
|
|
||||||
request_url = "https://pypi.org/pypi/" + package_name + "/json"
|
|
||||||
headers = {"User-Agent": user_agent}
|
|
||||||
response = requests.get(request_url, headers=headers)
|
|
||||||
data = response.json()
|
|
||||||
if (
|
|
||||||
data is not None
|
|
||||||
and "info" in data
|
|
||||||
and data["info"] is not None
|
|
||||||
and "version" in data["info"]
|
|
||||||
and data["info"]["version"] is not None
|
|
||||||
):
|
|
||||||
return str(data["info"]["version"])
|
|
||||||
|
|
||||||
raise ValueError("Could not get latest pypi version")
|
|
||||||
|
|
||||||
|
|
||||||
def latest_version_github(
|
|
||||||
package_name: str, user_agent: str = DEFAULT_USER_AGENT
|
|
||||||
) -> str:
|
|
||||||
"""Latest waybackpy version on GitHub."""
|
|
||||||
request_url = (
|
|
||||||
"https://api.github.com/repos/akamhy/" + package_name + "/releases?per_page=1"
|
|
||||||
)
|
|
||||||
headers = {"User-Agent": user_agent}
|
|
||||||
response = requests.get(request_url, headers=headers)
|
|
||||||
data = response.json()
|
|
||||||
if (
|
|
||||||
data is not None
|
|
||||||
and len(data) > 0
|
|
||||||
and data[0] is not None
|
|
||||||
and "tag_name" in data[0]
|
|
||||||
):
|
|
||||||
return str(data[0]["tag_name"])
|
|
||||||
|
|
||||||
raise ValueError("Could not get latest github version")
|
|
||||||
|
@@ -113,6 +113,7 @@ class Url:
|
|||||||
"""Set the attributes for total backwards compatibility."""
|
"""Set the attributes for total backwards compatibility."""
|
||||||
self.archive_url = self.wayback_machine_availability_api.archive_url
|
self.archive_url = self.wayback_machine_availability_api.archive_url
|
||||||
self.json = self.wayback_machine_availability_api.json
|
self.json = self.wayback_machine_availability_api.json
|
||||||
|
self.JSON = self.json # for backwards compatibility, do not remove it.
|
||||||
self.timestamp = self.wayback_machine_availability_api.timestamp()
|
self.timestamp = self.wayback_machine_availability_api.timestamp()
|
||||||
|
|
||||||
def total_archives(
|
def total_archives(
|
||||||
|
Reference in New Issue
Block a user