added more docstrings
This commit is contained in:
@@ -31,9 +31,10 @@ def _latest_version(package_name, headers):
|
|||||||
on PyPi.
|
on PyPi.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
endpoint = "https://pypi.org/pypi/" + package_name + "/json"
|
request_url = "https://pypi.org/pypi/" + package_name + "/json"
|
||||||
json = _get_response(endpoint, headers=headers).json()
|
response = _get_response(request_url, headers=headers)
|
||||||
return json["info"]["version"]
|
data = response.json()
|
||||||
|
return data["info"]["version"]
|
||||||
|
|
||||||
|
|
||||||
def _unix_timestamp_to_wayback_timestamp(unix_timestamp):
|
def _unix_timestamp_to_wayback_timestamp(unix_timestamp):
|
||||||
@@ -121,6 +122,21 @@ def _timestamp_manager(timestamp, data):
|
|||||||
|
|
||||||
|
|
||||||
def _check_match_type(match_type, url):
|
def _check_match_type(match_type, url):
|
||||||
|
"""Checks the validity of match_type parameter of the CDX GET requests.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
match_type : list
|
||||||
|
list that may contain any or all from ["exact", "prefix", "host", "domain"]
|
||||||
|
See https://github.com/akamhy/waybackpy/wiki/Python-package-docs#url-match-scope
|
||||||
|
|
||||||
|
url : str
|
||||||
|
The URL used to create the waybackpy Url object.
|
||||||
|
|
||||||
|
If not vaild match_type raise Exception.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
if not match_type:
|
if not match_type:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -137,6 +153,19 @@ def _check_match_type(match_type, url):
|
|||||||
|
|
||||||
|
|
||||||
def _check_collapses(collapses):
|
def _check_collapses(collapses):
|
||||||
|
"""Checks the validity of collapse parameter of the CDX GET request.
|
||||||
|
|
||||||
|
One or more field or field:N to 'collapses=[]' where
|
||||||
|
field is one of (urlkey, timestamp, original, mimetype, statuscode,
|
||||||
|
digest and length) and N is the first N characters of field to test.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
collapses : list
|
||||||
|
|
||||||
|
If not vaild collapses raise Exception.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
if not isinstance(collapses, list):
|
if not isinstance(collapses, list):
|
||||||
raise WaybackError("collapses must be a list.")
|
raise WaybackError("collapses must be a list.")
|
||||||
@@ -171,12 +200,26 @@ def _check_collapses(collapses):
|
|||||||
|
|
||||||
|
|
||||||
def _check_filters(filters):
|
def _check_filters(filters):
|
||||||
|
"""Checks the validity of filter parameter of the CDX GET request.
|
||||||
|
|
||||||
|
Any number of filter params of the following form may be specified:
|
||||||
|
filters=["[!]field:regex"] may be specified..
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
filters : list
|
||||||
|
|
||||||
|
If not vaild filters raise Exception.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
if not isinstance(filters, list):
|
if not isinstance(filters, list):
|
||||||
raise WaybackError("filters must be a list.")
|
raise WaybackError("filters must be a list.")
|
||||||
|
|
||||||
# [!]field:regex
|
# [!]field:regex
|
||||||
for _filter in filters:
|
for _filter in filters:
|
||||||
try:
|
try:
|
||||||
|
|
||||||
match = re.search(
|
match = re.search(
|
||||||
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
|
r"(\!?(?:urlkey|timestamp|original|mimetype|statuscode|digest|length)):(.*)",
|
||||||
_filter,
|
_filter,
|
||||||
@@ -186,8 +229,9 @@ def _check_filters(filters):
|
|||||||
val = match.group(2)
|
val = match.group(2)
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
||||||
exc_message = (
|
exc_message = (
|
||||||
"Filter '{_filter}' not following the cdx filter syntax.".format(
|
"Filter '{_filter}' is not following the cdx filter syntax.".format(
|
||||||
_filter=_filter
|
_filter=_filter
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -216,16 +260,29 @@ def _url_check(url):
|
|||||||
|
|
||||||
|
|
||||||
def _full_url(endpoint, params):
|
def _full_url(endpoint, params):
|
||||||
full_url = endpoint
|
"""API endpoint + GET parameters = full_url
|
||||||
if params:
|
|
||||||
full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
|
Parameters
|
||||||
for key, val in params.items():
|
----------
|
||||||
key = "filter" if key.startswith("filter") else key
|
endpoint : str
|
||||||
key = "collapse" if key.startswith("collapse") else key
|
The API endpoint
|
||||||
amp = "" if full_url.endswith("?") else "&"
|
|
||||||
full_url = (
|
params : dict
|
||||||
full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
|
Dictionary that has name-value pairs.
|
||||||
)
|
|
||||||
|
Return type is str
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not params:
|
||||||
|
return endpoint
|
||||||
|
|
||||||
|
full_url = endpoint if endpoint.endswith("?") else (endpoint + "?")
|
||||||
|
for key, val in params.items():
|
||||||
|
key = "filter" if key.startswith("filter") else key
|
||||||
|
key = "collapse" if key.startswith("collapse") else key
|
||||||
|
amp = "" if full_url.endswith("?") else "&"
|
||||||
|
full_url = full_url + amp + "{key}={val}".format(key=key, val=quote(str(val)))
|
||||||
return full_url
|
return full_url
|
||||||
|
|
||||||
|
|
||||||
@@ -246,7 +303,23 @@ def _get_total_pages(url, user_agent):
|
|||||||
|
|
||||||
|
|
||||||
def _archive_url_parser(header, url, latest_version=__version__, instance=None):
|
def _archive_url_parser(header, url, latest_version=__version__, instance=None):
|
||||||
"""
|
"""Returns the archive after parsing it from the response header.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
header : str
|
||||||
|
The response header of WayBack Machine's Save API
|
||||||
|
|
||||||
|
url : str
|
||||||
|
The input url, the one used to created the Url object.
|
||||||
|
|
||||||
|
latest_version : str
|
||||||
|
The latest version of waybackpy (default is __version__)
|
||||||
|
|
||||||
|
instance : waybackpy.wrapper.Url
|
||||||
|
Instance of Url class
|
||||||
|
|
||||||
|
|
||||||
The wayback machine's save API doesn't
|
The wayback machine's save API doesn't
|
||||||
return JSON response, we are required
|
return JSON response, we are required
|
||||||
to read the header of the API response
|
to read the header of the API response
|
||||||
@@ -358,7 +431,6 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
|
|||||||
def _wayback_timestamp(**kwargs):
|
def _wayback_timestamp(**kwargs):
|
||||||
"""Returns a valid waybackpy timestamp.
|
"""Returns a valid waybackpy timestamp.
|
||||||
|
|
||||||
|
|
||||||
The standard archive URL format is
|
The standard archive URL format is
|
||||||
https://web.archive.org/web/20191214041711/https://www.youtube.com
|
https://web.archive.org/web/20191214041711/https://www.youtube.com
|
||||||
|
|
||||||
@@ -444,20 +516,28 @@ def _get_response(
|
|||||||
|
|
||||||
s.mount("https://", HTTPAdapter(max_retries=retries))
|
s.mount("https://", HTTPAdapter(max_retries=retries))
|
||||||
|
|
||||||
|
# The URL with parameters required for the get request
|
||||||
url = _full_url(endpoint, params)
|
url = _full_url(endpoint, params)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
if not return_full_url:
|
if not return_full_url:
|
||||||
return s.get(url, headers=headers)
|
return s.get(url, headers=headers)
|
||||||
|
|
||||||
return (url, s.get(url, headers=headers))
|
return (url, s.get(url, headers=headers))
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
||||||
reason = str(e)
|
reason = str(e)
|
||||||
|
|
||||||
if no_raise_on_redirects:
|
if no_raise_on_redirects:
|
||||||
if "Exceeded 30 redirects" in reason:
|
if "Exceeded 30 redirects" in reason:
|
||||||
return
|
return
|
||||||
|
|
||||||
exc_message = "Error while retrieving {url}.\n{reason}".format(
|
exc_message = "Error while retrieving {url}.\n{reason}".format(
|
||||||
url=url, reason=reason
|
url=url, reason=reason
|
||||||
)
|
)
|
||||||
|
|
||||||
exc = WaybackError(exc_message)
|
exc = WaybackError(exc_message)
|
||||||
exc.__cause__ = e
|
exc.__cause__ = e
|
||||||
raise exc
|
raise exc
|
||||||
|
Reference in New Issue
Block a user