Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
2650943f9d | |||
4b218d35cb | |||
f990b93f8a | |||
3a44a710d3 | |||
f63c6adf79 | |||
b4d3393ef1 |
112
README.md
112
README.md
@ -84,28 +84,68 @@ False
|
||||
datetime.datetime(2022, 1, 18, 12, 52, 49)
|
||||
```
|
||||
|
||||
#### Availability API
|
||||
|
||||
```python
|
||||
>>> from waybackpy import WaybackMachineAvailabilityAPI
|
||||
>>>
|
||||
>>> url = "https://google.com"
|
||||
>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0"
|
||||
>>>
|
||||
>>> availability_api = WaybackMachineAvailabilityAPI(url, user_agent)
|
||||
>>>
|
||||
>>> availability_api.oldest()
|
||||
https://web.archive.org/web/19981111184551/http://google.com:80/
|
||||
>>>
|
||||
>>> availability_api.newest()
|
||||
https://web.archive.org/web/20220118150444/https://www.google.com/
|
||||
>>>
|
||||
>>> availability_api.near(year=2010, month=10, day=10, hour=10)
|
||||
https://web.archive.org/web/20101010101708/http://www.google.com/
|
||||
```
|
||||
|
||||
#### CDX API aka CDXServerAPI
|
||||
|
||||
```python
|
||||
>>> from waybackpy import WaybackMachineCDXServerAPI
|
||||
>>> url = "https://google.com"
|
||||
>>> user_agent = "my new app's user agent"
|
||||
>>> cdx_api = WaybackMachineCDXServerAPI(url, user_agent)
|
||||
```
|
||||
##### oldest
|
||||
```python
|
||||
>>> cdx_api.oldest()
|
||||
com,google)/ 19981111184551 http://google.com:80/ text/html 200 HOQ2TGPYAEQJPNUA6M4SMZ3NGQRBXDZ3 381
|
||||
>>> oldest = cdx_api.oldest()
|
||||
>>> oldest
|
||||
com,google)/ 19981111184551 http://google.com:80/ text/html 200 HOQ2TGPYAEQJPNUA6M4SMZ3NGQRBXDZ3 381
|
||||
>>> oldest.archive_url
|
||||
'https://web.archive.org/web/19981111184551/http://google.com:80/'
|
||||
>>> oldest.original
|
||||
'http://google.com:80/'
|
||||
>>> oldest.urlkey
|
||||
'com,google)/'
|
||||
>>> oldest.timestamp
|
||||
'19981111184551'
|
||||
>>> oldest.datetime_timestamp
|
||||
datetime.datetime(1998, 11, 11, 18, 45, 51)
|
||||
>>> oldest.statuscode
|
||||
'200'
|
||||
>>> oldest.mimetype
|
||||
'text/html'
|
||||
```
|
||||
##### newest
|
||||
```python
|
||||
>>> newest = cdx_api.newest()
|
||||
>>> newest
|
||||
com,google)/ 20220217234427 http://@google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 563
|
||||
>>> newest.archive_url
|
||||
'https://web.archive.org/web/20220217234427/http://@google.com/'
|
||||
>>> newest.timestamp
|
||||
'20220217234427'
|
||||
```
|
||||
##### near
|
||||
```python
|
||||
>>> near = cdx_api.near(year=2010, month=10, day=10, hour=10, minute=10)
|
||||
>>> near.archive_url
|
||||
'https://web.archive.org/web/20101010101435/http://google.com/'
|
||||
>>> near
|
||||
com,google)/ 20101010101435 http://google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 391
|
||||
>>> near.timestamp
|
||||
'20101010101435'
|
||||
>>> near.timestamp
|
||||
'20101010101435'
|
||||
>>> near = cdx_api.near(wayback_machine_timestamp=2008080808)
|
||||
>>> near.archive_url
|
||||
'https://web.archive.org/web/20080808051143/http://google.com/'
|
||||
>>> near = cdx_api.near(unix_timestamp=1286705410)
|
||||
>>> near
|
||||
com,google)/ 20101010101435 http://google.com/ text/html 301 Y6PVK4XWOI3BXQEXM5WLLWU5JKUVNSFZ 391
|
||||
>>> near.archive_url
|
||||
'https://web.archive.org/web/20101010101435/http://google.com/'
|
||||
>>>
|
||||
```
|
||||
##### snapshots
|
||||
```python
|
||||
>>> from waybackpy import WaybackMachineCDXServerAPI
|
||||
>>> url = "https://pypi.org"
|
||||
@ -123,13 +163,41 @@ https://web.archive.org/web/20171127171549/https://pypi.org/
|
||||
https://web.archive.org/web/20171206002737/http://pypi.org:80/
|
||||
```
|
||||
|
||||
#### Availability API
|
||||
|
||||
It is recommended to not use the availability API due to performance issues. All the methods of availability API interface class, `WaybackMachineAvailabilityAPI`, are also implemented in the CDX server API interface class, `WaybackMachineCDXServerAPI`.
|
||||
|
||||
```python
|
||||
>>> from waybackpy import WaybackMachineAvailabilityAPI
|
||||
>>>
|
||||
>>> url = "https://google.com"
|
||||
>>> user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0"
|
||||
>>>
|
||||
>>> availability_api = WaybackMachineAvailabilityAPI(url, user_agent)
|
||||
```
|
||||
##### oldest
|
||||
```python
|
||||
>>> availability_api.oldest()
|
||||
https://web.archive.org/web/19981111184551/http://google.com:80/
|
||||
```
|
||||
##### newest
|
||||
```python
|
||||
>>> availability_api.newest()
|
||||
https://web.archive.org/web/20220118150444/https://www.google.com/
|
||||
```
|
||||
##### near
|
||||
```python
|
||||
>>> availability_api.near(year=2010, month=10, day=10, hour=10)
|
||||
https://web.archive.org/web/20101010101708/http://www.google.com/
|
||||
```
|
||||
|
||||
> Documentation is at <https://github.com/akamhy/waybackpy/wiki/Python-package-docs>.
|
||||
|
||||
### As a CLI tool
|
||||
|
||||
Demo video on [asciinema.org](https://asciinema.org), you can copy the text from video:
|
||||
Demo video on [asciinema.org](https://asciinema.org/a/469890), you can copy the text from video:
|
||||
|
||||
[](https://asciinema.org/a/464367)
|
||||
[](https://asciinema.org/a/469890)
|
||||
|
||||
> CLI documentation is at <https://github.com/akamhy/waybackpy/wiki/CLI-docs>.
|
||||
|
||||
|
12
setup.cfg
12
setup.cfg
@ -1,14 +1,14 @@
|
||||
[metadata]
|
||||
name = waybackpy
|
||||
version = attr: waybackpy.__version__
|
||||
description = attr: waybackpy.__description__
|
||||
description = Python package that interfaces with the Internet Archive's Wayback Machine APIs. Archive pages and retrieve archived pages easily.
|
||||
long_description = file: README.md
|
||||
long_description_content_type = text/markdown
|
||||
license = attr: waybackpy.__license__
|
||||
author = attr: waybackpy.__author__
|
||||
author_email = attr: waybackpy.__author_email__
|
||||
url = attr: waybackpy.__url__
|
||||
download_url = attr: waybackpy.__download_url__
|
||||
license = MIT
|
||||
author = Akash Mahanty
|
||||
author_email = akamhy@yahoo.com
|
||||
url = https://akamhy.github.io/waybackpy/
|
||||
download_url = https://github.com/akamhy/waybackpy/releases
|
||||
project_urls =
|
||||
Documentation = https://github.com/akamhy/waybackpy/wiki
|
||||
Source = https://github.com/akamhy/waybackpy
|
||||
|
@ -1,4 +1,16 @@
|
||||
import random
|
||||
import string
|
||||
|
||||
import pytest
|
||||
|
||||
from waybackpy.cdx_api import WaybackMachineCDXServerAPI
|
||||
from waybackpy.exceptions import NoCDXRecordFound
|
||||
|
||||
|
||||
def rndstr(n: int) -> str:
|
||||
return "".join(
|
||||
random.choice(string.ascii_uppercase + string.digits) for _ in range(n)
|
||||
)
|
||||
|
||||
|
||||
def test_a() -> None:
|
||||
@ -32,7 +44,11 @@ def test_b() -> None:
|
||||
url = "https://www.google.com"
|
||||
|
||||
wayback = WaybackMachineCDXServerAPI(
|
||||
url=url, user_agent=user_agent, start_timestamp="202101", end_timestamp="202112"
|
||||
url=url,
|
||||
user_agent=user_agent,
|
||||
start_timestamp="202101",
|
||||
end_timestamp="202112",
|
||||
collapses=["urlkey"],
|
||||
)
|
||||
# timeframe bound prefix matching enabled along with active urlkey based collapsing
|
||||
|
||||
@ -40,3 +56,123 @@ def test_b() -> None:
|
||||
|
||||
for snapshot in snapshots:
|
||||
assert snapshot.timestamp.startswith("2021")
|
||||
|
||||
|
||||
def test_c() -> None:
|
||||
user_agent = (
|
||||
"Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) "
|
||||
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||
)
|
||||
url = "https://www.google.com"
|
||||
|
||||
cdx = WaybackMachineCDXServerAPI(
|
||||
url=url,
|
||||
user_agent=user_agent,
|
||||
closest="201010101010",
|
||||
sort="closest",
|
||||
limit="1",
|
||||
)
|
||||
snapshots = cdx.snapshots()
|
||||
for snapshot in snapshots:
|
||||
archive_url = snapshot.archive_url
|
||||
timestamp = snapshot.timestamp
|
||||
break
|
||||
|
||||
assert str(archive_url).find("google.com")
|
||||
assert "20101010" in timestamp
|
||||
|
||||
|
||||
def test_d() -> None:
|
||||
user_agent = (
|
||||
"Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) "
|
||||
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||
)
|
||||
|
||||
cdx = WaybackMachineCDXServerAPI(
|
||||
url="akamhy.github.io",
|
||||
user_agent=user_agent,
|
||||
match_type="prefix",
|
||||
use_pagination=True,
|
||||
filters=["statuscode:200"],
|
||||
)
|
||||
snapshots = cdx.snapshots()
|
||||
|
||||
count = 0
|
||||
for snapshot in snapshots:
|
||||
count += 1
|
||||
assert str(snapshot.archive_url).find("akamhy.github.io")
|
||||
assert count > 50
|
||||
|
||||
|
||||
def test_oldest() -> None:
|
||||
user_agent = (
|
||||
"Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) "
|
||||
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||
)
|
||||
|
||||
cdx = WaybackMachineCDXServerAPI(
|
||||
url="google.com",
|
||||
user_agent=user_agent,
|
||||
filters=["statuscode:200"],
|
||||
)
|
||||
oldest = cdx.oldest()
|
||||
assert "1998" in oldest.timestamp
|
||||
assert "google" in oldest.urlkey
|
||||
assert oldest.original.find("google.com") != -1
|
||||
assert oldest.archive_url.find("google.com") != -1
|
||||
|
||||
|
||||
def test_newest() -> None:
|
||||
user_agent = (
|
||||
"Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) "
|
||||
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||
)
|
||||
|
||||
cdx = WaybackMachineCDXServerAPI(
|
||||
url="google.com",
|
||||
user_agent=user_agent,
|
||||
filters=["statuscode:200"],
|
||||
)
|
||||
newest = cdx.newest()
|
||||
assert "google" in newest.urlkey
|
||||
assert newest.original.find("google.com") != -1
|
||||
assert newest.archive_url.find("google.com") != -1
|
||||
|
||||
|
||||
def test_near() -> None:
|
||||
user_agent = (
|
||||
"Mozilla/5.0 (MacBook Air; M1 Mac OS X 11_4) "
|
||||
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/604.1"
|
||||
)
|
||||
|
||||
cdx = WaybackMachineCDXServerAPI(
|
||||
url="google.com",
|
||||
user_agent=user_agent,
|
||||
filters=["statuscode:200"],
|
||||
)
|
||||
near = cdx.near(year=2010, month=10, day=10, hour=10, minute=10)
|
||||
assert "2010101010" in near.timestamp
|
||||
assert "google" in near.urlkey
|
||||
assert near.original.find("google.com") != -1
|
||||
assert near.archive_url.find("google.com") != -1
|
||||
|
||||
near = cdx.near(wayback_machine_timestamp="201010101010")
|
||||
assert "2010101010" in near.timestamp
|
||||
assert "google" in near.urlkey
|
||||
assert near.original.find("google.com") != -1
|
||||
assert near.archive_url.find("google.com") != -1
|
||||
|
||||
near = cdx.near(unix_timestamp=1286705410)
|
||||
assert "2010101010" in near.timestamp
|
||||
assert "google" in near.urlkey
|
||||
assert near.original.find("google.com") != -1
|
||||
assert near.archive_url.find("google.com") != -1
|
||||
|
||||
with pytest.raises(NoCDXRecordFound):
|
||||
dne_url = f"https://{rndstr(30)}.in"
|
||||
cdx = WaybackMachineCDXServerAPI(
|
||||
url=dne_url,
|
||||
user_agent=user_agent,
|
||||
filters=["statuscode:200"],
|
||||
)
|
||||
cdx.near(unix_timestamp=1286705410)
|
||||
|
@ -41,3 +41,4 @@ def test_CDXSnapshot() -> None:
|
||||
)
|
||||
assert archive_url == snapshot.archive_url
|
||||
assert sample_input == str(snapshot)
|
||||
assert sample_input == repr(snapshot)
|
||||
|
@ -6,6 +6,7 @@ from waybackpy.cdx_utils import (
|
||||
check_collapses,
|
||||
check_filters,
|
||||
check_match_type,
|
||||
check_sort,
|
||||
full_url,
|
||||
get_response,
|
||||
get_total_pages,
|
||||
@ -101,3 +102,12 @@ def test_check_match_type() -> None:
|
||||
|
||||
with pytest.raises(WaybackError):
|
||||
check_match_type("not a valid type", "url")
|
||||
|
||||
|
||||
def test_check_sort() -> None:
|
||||
assert check_sort("default")
|
||||
assert check_sort("closest")
|
||||
assert check_sort("reverse")
|
||||
|
||||
with pytest.raises(WaybackError):
|
||||
assert check_sort("random crap")
|
||||
|
@ -42,39 +42,6 @@ def test_near() -> None:
|
||||
)
|
||||
|
||||
|
||||
def test_json() -> None:
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(
|
||||
main,
|
||||
[
|
||||
"--url",
|
||||
" https://apple.com ",
|
||||
"--near",
|
||||
"--year",
|
||||
"2010",
|
||||
"--month",
|
||||
"2",
|
||||
"--day",
|
||||
"8",
|
||||
"--hour",
|
||||
"12",
|
||||
"--json",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert (
|
||||
result.output.find(
|
||||
"""Archive URL:\nhttps://web.archive.org/web/2010020812\
|
||||
5854/http://www.apple.com/\nJSON respons\
|
||||
e:\n{"url": "https://apple.com", "archived_snapshots": {"close\
|
||||
st": {"status": "200", "available": true, "url": "http://web.ar\
|
||||
chive.org/web/20100208125854/http://www.apple.com/", "timest\
|
||||
amp": "20100208125854"}}, "timestamp":"""
|
||||
)
|
||||
!= -1
|
||||
)
|
||||
|
||||
|
||||
def test_newest() -> None:
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(main, ["--url", " https://microsoft.com ", "--newest"])
|
||||
@ -145,7 +112,7 @@ def test_only_url() -> None:
|
||||
assert result.exit_code == 0
|
||||
assert (
|
||||
result.output
|
||||
== "Only URL passed, but did not specify what to do with the URL. Use \
|
||||
== "NoCommandFound: Only URL passed, but did not specify what to do with the URL. Use \
|
||||
--help flag for help using waybackpy.\n"
|
||||
)
|
||||
|
||||
|
@ -219,4 +219,5 @@ def test_archive_url() -> None:
|
||||
save_api.saved_archive = (
|
||||
"https://web.archive.org/web/20220124063056/https://example.com/"
|
||||
)
|
||||
save_api._archive_url = save_api.saved_archive
|
||||
assert save_api.archive_url == save_api.saved_archive
|
||||
|
@ -35,4 +35,11 @@ def test_total_archives() -> None:
|
||||
|
||||
def test_known_urls() -> None:
|
||||
wayback = Url("akamhy.github.io")
|
||||
assert len(list(wayback.known_urls())) > 40
|
||||
assert len(list(wayback.known_urls(subdomain=True))) > 40
|
||||
|
||||
|
||||
def test_Save() -> None:
|
||||
wayback = Url("https://en.wikipedia.org/wiki/Asymptotic_equipartition_property")
|
||||
wayback.save()
|
||||
archive_url = str(wayback.archive_url)
|
||||
assert archive_url.find("Asymptotic_equipartition_property") != -1
|
||||
|
@ -1,17 +1,6 @@
|
||||
"""Module initializer and provider of static information."""
|
||||
|
||||
__title__ = "waybackpy"
|
||||
__description__ = (
|
||||
"Python package that interfaces with the Internet Archive's Wayback Machine APIs. "
|
||||
"Archive pages and retrieve archived pages easily."
|
||||
)
|
||||
__url__ = "https://akamhy.github.io/waybackpy/"
|
||||
__version__ = "3.0.3"
|
||||
__download_url__ = f"https://github.com/akamhy/waybackpy/archive/{__version__}.tar.gz"
|
||||
__author__ = "Akash Mahanty"
|
||||
__author_email__ = "akamhy@yahoo.com"
|
||||
__license__ = "MIT"
|
||||
__copyright__ = "Copyright 2020-2022 Akash Mahanty et al."
|
||||
__version__ = "3.0.4"
|
||||
|
||||
from .availability_api import WaybackMachineAvailabilityAPI
|
||||
from .cdx_api import WaybackMachineCDXServerAPI
|
||||
@ -19,14 +8,6 @@ from .save_api import WaybackMachineSaveAPI
|
||||
from .wrapper import Url
|
||||
|
||||
__all__ = [
|
||||
"__author__",
|
||||
"__author_email__",
|
||||
"__copyright__",
|
||||
"__description__",
|
||||
"__license__",
|
||||
"__title__",
|
||||
"__url__",
|
||||
"__download_url__",
|
||||
"__version__",
|
||||
"WaybackMachineAvailabilityAPI",
|
||||
"WaybackMachineCDXServerAPI",
|
||||
|
@ -32,7 +32,11 @@ from .exceptions import (
|
||||
ArchiveNotInAvailabilityAPIResponse,
|
||||
InvalidJSONInAvailabilityAPIResponse,
|
||||
)
|
||||
from .utils import DEFAULT_USER_AGENT
|
||||
from .utils import (
|
||||
DEFAULT_USER_AGENT,
|
||||
unix_timestamp_to_wayback_timestamp,
|
||||
wayback_timestamp,
|
||||
)
|
||||
|
||||
ResponseJSON = Dict[str, Any]
|
||||
|
||||
@ -58,14 +62,6 @@ class WaybackMachineAvailabilityAPI:
|
||||
self.json: Optional[ResponseJSON] = None
|
||||
self.response: Optional[Response] = None
|
||||
|
||||
@staticmethod
|
||||
def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str:
|
||||
"""
|
||||
Converts Unix time to Wayback Machine timestamp, Wayback Machine
|
||||
timestamp format is yyyyMMddhhmmss.
|
||||
"""
|
||||
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""
|
||||
Same as string representation, just return the archive URL as a string.
|
||||
@ -194,17 +190,6 @@ class WaybackMachineAvailabilityAPI:
|
||||
)
|
||||
return archive_url
|
||||
|
||||
@staticmethod
|
||||
def wayback_timestamp(**kwargs: int) -> str:
|
||||
"""
|
||||
Prepends zero before the year, month, day, hour and minute so that they
|
||||
are conformable with the YYYYMMDDhhmmss Wayback Machine timestamp format.
|
||||
"""
|
||||
return "".join(
|
||||
str(kwargs[key]).zfill(2)
|
||||
for key in ["year", "month", "day", "hour", "minute"]
|
||||
)
|
||||
|
||||
def oldest(self) -> "WaybackMachineAvailabilityAPI":
|
||||
"""
|
||||
Passes the date 1994-01-01 to near which should return the oldest archive
|
||||
@ -245,10 +230,10 @@ class WaybackMachineAvailabilityAPI:
|
||||
finally returns the instance.
|
||||
"""
|
||||
if unix_timestamp:
|
||||
timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||
timestamp = unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||
else:
|
||||
now = datetime.utcnow().timetuple()
|
||||
timestamp = self.wayback_timestamp(
|
||||
timestamp = wayback_timestamp(
|
||||
year=now.tm_year if year is None else year,
|
||||
month=now.tm_mon if month is None else month,
|
||||
day=now.tm_mday if day is None else day,
|
||||
|
@ -9,19 +9,26 @@ the snapshots are yielded as instances of the CDXSnapshot class.
|
||||
"""
|
||||
|
||||
|
||||
from typing import Dict, Generator, List, Optional, cast
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, Generator, List, Optional, Union, cast
|
||||
|
||||
from .cdx_snapshot import CDXSnapshot
|
||||
from .cdx_utils import (
|
||||
check_collapses,
|
||||
check_filters,
|
||||
check_match_type,
|
||||
check_sort,
|
||||
full_url,
|
||||
get_response,
|
||||
get_total_pages,
|
||||
)
|
||||
from .exceptions import WaybackError
|
||||
from .utils import DEFAULT_USER_AGENT
|
||||
from .exceptions import NoCDXRecordFound, WaybackError
|
||||
from .utils import (
|
||||
DEFAULT_USER_AGENT,
|
||||
unix_timestamp_to_wayback_timestamp,
|
||||
wayback_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class WaybackMachineCDXServerAPI:
|
||||
@ -44,10 +51,13 @@ class WaybackMachineCDXServerAPI:
|
||||
end_timestamp: Optional[str] = None,
|
||||
filters: Optional[List[str]] = None,
|
||||
match_type: Optional[str] = None,
|
||||
sort: Optional[str] = None,
|
||||
gzip: Optional[str] = None,
|
||||
collapses: Optional[List[str]] = None,
|
||||
limit: Optional[str] = None,
|
||||
max_tries: int = 3,
|
||||
use_pagination: bool = False,
|
||||
closest: Optional[str] = None,
|
||||
) -> None:
|
||||
self.url = str(url).strip().replace(" ", "%20")
|
||||
self.user_agent = user_agent
|
||||
@ -57,65 +67,65 @@ class WaybackMachineCDXServerAPI:
|
||||
check_filters(self.filters)
|
||||
self.match_type = None if match_type is None else str(match_type).strip()
|
||||
check_match_type(self.match_type, self.url)
|
||||
self.sort = None if sort is None else str(sort).strip()
|
||||
check_sort(self.sort)
|
||||
self.gzip = gzip
|
||||
self.collapses = [] if collapses is None else collapses
|
||||
check_collapses(self.collapses)
|
||||
self.limit = 25000 if limit is None else limit
|
||||
self.max_tries = max_tries
|
||||
self.use_pagination = use_pagination
|
||||
self.closest = None if closest is None else str(closest)
|
||||
self.last_api_request_url: Optional[str] = None
|
||||
self.use_page = False
|
||||
self.endpoint = "https://web.archive.org/cdx/search/cdx"
|
||||
|
||||
def cdx_api_manager(
|
||||
self, payload: Dict[str, str], headers: Dict[str, str], use_page: bool = False
|
||||
self, payload: Dict[str, str], headers: Dict[str, str]
|
||||
) -> Generator[str, None, None]:
|
||||
"""
|
||||
Manages the API calls for the instance, it automatically selects the best
|
||||
parameters by looking as the query of the end-user. For bigger queries
|
||||
automatically use the CDX pagination API and for smaller queries use the
|
||||
normal API.
|
||||
|
||||
CDX Server API is a complex API and to make it easy for the end user to
|
||||
consume it the CDX manager(this method) handles the selection of the
|
||||
API output, whether to use the pagination API or not.
|
||||
|
||||
For doing large/bulk queries, the use of the Pagination API is
|
||||
recommended by the Wayback Machine authors. And it determines if the
|
||||
query would be large or not by using the showNumPages=true parameter,
|
||||
this tells the number of pages of CDX DATA that the pagination API
|
||||
will return.
|
||||
|
||||
If the number of page is less than 2 we use the normal non-pagination
|
||||
API as the pagination API is known to lag and for big queries it should
|
||||
not matter but for queries where the number of pages are less this
|
||||
method chooses accuracy over the pagination API.
|
||||
This method uses the pagination API of the CDX server if
|
||||
use_pagination attribute is True else uses the standard
|
||||
CDX server response data.
|
||||
"""
|
||||
# number of pages that will returned by the pagination API.
|
||||
# get_total_pages adds the showNumPages=true param to pagination API
|
||||
# requests.
|
||||
# This is a special query that will return a single number indicating
|
||||
# the number of pages.
|
||||
total_pages = get_total_pages(self.url, self.user_agent)
|
||||
|
||||
if use_page is True and total_pages >= 2:
|
||||
blank_pages = 0
|
||||
# When using the pagination API of the CDX server.
|
||||
if self.use_pagination is True:
|
||||
|
||||
total_pages = get_total_pages(self.url, self.user_agent)
|
||||
successive_blank_pages = 0
|
||||
|
||||
for i in range(total_pages):
|
||||
payload["page"] = str(i)
|
||||
|
||||
url = full_url(self.endpoint, params=payload)
|
||||
res = get_response(url, headers=headers)
|
||||
|
||||
if isinstance(res, Exception):
|
||||
raise res
|
||||
|
||||
self.last_api_request_url = url
|
||||
text = res.text
|
||||
if len(text) == 0:
|
||||
blank_pages += 1
|
||||
|
||||
if blank_pages >= 2:
|
||||
# Reset the counter if the last page was blank
|
||||
# but the current page is not.
|
||||
if successive_blank_pages == 1:
|
||||
if len(text) != 0:
|
||||
successive_blank_pages = 0
|
||||
|
||||
# Increase the succesive page counter on encountering
|
||||
# blank page.
|
||||
if len(text) == 0:
|
||||
successive_blank_pages += 1
|
||||
|
||||
# If two succesive pages are blank
|
||||
# then we don't have any more pages left to
|
||||
# iterate.
|
||||
if successive_blank_pages >= 2:
|
||||
break
|
||||
|
||||
yield text
|
||||
|
||||
# When not using the pagination API of the CDX server
|
||||
else:
|
||||
payload["showResumeKey"] = "true"
|
||||
payload["limit"] = str(self.limit)
|
||||
@ -162,9 +172,15 @@ class WaybackMachineCDXServerAPI:
|
||||
if self.gzip is None:
|
||||
payload["gzip"] = "false"
|
||||
|
||||
if self.closest:
|
||||
payload["closest"] = self.closest
|
||||
|
||||
if self.match_type:
|
||||
payload["matchType"] = self.match_type
|
||||
|
||||
if self.sort:
|
||||
payload["sort"] = self.sort
|
||||
|
||||
if self.filters and len(self.filters) > 0:
|
||||
for i, _filter in enumerate(self.filters):
|
||||
payload["filter" + str(i)] = _filter
|
||||
@ -175,6 +191,69 @@ class WaybackMachineCDXServerAPI:
|
||||
|
||||
payload["url"] = self.url
|
||||
|
||||
def near(
|
||||
self,
|
||||
year: Optional[int] = None,
|
||||
month: Optional[int] = None,
|
||||
day: Optional[int] = None,
|
||||
hour: Optional[int] = None,
|
||||
minute: Optional[int] = None,
|
||||
unix_timestamp: Optional[int] = None,
|
||||
wayback_machine_timestamp: Optional[Union[int, str]] = None,
|
||||
) -> CDXSnapshot:
|
||||
"""
|
||||
Fetch archive close to a datetime, it can only return
|
||||
a single URL. If you want more do not use this method
|
||||
instead use the class.
|
||||
"""
|
||||
if unix_timestamp:
|
||||
timestamp = unix_timestamp_to_wayback_timestamp(unix_timestamp)
|
||||
elif wayback_machine_timestamp:
|
||||
timestamp = str(wayback_machine_timestamp)
|
||||
else:
|
||||
now = datetime.utcnow().timetuple()
|
||||
timestamp = wayback_timestamp(
|
||||
year=now.tm_year if year is None else year,
|
||||
month=now.tm_mon if month is None else month,
|
||||
day=now.tm_mday if day is None else day,
|
||||
hour=now.tm_hour if hour is None else hour,
|
||||
minute=now.tm_min if minute is None else minute,
|
||||
)
|
||||
self.closest = timestamp
|
||||
self.sort = "closest"
|
||||
self.limit = 1
|
||||
first_snapshot = None
|
||||
for snapshot in self.snapshots():
|
||||
first_snapshot = snapshot
|
||||
break
|
||||
|
||||
if not first_snapshot:
|
||||
raise NoCDXRecordFound(
|
||||
"Wayback Machine's CDX server did not return any records "
|
||||
+ "for the query. The URL may not have any archives "
|
||||
+ " on the Wayback Machine or the URL may have been recently "
|
||||
+ "archived and is still not available on the CDX server."
|
||||
)
|
||||
|
||||
return first_snapshot
|
||||
|
||||
def newest(self) -> CDXSnapshot:
|
||||
"""
|
||||
Passes the current UNIX time to near() for retrieving the newest archive
|
||||
from the availability API.
|
||||
|
||||
Remember UNIX time is UTC and Wayback Machine is also UTC based.
|
||||
"""
|
||||
return self.near(unix_timestamp=int(time.time()))
|
||||
|
||||
def oldest(self) -> CDXSnapshot:
|
||||
"""
|
||||
Passes the date 1994-01-01 to near which should return the oldest archive
|
||||
because Wayback Machine was started in May, 1996 and it is assumed that
|
||||
there would be no archive older than January 1, 1994.
|
||||
"""
|
||||
return self.near(year=1994, month=1, day=1)
|
||||
|
||||
def snapshots(self) -> Generator[CDXSnapshot, None, None]:
|
||||
"""
|
||||
This function yields the CDX data lines as snapshots.
|
||||
@ -199,13 +278,7 @@ class WaybackMachineCDXServerAPI:
|
||||
|
||||
self.add_payload(payload)
|
||||
|
||||
if not self.start_timestamp or self.end_timestamp:
|
||||
self.use_page = True
|
||||
|
||||
if self.collapses != []:
|
||||
self.use_page = False
|
||||
|
||||
entries = self.cdx_api_manager(payload, headers, use_page=self.use_page)
|
||||
entries = self.cdx_api_manager(payload, headers)
|
||||
|
||||
for entry in entries:
|
||||
|
||||
|
@ -73,6 +73,12 @@ class CDXSnapshot:
|
||||
f"https://web.archive.org/web/{self.timestamp}/{self.original}"
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""
|
||||
Same as __str__()
|
||||
"""
|
||||
return str(self)
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""
|
||||
The string representation is same as the line returned by the
|
||||
|
@ -13,7 +13,7 @@ import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from .exceptions import WaybackError
|
||||
from .exceptions import BlockedSiteError, WaybackError
|
||||
from .utils import DEFAULT_USER_AGENT
|
||||
|
||||
|
||||
@ -28,12 +28,38 @@ def get_total_pages(url: str, user_agent: str = DEFAULT_USER_AGENT) -> int:
|
||||
headers = {"User-Agent": user_agent}
|
||||
request_url = full_url(endpoint, params=payload)
|
||||
response = get_response(request_url, headers=headers)
|
||||
|
||||
check_for_blocked_site(response, url)
|
||||
if isinstance(response, requests.Response):
|
||||
return int(response.text.strip())
|
||||
raise response
|
||||
|
||||
|
||||
def check_for_blocked_site(
|
||||
response: Union[requests.Response, Exception], url: Optional[str] = None
|
||||
) -> None:
|
||||
"""
|
||||
Checks that the URL can be archived by wayback machine or not.
|
||||
robots.txt policy of the site may prevent the wayback machine.
|
||||
"""
|
||||
# see https://github.com/akamhy/waybackpy/issues/157
|
||||
|
||||
# the following if block is to make mypy happy.
|
||||
if isinstance(response, Exception):
|
||||
raise response
|
||||
|
||||
if not url:
|
||||
url = "The requested content"
|
||||
if (
|
||||
"org.archive.util.io.RuntimeIOException: "
|
||||
+ "org.archive.wayback.exception.AdministrativeAccessControlException: "
|
||||
+ "Blocked Site Error"
|
||||
in response.text.strip()
|
||||
):
|
||||
raise BlockedSiteError(
|
||||
f"{url} is excluded from Wayback Machine by the site's robots.txt policy."
|
||||
)
|
||||
|
||||
|
||||
def full_url(endpoint: str, params: Dict[str, Any]) -> str:
|
||||
"""
|
||||
As the function's name already implies that it returns
|
||||
@ -76,6 +102,7 @@ def get_response(
|
||||
session.mount("https://", HTTPAdapter(max_retries=retries_))
|
||||
response = session.get(url, headers=headers)
|
||||
session.close()
|
||||
check_for_blocked_site(response)
|
||||
return response
|
||||
|
||||
|
||||
@ -151,3 +178,24 @@ def check_match_type(match_type: Optional[str], url: str) -> bool:
|
||||
raise WaybackError(exc_message)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def check_sort(sort: Optional[str]) -> bool:
|
||||
"""
|
||||
Check that the sort argument passed by the end-user is valid.
|
||||
If not valid then raise WaybackError.
|
||||
"""
|
||||
|
||||
legal_sort = ["default", "closest", "reverse"]
|
||||
|
||||
if not sort:
|
||||
return True
|
||||
|
||||
if sort not in legal_sort:
|
||||
exc_message = (
|
||||
f"{sort} is not an allowed argument for sort.\n"
|
||||
"Use one from 'default', 'closest' or 'reverse'"
|
||||
)
|
||||
raise WaybackError(exc_message)
|
||||
|
||||
return True
|
||||
|
129
waybackpy/cli.py
129
waybackpy/cli.py
@ -6,47 +6,48 @@ import os
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
from json import dumps
|
||||
from typing import Any, Generator, List, Optional
|
||||
from typing import Any, Dict, Generator, List, Optional
|
||||
|
||||
import click
|
||||
import requests
|
||||
|
||||
from . import __version__
|
||||
from .availability_api import WaybackMachineAvailabilityAPI
|
||||
from .cdx_api import WaybackMachineCDXServerAPI
|
||||
from .exceptions import ArchiveNotInAvailabilityAPIResponse
|
||||
from .exceptions import BlockedSiteError, NoCDXRecordFound
|
||||
from .save_api import WaybackMachineSaveAPI
|
||||
from .utils import DEFAULT_USER_AGENT
|
||||
from .wrapper import Url
|
||||
|
||||
|
||||
def echo_availability_api(
|
||||
availability_api_instance: WaybackMachineAvailabilityAPI, json: bool
|
||||
def handle_cdx_closest_derivative_methods(
|
||||
cdx_api: "WaybackMachineCDXServerAPI",
|
||||
oldest: bool,
|
||||
near: bool,
|
||||
newest: bool,
|
||||
near_args: Optional[Dict[str, int]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Output for method that use the availability API.
|
||||
Near, oldest and newest output via this function.
|
||||
Handles the closest parameter derivative methods.
|
||||
|
||||
near, newest and oldest use the closest parameter with active
|
||||
closest based sorting.
|
||||
"""
|
||||
try:
|
||||
if availability_api_instance.archive_url:
|
||||
archive_url = availability_api_instance.archive_url
|
||||
except ArchiveNotInAvailabilityAPIResponse as error:
|
||||
message = (
|
||||
"NO ARCHIVE FOUND - The requested URL is probably "
|
||||
+ "not yet archived or if the URL was recently archived then it is "
|
||||
+ "not yet available via the Wayback Machine's availability API "
|
||||
+ "because of database lag and should be available after some time."
|
||||
)
|
||||
|
||||
click.echo(message + "\nJSON response:\n" + str(error), err=True)
|
||||
return
|
||||
|
||||
click.echo("Archive URL:")
|
||||
click.echo(archive_url)
|
||||
if json:
|
||||
click.echo("JSON response:")
|
||||
click.echo(dumps(availability_api_instance.json))
|
||||
if near:
|
||||
if near_args:
|
||||
archive_url = cdx_api.near(**near_args).archive_url
|
||||
else:
|
||||
archive_url = cdx_api.near().archive_url
|
||||
elif newest:
|
||||
archive_url = cdx_api.newest().archive_url
|
||||
elif oldest:
|
||||
archive_url = cdx_api.oldest().archive_url
|
||||
click.echo("Archive URL:")
|
||||
click.echo(archive_url)
|
||||
except NoCDXRecordFound as exc:
|
||||
click.echo(click.style("NoCDXRecordFound: ", fg="red") + str(exc), err=True)
|
||||
except BlockedSiteError as exc:
|
||||
click.echo(click.style("BlockedSiteError: ", fg="red") + str(exc), err=True)
|
||||
|
||||
|
||||
def handle_cdx(data: List[Any]) -> None:
|
||||
@ -63,6 +64,9 @@ def handle_cdx(data: List[Any]) -> None:
|
||||
limit = data[7]
|
||||
gzip = data[8]
|
||||
match_type = data[9]
|
||||
sort = data[10]
|
||||
use_pagination = data[11]
|
||||
closest = data[12]
|
||||
|
||||
filters = list(cdx_filter)
|
||||
collapses = list(collapse)
|
||||
@ -73,8 +77,11 @@ def handle_cdx(data: List[Any]) -> None:
|
||||
user_agent=user_agent,
|
||||
start_timestamp=start_timestamp,
|
||||
end_timestamp=end_timestamp,
|
||||
closest=closest,
|
||||
filters=filters,
|
||||
match_type=match_type,
|
||||
sort=sort,
|
||||
use_pagination=use_pagination,
|
||||
gzip=gzip,
|
||||
collapses=collapses,
|
||||
limit=limit,
|
||||
@ -139,7 +146,8 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
|
||||
file_name = f"{domain}-urls-{uid}.txt"
|
||||
file_path = os.path.join(os.getcwd(), file_name)
|
||||
if not os.path.isfile(file_path):
|
||||
open(file_path, "w+", encoding="utf-8").close()
|
||||
with open(file_path, "w+", encoding="utf-8") as file:
|
||||
file.close()
|
||||
|
||||
with open(file_path, "a", encoding="utf-8") as file:
|
||||
file.write(f"{url}\n")
|
||||
@ -193,13 +201,6 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
|
||||
is_flag=True,
|
||||
help="Retrieve the oldest archive of URL.",
|
||||
)
|
||||
@click.option(
|
||||
"-j",
|
||||
"--json",
|
||||
default=False,
|
||||
is_flag=True,
|
||||
help="JSON data returned by the availability API.",
|
||||
)
|
||||
@click.option(
|
||||
"-N",
|
||||
"--near",
|
||||
@ -249,7 +250,6 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
|
||||
help="Use with '--known_urls' to save the URLs in file at current directory.",
|
||||
)
|
||||
@click.option(
|
||||
"-c",
|
||||
"--cdx",
|
||||
default=False,
|
||||
is_flag=True,
|
||||
@ -269,6 +269,12 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
|
||||
"--to",
|
||||
help="End timestamp for CDX API in yyyyMMddhhmmss format.",
|
||||
)
|
||||
@click.option(
|
||||
"-C",
|
||||
"--closest",
|
||||
help="Archive that are closest the timestamp passed as arguments to this "
|
||||
+ "parameter.",
|
||||
)
|
||||
@click.option(
|
||||
"-f",
|
||||
"--cdx-filter",
|
||||
@ -285,6 +291,20 @@ def save_urls_on_file(url_gen: Generator[str, None, None]) -> None:
|
||||
+ "However, the CDX server can also return results matching a certain prefix, "
|
||||
+ "a certain host, or all sub-hosts by using the match_type",
|
||||
)
|
||||
@click.option(
|
||||
"-st",
|
||||
"--sort",
|
||||
help="Choose one from default, closest or reverse. It returns sorted CDX entries "
|
||||
+ "in the response.",
|
||||
)
|
||||
@click.option(
|
||||
"-up",
|
||||
"--use-pagination",
|
||||
"--use_pagination",
|
||||
default=False,
|
||||
is_flag=True,
|
||||
help="Use the pagination API of the CDX server instead of the default one.",
|
||||
)
|
||||
@click.option(
|
||||
"-gz",
|
||||
"--gzip",
|
||||
@ -318,7 +338,6 @@ def main( # pylint: disable=no-value-for-parameter
|
||||
show_license: bool,
|
||||
newest: bool,
|
||||
oldest: bool,
|
||||
json: bool,
|
||||
near: bool,
|
||||
save: bool,
|
||||
headers: bool,
|
||||
@ -326,6 +345,7 @@ def main( # pylint: disable=no-value-for-parameter
|
||||
subdomain: bool,
|
||||
file: bool,
|
||||
cdx: bool,
|
||||
use_pagination: bool,
|
||||
cdx_filter: List[str],
|
||||
collapse: List[str],
|
||||
cdx_print: List[str],
|
||||
@ -337,7 +357,9 @@ def main( # pylint: disable=no-value-for-parameter
|
||||
minute: Optional[int] = None,
|
||||
start_timestamp: Optional[str] = None,
|
||||
end_timestamp: Optional[str] = None,
|
||||
closest: Optional[str] = None,
|
||||
match_type: Optional[str] = None,
|
||||
sort: Optional[str] = None,
|
||||
gzip: Optional[str] = None,
|
||||
limit: Optional[str] = None,
|
||||
) -> None:
|
||||
@ -357,7 +379,7 @@ def main( # pylint: disable=no-value-for-parameter
|
||||
|
||||
Documentation: https://github.com/akamhy/waybackpy/wiki/CLI-docs
|
||||
|
||||
waybackpy - CLI usage(Demo video): https://asciinema.org/a/464367
|
||||
waybackpy - CLI usage(Demo video): https://asciinema.org/a/469890
|
||||
|
||||
Released under the MIT License. Use the flag --license for license.
|
||||
|
||||
@ -372,28 +394,32 @@ def main( # pylint: disable=no-value-for-parameter
|
||||
).text
|
||||
)
|
||||
elif url is None:
|
||||
click.echo("No URL detected. Please provide an URL.", err=True)
|
||||
click.echo(
|
||||
click.style("NoURLDetected: ", fg="red")
|
||||
+ "No URL detected. "
|
||||
+ "Please provide an URL.",
|
||||
err=True,
|
||||
)
|
||||
|
||||
elif oldest:
|
||||
availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
|
||||
availability_api.oldest()
|
||||
echo_availability_api(availability_api, json)
|
||||
cdx_api = WaybackMachineCDXServerAPI(url, user_agent=user_agent)
|
||||
handle_cdx_closest_derivative_methods(cdx_api, oldest, near, newest)
|
||||
|
||||
elif newest:
|
||||
availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
|
||||
availability_api.newest()
|
||||
echo_availability_api(availability_api, json)
|
||||
cdx_api = WaybackMachineCDXServerAPI(url, user_agent=user_agent)
|
||||
handle_cdx_closest_derivative_methods(cdx_api, oldest, near, newest)
|
||||
|
||||
elif near:
|
||||
availability_api = WaybackMachineAvailabilityAPI(url, user_agent=user_agent)
|
||||
cdx_api = WaybackMachineCDXServerAPI(url, user_agent=user_agent)
|
||||
near_args = {}
|
||||
keys = ["year", "month", "day", "hour", "minute"]
|
||||
args_arr = [year, month, day, hour, minute]
|
||||
for key, arg in zip(keys, args_arr):
|
||||
if arg:
|
||||
near_args[key] = arg
|
||||
availability_api.near(**near_args)
|
||||
echo_availability_api(availability_api, json)
|
||||
handle_cdx_closest_derivative_methods(
|
||||
cdx_api, oldest, near, newest, near_args=near_args
|
||||
)
|
||||
|
||||
elif save:
|
||||
save_api = WaybackMachineSaveAPI(url, user_agent=user_agent)
|
||||
@ -428,13 +454,18 @@ def main( # pylint: disable=no-value-for-parameter
|
||||
limit,
|
||||
gzip,
|
||||
match_type,
|
||||
sort,
|
||||
use_pagination,
|
||||
closest,
|
||||
]
|
||||
handle_cdx(data)
|
||||
|
||||
else:
|
||||
|
||||
click.echo(
|
||||
"Only URL passed, but did not specify what to do with the URL. "
|
||||
"Use --help flag for help using waybackpy.",
|
||||
click.style("NoCommandFound: ", fg="red")
|
||||
+ "Only URL passed, but did not specify what to do with the URL. "
|
||||
+ "Use --help flag for help using waybackpy.",
|
||||
err=True,
|
||||
)
|
||||
|
||||
|
@ -16,6 +16,21 @@ class WaybackError(Exception):
|
||||
"""
|
||||
|
||||
|
||||
class NoCDXRecordFound(WaybackError):
|
||||
"""
|
||||
No records returned by the CDX server for a query.
|
||||
Raised when the user invokes near(), newest() or oldest() methods
|
||||
and there are no archives.
|
||||
"""
|
||||
|
||||
|
||||
class BlockedSiteError(WaybackError):
|
||||
"""
|
||||
Raised when the archives for website/URLs that was excluded from Wayback
|
||||
Machine are requested via the CDX server API.
|
||||
"""
|
||||
|
||||
|
||||
class TooManyRequestsError(WaybackError):
|
||||
"""
|
||||
Raised when you make more than 15 requests per
|
||||
|
@ -2,8 +2,28 @@
|
||||
Utility functions and shared variables like DEFAULT_USER_AGENT are here.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from . import __version__
|
||||
|
||||
DEFAULT_USER_AGENT: str = (
|
||||
f"waybackpy {__version__} - https://github.com/akamhy/waybackpy"
|
||||
)
|
||||
|
||||
|
||||
def unix_timestamp_to_wayback_timestamp(unix_timestamp: int) -> str:
|
||||
"""
|
||||
Converts Unix time to Wayback Machine timestamp, Wayback Machine
|
||||
timestamp format is yyyyMMddhhmmss.
|
||||
"""
|
||||
return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S")
|
||||
|
||||
|
||||
def wayback_timestamp(**kwargs: int) -> str:
|
||||
"""
|
||||
Prepends zero before the year, month, day, hour and minute so that they
|
||||
are conformable with the YYYYMMDDhhmmss Wayback Machine timestamp format.
|
||||
"""
|
||||
return "".join(
|
||||
str(kwargs[key]).zfill(2) for key in ["year", "month", "day", "hour", "minute"]
|
||||
)
|
||||
|
Reference in New Issue
Block a user