code formated with black (#47)
This commit is contained in:
parent
fde28d57aa
commit
d3e68d0e70
78
setup.py
78
setup.py
@ -1,54 +1,54 @@
|
|||||||
import os.path
|
import os.path
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
|
|
||||||
with open(os.path.join(os.path.dirname(__file__), 'README.md')) as f:
|
with open(os.path.join(os.path.dirname(__file__), "README.md")) as f:
|
||||||
long_description = f.read()
|
long_description = f.read()
|
||||||
|
|
||||||
about = {}
|
about = {}
|
||||||
with open(os.path.join(os.path.dirname(__file__), 'waybackpy', '__version__.py')) as f:
|
with open(os.path.join(os.path.dirname(__file__), "waybackpy", "__version__.py")) as f:
|
||||||
exec(f.read(), about)
|
exec(f.read(), about)
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name=about['__title__'],
|
name=about["__title__"],
|
||||||
packages=['waybackpy'],
|
packages=["waybackpy"],
|
||||||
version=about['__version__'],
|
version=about["__version__"],
|
||||||
description=about['__description__'],
|
description=about["__description__"],
|
||||||
long_description=long_description,
|
long_description=long_description,
|
||||||
long_description_content_type='text/markdown',
|
long_description_content_type="text/markdown",
|
||||||
license=about['__license__'],
|
license=about["__license__"],
|
||||||
author=about['__author__'],
|
author=about["__author__"],
|
||||||
author_email=about['__author_email__'],
|
author_email=about["__author_email__"],
|
||||||
url=about['__url__'],
|
url=about["__url__"],
|
||||||
download_url='https://github.com/akamhy/waybackpy/archive/2.3.0.tar.gz',
|
download_url="https://github.com/akamhy/waybackpy/archive/2.3.0.tar.gz",
|
||||||
keywords=['Archive It', 'Archive Website', 'Wayback Machine',
|
keywords=[
|
||||||
'waybackurls', 'Internet Archive',
|
"Archive It",
|
||||||
],
|
"Archive Website",
|
||||||
install_requires=['requests'],
|
"Wayback Machine",
|
||||||
|
"waybackurls",
|
||||||
|
"Internet Archive",
|
||||||
|
],
|
||||||
|
install_requires=["requests"],
|
||||||
python_requires=">=3.4",
|
python_requires=">=3.4",
|
||||||
classifiers=[
|
classifiers=[
|
||||||
'Development Status :: 5 - Production/Stable',
|
"Development Status :: 5 - Production/Stable",
|
||||||
'Intended Audience :: Developers',
|
"Intended Audience :: Developers",
|
||||||
'Natural Language :: English',
|
"Natural Language :: English",
|
||||||
'Topic :: Software Development :: Build Tools',
|
"Topic :: Software Development :: Build Tools",
|
||||||
'License :: OSI Approved :: MIT License',
|
"License :: OSI Approved :: MIT License",
|
||||||
'Programming Language :: Python',
|
"Programming Language :: Python",
|
||||||
'Programming Language :: Python :: 3',
|
"Programming Language :: Python :: 3",
|
||||||
'Programming Language :: Python :: 3.4',
|
"Programming Language :: Python :: 3.4",
|
||||||
'Programming Language :: Python :: 3.5',
|
"Programming Language :: Python :: 3.5",
|
||||||
'Programming Language :: Python :: 3.6',
|
"Programming Language :: Python :: 3.6",
|
||||||
'Programming Language :: Python :: 3.7',
|
"Programming Language :: Python :: 3.7",
|
||||||
'Programming Language :: Python :: 3.8',
|
"Programming Language :: Python :: 3.8",
|
||||||
'Programming Language :: Python :: 3.9',
|
"Programming Language :: Python :: 3.9",
|
||||||
'Programming Language :: Python :: Implementation :: CPython',
|
"Programming Language :: Python :: Implementation :: CPython",
|
||||||
],
|
],
|
||||||
entry_points={
|
entry_points={"console_scripts": ["waybackpy = waybackpy.cli:main"]},
|
||||||
'console_scripts': [
|
|
||||||
'waybackpy = waybackpy.cli:main'
|
|
||||||
]
|
|
||||||
},
|
|
||||||
project_urls={
|
project_urls={
|
||||||
'Documentation': 'https://akamhy.github.io/waybackpy/',
|
"Documentation": "https://akamhy.github.io/waybackpy/",
|
||||||
'Source': 'https://github.com/akamhy/waybackpy',
|
"Source": "https://github.com/akamhy/waybackpy",
|
||||||
'Tracker': 'https://github.com/akamhy/waybackpy/issues',
|
"Tracker": "https://github.com/akamhy/waybackpy/issues",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
@ -6,7 +6,7 @@ import argparse
|
|||||||
|
|
||||||
sys.path.append("..")
|
sys.path.append("..")
|
||||||
import waybackpy.cli as cli # noqa: E402
|
import waybackpy.cli as cli # noqa: E402
|
||||||
from waybackpy.wrapper import Url # noqa: E402
|
from waybackpy.wrapper import Url # noqa: E402
|
||||||
from waybackpy.__version__ import __version__
|
from waybackpy.__version__ import __version__
|
||||||
|
|
||||||
# Namespace(day=None, get=None, hour=None, minute=None, month=None, near=False,
|
# Namespace(day=None, get=None, hour=None, minute=None, month=None, near=False,
|
||||||
@ -14,88 +14,284 @@ from waybackpy.__version__ import __version__
|
|||||||
|
|
||||||
|
|
||||||
def test_save():
|
def test_save():
|
||||||
args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False,
|
args = argparse.Namespace(
|
||||||
oldest=False, save=True, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
user_agent=None,
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=True,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get=None,
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "pypi.org/user/akamhy" in str(reply)
|
assert "pypi.org/user/akamhy" in str(reply)
|
||||||
|
|
||||||
|
|
||||||
def test_json():
|
def test_json():
|
||||||
args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False,
|
args = argparse.Namespace(
|
||||||
oldest=False, save=False, json=True, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
user_agent=None,
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=True,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get=None,
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "archived_snapshots" in str(reply)
|
assert "archived_snapshots" in str(reply)
|
||||||
|
|
||||||
|
|
||||||
def test_archive_url():
|
def test_archive_url():
|
||||||
args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False,
|
args = argparse.Namespace(
|
||||||
oldest=False, save=False, json=False, archive_url=True, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
user_agent=None,
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=True,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get=None,
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "https://web.archive.org/web/" in str(reply)
|
assert "https://web.archive.org/web/" in str(reply)
|
||||||
|
|
||||||
|
|
||||||
def test_oldest():
|
def test_oldest():
|
||||||
args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False,
|
args = argparse.Namespace(
|
||||||
oldest=True, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
user_agent=None,
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=True,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get=None,
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "pypi.org/user/akamhy" in str(reply)
|
assert "pypi.org/user/akamhy" in str(reply)
|
||||||
|
|
||||||
|
|
||||||
def test_newest():
|
def test_newest():
|
||||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
args = argparse.Namespace(
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
oldest=False, save=False, json=False, archive_url=False, newest=True, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=True,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get=None,
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "pypi.org/user/akamhy" in str(reply)
|
assert "pypi.org/user/akamhy" in str(reply)
|
||||||
|
|
||||||
|
|
||||||
def test_total_archives():
|
def test_total_archives():
|
||||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
args = argparse.Namespace(
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=True, version=False,
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=True,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get=None,
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert isinstance(reply, int)
|
assert isinstance(reply, int)
|
||||||
|
|
||||||
|
|
||||||
def test_known_urls():
|
def test_known_urls():
|
||||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
args = argparse.Namespace(
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://akamhy.github.io", total=False, version=False,
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=True, subdomain=True, known_urls=True, get=None)
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
|
url="https://akamhy.github.io",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=True,
|
||||||
|
subdomain=True,
|
||||||
|
known_urls=True,
|
||||||
|
get=None,
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "github" in str(reply)
|
assert "github" in str(reply)
|
||||||
|
|
||||||
|
|
||||||
def test_near():
|
def test_near():
|
||||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
args = argparse.Namespace(
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=True, alive=False, subdomain=False, known_urls=False, get=None, year=2020, month=7, day=15, hour=1, minute=1)
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=True,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get=None,
|
||||||
|
year=2020,
|
||||||
|
month=7,
|
||||||
|
day=15,
|
||||||
|
hour=1,
|
||||||
|
minute=1,
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "202007" in str(reply)
|
assert "202007" in str(reply)
|
||||||
|
|
||||||
|
|
||||||
def test_get():
|
def test_get():
|
||||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
args = argparse.Namespace(
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="url")
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get="url",
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "waybackpy" in str(reply)
|
assert "waybackpy" in str(reply)
|
||||||
|
|
||||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
args = argparse.Namespace(
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="oldest")
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get="oldest",
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "waybackpy" in str(reply)
|
assert "waybackpy" in str(reply)
|
||||||
|
|
||||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
args = argparse.Namespace(
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="newest")
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get="newest",
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "waybackpy" in str(reply)
|
assert "waybackpy" in str(reply)
|
||||||
|
|
||||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
args = argparse.Namespace(
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="save")
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get="save",
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "waybackpy" in str(reply)
|
assert "waybackpy" in str(reply)
|
||||||
|
|
||||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
args = argparse.Namespace(
|
||||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="BullShit")
|
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||||
|
url="https://pypi.org/user/akamhy/",
|
||||||
|
total=False,
|
||||||
|
version=False,
|
||||||
|
oldest=False,
|
||||||
|
save=False,
|
||||||
|
json=False,
|
||||||
|
archive_url=False,
|
||||||
|
newest=False,
|
||||||
|
near=False,
|
||||||
|
alive=False,
|
||||||
|
subdomain=False,
|
||||||
|
known_urls=False,
|
||||||
|
get="BullShit",
|
||||||
|
)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert "get the source code of the" in str(reply)
|
assert "get the source code of the" in str(reply)
|
||||||
|
|
||||||
|
|
||||||
def test_args_handler():
|
def test_args_handler():
|
||||||
args = argparse.Namespace(version=True)
|
args = argparse.Namespace(version=True)
|
||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
@ -105,6 +301,7 @@ def test_args_handler():
|
|||||||
reply = cli.args_handler(args)
|
reply = cli.args_handler(args)
|
||||||
assert ("waybackpy %s" % (__version__)) in str(reply)
|
assert ("waybackpy %s" % (__version__)) in str(reply)
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
# This also tests the parse_args method in cli.py
|
# This also tests the parse_args method in cli.py
|
||||||
cli.main(['temp.py', '--version'])
|
cli.main(["temp.py", "--version"])
|
||||||
|
@ -3,6 +3,7 @@ import sys
|
|||||||
import pytest
|
import pytest
|
||||||
import random
|
import random
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
sys.path.append("..")
|
sys.path.append("..")
|
||||||
|
|
||||||
import waybackpy.wrapper as waybackpy # noqa: E402
|
import waybackpy.wrapper as waybackpy # noqa: E402
|
||||||
@ -18,6 +19,7 @@ def test_clean_url():
|
|||||||
test_result = target._clean_url()
|
test_result = target._clean_url()
|
||||||
assert answer == test_result
|
assert answer == test_result
|
||||||
|
|
||||||
|
|
||||||
def test_dunders():
|
def test_dunders():
|
||||||
url = "https://en.wikipedia.org/wiki/Network_security"
|
url = "https://en.wikipedia.org/wiki/Network_security"
|
||||||
user_agent = "UA"
|
user_agent = "UA"
|
||||||
@ -25,6 +27,7 @@ def test_dunders():
|
|||||||
assert "waybackpy.Url(url=%s, user_agent=%s)" % (url, user_agent) == repr(target)
|
assert "waybackpy.Url(url=%s, user_agent=%s)" % (url, user_agent) == repr(target)
|
||||||
assert "en.wikipedia.org" in str(target)
|
assert "en.wikipedia.org" in str(target)
|
||||||
|
|
||||||
|
|
||||||
def test_archive_url_parser():
|
def test_archive_url_parser():
|
||||||
endpoint = "https://amazon.com"
|
endpoint = "https://amazon.com"
|
||||||
user_agent = "Mozilla/5.0 (Windows NT 6.2; rv:20.0) Gecko/20121202 Firefox/20.0"
|
user_agent = "Mozilla/5.0 (Windows NT 6.2; rv:20.0) Gecko/20121202 Firefox/20.0"
|
||||||
@ -34,6 +37,7 @@ def test_archive_url_parser():
|
|||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
waybackpy._archive_url_parser(header)
|
waybackpy._archive_url_parser(header)
|
||||||
|
|
||||||
|
|
||||||
def test_url_check():
|
def test_url_check():
|
||||||
broken_url = "http://wwwgooglecom/"
|
broken_url = "http://wwwgooglecom/"
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
@ -61,8 +65,6 @@ def test_save():
|
|||||||
archived_url1 = str(target.save())
|
archived_url1 = str(target.save())
|
||||||
assert url1 in archived_url1
|
assert url1 in archived_url1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Test for urls that are incorrect.
|
# Test for urls that are incorrect.
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
url2 = "ha ha ha ha"
|
url2 = "ha ha ha ha"
|
||||||
@ -89,7 +91,6 @@ def test_near():
|
|||||||
archive_near_year = target.near(year=2010)
|
archive_near_year = target.near(year=2010)
|
||||||
assert "2010" in str(archive_near_year)
|
assert "2010" in str(archive_near_year)
|
||||||
|
|
||||||
|
|
||||||
archive_near_month_year = str(target.near(year=2015, month=2))
|
archive_near_month_year = str(target.near(year=2015, month=2))
|
||||||
assert (
|
assert (
|
||||||
("201502" in archive_near_month_year)
|
("201502" in archive_near_month_year)
|
||||||
@ -102,9 +103,9 @@ def test_near():
|
|||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||||
"(KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246",
|
"(KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246",
|
||||||
)
|
)
|
||||||
archive_near_hour_day_month_year = str(target.near(
|
archive_near_hour_day_month_year = str(
|
||||||
year=2008, month=5, day=9, hour=15
|
target.near(year=2008, month=5, day=9, hour=15)
|
||||||
))
|
)
|
||||||
assert (
|
assert (
|
||||||
("2008050915" in archive_near_hour_day_month_year)
|
("2008050915" in archive_near_hour_day_month_year)
|
||||||
or ("2008050914" in archive_near_hour_day_month_year)
|
or ("2008050914" in archive_near_hour_day_month_year)
|
||||||
@ -119,22 +120,24 @@ def test_near():
|
|||||||
target.near(year=2010)
|
target.near(year=2010)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_oldest():
|
def test_oldest():
|
||||||
url = "github.com/akamhy/waybackpy"
|
url = "github.com/akamhy/waybackpy"
|
||||||
target = waybackpy.Url(url, user_agent)
|
target = waybackpy.Url(url, user_agent)
|
||||||
assert "20200504141153" in str(target.oldest())
|
assert "20200504141153" in str(target.oldest())
|
||||||
|
|
||||||
|
|
||||||
def test_json():
|
def test_json():
|
||||||
url = "github.com/akamhy/waybackpy"
|
url = "github.com/akamhy/waybackpy"
|
||||||
target = waybackpy.Url(url, user_agent)
|
target = waybackpy.Url(url, user_agent)
|
||||||
assert "archived_snapshots" in str(target.JSON)
|
assert "archived_snapshots" in str(target.JSON)
|
||||||
|
|
||||||
|
|
||||||
def test_archive_url():
|
def test_archive_url():
|
||||||
url = "github.com/akamhy/waybackpy"
|
url = "github.com/akamhy/waybackpy"
|
||||||
target = waybackpy.Url(url, user_agent)
|
target = waybackpy.Url(url, user_agent)
|
||||||
assert "github.com/akamhy" in str(target.archive_url)
|
assert "github.com/akamhy" in str(target.archive_url)
|
||||||
|
|
||||||
|
|
||||||
def test_newest():
|
def test_newest():
|
||||||
url = "github.com/akamhy/waybackpy"
|
url = "github.com/akamhy/waybackpy"
|
||||||
target = waybackpy.Url(url, user_agent)
|
target = waybackpy.Url(url, user_agent)
|
||||||
@ -146,17 +149,16 @@ def test_get():
|
|||||||
assert "Welcome to Google" in target.get(target.oldest())
|
assert "Welcome to Google" in target.get(target.oldest())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_wayback_timestamp():
|
def test_wayback_timestamp():
|
||||||
ts = waybackpy._wayback_timestamp(
|
ts = waybackpy._wayback_timestamp(year=2020, month=1, day=2, hour=3, minute=4)
|
||||||
year=2020, month=1, day=2, hour=3, minute=4
|
|
||||||
)
|
|
||||||
assert "202001020304" in str(ts)
|
assert "202001020304" in str(ts)
|
||||||
|
|
||||||
|
|
||||||
def test_get_response():
|
def test_get_response():
|
||||||
endpoint = "https://www.google.com"
|
endpoint = "https://www.google.com"
|
||||||
user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"
|
user_agent = (
|
||||||
|
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"
|
||||||
|
)
|
||||||
headers = {"User-Agent": "%s" % user_agent}
|
headers = {"User-Agent": "%s" % user_agent}
|
||||||
response = waybackpy._get_response(endpoint, params=None, headers=headers)
|
response = waybackpy._get_response(endpoint, params=None, headers=headers)
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
@ -172,6 +174,7 @@ def test_total_archives():
|
|||||||
)
|
)
|
||||||
assert target.total_archives() == 0
|
assert target.total_archives() == 0
|
||||||
|
|
||||||
|
|
||||||
def test_known_urls():
|
def test_known_urls():
|
||||||
|
|
||||||
target = waybackpy.Url("akamhy.github.io", user_agent)
|
target = waybackpy.Url("akamhy.github.io", user_agent)
|
||||||
|
120
waybackpy/cli.py
120
waybackpy/cli.py
@ -10,27 +10,27 @@ from waybackpy.__version__ import __version__
|
|||||||
|
|
||||||
|
|
||||||
def _save(obj):
|
def _save(obj):
|
||||||
return (obj.save())
|
return obj.save()
|
||||||
|
|
||||||
|
|
||||||
def _archive_url(obj):
|
def _archive_url(obj):
|
||||||
return (obj.archive_url)
|
return obj.archive_url
|
||||||
|
|
||||||
|
|
||||||
def _json(obj):
|
def _json(obj):
|
||||||
return (obj.JSON)
|
return obj.JSON
|
||||||
|
|
||||||
|
|
||||||
def _oldest(obj):
|
def _oldest(obj):
|
||||||
return (obj.oldest())
|
return obj.oldest()
|
||||||
|
|
||||||
|
|
||||||
def _newest(obj):
|
def _newest(obj):
|
||||||
return (obj.newest())
|
return obj.newest()
|
||||||
|
|
||||||
|
|
||||||
def _total_archives(obj):
|
def _total_archives(obj):
|
||||||
return (obj.total_archives())
|
return obj.total_archives()
|
||||||
|
|
||||||
|
|
||||||
def _near(obj, args):
|
def _near(obj, args):
|
||||||
@ -45,17 +45,19 @@ def _near(obj, args):
|
|||||||
_near_args["hour"] = args.hour
|
_near_args["hour"] = args.hour
|
||||||
if args.minute:
|
if args.minute:
|
||||||
_near_args["minute"] = args.minute
|
_near_args["minute"] = args.minute
|
||||||
return (obj.near(**_near_args))
|
return obj.near(**_near_args)
|
||||||
|
|
||||||
|
|
||||||
def _save_urls_on_file(input_list, live_url_count):
|
def _save_urls_on_file(input_list, live_url_count):
|
||||||
m = re.search('https?://([A-Za-z_0-9.-]+).*', input_list[0])
|
m = re.search("https?://([A-Za-z_0-9.-]+).*", input_list[0])
|
||||||
if m:
|
if m:
|
||||||
domain = m.group(1)
|
domain = m.group(1)
|
||||||
else:
|
else:
|
||||||
domain = "domain-unknown"
|
domain = "domain-unknown"
|
||||||
|
|
||||||
uid = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(6))
|
uid = "".join(
|
||||||
|
random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
|
||||||
|
)
|
||||||
|
|
||||||
file_name = "%s-%d-urls-%s.txt" % (domain, live_url_count, uid)
|
file_name = "%s-%d-urls-%s.txt" % (domain, live_url_count, uid)
|
||||||
file_content = "\n".join(input_list)
|
file_content = "\n".join(input_list)
|
||||||
@ -89,34 +91,37 @@ def _known_urls(obj, args):
|
|||||||
|
|
||||||
def _get(obj, args):
|
def _get(obj, args):
|
||||||
if args.get.lower() == "url":
|
if args.get.lower() == "url":
|
||||||
return (obj.get())
|
return obj.get()
|
||||||
|
|
||||||
if args.get.lower() == "archive_url":
|
if args.get.lower() == "archive_url":
|
||||||
return (obj.get(obj.archive_url))
|
return obj.get(obj.archive_url)
|
||||||
|
|
||||||
if args.get.lower() == "oldest":
|
if args.get.lower() == "oldest":
|
||||||
return (obj.get(obj.oldest()))
|
return obj.get(obj.oldest())
|
||||||
|
|
||||||
if args.get.lower() == "latest" or args.get.lower() == "newest":
|
if args.get.lower() == "latest" or args.get.lower() == "newest":
|
||||||
return (obj.get(obj.newest()))
|
return obj.get(obj.newest())
|
||||||
|
|
||||||
if args.get.lower() == "save":
|
if args.get.lower() == "save":
|
||||||
return (obj.get(obj.save()))
|
return obj.get(obj.save())
|
||||||
|
|
||||||
return ("Use get as \"--get 'source'\", 'source' can be one of the followings: \
|
return "Use get as \"--get 'source'\", 'source' can be one of the followings: \
|
||||||
\n1) url - get the source code of the url specified using --url/-u.\
|
\n1) url - get the source code of the url specified using --url/-u.\
|
||||||
\n2) archive_url - get the source code of the newest archive for the supplied url, alias of newest.\
|
\n2) archive_url - get the source code of the newest archive for the supplied url, alias of newest.\
|
||||||
\n3) oldest - get the source code of the oldest archive for the supplied url.\
|
\n3) oldest - get the source code of the oldest archive for the supplied url.\
|
||||||
\n4) newest - get the source code of the newest archive for the supplied url.\
|
\n4) newest - get the source code of the newest archive for the supplied url.\
|
||||||
\n5) save - Create a new archive and get the source code of this new archive for the supplied url.")
|
\n5) save - Create a new archive and get the source code of this new archive for the supplied url."
|
||||||
|
|
||||||
|
|
||||||
def args_handler(args):
|
def args_handler(args):
|
||||||
if args.version:
|
if args.version:
|
||||||
return ("waybackpy version %s" % __version__)
|
return "waybackpy version %s" % __version__
|
||||||
|
|
||||||
if not args.url:
|
if not args.url:
|
||||||
return ("waybackpy %s \nSee 'waybackpy --help' for help using this tool." % __version__)
|
return (
|
||||||
|
"waybackpy %s \nSee 'waybackpy --help' for help using this tool."
|
||||||
|
% __version__
|
||||||
|
)
|
||||||
|
|
||||||
if args.user_agent:
|
if args.user_agent:
|
||||||
obj = Url(args.url, args.user_agent)
|
obj = Url(args.url, args.user_agent)
|
||||||
@ -151,52 +156,93 @@ def args_handler(args):
|
|||||||
def parse_args(argv):
|
def parse_args(argv):
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
requiredArgs = parser.add_argument_group('URL argument (required)')
|
requiredArgs = parser.add_argument_group("URL argument (required)")
|
||||||
requiredArgs.add_argument("--url", "-u", help="URL on which Wayback machine operations would occur")
|
requiredArgs.add_argument(
|
||||||
|
"--url", "-u", help="URL on which Wayback machine operations would occur"
|
||||||
|
)
|
||||||
|
|
||||||
userAgentArg = parser.add_argument_group('User Agent')
|
userAgentArg = parser.add_argument_group("User Agent")
|
||||||
help_text = "User agent, default user_agent is \"waybackpy python package - https://github.com/akamhy/waybackpy\""
|
help_text = 'User agent, default user_agent is "waybackpy python package - https://github.com/akamhy/waybackpy"'
|
||||||
userAgentArg.add_argument("--user_agent", "-ua", help=help_text)
|
userAgentArg.add_argument("--user_agent", "-ua", help=help_text)
|
||||||
|
|
||||||
saveArg = parser.add_argument_group("Create new archive/save URL")
|
saveArg = parser.add_argument_group("Create new archive/save URL")
|
||||||
saveArg.add_argument("--save", "-s", action='store_true', help="Save the URL on the Wayback machine")
|
saveArg.add_argument(
|
||||||
|
"--save", "-s", action="store_true", help="Save the URL on the Wayback machine"
|
||||||
|
)
|
||||||
|
|
||||||
auArg = parser.add_argument_group("Get the latest Archive")
|
auArg = parser.add_argument_group("Get the latest Archive")
|
||||||
auArg.add_argument("--archive_url", "-au", action='store_true', help="Get the latest archive URL, alias for --newest")
|
auArg.add_argument(
|
||||||
|
"--archive_url",
|
||||||
|
"-au",
|
||||||
|
action="store_true",
|
||||||
|
help="Get the latest archive URL, alias for --newest",
|
||||||
|
)
|
||||||
|
|
||||||
jsonArg = parser.add_argument_group("Get the JSON data")
|
jsonArg = parser.add_argument_group("Get the JSON data")
|
||||||
jsonArg.add_argument("--json", "-j", action='store_true', help="JSON data of the availability API request")
|
jsonArg.add_argument(
|
||||||
|
"--json",
|
||||||
|
"-j",
|
||||||
|
action="store_true",
|
||||||
|
help="JSON data of the availability API request",
|
||||||
|
)
|
||||||
|
|
||||||
oldestArg = parser.add_argument_group("Oldest archive")
|
oldestArg = parser.add_argument_group("Oldest archive")
|
||||||
oldestArg.add_argument("--oldest", "-o", action='store_true', help="Oldest archive for the specified URL")
|
oldestArg.add_argument(
|
||||||
|
"--oldest",
|
||||||
|
"-o",
|
||||||
|
action="store_true",
|
||||||
|
help="Oldest archive for the specified URL",
|
||||||
|
)
|
||||||
|
|
||||||
newestArg = parser.add_argument_group("Newest archive")
|
newestArg = parser.add_argument_group("Newest archive")
|
||||||
newestArg.add_argument("--newest", "-n", action='store_true', help="Newest archive for the specified URL")
|
newestArg.add_argument(
|
||||||
|
"--newest",
|
||||||
|
"-n",
|
||||||
|
action="store_true",
|
||||||
|
help="Newest archive for the specified URL",
|
||||||
|
)
|
||||||
|
|
||||||
totalArg = parser.add_argument_group("Total number of archives")
|
totalArg = parser.add_argument_group("Total number of archives")
|
||||||
totalArg.add_argument("--total", "-t", action='store_true', help="Total number of archives for the specified URL")
|
totalArg.add_argument(
|
||||||
|
"--total",
|
||||||
|
"-t",
|
||||||
|
action="store_true",
|
||||||
|
help="Total number of archives for the specified URL",
|
||||||
|
)
|
||||||
|
|
||||||
getArg = parser.add_argument_group("Get source code")
|
getArg = parser.add_argument_group("Get source code")
|
||||||
getArg.add_argument("--get", "-g", help="Prints the source code of the supplied url. Use '--get help' for extended usage")
|
getArg.add_argument(
|
||||||
|
"--get",
|
||||||
|
"-g",
|
||||||
|
help="Prints the source code of the supplied url. Use '--get help' for extended usage",
|
||||||
|
)
|
||||||
|
|
||||||
knownUrlArg = parser.add_argument_group("URLs known and archived to Waybcak Machine for the site.")
|
knownUrlArg = parser.add_argument_group(
|
||||||
knownUrlArg.add_argument("--known_urls", "-ku", action='store_true', help="URLs known for the domain.")
|
"URLs known and archived to Waybcak Machine for the site."
|
||||||
|
)
|
||||||
|
knownUrlArg.add_argument(
|
||||||
|
"--known_urls", "-ku", action="store_true", help="URLs known for the domain."
|
||||||
|
)
|
||||||
help_text = "Use with '--known_urls' to include known URLs for subdomains."
|
help_text = "Use with '--known_urls' to include known URLs for subdomains."
|
||||||
knownUrlArg.add_argument("--subdomain", "-sub", action='store_true', help=help_text)
|
knownUrlArg.add_argument("--subdomain", "-sub", action="store_true", help=help_text)
|
||||||
help_text = "Only include live URLs. Will not inlclude dead links."
|
help_text = "Only include live URLs. Will not inlclude dead links."
|
||||||
knownUrlArg.add_argument("--alive", "-a", action='store_true', help=help_text)
|
knownUrlArg.add_argument("--alive", "-a", action="store_true", help=help_text)
|
||||||
|
|
||||||
nearArg = parser.add_argument_group('Archive close to time specified')
|
nearArg = parser.add_argument_group("Archive close to time specified")
|
||||||
nearArg.add_argument("--near", "-N", action='store_true', help="Archive near specified time")
|
nearArg.add_argument(
|
||||||
|
"--near", "-N", action="store_true", help="Archive near specified time"
|
||||||
|
)
|
||||||
|
|
||||||
nearArgs = parser.add_argument_group('Arguments that are used only with --near')
|
nearArgs = parser.add_argument_group("Arguments that are used only with --near")
|
||||||
nearArgs.add_argument("--year", "-Y", type=int, help="Year in integer")
|
nearArgs.add_argument("--year", "-Y", type=int, help="Year in integer")
|
||||||
nearArgs.add_argument("--month", "-M", type=int, help="Month in integer")
|
nearArgs.add_argument("--month", "-M", type=int, help="Month in integer")
|
||||||
nearArgs.add_argument("--day", "-D", type=int, help="Day in integer.")
|
nearArgs.add_argument("--day", "-D", type=int, help="Day in integer.")
|
||||||
nearArgs.add_argument("--hour", "-H", type=int, help="Hour in intege")
|
nearArgs.add_argument("--hour", "-H", type=int, help="Hour in intege")
|
||||||
nearArgs.add_argument("--minute", "-MIN", type=int, help="Minute in integer")
|
nearArgs.add_argument("--minute", "-MIN", type=int, help="Minute in integer")
|
||||||
|
|
||||||
parser.add_argument("--version", "-v", action='store_true', help="Waybackpy version")
|
parser.add_argument(
|
||||||
|
"--version", "-v", action="store_true", help="Waybackpy version"
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args(argv[1:])
|
return parser.parse_args(argv[1:])
|
||||||
|
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
class WaybackError(Exception):
|
class WaybackError(Exception):
|
||||||
"""
|
"""
|
||||||
Raised when Wayback Machine API Service is unreachable/down.
|
Raised when Wayback Machine API Service is unreachable/down.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class URLError(Exception):
|
class URLError(Exception):
|
||||||
"""
|
"""
|
||||||
Raised when malformed URLs are passed as arguments.
|
Raised when malformed URLs are passed as arguments.
|
||||||
|
@ -14,9 +14,7 @@ default_UA = "waybackpy python package - https://github.com/akamhy/waybackpy"
|
|||||||
def _archive_url_parser(header):
|
def _archive_url_parser(header):
|
||||||
"""Parse out the archive from header."""
|
"""Parse out the archive from header."""
|
||||||
# Regex1
|
# Regex1
|
||||||
arch = re.search(
|
arch = re.search(r"Content-Location: (/web/[0-9]{14}/.*)", str(header))
|
||||||
r"Content-Location: (/web/[0-9]{14}/.*)", str(header)
|
|
||||||
)
|
|
||||||
if arch:
|
if arch:
|
||||||
return "web.archive.org" + arch.group(1)
|
return "web.archive.org" + arch.group(1)
|
||||||
# Regex2
|
# Regex2
|
||||||
@ -79,11 +77,7 @@ class Url:
|
|||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
td_max = timedelta(
|
td_max = timedelta(
|
||||||
days=999999999,
|
days=999999999, hours=23, minutes=59, seconds=59, microseconds=999999
|
||||||
hours=23,
|
|
||||||
minutes=59,
|
|
||||||
seconds=59,
|
|
||||||
microseconds=999999
|
|
||||||
)
|
)
|
||||||
if self.timestamp == datetime.max:
|
if self.timestamp == datetime.max:
|
||||||
return td_max.days
|
return td_max.days
|
||||||
@ -112,9 +106,7 @@ class Url:
|
|||||||
else:
|
else:
|
||||||
archive_url = data["archived_snapshots"]["closest"]["url"]
|
archive_url = data["archived_snapshots"]["closest"]["url"]
|
||||||
archive_url = archive_url.replace(
|
archive_url = archive_url.replace(
|
||||||
"http://web.archive.org/web/",
|
"http://web.archive.org/web/", "https://web.archive.org/web/", 1
|
||||||
"https://web.archive.org/web/",
|
|
||||||
1
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return archive_url
|
return archive_url
|
||||||
@ -127,10 +119,9 @@ class Url:
|
|||||||
time = datetime.max
|
time = datetime.max
|
||||||
|
|
||||||
else:
|
else:
|
||||||
time = datetime.strptime(data["archived_snapshots"]
|
time = datetime.strptime(
|
||||||
["closest"]
|
data["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
|
||||||
["timestamp"],
|
)
|
||||||
'%Y%m%d%H%M%S')
|
|
||||||
|
|
||||||
return time
|
return time
|
||||||
|
|
||||||
@ -170,9 +161,9 @@ class Url:
|
|||||||
return response.content.decode(encoding.replace("text/html", "UTF-8", 1))
|
return response.content.decode(encoding.replace("text/html", "UTF-8", 1))
|
||||||
|
|
||||||
def near(self, year=None, month=None, day=None, hour=None, minute=None):
|
def near(self, year=None, month=None, day=None, hour=None, minute=None):
|
||||||
""" Return the closest Wayback Machine archive to the time supplied.
|
"""Return the closest Wayback Machine archive to the time supplied.
|
||||||
Supported params are year, month, day, hour and minute.
|
Supported params are year, month, day, hour and minute.
|
||||||
Any non-supplied parameters default to the current time.
|
Any non-supplied parameters default to the current time.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
now = datetime.utcnow().timetuple()
|
now = datetime.utcnow().timetuple()
|
||||||
@ -184,10 +175,9 @@ class Url:
|
|||||||
minute=minute if minute else now.tm_min,
|
minute=minute if minute else now.tm_min,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
endpoint = "https://archive.org/wayback/available"
|
endpoint = "https://archive.org/wayback/available"
|
||||||
headers = {"User-Agent": "%s" % self.user_agent}
|
headers = {"User-Agent": "%s" % self.user_agent}
|
||||||
payload = {"url": "%s" % self._clean_url(), "timestamp" : timestamp}
|
payload = {"url": "%s" % self._clean_url(), "timestamp": timestamp}
|
||||||
response = _get_response(endpoint, params=payload, headers=headers)
|
response = _get_response(endpoint, params=payload, headers=headers)
|
||||||
data = response.json()
|
data = response.json()
|
||||||
if not data["archived_snapshots"]:
|
if not data["archived_snapshots"]:
|
||||||
@ -201,7 +191,9 @@ class Url:
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.archive_url = archive_url
|
self.archive_url = archive_url
|
||||||
self.timestamp = datetime.strptime(data["archived_snapshots"]["closest"]["timestamp"], '%Y%m%d%H%M%S')
|
self.timestamp = datetime.strptime(
|
||||||
|
data["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
|
||||||
|
)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@ -221,7 +213,11 @@ class Url:
|
|||||||
"""Returns the total number of Wayback Machine archives for this URL."""
|
"""Returns the total number of Wayback Machine archives for this URL."""
|
||||||
|
|
||||||
endpoint = "https://web.archive.org/cdx/search/cdx"
|
endpoint = "https://web.archive.org/cdx/search/cdx"
|
||||||
headers = {"User-Agent": "%s" % self.user_agent, "output" : "json", "fl" : "statuscode"}
|
headers = {
|
||||||
|
"User-Agent": "%s" % self.user_agent,
|
||||||
|
"output": "json",
|
||||||
|
"fl": "statuscode",
|
||||||
|
}
|
||||||
payload = {"url": "%s" % self._clean_url()}
|
payload = {"url": "%s" % self._clean_url()}
|
||||||
response = _get_response(endpoint, params=payload, headers=headers)
|
response = _get_response(endpoint, params=payload, headers=headers)
|
||||||
|
|
||||||
@ -253,11 +249,13 @@ class Url:
|
|||||||
|
|
||||||
if subdomain:
|
if subdomain:
|
||||||
request_url = (
|
request_url = (
|
||||||
"https://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey" % self._clean_url()
|
"https://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey"
|
||||||
|
% self._clean_url()
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
request_url = (
|
request_url = (
|
||||||
"http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey" % self._clean_url()
|
"http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey"
|
||||||
|
% self._clean_url()
|
||||||
)
|
)
|
||||||
|
|
||||||
headers = {"User-Agent": "%s" % self.user_agent}
|
headers = {"User-Agent": "%s" % self.user_agent}
|
||||||
|
Loading…
Reference in New Issue
Block a user