code formated with black (#47)
This commit is contained in:
parent
fde28d57aa
commit
d3e68d0e70
78
setup.py
78
setup.py
@ -1,54 +1,54 @@
|
||||
import os.path
|
||||
from setuptools import setup
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), 'README.md')) as f:
|
||||
with open(os.path.join(os.path.dirname(__file__), "README.md")) as f:
|
||||
long_description = f.read()
|
||||
|
||||
about = {}
|
||||
with open(os.path.join(os.path.dirname(__file__), 'waybackpy', '__version__.py')) as f:
|
||||
with open(os.path.join(os.path.dirname(__file__), "waybackpy", "__version__.py")) as f:
|
||||
exec(f.read(), about)
|
||||
|
||||
setup(
|
||||
name=about['__title__'],
|
||||
packages=['waybackpy'],
|
||||
version=about['__version__'],
|
||||
description=about['__description__'],
|
||||
name=about["__title__"],
|
||||
packages=["waybackpy"],
|
||||
version=about["__version__"],
|
||||
description=about["__description__"],
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
license=about['__license__'],
|
||||
author=about['__author__'],
|
||||
author_email=about['__author_email__'],
|
||||
url=about['__url__'],
|
||||
download_url='https://github.com/akamhy/waybackpy/archive/2.3.0.tar.gz',
|
||||
keywords=['Archive It', 'Archive Website', 'Wayback Machine',
|
||||
'waybackurls', 'Internet Archive',
|
||||
],
|
||||
install_requires=['requests'],
|
||||
long_description_content_type="text/markdown",
|
||||
license=about["__license__"],
|
||||
author=about["__author__"],
|
||||
author_email=about["__author_email__"],
|
||||
url=about["__url__"],
|
||||
download_url="https://github.com/akamhy/waybackpy/archive/2.3.0.tar.gz",
|
||||
keywords=[
|
||||
"Archive It",
|
||||
"Archive Website",
|
||||
"Wayback Machine",
|
||||
"waybackurls",
|
||||
"Internet Archive",
|
||||
],
|
||||
install_requires=["requests"],
|
||||
python_requires=">=3.4",
|
||||
classifiers=[
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Intended Audience :: Developers',
|
||||
'Natural Language :: English',
|
||||
'Topic :: Software Development :: Build Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: Implementation :: CPython',
|
||||
],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'waybackpy = waybackpy.cli:main'
|
||||
]
|
||||
},
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Developers",
|
||||
"Natural Language :: English",
|
||||
"Topic :: Software Development :: Build Tools",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.4",
|
||||
"Programming Language :: Python :: 3.5",
|
||||
"Programming Language :: Python :: 3.6",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: Implementation :: CPython",
|
||||
],
|
||||
entry_points={"console_scripts": ["waybackpy = waybackpy.cli:main"]},
|
||||
project_urls={
|
||||
'Documentation': 'https://akamhy.github.io/waybackpy/',
|
||||
'Source': 'https://github.com/akamhy/waybackpy',
|
||||
'Tracker': 'https://github.com/akamhy/waybackpy/issues',
|
||||
"Documentation": "https://akamhy.github.io/waybackpy/",
|
||||
"Source": "https://github.com/akamhy/waybackpy",
|
||||
"Tracker": "https://github.com/akamhy/waybackpy/issues",
|
||||
},
|
||||
)
|
||||
|
@ -6,7 +6,7 @@ import argparse
|
||||
|
||||
sys.path.append("..")
|
||||
import waybackpy.cli as cli # noqa: E402
|
||||
from waybackpy.wrapper import Url # noqa: E402
|
||||
from waybackpy.wrapper import Url # noqa: E402
|
||||
from waybackpy.__version__ import __version__
|
||||
|
||||
# Namespace(day=None, get=None, hour=None, minute=None, month=None, near=False,
|
||||
@ -14,88 +14,284 @@ from waybackpy.__version__ import __version__
|
||||
|
||||
|
||||
def test_save():
|
||||
args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=False, save=True, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
||||
args = argparse.Namespace(
|
||||
user_agent=None,
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=True,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get=None,
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "pypi.org/user/akamhy" in str(reply)
|
||||
|
||||
|
||||
def test_json():
|
||||
args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=False, save=False, json=True, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
||||
args = argparse.Namespace(
|
||||
user_agent=None,
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=True,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get=None,
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "archived_snapshots" in str(reply)
|
||||
|
||||
|
||||
def test_archive_url():
|
||||
args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=False, save=False, json=False, archive_url=True, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
||||
args = argparse.Namespace(
|
||||
user_agent=None,
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=True,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get=None,
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "https://web.archive.org/web/" in str(reply)
|
||||
|
||||
|
||||
def test_oldest():
|
||||
args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=True, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
||||
args = argparse.Namespace(
|
||||
user_agent=None,
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=True,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get=None,
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "pypi.org/user/akamhy" in str(reply)
|
||||
|
||||
|
||||
def test_newest():
|
||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=False, save=False, json=False, archive_url=False, newest=True, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=True,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get=None,
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "pypi.org/user/akamhy" in str(reply)
|
||||
|
||||
|
||||
def test_total_archives():
|
||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=True, version=False,
|
||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None)
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=True,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get=None,
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert isinstance(reply, int)
|
||||
|
||||
|
||||
def test_known_urls():
|
||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://akamhy.github.io", total=False, version=False,
|
||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=True, subdomain=True, known_urls=True, get=None)
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://akamhy.github.io",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=True,
|
||||
subdomain=True,
|
||||
known_urls=True,
|
||||
get=None,
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "github" in str(reply)
|
||||
|
||||
|
||||
def test_near():
|
||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=True, alive=False, subdomain=False, known_urls=False, get=None, year=2020, month=7, day=15, hour=1, minute=1)
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=True,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get=None,
|
||||
year=2020,
|
||||
month=7,
|
||||
day=15,
|
||||
hour=1,
|
||||
minute=1,
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "202007" in str(reply)
|
||||
|
||||
|
||||
def test_get():
|
||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="url")
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get="url",
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "waybackpy" in str(reply)
|
||||
|
||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="oldest")
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get="oldest",
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "waybackpy" in str(reply)
|
||||
|
||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="newest")
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get="newest",
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "waybackpy" in str(reply)
|
||||
|
||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="save")
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get="save",
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "waybackpy" in str(reply)
|
||||
|
||||
args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False,
|
||||
oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="BullShit")
|
||||
args = argparse.Namespace(
|
||||
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \
|
||||
(KHTML, like Gecko) Version/8.0.8 Safari/600.8.9",
|
||||
url="https://pypi.org/user/akamhy/",
|
||||
total=False,
|
||||
version=False,
|
||||
oldest=False,
|
||||
save=False,
|
||||
json=False,
|
||||
archive_url=False,
|
||||
newest=False,
|
||||
near=False,
|
||||
alive=False,
|
||||
subdomain=False,
|
||||
known_urls=False,
|
||||
get="BullShit",
|
||||
)
|
||||
reply = cli.args_handler(args)
|
||||
assert "get the source code of the" in str(reply)
|
||||
|
||||
|
||||
def test_args_handler():
|
||||
args = argparse.Namespace(version=True)
|
||||
reply = cli.args_handler(args)
|
||||
@ -105,6 +301,7 @@ def test_args_handler():
|
||||
reply = cli.args_handler(args)
|
||||
assert ("waybackpy %s" % (__version__)) in str(reply)
|
||||
|
||||
|
||||
def test_main():
|
||||
# This also tests the parse_args method in cli.py
|
||||
cli.main(['temp.py', '--version'])
|
||||
cli.main(["temp.py", "--version"])
|
||||
|
@ -3,6 +3,7 @@ import sys
|
||||
import pytest
|
||||
import random
|
||||
import requests
|
||||
|
||||
sys.path.append("..")
|
||||
|
||||
import waybackpy.wrapper as waybackpy # noqa: E402
|
||||
@ -18,6 +19,7 @@ def test_clean_url():
|
||||
test_result = target._clean_url()
|
||||
assert answer == test_result
|
||||
|
||||
|
||||
def test_dunders():
|
||||
url = "https://en.wikipedia.org/wiki/Network_security"
|
||||
user_agent = "UA"
|
||||
@ -25,6 +27,7 @@ def test_dunders():
|
||||
assert "waybackpy.Url(url=%s, user_agent=%s)" % (url, user_agent) == repr(target)
|
||||
assert "en.wikipedia.org" in str(target)
|
||||
|
||||
|
||||
def test_archive_url_parser():
|
||||
endpoint = "https://amazon.com"
|
||||
user_agent = "Mozilla/5.0 (Windows NT 6.2; rv:20.0) Gecko/20121202 Firefox/20.0"
|
||||
@ -34,6 +37,7 @@ def test_archive_url_parser():
|
||||
with pytest.raises(Exception):
|
||||
waybackpy._archive_url_parser(header)
|
||||
|
||||
|
||||
def test_url_check():
|
||||
broken_url = "http://wwwgooglecom/"
|
||||
with pytest.raises(Exception):
|
||||
@ -61,8 +65,6 @@ def test_save():
|
||||
archived_url1 = str(target.save())
|
||||
assert url1 in archived_url1
|
||||
|
||||
|
||||
|
||||
# Test for urls that are incorrect.
|
||||
with pytest.raises(Exception):
|
||||
url2 = "ha ha ha ha"
|
||||
@ -89,7 +91,6 @@ def test_near():
|
||||
archive_near_year = target.near(year=2010)
|
||||
assert "2010" in str(archive_near_year)
|
||||
|
||||
|
||||
archive_near_month_year = str(target.near(year=2015, month=2))
|
||||
assert (
|
||||
("201502" in archive_near_month_year)
|
||||
@ -102,9 +103,9 @@ def test_near():
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246",
|
||||
)
|
||||
archive_near_hour_day_month_year = str(target.near(
|
||||
year=2008, month=5, day=9, hour=15
|
||||
))
|
||||
archive_near_hour_day_month_year = str(
|
||||
target.near(year=2008, month=5, day=9, hour=15)
|
||||
)
|
||||
assert (
|
||||
("2008050915" in archive_near_hour_day_month_year)
|
||||
or ("2008050914" in archive_near_hour_day_month_year)
|
||||
@ -119,22 +120,24 @@ def test_near():
|
||||
target.near(year=2010)
|
||||
|
||||
|
||||
|
||||
def test_oldest():
|
||||
url = "github.com/akamhy/waybackpy"
|
||||
target = waybackpy.Url(url, user_agent)
|
||||
assert "20200504141153" in str(target.oldest())
|
||||
|
||||
|
||||
def test_json():
|
||||
url = "github.com/akamhy/waybackpy"
|
||||
target = waybackpy.Url(url, user_agent)
|
||||
assert "archived_snapshots" in str(target.JSON)
|
||||
|
||||
|
||||
def test_archive_url():
|
||||
url = "github.com/akamhy/waybackpy"
|
||||
target = waybackpy.Url(url, user_agent)
|
||||
assert "github.com/akamhy" in str(target.archive_url)
|
||||
|
||||
|
||||
def test_newest():
|
||||
url = "github.com/akamhy/waybackpy"
|
||||
target = waybackpy.Url(url, user_agent)
|
||||
@ -146,17 +149,16 @@ def test_get():
|
||||
assert "Welcome to Google" in target.get(target.oldest())
|
||||
|
||||
|
||||
|
||||
def test_wayback_timestamp():
|
||||
ts = waybackpy._wayback_timestamp(
|
||||
year=2020, month=1, day=2, hour=3, minute=4
|
||||
)
|
||||
ts = waybackpy._wayback_timestamp(year=2020, month=1, day=2, hour=3, minute=4)
|
||||
assert "202001020304" in str(ts)
|
||||
|
||||
|
||||
def test_get_response():
|
||||
endpoint = "https://www.google.com"
|
||||
user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"
|
||||
user_agent = (
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"
|
||||
)
|
||||
headers = {"User-Agent": "%s" % user_agent}
|
||||
response = waybackpy._get_response(endpoint, params=None, headers=headers)
|
||||
assert response.status_code == 200
|
||||
@ -172,6 +174,7 @@ def test_total_archives():
|
||||
)
|
||||
assert target.total_archives() == 0
|
||||
|
||||
|
||||
def test_known_urls():
|
||||
|
||||
target = waybackpy.Url("akamhy.github.io", user_agent)
|
||||
|
120
waybackpy/cli.py
120
waybackpy/cli.py
@ -10,27 +10,27 @@ from waybackpy.__version__ import __version__
|
||||
|
||||
|
||||
def _save(obj):
|
||||
return (obj.save())
|
||||
return obj.save()
|
||||
|
||||
|
||||
def _archive_url(obj):
|
||||
return (obj.archive_url)
|
||||
return obj.archive_url
|
||||
|
||||
|
||||
def _json(obj):
|
||||
return (obj.JSON)
|
||||
return obj.JSON
|
||||
|
||||
|
||||
def _oldest(obj):
|
||||
return (obj.oldest())
|
||||
return obj.oldest()
|
||||
|
||||
|
||||
def _newest(obj):
|
||||
return (obj.newest())
|
||||
return obj.newest()
|
||||
|
||||
|
||||
def _total_archives(obj):
|
||||
return (obj.total_archives())
|
||||
return obj.total_archives()
|
||||
|
||||
|
||||
def _near(obj, args):
|
||||
@ -45,17 +45,19 @@ def _near(obj, args):
|
||||
_near_args["hour"] = args.hour
|
||||
if args.minute:
|
||||
_near_args["minute"] = args.minute
|
||||
return (obj.near(**_near_args))
|
||||
return obj.near(**_near_args)
|
||||
|
||||
|
||||
def _save_urls_on_file(input_list, live_url_count):
|
||||
m = re.search('https?://([A-Za-z_0-9.-]+).*', input_list[0])
|
||||
m = re.search("https?://([A-Za-z_0-9.-]+).*", input_list[0])
|
||||
if m:
|
||||
domain = m.group(1)
|
||||
else:
|
||||
domain = "domain-unknown"
|
||||
|
||||
uid = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(6))
|
||||
uid = "".join(
|
||||
random.choice(string.ascii_lowercase + string.digits) for _ in range(6)
|
||||
)
|
||||
|
||||
file_name = "%s-%d-urls-%s.txt" % (domain, live_url_count, uid)
|
||||
file_content = "\n".join(input_list)
|
||||
@ -89,34 +91,37 @@ def _known_urls(obj, args):
|
||||
|
||||
def _get(obj, args):
|
||||
if args.get.lower() == "url":
|
||||
return (obj.get())
|
||||
return obj.get()
|
||||
|
||||
if args.get.lower() == "archive_url":
|
||||
return (obj.get(obj.archive_url))
|
||||
return obj.get(obj.archive_url)
|
||||
|
||||
if args.get.lower() == "oldest":
|
||||
return (obj.get(obj.oldest()))
|
||||
return obj.get(obj.oldest())
|
||||
|
||||
if args.get.lower() == "latest" or args.get.lower() == "newest":
|
||||
return (obj.get(obj.newest()))
|
||||
return obj.get(obj.newest())
|
||||
|
||||
if args.get.lower() == "save":
|
||||
return (obj.get(obj.save()))
|
||||
return obj.get(obj.save())
|
||||
|
||||
return ("Use get as \"--get 'source'\", 'source' can be one of the followings: \
|
||||
return "Use get as \"--get 'source'\", 'source' can be one of the followings: \
|
||||
\n1) url - get the source code of the url specified using --url/-u.\
|
||||
\n2) archive_url - get the source code of the newest archive for the supplied url, alias of newest.\
|
||||
\n3) oldest - get the source code of the oldest archive for the supplied url.\
|
||||
\n4) newest - get the source code of the newest archive for the supplied url.\
|
||||
\n5) save - Create a new archive and get the source code of this new archive for the supplied url.")
|
||||
\n5) save - Create a new archive and get the source code of this new archive for the supplied url."
|
||||
|
||||
|
||||
def args_handler(args):
|
||||
if args.version:
|
||||
return ("waybackpy version %s" % __version__)
|
||||
return "waybackpy version %s" % __version__
|
||||
|
||||
if not args.url:
|
||||
return ("waybackpy %s \nSee 'waybackpy --help' for help using this tool." % __version__)
|
||||
return (
|
||||
"waybackpy %s \nSee 'waybackpy --help' for help using this tool."
|
||||
% __version__
|
||||
)
|
||||
|
||||
if args.user_agent:
|
||||
obj = Url(args.url, args.user_agent)
|
||||
@ -151,52 +156,93 @@ def args_handler(args):
|
||||
def parse_args(argv):
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
requiredArgs = parser.add_argument_group('URL argument (required)')
|
||||
requiredArgs.add_argument("--url", "-u", help="URL on which Wayback machine operations would occur")
|
||||
requiredArgs = parser.add_argument_group("URL argument (required)")
|
||||
requiredArgs.add_argument(
|
||||
"--url", "-u", help="URL on which Wayback machine operations would occur"
|
||||
)
|
||||
|
||||
userAgentArg = parser.add_argument_group('User Agent')
|
||||
help_text = "User agent, default user_agent is \"waybackpy python package - https://github.com/akamhy/waybackpy\""
|
||||
userAgentArg = parser.add_argument_group("User Agent")
|
||||
help_text = 'User agent, default user_agent is "waybackpy python package - https://github.com/akamhy/waybackpy"'
|
||||
userAgentArg.add_argument("--user_agent", "-ua", help=help_text)
|
||||
|
||||
saveArg = parser.add_argument_group("Create new archive/save URL")
|
||||
saveArg.add_argument("--save", "-s", action='store_true', help="Save the URL on the Wayback machine")
|
||||
saveArg.add_argument(
|
||||
"--save", "-s", action="store_true", help="Save the URL on the Wayback machine"
|
||||
)
|
||||
|
||||
auArg = parser.add_argument_group("Get the latest Archive")
|
||||
auArg.add_argument("--archive_url", "-au", action='store_true', help="Get the latest archive URL, alias for --newest")
|
||||
auArg.add_argument(
|
||||
"--archive_url",
|
||||
"-au",
|
||||
action="store_true",
|
||||
help="Get the latest archive URL, alias for --newest",
|
||||
)
|
||||
|
||||
jsonArg = parser.add_argument_group("Get the JSON data")
|
||||
jsonArg.add_argument("--json", "-j", action='store_true', help="JSON data of the availability API request")
|
||||
jsonArg.add_argument(
|
||||
"--json",
|
||||
"-j",
|
||||
action="store_true",
|
||||
help="JSON data of the availability API request",
|
||||
)
|
||||
|
||||
oldestArg = parser.add_argument_group("Oldest archive")
|
||||
oldestArg.add_argument("--oldest", "-o", action='store_true', help="Oldest archive for the specified URL")
|
||||
oldestArg.add_argument(
|
||||
"--oldest",
|
||||
"-o",
|
||||
action="store_true",
|
||||
help="Oldest archive for the specified URL",
|
||||
)
|
||||
|
||||
newestArg = parser.add_argument_group("Newest archive")
|
||||
newestArg.add_argument("--newest", "-n", action='store_true', help="Newest archive for the specified URL")
|
||||
newestArg.add_argument(
|
||||
"--newest",
|
||||
"-n",
|
||||
action="store_true",
|
||||
help="Newest archive for the specified URL",
|
||||
)
|
||||
|
||||
totalArg = parser.add_argument_group("Total number of archives")
|
||||
totalArg.add_argument("--total", "-t", action='store_true', help="Total number of archives for the specified URL")
|
||||
totalArg.add_argument(
|
||||
"--total",
|
||||
"-t",
|
||||
action="store_true",
|
||||
help="Total number of archives for the specified URL",
|
||||
)
|
||||
|
||||
getArg = parser.add_argument_group("Get source code")
|
||||
getArg.add_argument("--get", "-g", help="Prints the source code of the supplied url. Use '--get help' for extended usage")
|
||||
getArg.add_argument(
|
||||
"--get",
|
||||
"-g",
|
||||
help="Prints the source code of the supplied url. Use '--get help' for extended usage",
|
||||
)
|
||||
|
||||
knownUrlArg = parser.add_argument_group("URLs known and archived to Waybcak Machine for the site.")
|
||||
knownUrlArg.add_argument("--known_urls", "-ku", action='store_true', help="URLs known for the domain.")
|
||||
knownUrlArg = parser.add_argument_group(
|
||||
"URLs known and archived to Waybcak Machine for the site."
|
||||
)
|
||||
knownUrlArg.add_argument(
|
||||
"--known_urls", "-ku", action="store_true", help="URLs known for the domain."
|
||||
)
|
||||
help_text = "Use with '--known_urls' to include known URLs for subdomains."
|
||||
knownUrlArg.add_argument("--subdomain", "-sub", action='store_true', help=help_text)
|
||||
knownUrlArg.add_argument("--subdomain", "-sub", action="store_true", help=help_text)
|
||||
help_text = "Only include live URLs. Will not inlclude dead links."
|
||||
knownUrlArg.add_argument("--alive", "-a", action='store_true', help=help_text)
|
||||
knownUrlArg.add_argument("--alive", "-a", action="store_true", help=help_text)
|
||||
|
||||
nearArg = parser.add_argument_group('Archive close to time specified')
|
||||
nearArg.add_argument("--near", "-N", action='store_true', help="Archive near specified time")
|
||||
nearArg = parser.add_argument_group("Archive close to time specified")
|
||||
nearArg.add_argument(
|
||||
"--near", "-N", action="store_true", help="Archive near specified time"
|
||||
)
|
||||
|
||||
nearArgs = parser.add_argument_group('Arguments that are used only with --near')
|
||||
nearArgs = parser.add_argument_group("Arguments that are used only with --near")
|
||||
nearArgs.add_argument("--year", "-Y", type=int, help="Year in integer")
|
||||
nearArgs.add_argument("--month", "-M", type=int, help="Month in integer")
|
||||
nearArgs.add_argument("--day", "-D", type=int, help="Day in integer.")
|
||||
nearArgs.add_argument("--hour", "-H", type=int, help="Hour in intege")
|
||||
nearArgs.add_argument("--minute", "-MIN", type=int, help="Minute in integer")
|
||||
|
||||
parser.add_argument("--version", "-v", action='store_true', help="Waybackpy version")
|
||||
parser.add_argument(
|
||||
"--version", "-v", action="store_true", help="Waybackpy version"
|
||||
)
|
||||
|
||||
return parser.parse_args(argv[1:])
|
||||
|
||||
|
@ -1,10 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
class WaybackError(Exception):
|
||||
"""
|
||||
Raised when Wayback Machine API Service is unreachable/down.
|
||||
"""
|
||||
|
||||
|
||||
class URLError(Exception):
|
||||
"""
|
||||
Raised when malformed URLs are passed as arguments.
|
||||
|
@ -14,9 +14,7 @@ default_UA = "waybackpy python package - https://github.com/akamhy/waybackpy"
|
||||
def _archive_url_parser(header):
|
||||
"""Parse out the archive from header."""
|
||||
# Regex1
|
||||
arch = re.search(
|
||||
r"Content-Location: (/web/[0-9]{14}/.*)", str(header)
|
||||
)
|
||||
arch = re.search(r"Content-Location: (/web/[0-9]{14}/.*)", str(header))
|
||||
if arch:
|
||||
return "web.archive.org" + arch.group(1)
|
||||
# Regex2
|
||||
@ -79,11 +77,7 @@ class Url:
|
||||
|
||||
def __len__(self):
|
||||
td_max = timedelta(
|
||||
days=999999999,
|
||||
hours=23,
|
||||
minutes=59,
|
||||
seconds=59,
|
||||
microseconds=999999
|
||||
days=999999999, hours=23, minutes=59, seconds=59, microseconds=999999
|
||||
)
|
||||
if self.timestamp == datetime.max:
|
||||
return td_max.days
|
||||
@ -112,9 +106,7 @@ class Url:
|
||||
else:
|
||||
archive_url = data["archived_snapshots"]["closest"]["url"]
|
||||
archive_url = archive_url.replace(
|
||||
"http://web.archive.org/web/",
|
||||
"https://web.archive.org/web/",
|
||||
1
|
||||
"http://web.archive.org/web/", "https://web.archive.org/web/", 1
|
||||
)
|
||||
|
||||
return archive_url
|
||||
@ -127,10 +119,9 @@ class Url:
|
||||
time = datetime.max
|
||||
|
||||
else:
|
||||
time = datetime.strptime(data["archived_snapshots"]
|
||||
["closest"]
|
||||
["timestamp"],
|
||||
'%Y%m%d%H%M%S')
|
||||
time = datetime.strptime(
|
||||
data["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
|
||||
)
|
||||
|
||||
return time
|
||||
|
||||
@ -170,9 +161,9 @@ class Url:
|
||||
return response.content.decode(encoding.replace("text/html", "UTF-8", 1))
|
||||
|
||||
def near(self, year=None, month=None, day=None, hour=None, minute=None):
|
||||
""" Return the closest Wayback Machine archive to the time supplied.
|
||||
Supported params are year, month, day, hour and minute.
|
||||
Any non-supplied parameters default to the current time.
|
||||
"""Return the closest Wayback Machine archive to the time supplied.
|
||||
Supported params are year, month, day, hour and minute.
|
||||
Any non-supplied parameters default to the current time.
|
||||
|
||||
"""
|
||||
now = datetime.utcnow().timetuple()
|
||||
@ -184,10 +175,9 @@ class Url:
|
||||
minute=minute if minute else now.tm_min,
|
||||
)
|
||||
|
||||
|
||||
endpoint = "https://archive.org/wayback/available"
|
||||
headers = {"User-Agent": "%s" % self.user_agent}
|
||||
payload = {"url": "%s" % self._clean_url(), "timestamp" : timestamp}
|
||||
payload = {"url": "%s" % self._clean_url(), "timestamp": timestamp}
|
||||
response = _get_response(endpoint, params=payload, headers=headers)
|
||||
data = response.json()
|
||||
if not data["archived_snapshots"]:
|
||||
@ -201,7 +191,9 @@ class Url:
|
||||
)
|
||||
|
||||
self.archive_url = archive_url
|
||||
self.timestamp = datetime.strptime(data["archived_snapshots"]["closest"]["timestamp"], '%Y%m%d%H%M%S')
|
||||
self.timestamp = datetime.strptime(
|
||||
data["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
@ -221,7 +213,11 @@ class Url:
|
||||
"""Returns the total number of Wayback Machine archives for this URL."""
|
||||
|
||||
endpoint = "https://web.archive.org/cdx/search/cdx"
|
||||
headers = {"User-Agent": "%s" % self.user_agent, "output" : "json", "fl" : "statuscode"}
|
||||
headers = {
|
||||
"User-Agent": "%s" % self.user_agent,
|
||||
"output": "json",
|
||||
"fl": "statuscode",
|
||||
}
|
||||
payload = {"url": "%s" % self._clean_url()}
|
||||
response = _get_response(endpoint, params=payload, headers=headers)
|
||||
|
||||
@ -253,11 +249,13 @@ class Url:
|
||||
|
||||
if subdomain:
|
||||
request_url = (
|
||||
"https://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey" % self._clean_url()
|
||||
"https://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey"
|
||||
% self._clean_url()
|
||||
)
|
||||
else:
|
||||
request_url = (
|
||||
"http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey" % self._clean_url()
|
||||
"http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey"
|
||||
% self._clean_url()
|
||||
)
|
||||
|
||||
headers = {"User-Agent": "%s" % self.user_agent}
|
||||
|
Loading…
Reference in New Issue
Block a user