From 7f927ec7be2eece08bc8f54bf6da465575882122 Mon Sep 17 00:00:00 2001 From: Akash Mahanty Date: Fri, 16 Oct 2020 19:25:45 +0530 Subject: [PATCH] added tests for json and archive_url, updated broken tests (#34) * added tests for json and archive_url, updated broken tests * drop 2.7 support --- .travis.yml | 3 +-- tests/test_cli.py | 56 ++++++++++++++++++++++++++----------------- tests/test_wrapper.py | 37 +++++++++++++++------------- waybackpy/wrapper.py | 34 +++++++++++++------------- 4 files changed, 72 insertions(+), 58 deletions(-) diff --git a/.travis.yml b/.travis.yml index 136ec77..326fc32 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,9 +3,8 @@ os: linux dist: xenial cache: pip python: - - 2.7 - 3.6 - - 3.8 + - 3.9 before_install: - python --version - pip install -U pip diff --git a/tests/test_cli.py b/tests/test_cli.py index 91b722d..64b0516 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -19,75 +19,87 @@ if sys.version_info > (3, 7): if codecov_python: def test_save(): args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False, - oldest=False, save=True, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None) + oldest=False, save=True, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None) reply = cli.args_handler(args) - assert "pypi.org/user/akamhy" in reply + assert "pypi.org/user/akamhy" in str(reply) + +def test_json(): + args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False, + oldest=False, save=False, json=True, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None) + reply = cli.args_handler(args) + assert "archived_snapshots" in str(reply) + +def test_archive_url(): + args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False, + oldest=False, save=False, json=False, archive_url=True, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None) + reply = cli.args_handler(args) + assert "https://web.archive.org/web/" in str(reply) def test_oldest(): args = argparse.Namespace(user_agent=None, url="https://pypi.org/user/akamhy/", total=False, version=False, - oldest=True, save=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None) + oldest=True, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None) reply = cli.args_handler(args) - assert "pypi.org/user/akamhy" in reply + assert "pypi.org/user/akamhy" in str(reply) def test_newest(): args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \ (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False, - oldest=False, save=False, newest=True, near=False, alive=False, subdomain=False, known_urls=False, get=None) + oldest=False, save=False, json=False, archive_url=False, newest=True, near=False, alive=False, subdomain=False, known_urls=False, get=None) reply = cli.args_handler(args) - assert "pypi.org/user/akamhy" in reply + assert "pypi.org/user/akamhy" in str(reply) def test_total_archives(): args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \ (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=True, version=False, - oldest=False, save=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None) + oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get=None) reply = cli.args_handler(args) assert isinstance(reply, int) def test_known_urls(): args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \ (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://akamhy.github.io", total=False, version=False, - oldest=False, save=False, newest=False, near=False, alive=True, subdomain=True, known_urls=True, get=None) + oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=True, subdomain=True, known_urls=True, get=None) reply = cli.args_handler(args) - assert "github" in reply + assert "github" in str(reply) def test_near(): args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \ (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False, - oldest=False, save=False, newest=False, near=True, alive=False, subdomain=False, known_urls=False, get=None, year=2020, month=7, day=15, hour=1, minute=1) + oldest=False, save=False, json=False, archive_url=False, newest=False, near=True, alive=False, subdomain=False, known_urls=False, get=None, year=2020, month=7, day=15, hour=1, minute=1) reply = cli.args_handler(args) - assert "202007" in reply + assert "202007" in str(reply) def test_get(): args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \ (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False, - oldest=False, save=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="url") + oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="url") reply = cli.args_handler(args) - assert "waybackpy" in reply + assert "waybackpy" in str(reply) args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \ (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False, - oldest=False, save=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="oldest") + oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="oldest") reply = cli.args_handler(args) - assert "waybackpy" in reply + assert "waybackpy" in str(reply) args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \ (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False, - oldest=False, save=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="newest") + oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="newest") reply = cli.args_handler(args) - assert "waybackpy" in reply + assert "waybackpy" in str(reply) if codecov_python: args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \ (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False, - oldest=False, save=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="save") + oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="save") reply = cli.args_handler(args) - assert "waybackpy" in reply + assert "waybackpy" in str(reply) args = argparse.Namespace(user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9 \ (KHTML, like Gecko) Version/8.0.8 Safari/600.8.9", url="https://pypi.org/user/akamhy/", total=False, version=False, - oldest=False, save=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="BullShit") + oldest=False, save=False, json=False, archive_url=False, newest=False, near=False, alive=False, subdomain=False, known_urls=False, get="BullShit") reply = cli.args_handler(args) - assert "get the source code of the" in reply + assert "get the source code of the" in str(reply) def test_args_handler(): args = argparse.Namespace(version=True) @@ -96,7 +108,7 @@ def test_args_handler(): args = argparse.Namespace(url=None, version=False) reply = cli.args_handler(args) - assert ("waybackpy %s" % (__version__)) in reply + assert ("waybackpy %s" % (__version__)) in str(reply) def test_main(): # This also tests the parse_args method in cli.py diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py index ef00378..c50ef35 100644 --- a/tests/test_wrapper.py +++ b/tests/test_wrapper.py @@ -2,7 +2,7 @@ import sys import pytest import random -import time + sys.path.append("..") import waybackpy.wrapper as waybackpy # noqa: E402 @@ -28,8 +28,7 @@ def test_dunders(): user_agent = "UA" target = waybackpy.Url(url, user_agent) assert "waybackpy.Url(url=%s, user_agent=%s)" % (url, user_agent) == repr(target) - assert len(target) == len(url) - assert str(target) == url + assert "en.wikipedia.org" in str(target) def test_archive_url_parser(): request_url = "https://amazon.com" @@ -47,7 +46,6 @@ def test_url_check(): def test_save(): # Test for urls that exist and can be archived. - time.sleep(10) url_list = [ "en.wikipedia.org", @@ -64,7 +62,7 @@ def test_save(): "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36", ) - archived_url1 = target.save() + archived_url1 = str(target.save()) assert url1 in archived_url1 if sys.version_info > (3, 6): @@ -73,18 +71,16 @@ def test_save(): with pytest.raises(Exception): url2 = "ha ha ha ha" waybackpy.Url(url2, user_agent) - time.sleep(5) url3 = "http://www.archive.is/faq.html" # Test for urls not allowed to archive by robot.txt. Doesn't works anymore. Find alternatives. # with pytest.raises(Exception): -# +# # target = waybackpy.Url( # url3, # "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) " # "Gecko/20100101 Firefox/25.0", # ) # target.save() -# time.sleep(5) # Non existent urls, test with pytest.raises(Exception): target = waybackpy.Url( @@ -100,7 +96,6 @@ def test_save(): def test_near(): - time.sleep(10) url = "google.com" target = waybackpy.Url( url, @@ -108,11 +103,10 @@ def test_near(): "(KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", ) archive_near_year = target.near(year=2010) - assert "2010" in archive_near_year + assert "2010" in str(archive_near_year) if sys.version_info > (3, 6): - time.sleep(5) - archive_near_month_year = target.near(year=2015, month=2) + archive_near_month_year = str(target.near(year=2015, month=2)) assert ( ("201502" in archive_near_month_year) or ("201501" in archive_near_month_year) @@ -124,9 +118,9 @@ def test_near(): "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246", ) - archive_near_hour_day_month_year = target.near( + archive_near_hour_day_month_year = str(target.near( year=2008, month=5, day=9, hour=15 - ) + )) assert ( ("2008050915" in archive_near_hour_day_month_year) or ("2008050914" in archive_near_hour_day_month_year) @@ -146,13 +140,22 @@ def test_near(): def test_oldest(): url = "github.com/akamhy/waybackpy" target = waybackpy.Url(url, user_agent) - assert "20200504141153" in target.oldest() + assert "20200504141153" in str(target.oldest()) +def test_json(): + url = "github.com/akamhy/waybackpy" + target = waybackpy.Url(url, user_agent) + assert "archived_snapshots" in str(target.JSON) + +def test_archive_url(): + url = "github.com/akamhy/waybackpy" + target = waybackpy.Url(url, user_agent) + assert "github.com/akamhy" in str(target.archive_url) def test_newest(): url = "github.com/akamhy/waybackpy" target = waybackpy.Url(url, user_agent) - assert url in target.newest() + assert url in str(target.newest()) def test_get(): @@ -195,4 +198,4 @@ def test_known_urls(): assert len(target.known_urls(alive=True, subdomain=True)) > 2 target = waybackpy.Url("akamhy.github.io", user_agent) - assert len(target.known_urls()) > 3 \ No newline at end of file + assert len(target.known_urls()) > 3 diff --git a/waybackpy/wrapper.py b/waybackpy/wrapper.py index 3c02409..711b061 100644 --- a/waybackpy/wrapper.py +++ b/waybackpy/wrapper.py @@ -72,7 +72,7 @@ class Url: self.JSON = self._JSON() # JSON of most recent archive self.archive_url = self._archive_url() # URL of archive self.timestamp = self._archive_timestamp() # timestamp for last archive - + def __repr__(self): return "waybackpy.Url(url=%s, user_agent=%s)" % (self.url, self.user_agent) @@ -82,20 +82,20 @@ class Url: def __len__(self): td_max = timedelta(days=999999999, hours=23, - minutes=59, - seconds=59, + minutes=59, + seconds=59, microseconds=999999) if self.timestamp == datetime.max: return td_max.days else: diff = datetime.utcnow() - self.timestamp return diff.days - + def _url_check(self): """Check for common URL problems.""" if "." not in self.url: raise URLError("'%s' is not a vaild URL." % self.url) - + def _JSON(self): request_url = "https://archive.org/wayback/available?url=%s" % ( self._clean_url(), @@ -106,13 +106,13 @@ class Url: response = _get_response(req) data_string = response.read().decode("UTF-8") data = json.loads(data_string) - + return data - + def _archive_url(self): """Get URL of archive.""" data = self.JSON - + if not data["archived_snapshots"]: archive_url = None else: @@ -122,13 +122,13 @@ class Url: "https://web.archive.org/web/", 1 ) - + return archive_url - + def _archive_timestamp(self): """Get timestamp of last archive.""" data = self.JSON - + if not data["archived_snapshots"]: time = datetime.max @@ -137,7 +137,7 @@ class Url: ["closest"] ["timestamp"], '%Y%m%d%H%M%S') - + return time def _clean_url(self): @@ -207,15 +207,15 @@ class Url: archive_url = archive_url.replace( "http://web.archive.org/web/", "https://web.archive.org/web/", 1 ) - + self.archive_url = archive_url self.timestamp = datetime.strptime(data["archived_snapshots"] ["closest"] - ["timestamp"], + ["timestamp"], '%Y%m%d%H%M%S') - + return self - + def oldest(self, year=1994): """Return the oldest Wayback Machine archive for this URL.""" @@ -285,5 +285,5 @@ class Url: tmp_url_list.append(url) url_list = tmp_url_list - + return url_list