removed JSON from init, this was resulting in too much unnecessary taffic. Some users who are thousands of URLs were blocked by IA (#53)

closes #52
This commit is contained in:
Akash Mahanty 2021-01-01 16:38:57 +05:30 committed by GitHub
parent da390ee8a3
commit 1b499a7594
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 2 deletions

3
.gitignore vendored
View File

@ -1,3 +1,6 @@
# Files generated while testing
*-urls-*.txt
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
*.py[cod] *.py[cod]

View File

@ -64,7 +64,6 @@ class Url:
self.url = url self.url = url
self.user_agent = user_agent self.user_agent = user_agent
self._url_check() # checks url validity on init. self._url_check() # checks url validity on init.
self.JSON = self._JSON() # JSON of most recent archive
self.archive_url = self._archive_url() # URL of archive self.archive_url = self._archive_url() # URL of archive
self.timestamp = self._archive_timestamp() # timestamp for last archive self.timestamp = self._archive_timestamp() # timestamp for last archive
self._alive_url_list = [] self._alive_url_list = []
@ -90,7 +89,8 @@ class Url:
if "." not in self.url: if "." not in self.url:
raise URLError("'%s' is not a vaild URL." % self.url) raise URLError("'%s' is not a vaild URL." % self.url)
def _JSON(self): @property
def JSON(self):
endpoint = "https://archive.org/wayback/available" endpoint = "https://archive.org/wayback/available"
headers = {"User-Agent": "%s" % self.user_agent} headers = {"User-Agent": "%s" % self.user_agent}
payload = {"url": "%s" % self._clean_url()} payload = {"url": "%s" % self._clean_url()}