Update wrapper.py

This commit is contained in:
Akash Mahanty
2021-01-02 01:10:21 +05:30
committed by GitHub
parent 5a81309f7f
commit 3b941f7311

View File

@@ -64,22 +64,30 @@ class Url:
self.url = url self.url = url
self.user_agent = user_agent self.user_agent = user_agent
self._url_check() # checks url validity on init. self._url_check() # checks url validity on init.
self._archive_url = None # URL of archive
self._timestamp = None # timestamp for last archive
self._alive_url_list = [] self._alive_url_list = []
def __repr__(self): def __repr__(self):
return "waybackpy.Url(url=%s, user_agent=%s)" % (self.url, self.user_agent) return "waybackpy.Url(url=%s, user_agent=%s)" % (self.url, self.user_agent)
def __str__(self): def __str__(self):
return "%s" % self.archive_url if not self._archive_url:
self._archive_url = self.archive_url
return "%s" % self._archive_url
def __len__(self): def __len__(self):
td_max = timedelta( td_max = timedelta(
days=999999999, hours=23, minutes=59, seconds=59, microseconds=999999 days=999999999, hours=23, minutes=59, seconds=59, microseconds=999999
) )
if self.timestamp == datetime.max:
if not self._timestamp:
self._timestamp = self.timestamp
if self._timestamp == datetime.max:
return td_max.days return td_max.days
diff = datetime.utcnow() - self.timestamp diff = datetime.utcnow() - self._timestamp
return diff.days return diff.days
def _url_check(self): def _url_check(self):
@@ -107,7 +115,7 @@ class Url:
archive_url = archive_url.replace( archive_url = archive_url.replace(
"http://web.archive.org/web/", "https://web.archive.org/web/", 1 "http://web.archive.org/web/", "https://web.archive.org/web/", 1
) )
self._archive_url = archive_url
return archive_url return archive_url
@property @property
@@ -116,14 +124,14 @@ class Url:
data = self.JSON data = self.JSON
if not data["archived_snapshots"]: if not data["archived_snapshots"]:
time = datetime.max ts = datetime.max
else: else:
time = datetime.strptime( ts = datetime.strptime(
data["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S" data["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
) )
self._timestamp = ts
return time return ts
def _clean_url(self): def _clean_url(self):
"""Fix the URL, if possible.""" """Fix the URL, if possible."""
@@ -134,8 +142,8 @@ class Url:
request_url = "https://web.archive.org/save/" + self._clean_url() request_url = "https://web.archive.org/save/" + self._clean_url()
headers = {"User-Agent": "%s" % self.user_agent} headers = {"User-Agent": "%s" % self.user_agent}
response = _get_response(request_url, params=None, headers=headers) response = _get_response(request_url, params=None, headers=headers)
self.archive_url = "https://" + _archive_url_parser(response.headers) self._archive_url = "https://" + _archive_url_parser(response.headers)
self.timestamp = datetime.utcnow() self._timestamp = datetime.utcnow()
return self return self
def get(self, url="", user_agent="", encoding=""): def get(self, url="", user_agent="", encoding=""):
@@ -190,8 +198,8 @@ class Url:
"http://web.archive.org/web/", "https://web.archive.org/web/", 1 "http://web.archive.org/web/", "https://web.archive.org/web/", 1
) )
self.archive_url = archive_url self._archive_url = archive_url
self.timestamp = datetime.strptime( self._timestamp = datetime.strptime(
data["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S" data["archived_snapshots"]["closest"]["timestamp"], "%Y%m%d%H%M%S"
) )