flag to check if the archive saved is 30 mins older or not

This commit is contained in:
Akash Mahanty 2021-01-16 12:06:08 +05:30
parent 40233eb115
commit ffe0810b12
2 changed files with 16 additions and 14 deletions

View File

@ -22,28 +22,28 @@ def _unix_ts_to_wayback_ts(unix_ts):
return datetime.utcfromtimestamp(int(unix_ts)).strftime("%Y%m%d%H%M%S") return datetime.utcfromtimestamp(int(unix_ts)).strftime("%Y%m%d%H%M%S")
def _add_payload(self, payload): def _add_payload(instance, payload):
if self.start_timestamp: if instance.start_timestamp:
payload["from"] = self.start_timestamp payload["from"] = instance.start_timestamp
if self.end_timestamp: if instance.end_timestamp:
payload["to"] = self.end_timestamp payload["to"] = instance.end_timestamp
if self.gzip != True: if instance.gzip != True:
payload["gzip"] = "false" payload["gzip"] = "false"
if self.match_type: if instance.match_type:
payload["matchType"] = self.match_type payload["matchType"] = instance.match_type
if self.filters and len(self.filters) > 0: if instance.filters and len(instance.filters) > 0:
for i, f in enumerate(self.filters): for i, f in enumerate(instance.filters):
payload["filter" + str(i)] = f payload["filter" + str(i)] = f
if self.collapses and len(self.collapses) > 0: if instance.collapses and len(instance.collapses) > 0:
for i, f in enumerate(self.collapses): for i, f in enumerate(instance.collapses):
payload["collapse" + str(i)] = f payload["collapse" + str(i)] = f
payload["url"] = self.url payload["url"] = instance.url
def _ts(timestamp, data): def _ts(timestamp, data):
@ -220,7 +220,7 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
WaybackError with an error message. WaybackError with an error message.
""" """
if "save redirected" in header: if "save redirected" in header and instance:
time.sleep(60) # makeup for archive time time.sleep(60) # makeup for archive time
now = datetime.utcnow().timetuple() now = datetime.utcnow().timetuple()
@ -279,6 +279,7 @@ def _archive_url_parser(header, url, latest_version=__version__, instance=None):
archive_url = newest_archive.archive_url archive_url = newest_archive.archive_url
m = re.search(r"web\.archive\.org/web/[0-9]{14}/.*", archive_url) m = re.search(r"web\.archive\.org/web/[0-9]{14}/.*", archive_url)
if m: if m:
instance.cached_save = True
return m.group(0) return m.group(0)
if __version__ == latest_version: if __version__ == latest_version:

View File

@ -23,6 +23,7 @@ class Url:
self.timestamp = None self.timestamp = None
self._JSON = None self._JSON = None
self.latest_version = None self.latest_version = None
self.cached_save = False
def __repr__(self): def __repr__(self):
return "waybackpy.Url(url={url}, user_agent={user_agent})".format( return "waybackpy.Url(url={url}, user_agent={user_agent})".format(