diff --git a/waybackpy/availability_api.py b/waybackpy/availability_api.py index 3864da4..324dae4 100644 --- a/waybackpy/availability_api.py +++ b/waybackpy/availability_api.py @@ -64,14 +64,12 @@ class WaybackMachineAvailabilityAPI: Converts Unix time to wayback Machine timestamp and the Wayback Machine timestamp format is yyyyMMddhhmmss. """ - return datetime.utcfromtimestamp(int(unix_timestamp)).strftime("%Y%m%d%H%M%S") def __repr__(self) -> str: """ Same as string representation, just return the archive URL as a string. """ - return str(self) def __str__(self) -> str: @@ -80,7 +78,6 @@ class WaybackMachineAvailabilityAPI: call was successfully made then return the archive URL as a string. Else returns "". """ - # String should not return anything other than a string object # So, if a string repr is asked for before making any API requests # just return "" @@ -103,7 +100,6 @@ class WaybackMachineAvailabilityAPI: to increase or decrease the default time gap between two successive API calls, but it is not recommended to increase it. """ - time_diff = int(time.time()) - self.last_api_call_unix_time sleep_time = self.api_call_time_gap - time_diff @@ -136,7 +132,6 @@ class WaybackMachineAvailabilityAPI: If you get an URL as a response form the availability API it is guaranteed that you can get the datetime object from the timestamp. """ - if self.json is None or "archived_snapshots" not in self.json: return datetime.max @@ -161,7 +156,6 @@ class WaybackMachineAvailabilityAPI: the timestamp if found and if not found raises ArchiveNotInAvailabilityAPIResponse. """ - archive_url = "" data = self.json @@ -206,7 +200,6 @@ class WaybackMachineAvailabilityAPI: Prepends zero before the year, month, day, hour and minute so that they are conformable with the YYYYMMDDhhmmss wayback machine timestamp format. """ - return "".join( str(kwargs[key]).zfill(2) for key in ["year", "month", "day", "hour", "minute"] @@ -218,7 +211,6 @@ class WaybackMachineAvailabilityAPI: because Wayback Machine was started in May, 1996 and it is assumed that there would be no archive older than January 1, 1994. """ - return self.near(year=1994, month=1, day=1) def newest(self) -> "WaybackMachineAvailabilityAPI": @@ -228,7 +220,6 @@ class WaybackMachineAvailabilityAPI: We assume that wayback machine can not archive the future of a webpage. """ - return self.near(unix_timestamp=int(time.time())) def near( @@ -252,7 +243,6 @@ class WaybackMachineAvailabilityAPI: And finally invoking the json method to make the API call then returns the instance. """ - if unix_timestamp: timestamp = self.unix_timestamp_to_wayback_timestamp(unix_timestamp) else: diff --git a/waybackpy/cdx_api.py b/waybackpy/cdx_api.py index bfb474e..7f8b2a4 100644 --- a/waybackpy/cdx_api.py +++ b/waybackpy/cdx_api.py @@ -90,7 +90,6 @@ class WaybackMachineCDXServerAPI: not matter but for queries where the number of pages are less this method chooses accuracy over the pagination API. """ - # number of pages that will returned by the pagination API. # get_total_pages adds the showNumPages=true param to pagination API # requests. diff --git a/waybackpy/cdx_utils.py b/waybackpy/cdx_utils.py index 3585a2a..8826b21 100644 --- a/waybackpy/cdx_utils.py +++ b/waybackpy/cdx_utils.py @@ -23,7 +23,6 @@ def get_total_pages(url: str, user_agent: str = DEFAULT_USER_AGENT) -> int: URL makes the CDX server return an integer which is the number of pages of CDX pages available for us to query using the pagination API. """ - endpoint = "https://web.archive.org/cdx/search/cdx?" payload = {"showNumPages": "true", "url": str(url)} headers = {"User-Agent": user_agent} @@ -43,7 +42,6 @@ def full_url(endpoint: str, params: Dict[str, Any]) -> str: such as filter and collapse and this function adds them without overwriting earlier added arguments. """ - if not params: return endpoint _full_url = endpoint if endpoint.endswith("?") else (endpoint + "?") @@ -67,7 +65,6 @@ def get_response( """ Make get request to the CDX server and return the response. """ - session = requests.Session() retries_ = Retry( @@ -87,7 +84,6 @@ def check_filters(filters: List[str]) -> None: Check that the filter arguments passed by the end-user are valid. If not valid then raise WaybackError. """ - if not isinstance(filters, list): raise WaybackError("filters must be a list.") @@ -110,7 +106,6 @@ def check_collapses(collapses: List[str]) -> bool: Check that the collapse arguments passed by the end-user are valid. If not valid then raise WaybackError. """ - if not isinstance(collapses, list): raise WaybackError("collapses must be a list.") @@ -138,7 +133,6 @@ def check_match_type(match_type: Optional[str], url: str) -> bool: Check that the match_type argument passed by the end-user is valid. If not valid then raise WaybackError. """ - legal_match_type = ["exact", "prefix", "host", "domain"] if not match_type: diff --git a/waybackpy/save_api.py b/waybackpy/save_api.py index d80f477..aeefacd 100644 --- a/waybackpy/save_api.py +++ b/waybackpy/save_api.py @@ -58,7 +58,6 @@ class WaybackMachineSaveAPI: else invoke the save method to save the archive which returns the archive thus we return the methods return value. """ - if self._archive_url: return self._archive_url @@ -80,7 +79,6 @@ class WaybackMachineSaveAPI: to be very unreliable thus if it fails first check opening the response URL yourself in the browser. """ - session = requests.Session() retries = Retry( total=self.total_save_retries, @@ -120,7 +118,6 @@ class WaybackMachineSaveAPI: archive URL in the headers and finally look in the response URL for the archive URL. """ - regex1 = r"Content-Location: (/web/[0-9]{14}/.*)" match = re.search(regex1, str(self.headers)) if match: @@ -156,7 +153,6 @@ class WaybackMachineSaveAPI: If tries are multiple of 3 sleep 10 seconds else sleep 5 seconds. """ - sleep_seconds = 5 if tries % 3 == 0: sleep_seconds = 10 @@ -176,7 +172,6 @@ class WaybackMachineSaveAPI: the Wayback Machine to serve cached archive if last archive was captured before last 45 minutes. """ - regex = r"https?://web\.archive.org/web/([0-9]{14})/http" match = re.search(regex, str(self._archive_url)) @@ -205,7 +200,6 @@ class WaybackMachineSaveAPI: Raises MaximumSaveRetriesExceeded is maximum retries are exhausted but still we were unable to retrieve the archive from the Wayback Machine. """ - self.saved_archive = None tries = 0