From 53561ec9c4187ff52e4903ae2af6dbd825072fa5 Mon Sep 17 00:00:00 2001 From: akamhy <64683866+akamhy@users.noreply.github.com> Date: Mon, 4 May 2020 08:56:01 +0530 Subject: [PATCH] Update wrapper.py --- waybackpy/wrapper.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/waybackpy/wrapper.py b/waybackpy/wrapper.py index 850cf32..b1e3050 100644 --- a/waybackpy/wrapper.py +++ b/waybackpy/wrapper.py @@ -30,9 +30,12 @@ class InvalidUrlError(Exception): Wayback machine respects these file, will not archive. """ +def clean_url(url): + return str(url).strip().replace(" ","_") + def save(url,UA=default_UA): base_save_url = "https://web.archive.org/save/" - request_url = base_save_url + url + request_url = base_save_url + clean_url(url) hdr = { 'User-Agent' : '%s' % UA } req = Request(request_url, headers=hdr) if "." not in url: @@ -62,7 +65,7 @@ def near( UA=default_UA, ): timestamp = str(year)+str(month)+str(day)+str(hour)+str(minute) - request_url = "https://archive.org/wayback/available?url=%s×tamp=%s" % (str(url).strip(), str(timestamp)) + request_url = "https://archive.org/wayback/available?url=%s×tamp=%s" % (clean_url(url), str(timestamp)) hdr = { 'User-Agent' : '%s' % UA } req = Request(request_url, headers=hdr) response = urlopen(req) #nosec