This commit is contained in:
Akash Mahanty 2020-10-03 16:58:11 +05:30
parent 6b3b2e2a7d
commit 1a81eb97fb
2 changed files with 7 additions and 6 deletions

View File

@ -48,7 +48,8 @@ def _known_urls(obj, args):
if m:
domain = m.group(1)
else:
domain = "waybackpy-known"
domain = "domain-unknown"
dir_path = os.path.abspath(os.getcwd())
file_name = dir_path + "/%s-%d-urls.txt" % (domain, total_urls)
text = "\n".join(url_list) + "\n"
@ -118,7 +119,7 @@ def parse_args(argv):
userAgentArg = parser.add_argument_group('User Agent')
userAgentArg.add_argument("--user_agent", "-ua", help="User agent, default user_agent is \"waybackpy python package - https://github.com/akamhy/waybackpy\"")
saveArg = parser.add_argument_group("Create new archive/save URL")
saveArg.add_argument("--save", "-s", action='store_true', help="Save the URL on the Wayback machine")

View File

@ -100,7 +100,7 @@ class Url:
"""Return the source code of the supplied URL.
If encoding is not supplied, it is auto-detected from the response.
"""
if not url:
url = self._clean_url()
@ -190,13 +190,13 @@ class Url:
if subdomain:
request_url = (
"https://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey"
"https://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey"
% self._clean_url()
)
else:
request_url = (
"http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey"
"http://web.archive.org/cdx/search/cdx?url=%s/*&output=json&fl=original&collapse=urlkey"
% self._clean_url()
)
@ -213,7 +213,7 @@ class Url:
for url in url_list:
try:
urlopen(url)
urlopen(url) # nosec
except:
continue