26 lines
701 B
Python
26 lines
701 B
Python
|
import requests
|
||
|
|
||
|
def getContentFromURL(url):
|
||
|
return requests.get(url).text
|
||
|
|
||
|
c = 0
|
||
|
|
||
|
for i in range(36, 203):#1, 203):
|
||
|
print(i)
|
||
|
url = f'https://ww9.bookys-ebooks.com/sitemaps/items{i}.xml'
|
||
|
sitemapContent = getContentFromURL(url)
|
||
|
jsonData = xmltodict.parse(sitemapContent)['urlset']['url'][1:]
|
||
|
for entry in jsonData:
|
||
|
#print(entry)
|
||
|
loc = entry['loc']
|
||
|
if loc.startswith('https://ww9.bookys-ebooks.com/livres/'):
|
||
|
#print(entry)
|
||
|
print(i, c)
|
||
|
print(loc)
|
||
|
content = getContentFromURL(loc)
|
||
|
title = content.split('<title>')[1].split('\n')[0]
|
||
|
print(title)
|
||
|
print()
|
||
|
c += 1
|
||
|
|