import requests from lxml import html from xml.etree.ElementTree import fromstring def getContentFromURL(url): return requests.get(url).text c = 0 # 36 is the first `i` value containing `livres`. for i in range(1, 203): print(i) url = f'https://ww9.bookys-ebooks.com/sitemaps/items{i}.xml' sitemapContent = getContentFromURL(url) sitemapXML = fromstring(sitemapContent) for entry in sitemapXML: loc = entry.findtext('{*}loc') if loc.startswith('https://ww9.bookys-ebooks.com/livres/'): print(i, c) print(loc) content = getContentFromURL(loc) tree = html.fromstring(content) title = tree.xpath('/html/body/div/div/div[2]/div[1]/div/div[2]/div[2]/h1')[0].text.strip() print(title) print() c += 1