Add the resumable part of the code

This commit is contained in:
Benjamin Loison 2024-04-02 02:04:28 +02:00
parent b7386c1ad8
commit 7c376a65a1
No known key found for this signature in database

12
main.py
View File

@ -3,6 +3,7 @@
import requests import requests
from lxml import html from lxml import html
import string import string
from pathlib import Path
charset = string.ascii_lowercase charset = string.ascii_lowercase
url = 'https://www.linguee.fr/francais-anglais/search' url = 'https://www.linguee.fr/francais-anglais/search'
@ -14,15 +15,24 @@ MAXIMUM_SUGGESTIONS = 4
entries = set() entries = set()
REQUESTS_FOLDER_PATH = 'requests'
Path(REQUESTS_FOLDER_PATH).mkdir(exist_ok = True)
def treatSuffixes(prefix): def treatSuffixes(prefix):
#print(prefix) #print(prefix)
for char in charset: for char in charset:
base = prefix + char base = prefix + char
print(base) print(base)
baseFilePath = f'{REQUESTS_FOLDER_PATH}/{base}.html'
try:
with open(baseFilePath) as requestFile:
text = requestFile.read()
print('From file')
except:
params['qe'] = base params['qe'] = base
text = requests.get(url, params = params).text text = requests.get(url, params = params).text
# Pay attention if change `base` elaboration to not allow unwanted folder file writing. # Pay attention if change `base` elaboration to not allow unwanted folder file writing.
with open(f'requests/{base}.html', 'w') as requestFile: with open(baseFilePath, 'w') as requestFile:
requestFile.write(text) requestFile.write(text)
tree = html.fromstring(text) tree = html.fromstring(text)
rows = tree.xpath('//div[@class="main_row"]') rows = tree.xpath('//div[@class="main_row"]')