Add the resumable part of the code
This commit is contained in:
parent
b7386c1ad8
commit
7c376a65a1
20
main.py
20
main.py
@ -3,6 +3,7 @@
|
||||
import requests
|
||||
from lxml import html
|
||||
import string
|
||||
from pathlib import Path
|
||||
|
||||
charset = string.ascii_lowercase
|
||||
url = 'https://www.linguee.fr/francais-anglais/search'
|
||||
@ -14,16 +15,25 @@ MAXIMUM_SUGGESTIONS = 4
|
||||
|
||||
entries = set()
|
||||
|
||||
REQUESTS_FOLDER_PATH = 'requests'
|
||||
Path(REQUESTS_FOLDER_PATH).mkdir(exist_ok = True)
|
||||
|
||||
def treatSuffixes(prefix):
|
||||
#print(prefix)
|
||||
for char in charset:
|
||||
base = prefix + char
|
||||
print(base)
|
||||
params['qe'] = base
|
||||
text = requests.get(url, params = params).text
|
||||
# Pay attention if change `base` elaboration to not allow unwanted folder file writing.
|
||||
with open(f'requests/{base}.html', 'w') as requestFile:
|
||||
requestFile.write(text)
|
||||
baseFilePath = f'{REQUESTS_FOLDER_PATH}/{base}.html'
|
||||
try:
|
||||
with open(baseFilePath) as requestFile:
|
||||
text = requestFile.read()
|
||||
print('From file')
|
||||
except:
|
||||
params['qe'] = base
|
||||
text = requests.get(url, params = params).text
|
||||
# Pay attention if change `base` elaboration to not allow unwanted folder file writing.
|
||||
with open(baseFilePath, 'w') as requestFile:
|
||||
requestFile.write(text)
|
||||
tree = html.fromstring(text)
|
||||
rows = tree.xpath('//div[@class="main_row"]')
|
||||
rowsLen = len(rows)
|
||||
|
Loading…
Reference in New Issue
Block a user