Add the resumable part of the code
This commit is contained in:
parent
b7386c1ad8
commit
7c376a65a1
20
main.py
20
main.py
@ -3,6 +3,7 @@
|
|||||||
import requests
|
import requests
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import string
|
import string
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
charset = string.ascii_lowercase
|
charset = string.ascii_lowercase
|
||||||
url = 'https://www.linguee.fr/francais-anglais/search'
|
url = 'https://www.linguee.fr/francais-anglais/search'
|
||||||
@ -14,16 +15,25 @@ MAXIMUM_SUGGESTIONS = 4
|
|||||||
|
|
||||||
entries = set()
|
entries = set()
|
||||||
|
|
||||||
|
REQUESTS_FOLDER_PATH = 'requests'
|
||||||
|
Path(REQUESTS_FOLDER_PATH).mkdir(exist_ok = True)
|
||||||
|
|
||||||
def treatSuffixes(prefix):
|
def treatSuffixes(prefix):
|
||||||
#print(prefix)
|
#print(prefix)
|
||||||
for char in charset:
|
for char in charset:
|
||||||
base = prefix + char
|
base = prefix + char
|
||||||
print(base)
|
print(base)
|
||||||
params['qe'] = base
|
baseFilePath = f'{REQUESTS_FOLDER_PATH}/{base}.html'
|
||||||
text = requests.get(url, params = params).text
|
try:
|
||||||
# Pay attention if change `base` elaboration to not allow unwanted folder file writing.
|
with open(baseFilePath) as requestFile:
|
||||||
with open(f'requests/{base}.html', 'w') as requestFile:
|
text = requestFile.read()
|
||||||
requestFile.write(text)
|
print('From file')
|
||||||
|
except:
|
||||||
|
params['qe'] = base
|
||||||
|
text = requests.get(url, params = params).text
|
||||||
|
# Pay attention if change `base` elaboration to not allow unwanted folder file writing.
|
||||||
|
with open(baseFilePath, 'w') as requestFile:
|
||||||
|
requestFile.write(text)
|
||||||
tree = html.fromstring(text)
|
tree = html.fromstring(text)
|
||||||
rows = tree.xpath('//div[@class="main_row"]')
|
rows = tree.xpath('//div[@class="main_row"]')
|
||||||
rowsLen = len(rows)
|
rowsLen = len(rows)
|
||||||
|
Loading…
Reference in New Issue
Block a user