diff --git a/main.py b/main.py index e93c868..43b9906 100644 --- a/main.py +++ b/main.py @@ -1,15 +1,33 @@ import requests from lxml import html +import string +charset = string.ascii_lowercase url = 'https://www.linguee.fr/francais-anglais/search' params = { - 'qe': 'a', 'ch': 0 } -text = requests.get(url, params = params).text -#print(text) -tree = html.fromstring(text) -items = tree.xpath('//div[@class="main_item"]') -#print(len(items)) -for item in items: - print(item.text_content()) \ No newline at end of file + +MAXIMUM_SUGGESTIONS = 4 + +entries = set() + +def treatSuffixes(prefix): + print(prefix) + for char in charset: + base = prefix + char + params['qe'] = base + text = requests.get(url, params = params).text + tree = html.fromstring(text) + items = tree.xpath('//div[@class="main_item"]') + itemsLen = len(items) + assert itemsLen <= MAXIMUM_SUGGESTIONS, f'More than {MAXIMUM_SUGGESTIONS} items!' + for item in items: + entry = item.text_content() + if not entry in entries: + print(len(entries), entry) + entries.add(entry) + if itemsLen == MAXIMUM_SUGGESTIONS: + treatSuffixes(base) + +treatSuffixes('') \ No newline at end of file