Limit recursion to base having a word type having the original base in it

This commit is contained in:
Benjamin Loison 2024-04-01 01:41:09 +02:00
parent c90beee5fc
commit fca2788dd4
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8

11
main.py
View File

@ -19,18 +19,25 @@ def treatSuffixes(prefix):
print(base) print(base)
params['qe'] = base params['qe'] = base
text = requests.get(url, params = params).text text = requests.get(url, params = params).text
# Pay attention if change `base` elaboration to not allow unwanted folder file writing.
with open(f'requests/{base}.html', 'w') as requestFile:
requestFile.write(text)
tree = html.fromstring(text) tree = html.fromstring(text)
rows = tree.xpath('//div[@class="main_row"]') rows = tree.xpath('//div[@class="main_row"]')
rowsLen = len(rows) rowsLen = len(rows)
assert rowsLen <= MAXIMUM_SUGGESTIONS, f'More than {MAXIMUM_SUGGESTIONS} rows!' assert rowsLen <= MAXIMUM_SUGGESTIONS, f'More than {MAXIMUM_SUGGESTIONS} rows!'
interestingEntries = True
for row in rows: for row in rows:
item = row.xpath('div[@class="main_item"]')[0] item = row.xpath('div[@class="main_item"]')[0]
entry = item.text_content() entry = item.text_content()
wordType = row.xpath('div[@class="main_wordtype"]') wordType = row.xpath('div[@class="main_wordtype"]')
if wordType != [] and item.attrib['lc'] == 'FR' and not entry in entries: if wordType != []:
if item.attrib['lc'] == 'FR' and not entry in entries:
print(len(entries), entry, wordType[0].text_content()) print(len(entries), entry, wordType[0].text_content())
entries.add(entry) entries.add(entry)
if rowsLen == MAXIMUM_SUGGESTIONS: if not base in entry:
interestingEntries = False
if rowsLen == MAXIMUM_SUGGESTIONS and interestingEntries:
treatSuffixes(base) treatSuffixes(base)
treatSuffixes('') treatSuffixes('')