Add initialAmount parsing

This commit is contained in:
Benjamin Loison 2023-06-21 00:57:16 +02:00
parent 6ced1c0db0
commit 5f439c0e61
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8

View File

@ -28,11 +28,12 @@ def getTextFromPdf(pdfPath):
firstLineOfPaymentRegex = re.compile('\d{2}\.\d{2} \d{2}\.\d{2} \d+,\d{2}') firstLineOfPaymentRegex = re.compile('\d{2}\.\d{2} \d{2}\.\d{2} \d+,\d{2}')
endPageAfterTheFirstOneRegex = re.compile('P\. \d+/\d+') endPageAfterTheFirstOneRegex = re.compile('P\. \d+/\d+')
soldeCrediteurAuRegex = re.compile('SOLDE CREDITEUR AU \d{2}\.\d{2}\.\d{4}')
for folder in os.listdir(): for folder in os.listdir():
for file in os.listdir(folder): for file in os.listdir(folder):
folder = '2022' #folder = '2022'
file = '20220321.pdf' #file = '20220321.pdf'
print(folder, file) print(folder, file)
filePath = f'{folder}/{file}' filePath = f'{folder}/{file}'
content = getTextFromPdf(filePath) content = getTextFromPdf(filePath)
@ -40,10 +41,14 @@ for folder in os.listdir():
started = False started = False
firstPage = True firstPage = True
payment = [] payment = []
initialAmount = None
for line in lines: for line in lines:
if not started: if not started:
# We are interested in the content after this line: # We are interested in the content after this line:
if line.startswith('SOLDE CREDITEUR AU') or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage): if soldeCrediteurAuRegex.match(line) != None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage):
if soldeCrediteurAuRegex.match(line):
initialAmount = float(soldeCrediteurAuRegex.sub('', line).replace(',', '.').replace(' ', ''))
print(initialAmount)
started = True started = True
continue continue
else: else: