From 5f439c0e610a4b6f8ce4f2739302b253614845ef Mon Sep 17 00:00:00 2001
From: Benjamin Loison <benjamin_loison@users.noreply.gitea.lemnoslife.com>
Date: Wed, 21 Jun 2023 00:57:16 +0200
Subject: [PATCH] Add `initialAmount` parsing

---
 bnp_pdf_statement_parser.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/bnp_pdf_statement_parser.py b/bnp_pdf_statement_parser.py
index c37839a..8ee6879 100755
--- a/bnp_pdf_statement_parser.py
+++ b/bnp_pdf_statement_parser.py
@@ -28,11 +28,12 @@ def getTextFromPdf(pdfPath):
 
 firstLineOfPaymentRegex = re.compile('\d{2}\.\d{2} \d{2}\.\d{2} \d+,\d{2}')
 endPageAfterTheFirstOneRegex = re.compile('P\. \d+/\d+')
+soldeCrediteurAuRegex = re.compile('SOLDE CREDITEUR AU \d{2}\.\d{2}\.\d{4}')
 
 for folder in os.listdir():
     for file in os.listdir(folder):
-        folder = '2022'
-        file = '20220321.pdf'
+        #folder = '2022'
+        #file = '20220321.pdf'
         print(folder, file)
         filePath = f'{folder}/{file}'
         content = getTextFromPdf(filePath)
@@ -40,10 +41,14 @@ for folder in os.listdir():
         started = False
         firstPage = True
         payment = []
+        initialAmount = None
         for line in lines:
             if not started:
                 # We are interested in the content after this line:
-                if line.startswith('SOLDE CREDITEUR AU') or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage):
+                if soldeCrediteurAuRegex.match(line) != None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage):
+                    if soldeCrediteurAuRegex.match(line):
+                        initialAmount = float(soldeCrediteurAuRegex.sub('', line).replace(',', '.').replace(' ', ''))
+                        print(initialAmount)
                     started = True
                     continue
             else: