diff --git a/bnp_pdf_statement_parser.py b/bnp_pdf_statement_parser.py index 8ee6879..3f79f7e 100755 --- a/bnp_pdf_statement_parser.py +++ b/bnp_pdf_statement_parser.py @@ -34,21 +34,25 @@ for folder in os.listdir(): for file in os.listdir(folder): #folder = '2022' #file = '20220321.pdf' - print(folder, file) filePath = f'{folder}/{file}' + print(filePath) content = getTextFromPdf(filePath) lines = content.splitlines() started = False firstPage = True - payment = [] initialAmount = None + currentAmount = None + date = None + comment = [] for line in lines: if not started: # We are interested in the content after this line: if soldeCrediteurAuRegex.match(line) != None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage): if soldeCrediteurAuRegex.match(line): initialAmount = float(soldeCrediteurAuRegex.sub('', line).replace(',', '.').replace(' ', '')) - print(initialAmount) + currentAmount = initialAmount + print('Initial amount', initialAmount) + print() started = True continue else: @@ -61,8 +65,16 @@ for folder in os.listdir(): elif line.startswith('TOTAL DES OPERATIONS'): break if firstLineOfPaymentRegex.match(line) != None: - print() - print(line) + if date != None: + print(date, valeur, amount, currentAmount) + print("\n".join(comment)) + print() + date, valeur, amount = line.split() + amount = float(amount.replace(',', '.')) + currentAmount -= amount + comment = [] + else: + comment += [line] break break