From b75d5fc86fc3e63d0e7500c2c9547a6ac1a7b6c0 Mon Sep 17 00:00:00 2001 From: Benjamin Loison Date: Tue, 1 Oct 2024 20:35:43 +0200 Subject: [PATCH] Add WIP monthly debit and credit plot --- bnp_pdf_statement_parser.py | 44 +++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/bnp_pdf_statement_parser.py b/bnp_pdf_statement_parser.py index e036db4..d4f0273 100755 --- a/bnp_pdf_statement_parser.py +++ b/bnp_pdf_statement_parser.py @@ -1,10 +1,12 @@ -#!/usr/bin/python3 +#!/usr/bin/env python # Depends on `pdftotext`. import os import subprocess import re +import matplotlib.pyplot as plt +import matplotlib.ticker as ticker path = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/' @@ -27,12 +29,17 @@ def execute(command): def getTextFromPdf(pdfPath): return execute(['pdftotext', '-raw', pdfPath, '-']) -firstLineOfPaymentRegex = re.compile('\d{2}\.\d{2} \d{2}\.\d{2} \d+,\d{2}') -endPageAfterTheFirstOneRegex = re.compile('P\. \d+/\d+') -soldeCrediteurAuRegex = re.compile('SOLDE CREDITEUR AU \d{2}\.\d{2}\.\d{4}') +firstLineOfPaymentRegex = re.compile('\\d{2}\\.\\d{2} \\d{2}\\.\\d{2} \\d+,\\d{2}') +endPageAfterTheFirstOneRegex = re.compile('P\\. \\d+/\\d+') +soldeCrediteurAuRegex = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}') +totalDesOperationsRegex = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})\\ ([0-9 ]+,\\d{2})') -for folder in os.listdir(): - for file in os.listdir(folder): +PRINT_TRANSACTIONS = False + +totalMonthlyDebits = [] +totalMonthlyCredits = [] +for folder in sorted(os.listdir()): + for file in sorted(os.listdir(folder)): #folder = '2022' #file = '20220321.pdf' filePath = f'{folder}/{file}' @@ -64,9 +71,15 @@ for folder in os.listdir(): continue # We aren't interested in the content after this line elif line.startswith('TOTAL DES OPERATIONS'): + totalDesOperationsRegexMatch = totalDesOperationsRegex.match(line) + totalMonthlyDebit, totalMonthlyCredit = [float(group.replace(',', '.').replace(' ', '')) for group in totalDesOperationsRegexMatch.groups()] + print(f'Total monthly debit: {totalMonthlyDebit}') + print(f'Total monthly credit: {totalMonthlyCredit}') + totalMonthlyDebits += [totalMonthlyDebit] + totalMonthlyCredits += [totalMonthlyCredit] break if firstLineOfPaymentRegex.match(line) is not None: - if date is not None: + if date is not None and PRINT_TRANSACTIONS: print(date, valeur, amount, currentAmount) print('\n'.join(comment)) print() @@ -76,6 +89,19 @@ for folder in os.listdir(): comment = [] else: comment += [line] - break - break + #break + #break +fig, ax = plt.subplots() +plt.title('Monthly debits and credits') +plt.xlabel('Date') +plt.ylabel('€') +ALPHA = 0.5 +xTicks = range(len(totalMonthlyDebits)) +plt.bar(xTicks, totalMonthlyDebits, alpha = ALPHA, label = 'Debit') +plt.bar(xTicks, totalMonthlyCredits, alpha = ALPHA, label = 'Credit') +plt.legend() + +ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,}')) + +plt.show() \ No newline at end of file