#!/usr/bin/env python import os import matplotlib.pyplot as plt import matplotlib.ticker as ticker from datetime import datetime from utils import getTextFromPdf, getDatetimeFromFileName, getMonthIndexSinceEpoch, getMonthNameFromMonthIndex, FIRST_LINE_OF_PAYMENT_REGEX, END_PAGE_AFTER_THE_FIRST_ONE_REGEX, SOLDE_CREDITEUR_AU_REGEX, TOTAL_DES_OPERATIONS_REGEX, TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX PATH = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/' os.chdir(PATH) PRINT_TRANSACTIONS = False MAIN_BANK_ACCOUNT = 'compte_de_cheques' # As far as I know there was no debit yet. otherBankAccountsCredits = {} for folder in os.listdir(): if folder != MAIN_BANK_ACCOUNT: print(folder) for file in os.listdir(folder): print(file) fileDatetime = getDatetimeFromFileName(file) print(fileDatetime) #otherBankAccountsCredits[fileDatetime] = otherBankAccountsCredits.get(fileDatetime, 0) + os.chdir(f'{MAIN_BANK_ACCOUNT}/') totalMonthlyDebits = [] totalMonthlyCredits = [] totalMonthlyDifferences = [] totals = [] firstDatetime = None lastDatetime = None for folder in sorted(os.listdir()): for file in sorted(os.listdir(folder)): filePath = f'{folder}/{file}' print(filePath) fileDatetime = getDatetimeFromFileName(file) if firstDatetime is None: firstDatetime = fileDatetime content = getTextFromPdf(filePath) lines = content.splitlines() started = False firstPage = True initialAmount = None currentAmount = None date = None comment = [] for line in lines: if not started: # We are interested in the content after this line: if SOLDE_CREDITEUR_AU_REGEX.match(line) is not None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage): if SOLDE_CREDITEUR_AU_REGEX.match(line): initialAmount = float(SOLDE_CREDITEUR_AU_REGEX.sub('', line).replace(',', '.').replace(' ', '')) currentAmount = initialAmount print('Initial amount', initialAmount) print() totals += [initialAmount] started = True continue else: # We aren't interested in the content after this line: if line.startswith('BNP PARIBAS SA au capital de') or END_PAGE_AFTER_THE_FIRST_ONE_REGEX.match(line) is not None: firstPage = False started = False continue # We aren't interested in the content after this line elif line.startswith('TOTAL DES OPERATIONS'): totalDesOperationsRegexMatch = TOTAL_DES_OPERATIONS_REGEX.match(line) # Note that transfer between accounts will be noted in both debits and credits, as trying to cancel would make benefits show as negative debit which does not make sense. # Cannot just consider January as benefits only as `20240122.pdf` also contains an additional transfer between my accounts. toFloat = lambda group: float(group.replace(',', '.').replace(' ', '')) if totalDesOperationsRegexMatch is not None: totalMonthlyDebit, totalMonthlyCredit = [toFloat(group) for group in totalDesOperationsRegexMatch.groups()] else: totalMonthlyCredit = toFloat(TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX.match(line).group(1)) totalMonthlyDebit = 0 if os.getcwd().endswith(f'/{MAIN_BANK_ACCOUNT}'): totalMonthlyCredit += otherBankAccountsCredits[fileDatetime] print(f'Total monthly debit: {totalMonthlyDebit}') print(f'Total monthly credit: {totalMonthlyCredit}') totalMonthlyDebits += [totalMonthlyDebit] totalMonthlyCredits += [totalMonthlyCredit] totalMonthlyDifference = totalMonthlyCredit - totalMonthlyDebit totalMonthlyDifferences += [totalMonthlyDifference] break if FIRST_LINE_OF_PAYMENT_REGEX.match(line) is not None: if date is not None and PRINT_TRANSACTIONS: print(date, valeur, amount, currentAmount) print('\n'.join(comment)) print() date, valeur, amount = line.split() amount = float(amount.replace(',', '.')) currentAmount -= amount comment = [] else: comment += [line] #break #break lastDatetime = getDatetimeFromFileName(file) fig, ax = plt.subplots() plt.title('Monthly debits and credits') plt.xlabel('Date') plt.ylabel('€') ALPHA = 0.5 xTicks = range(getMonthIndexSinceEpoch(firstDatetime), getMonthIndexSinceEpoch(lastDatetime) + 1) totalMonthlyAmountAndLabel = ( #(totalMonthlyDebits, 'Debit'), #(totalMonthlyCredits, 'Credit'), (totalMonthlyDifferences, 'Difference'), (totals, 'Total'), ) for totalMonthlyAmount, totalMonthlyLabel in totalMonthlyAmountAndLabel: plt.bar(xTicks, totalMonthlyAmount, alpha = ALPHA, label = totalMonthlyLabel) plt.legend() #plt.yscale('symlog') ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,}')) ticksLabels = [getMonthNameFromMonthIndex(monthIndex) for monthIndex in xTicks] plt.xticks(xTicks, ticksLabels, rotation = 90) #plt.tight_layout() # How to show the horizontal lines for subticks? plt.grid(axis = 'y') plt.show()