2024-10-01 20:35:43 +02:00
#!/usr/bin/env python
2023-06-21 00:36:27 +02:00
2023-11-05 21:10:45 +01:00
import os
2024-10-01 20:35:43 +02:00
import matplotlib . pyplot as plt
import matplotlib . ticker as ticker
2024-10-01 20:57:36 +02:00
from datetime import datetime
2024-10-03 19:33:54 +02:00
from utils import getTextFromPdf , getDatetimeFromFileName , getMonthIndexSinceEpoch , getMonthNameFromMonthIndex , FIRST_LINE_OF_PAYMENT_REGEX , END_PAGE_AFTER_THE_FIRST_ONE_REGEX , SOLDE_CREDITEUR_AU_REGEX , TOTAL_DES_OPERATIONS_REGEX , TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX
2023-06-21 00:36:27 +02:00
2024-10-03 19:33:54 +02:00
PATH = f ' /home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/ '
2023-06-21 00:36:27 +02:00
2024-10-03 17:43:44 +02:00
os . chdir ( PATH )
2023-06-21 00:36:27 +02:00
2024-10-01 20:35:43 +02:00
PRINT_TRANSACTIONS = False
2024-10-03 19:33:54 +02:00
MAIN_BANK_ACCOUNT = ' compte_de_cheques '
# As far as I know there was no debit yet.
otherBankAccountsCredits = { }
for folder in os . listdir ( ) :
if folder != MAIN_BANK_ACCOUNT :
print ( folder )
for file in os . listdir ( folder ) :
print ( file )
fileDatetime = getDatetimeFromFileName ( file )
print ( fileDatetime )
#otherBankAccountsCredits[fileDatetime] = otherBankAccountsCredits.get(fileDatetime, 0) +
os . chdir ( f ' { MAIN_BANK_ACCOUNT } / ' )
2024-10-01 20:35:43 +02:00
totalMonthlyDebits = [ ]
totalMonthlyCredits = [ ]
2024-10-02 00:33:11 +02:00
totalMonthlyDifferences = [ ]
totals = [ ]
2024-10-01 20:57:36 +02:00
firstDatetime = None
lastDatetime = None
2024-10-01 20:35:43 +02:00
for folder in sorted ( os . listdir ( ) ) :
for file in sorted ( os . listdir ( folder ) ) :
2023-06-21 00:36:27 +02:00
filePath = f ' { folder } / { file } '
2023-06-21 01:05:48 +02:00
print ( filePath )
2024-10-03 19:33:54 +02:00
fileDatetime = getDatetimeFromFileName ( file )
2024-10-01 20:57:36 +02:00
if firstDatetime is None :
2024-10-03 19:33:54 +02:00
firstDatetime = fileDatetime
2023-06-21 00:36:27 +02:00
content = getTextFromPdf ( filePath )
lines = content . splitlines ( )
started = False
firstPage = True
2023-06-21 00:57:16 +02:00
initialAmount = None
2023-06-21 01:05:48 +02:00
currentAmount = None
date = None
comment = [ ]
2023-06-21 00:36:27 +02:00
for line in lines :
if not started :
# We are interested in the content after this line:
2024-10-03 17:43:44 +02:00
if SOLDE_CREDITEUR_AU_REGEX . match ( line ) is not None or ( line . startswith ( ' Date Nature des opérations Valeur Débit Crédit ' ) and not firstPage ) :
if SOLDE_CREDITEUR_AU_REGEX . match ( line ) :
initialAmount = float ( SOLDE_CREDITEUR_AU_REGEX . sub ( ' ' , line ) . replace ( ' , ' , ' . ' ) . replace ( ' ' , ' ' ) )
2023-06-21 01:05:48 +02:00
currentAmount = initialAmount
print ( ' Initial amount ' , initialAmount )
print ( )
2024-10-02 00:33:11 +02:00
totals + = [ initialAmount ]
2023-06-21 00:36:27 +02:00
started = True
continue
else :
# We aren't interested in the content after this line:
2024-10-03 17:43:44 +02:00
if line . startswith ( ' BNP PARIBAS SA au capital de ' ) or END_PAGE_AFTER_THE_FIRST_ONE_REGEX . match ( line ) is not None :
2023-06-21 00:36:27 +02:00
firstPage = False
started = False
continue
# We aren't interested in the content after this line
elif line . startswith ( ' TOTAL DES OPERATIONS ' ) :
2024-10-03 17:43:44 +02:00
totalDesOperationsRegexMatch = TOTAL_DES_OPERATIONS_REGEX . match ( line )
2024-10-03 19:33:54 +02:00
# Note that transfer between accounts will be noted in both debits and credits, as trying to cancel would make benefits show as negative debit which does not make sense.
# Cannot just consider January as benefits only as `20240122.pdf` also contains an additional transfer between my accounts.
toFloat = lambda group : float ( group . replace ( ' , ' , ' . ' ) . replace ( ' ' , ' ' ) )
if totalDesOperationsRegexMatch is not None :
totalMonthlyDebit , totalMonthlyCredit = [ toFloat ( group ) for group in totalDesOperationsRegexMatch . groups ( ) ]
else :
totalMonthlyCredit = toFloat ( TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX . match ( line ) . group ( 1 ) )
totalMonthlyDebit = 0
if os . getcwd ( ) . endswith ( f ' / { MAIN_BANK_ACCOUNT } ' ) :
totalMonthlyCredit + = otherBankAccountsCredits [ fileDatetime ]
2024-10-01 20:35:43 +02:00
print ( f ' Total monthly debit: { totalMonthlyDebit } ' )
print ( f ' Total monthly credit: { totalMonthlyCredit } ' )
totalMonthlyDebits + = [ totalMonthlyDebit ]
totalMonthlyCredits + = [ totalMonthlyCredit ]
2024-10-02 00:33:11 +02:00
totalMonthlyDifference = totalMonthlyCredit - totalMonthlyDebit
totalMonthlyDifferences + = [ totalMonthlyDifference ]
2023-06-21 00:36:27 +02:00
break
2024-10-03 17:43:44 +02:00
if FIRST_LINE_OF_PAYMENT_REGEX . match ( line ) is not None :
2024-10-01 20:35:43 +02:00
if date is not None and PRINT_TRANSACTIONS :
2023-06-21 01:05:48 +02:00
print ( date , valeur , amount , currentAmount )
2023-07-28 14:54:53 +02:00
print ( ' \n ' . join ( comment ) )
2023-06-21 01:05:48 +02:00
print ( )
date , valeur , amount = line . split ( )
amount = float ( amount . replace ( ' , ' , ' . ' ) )
currentAmount - = amount
comment = [ ]
else :
comment + = [ line ]
2024-10-01 20:35:43 +02:00
#break
#break
2024-10-03 18:07:03 +02:00
lastDatetime = getDatetimeFromFileName ( file )
2024-10-01 20:35:43 +02:00
fig , ax = plt . subplots ( )
plt . title ( ' Monthly debits and credits ' )
plt . xlabel ( ' Date ' )
plt . ylabel ( ' € ' )
ALPHA = 0.5
2024-10-01 20:57:36 +02:00
2024-10-03 18:07:03 +02:00
xTicks = range ( getMonthIndexSinceEpoch ( firstDatetime ) , getMonthIndexSinceEpoch ( lastDatetime ) + 1 )
2024-10-02 00:33:11 +02:00
totalMonthlyAmountAndLabel = (
#(totalMonthlyDebits, 'Debit'),
#(totalMonthlyCredits, 'Credit'),
( totalMonthlyDifferences , ' Difference ' ) ,
( totals , ' Total ' ) ,
)
for totalMonthlyAmount , totalMonthlyLabel in totalMonthlyAmountAndLabel :
plt . bar ( xTicks , totalMonthlyAmount , alpha = ALPHA , label = totalMonthlyLabel )
2024-10-01 20:35:43 +02:00
plt . legend ( )
2024-10-02 00:33:11 +02:00
#plt.yscale('symlog')
2024-10-01 20:35:43 +02:00
ax . yaxis . set_major_formatter ( ticker . StrMethodFormatter ( ' {x:,} ' ) )
2023-06-21 00:36:27 +02:00
2024-10-03 18:07:03 +02:00
ticksLabels = [ getMonthNameFromMonthIndex ( monthIndex ) for monthIndex in xTicks ]
2024-10-01 20:57:36 +02:00
plt . xticks ( xTicks , ticksLabels , rotation = 90 )
#plt.tight_layout()
# How to show the horizontal lines for subticks?
plt . grid ( axis = ' y ' )
2024-10-01 20:35:43 +02:00
plt . show ( )