diff --git a/bnp_pdf_statement_parser.py b/bnp_pdf_statement_parser.py index ea01264..56dd167 100755 --- a/bnp_pdf_statement_parser.py +++ b/bnp_pdf_statement_parser.py @@ -4,14 +4,31 @@ import os import matplotlib.pyplot as plt import matplotlib.ticker as ticker from datetime import datetime -from utils import getTextFromPdf, getDatetimeFromFileName, getMonthIndexSinceEpoch, getMonthNameFromMonthIndex, FIRST_LINE_OF_PAYMENT_REGEX, END_PAGE_AFTER_THE_FIRST_ONE_REGEX, SOLDE_CREDITEUR_AU_REGEX, TOTAL_DES_OPERATIONS_REGEX +from utils import getTextFromPdf, getDatetimeFromFileName, getMonthIndexSinceEpoch, getMonthNameFromMonthIndex, FIRST_LINE_OF_PAYMENT_REGEX, END_PAGE_AFTER_THE_FIRST_ONE_REGEX, SOLDE_CREDITEUR_AU_REGEX, TOTAL_DES_OPERATIONS_REGEX, TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX -PATH = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/compte_de_cheques/' +PATH = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/' os.chdir(PATH) PRINT_TRANSACTIONS = False +MAIN_BANK_ACCOUNT = 'compte_de_cheques' + +# As far as I know there was no debit yet. +otherBankAccountsCredits = {} + +for folder in os.listdir(): + if folder != MAIN_BANK_ACCOUNT: + print(folder) + for file in os.listdir(folder): + print(file) + fileDatetime = getDatetimeFromFileName(file) + print(fileDatetime) + + #otherBankAccountsCredits[fileDatetime] = otherBankAccountsCredits.get(fileDatetime, 0) + + +os.chdir(f'{MAIN_BANK_ACCOUNT}/') + totalMonthlyDebits = [] totalMonthlyCredits = [] totalMonthlyDifferences = [] @@ -22,9 +39,9 @@ for folder in sorted(os.listdir()): for file in sorted(os.listdir(folder)): filePath = f'{folder}/{file}' print(filePath) - currentDatetime = getDatetimeFromFileName(file) + fileDatetime = getDatetimeFromFileName(file) if firstDatetime is None: - firstDatetime = currentDatetime + firstDatetime = fileDatetime content = getTextFromPdf(filePath) lines = content.splitlines() started = False @@ -54,7 +71,16 @@ for folder in sorted(os.listdir()): # We aren't interested in the content after this line elif line.startswith('TOTAL DES OPERATIONS'): totalDesOperationsRegexMatch = TOTAL_DES_OPERATIONS_REGEX.match(line) - totalMonthlyDebit, totalMonthlyCredit = [float(group.replace(',', '.').replace(' ', '')) for group in totalDesOperationsRegexMatch.groups()] + # Note that transfer between accounts will be noted in both debits and credits, as trying to cancel would make benefits show as negative debit which does not make sense. + # Cannot just consider January as benefits only as `20240122.pdf` also contains an additional transfer between my accounts. + toFloat = lambda group: float(group.replace(',', '.').replace(' ', '')) + if totalDesOperationsRegexMatch is not None: + totalMonthlyDebit, totalMonthlyCredit = [toFloat(group) for group in totalDesOperationsRegexMatch.groups()] + else: + totalMonthlyCredit = toFloat(TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX.match(line).group(1)) + totalMonthlyDebit = 0 + if os.getcwd().endswith(f'/{MAIN_BANK_ACCOUNT}'): + totalMonthlyCredit += otherBankAccountsCredits[fileDatetime] print(f'Total monthly debit: {totalMonthlyDebit}') print(f'Total monthly credit: {totalMonthlyCredit}') totalMonthlyDebits += [totalMonthlyDebit] diff --git a/utils.py b/utils.py index 83ca5dd..08537d3 100644 --- a/utils.py +++ b/utils.py @@ -6,6 +6,7 @@ FIRST_LINE_OF_PAYMENT_REGEX = re.compile('\\d{2}\\.\\d{2} \\d{2}\\.\\d{2} \\d+,\ END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+') SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}') TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})\\ ([0-9 ]+,\\d{2})') +TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})') def execute(command): return subprocess.check_output(command).decode('utf-8') @@ -14,7 +15,10 @@ def getTextFromPdf(pdfPath): return execute(['pdftotext', '-raw', pdfPath, '-']) def getDatetimeFromFileName(aDatetimeStr): - return datetime.strptime(aDatetimeStr, '%Y%m%d.pdf') + #aDatetime = datetime.strptime(aDatetimeStr, '%Y%m%d.pdf') + #aDatetime = datetime.strptime(aDatetimeStr[:6], '%Y%m') + aDatetime = datetime(int(aDatetimeStr[:4]), int(aDatetimeStr[4:6]), 1) + return aDatetime def getMonthIndexSinceEpoch(aDatetime): return aDatetime.year * 12 + aDatetime.month