BNP_PDF_statement_parser/bnp_pdf_statement_parser.py

107 lines
4.1 KiB
Python
Raw Permalink Normal View History

2024-10-01 20:35:43 +02:00
#!/usr/bin/env python
2023-06-21 00:36:27 +02:00
2023-11-05 21:10:45 +01:00
import os
2024-10-01 20:35:43 +02:00
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
2024-10-01 20:57:36 +02:00
from datetime import datetime
2024-10-03 19:51:54 +02:00
from utils import getDatetimeFromFileName, getMonthIndexSinceEpoch, getMonthNameFromMonthIndex, readPdfBankStatement
import operator
from pprint import pprint
2023-06-21 00:36:27 +02:00
2024-10-03 19:33:54 +02:00
PATH = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/'
2023-06-21 00:36:27 +02:00
2024-10-03 17:43:44 +02:00
os.chdir(PATH)
2023-06-21 00:36:27 +02:00
PRINT_TRANSACTIONS = False
2024-10-01 20:35:43 +02:00
2024-10-03 19:33:54 +02:00
MAIN_BANK_ACCOUNT = 'compte_de_cheques'
2024-10-03 21:22:30 +02:00
allTransactions = []
monthlyTransactions = {}
def appendTransactions(transactions, bankAccount):
global allTransactions
for transaction in transactions:
transaction['bank account'] = bankAccount
allTransactions += [transaction]
date = transaction['date'].replace(day = 1)
monthlyTransactions[date] = monthlyTransactions.get(date, []) + [transaction]
2024-10-03 21:22:30 +02:00
for folder in os.listdir():
2024-10-03 19:33:54 +02:00
if folder != MAIN_BANK_ACCOUNT:
2024-10-03 21:22:30 +02:00
for file in os.listdir(folder):
2024-10-03 20:10:06 +02:00
filePath = f'{folder}/{file}'
print(filePath)
transactions = readPdfBankStatement(filePath)[3]
pprint(transactions)
appendTransactions(transactions, folder)
2024-10-03 19:33:54 +02:00
os.chdir(f'{MAIN_BANK_ACCOUNT}/')
2024-10-01 20:35:43 +02:00
for folder in sorted(os.listdir()):
for file in sorted(os.listdir(folder)):
2023-06-21 00:36:27 +02:00
filePath = f'{folder}/{file}'
print(filePath)
transactions = readPdfBankStatement(filePath)[3]
appendTransactions(transactions, MAIN_BANK_ACCOUNT)
2024-10-03 19:51:54 +02:00
if PRINT_TRANSACTIONS:
2024-10-04 01:15:38 +02:00
pprint(transactions)
2024-10-01 20:35:43 +02:00
#break
#break
2024-10-03 21:36:43 +02:00
allTransactions.sort(key = operator.itemgetter('date'))
print(len(allTransactions))
#pprint(allTransactions)
import re
VIRT_A_CPTE_EMIS_SUR_LE_REGEX = re.compile('VIRT CPTE A CPTE EMIS SUR LE\n(CEL|LEP|LVJ|L\.A|LDD)\\d{23}')
# Could precise bank account to restrict own account comments.
def isTransactionFromOwnAccounts(comment):
#if comment.startswith('DEPOT INITIAL DU COMPTE\n'):
# print(comment)
#if comment.startswith('VIR CPTE A CPTE EMIS /MOTIF '):
# print(comment)
return comment.startswith('DEPOT INITIAL DU COMPTE\n') or \
comment.startswith('VIR CPTE A CPTE EMIS /MOTIF ') or \
2024-10-16 20:09:22 +02:00
VIRT_A_CPTE_EMIS_SUR_LE_REGEX.fullmatch(comment)#comment.startswith('VIRT CPTE A CPTE EMIS SUR LE')
#comment.startswith('VIR CPTE A CPTE RECU /DE ') or \
# and comment.endswith('/REFDO /REFBEN')
#allTransactions = [transaction for transaction in allTransactions if not isTransactionFromOwnAccounts(transaction['comment'])]
sortedMonths = sorted(monthlyTransactions.keys())
for month in sortedMonths:
monthlyTransactions[month] = [transaction for transaction in monthlyTransactions[month] if not isTransactionFromOwnAccounts(transaction['comment'])]
totalMonthlyDebits = [sum([min(transaction['amount'], 0) for transaction in monthlyTransactions[month]]) for month in sortedMonths]
totalMonthlyCredits = [sum([max(transaction['amount'], 0) for transaction in monthlyTransactions[month]]) for month in sortedMonths]
totalMonthlyDifferences = [sum([transaction['amount'] for transaction in monthlyTransactions[month]]) for month in sortedMonths]
totals = [monthlyTransactions[sortedMonths[0]][0]['current amount'] + sum(totalMonthlyDifferences[:monthIndex + 1]) for monthIndex in range(len(sortedMonths))]
2024-10-01 20:35:43 +02:00
fig, ax = plt.subplots()
plt.title('BNP accounts monthly debits and credits')
2024-10-01 20:35:43 +02:00
plt.xlabel('Date')
plt.ylabel('')
ALPHA = 0.5
2024-10-01 20:57:36 +02:00
xTicks = range(getMonthIndexSinceEpoch(sortedMonths[0]), getMonthIndexSinceEpoch(sortedMonths[-1]) + 1)
2024-10-02 00:33:11 +02:00
totalMonthlyAmountAndLabel = (
(totalMonthlyDebits, 'Debit'),
(totalMonthlyCredits, 'Credit'),
2024-10-02 00:33:11 +02:00
(totalMonthlyDifferences, 'Difference'),
(totals, 'Total'),
2024-10-02 00:33:11 +02:00
)
for totalMonthlyAmount, totalMonthlyLabel in totalMonthlyAmountAndLabel:
plt.bar(xTicks, totalMonthlyAmount, alpha = ALPHA, label = totalMonthlyLabel)
2024-10-01 20:35:43 +02:00
plt.legend()
2024-10-02 00:33:11 +02:00
#plt.yscale('symlog')
2024-10-01 20:35:43 +02:00
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,}'))
2023-06-21 00:36:27 +02:00
ticksLabels = [getMonthNameFromMonthIndex(monthIndex) for monthIndex in xTicks]
2024-10-01 20:57:36 +02:00
plt.xticks(xTicks, ticksLabels, rotation = 90)
#plt.tight_layout()
# How to show the horizontal lines for subticks?
plt.grid(axis = 'y')
2024-10-16 20:09:22 +02:00
plt.show()