Move stuff from main.py to README.md and utils.py

This commit is contained in:
Benjamin Loison 2024-10-03 18:07:03 +02:00
parent 17acc478b7
commit bbe93d0939
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8
3 changed files with 47 additions and 40 deletions

19
README.md Normal file
View File

@ -0,0 +1,19 @@
# BNP PDF statement parser
Depends on `pdftotext`.
Assuming file hierarchy like:
```
.
├── compte_de_cheques/
│ ├── 2022/
│ │ ├── 20221121.pdf
│ │ └── 20221221.pdf
│ └── 2023/
│ ├── 20230123.pdf
│ └── 20230221.pdf
livret_a/
├── 20230721.pdf
└── 20240122.pdf
```

View File

@ -1,40 +1,15 @@
#!/usr/bin/env python #!/usr/bin/env python
# Depends on `pdftotext`.
import os import os
import subprocess
import re
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.ticker as ticker import matplotlib.ticker as ticker
from datetime import datetime from datetime import datetime
from utils import getTextFromPdf, getDatetimeFromFileName, getMonthIndexSinceEpoch, getMonthNameFromMonthIndex, FIRST_LINE_OF_PAYMENT_REGEX, END_PAGE_AFTER_THE_FIRST_ONE_REGEX, SOLDE_CREDITEUR_AU_REGEX, TOTAL_DES_OPERATIONS_REGEX
PATH = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/compte_de_cheques/' PATH = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/compte_de_cheques/'
os.chdir(PATH) os.chdir(PATH)
'''
Assuming file hierarchy like:
2022
20221121.pdf
20221221.pdf
2023
20230123.pdf
20230221.pdf
'''
def execute(command):
return subprocess.check_output(command).decode('utf-8')
def getTextFromPdf(pdfPath):
return execute(['pdftotext', '-raw', pdfPath, '-'])
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('\\d{2}\\.\\d{2} \\d{2}\\.\\d{2} \\d+,\\d{2}')
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}')
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})\\ ([0-9 ]+,\\d{2})')
PRINT_TRANSACTIONS = False PRINT_TRANSACTIONS = False
totalMonthlyDebits = [] totalMonthlyDebits = []
@ -47,7 +22,7 @@ for folder in sorted(os.listdir()):
for file in sorted(os.listdir(folder)): for file in sorted(os.listdir(folder)):
filePath = f'{folder}/{file}' filePath = f'{folder}/{file}'
print(filePath) print(filePath)
currentDatetime = getDatetime(file) currentDatetime = getDatetimeFromFileName(file)
if firstDatetime is None: if firstDatetime is None:
firstDatetime = currentDatetime firstDatetime = currentDatetime
content = getTextFromPdf(filePath) content = getTextFromPdf(filePath)
@ -100,7 +75,7 @@ for folder in sorted(os.listdir()):
comment += [line] comment += [line]
#break #break
#break #break
lastDatetime = getDatetime(file) lastDatetime = getDatetimeFromFileName(file)
fig, ax = plt.subplots() fig, ax = plt.subplots()
plt.title('Monthly debits and credits') plt.title('Monthly debits and credits')
@ -108,14 +83,7 @@ plt.xlabel('Date')
plt.ylabel('') plt.ylabel('')
ALPHA = 0.5 ALPHA = 0.5
def getDatetime(aDatetimeStr): xTicks = range(getMonthIndexSinceEpoch(firstDatetime), getMonthIndexSinceEpoch(lastDatetime) + 1)
return datetime.strptime(aDatetimeStr, '%Y%m%d.pdf')
def getMonthIndex(aDatetime):
return aDatetime.year * 12 + aDatetime.month
xTicks = range(getMonthIndex(firstDatetime), getMonthIndex(lastDatetime) + 1)
# sign does not seem respected for `totalMonthlyDifferences`.
totalMonthlyAmountAndLabel = ( totalMonthlyAmountAndLabel = (
#(totalMonthlyDebits, 'Debit'), #(totalMonthlyDebits, 'Debit'),
#(totalMonthlyCredits, 'Credit'), #(totalMonthlyCredits, 'Credit'),
@ -129,10 +97,7 @@ plt.legend()
#plt.yscale('symlog') #plt.yscale('symlog')
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,}')) ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,}'))
def getMonthName(monthIndex): ticksLabels = [getMonthNameFromMonthIndex(monthIndex) for monthIndex in xTicks]
return datetime((monthIndex - 1) // 12, 1 + (monthIndex - 1) % 12, 1).strftime('%b %Y')
ticksLabels = [getMonthName(monthIndex) for monthIndex in xTicks]
plt.xticks(xTicks, ticksLabels, rotation = 90) plt.xticks(xTicks, ticksLabels, rotation = 90)
#plt.tight_layout() #plt.tight_layout()
# How to show the horizontal lines for subticks? # How to show the horizontal lines for subticks?

23
utils.py Normal file
View File

@ -0,0 +1,23 @@
import subprocess
from datetime import datetime
import re
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('\\d{2}\\.\\d{2} \\d{2}\\.\\d{2} \\d+,\\d{2}')
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}')
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})\\ ([0-9 ]+,\\d{2})')
def execute(command):
return subprocess.check_output(command).decode('utf-8')
def getTextFromPdf(pdfPath):
return execute(['pdftotext', '-raw', pdfPath, '-'])
def getDatetimeFromFileName(aDatetimeStr):
return datetime.strptime(aDatetimeStr, '%Y%m%d.pdf')
def getMonthIndexSinceEpoch(aDatetime):
return aDatetime.year * 12 + aDatetime.month
def getMonthNameFromMonthIndex(monthIndex):
return datetime((monthIndex - 1) // 12, 1 + (monthIndex - 1) % 12, 1).strftime('%b %Y')