Move stuff from main.py
to README.md
and utils.py
This commit is contained in:
parent
17acc478b7
commit
bbe93d0939
19
README.md
Normal file
19
README.md
Normal file
@ -0,0 +1,19 @@
|
||||
# BNP PDF statement parser
|
||||
|
||||
Depends on `pdftotext`.
|
||||
|
||||
Assuming file hierarchy like:
|
||||
|
||||
```
|
||||
.
|
||||
├── compte_de_cheques/
|
||||
│ ├── 2022/
|
||||
│ │ ├── 20221121.pdf
|
||||
│ │ └── 20221221.pdf
|
||||
│ └── 2023/
|
||||
│ ├── 20230123.pdf
|
||||
│ └── 20230221.pdf
|
||||
livret_a/
|
||||
├── 20230721.pdf
|
||||
└── 20240122.pdf
|
||||
```
|
@ -1,40 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Depends on `pdftotext`.
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as ticker
|
||||
from datetime import datetime
|
||||
from utils import getTextFromPdf, getDatetimeFromFileName, getMonthIndexSinceEpoch, getMonthNameFromMonthIndex, FIRST_LINE_OF_PAYMENT_REGEX, END_PAGE_AFTER_THE_FIRST_ONE_REGEX, SOLDE_CREDITEUR_AU_REGEX, TOTAL_DES_OPERATIONS_REGEX
|
||||
|
||||
PATH = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/compte_de_cheques/'
|
||||
|
||||
os.chdir(PATH)
|
||||
|
||||
'''
|
||||
Assuming file hierarchy like:
|
||||
|
||||
2022
|
||||
├── 20221121.pdf
|
||||
└── 20221221.pdf
|
||||
2023
|
||||
├── 20230123.pdf
|
||||
└── 20230221.pdf
|
||||
'''
|
||||
|
||||
def execute(command):
|
||||
return subprocess.check_output(command).decode('utf-8')
|
||||
|
||||
def getTextFromPdf(pdfPath):
|
||||
return execute(['pdftotext', '-raw', pdfPath, '-'])
|
||||
|
||||
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('\\d{2}\\.\\d{2} \\d{2}\\.\\d{2} \\d+,\\d{2}')
|
||||
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
|
||||
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}')
|
||||
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})\\ ([0-9 ]+,\\d{2})')
|
||||
|
||||
PRINT_TRANSACTIONS = False
|
||||
|
||||
totalMonthlyDebits = []
|
||||
@ -47,7 +22,7 @@ for folder in sorted(os.listdir()):
|
||||
for file in sorted(os.listdir(folder)):
|
||||
filePath = f'{folder}/{file}'
|
||||
print(filePath)
|
||||
currentDatetime = getDatetime(file)
|
||||
currentDatetime = getDatetimeFromFileName(file)
|
||||
if firstDatetime is None:
|
||||
firstDatetime = currentDatetime
|
||||
content = getTextFromPdf(filePath)
|
||||
@ -100,7 +75,7 @@ for folder in sorted(os.listdir()):
|
||||
comment += [line]
|
||||
#break
|
||||
#break
|
||||
lastDatetime = getDatetime(file)
|
||||
lastDatetime = getDatetimeFromFileName(file)
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
plt.title('Monthly debits and credits')
|
||||
@ -108,14 +83,7 @@ plt.xlabel('Date')
|
||||
plt.ylabel('€')
|
||||
ALPHA = 0.5
|
||||
|
||||
def getDatetime(aDatetimeStr):
|
||||
return datetime.strptime(aDatetimeStr, '%Y%m%d.pdf')
|
||||
|
||||
def getMonthIndex(aDatetime):
|
||||
return aDatetime.year * 12 + aDatetime.month
|
||||
|
||||
xTicks = range(getMonthIndex(firstDatetime), getMonthIndex(lastDatetime) + 1)
|
||||
# sign does not seem respected for `totalMonthlyDifferences`.
|
||||
xTicks = range(getMonthIndexSinceEpoch(firstDatetime), getMonthIndexSinceEpoch(lastDatetime) + 1)
|
||||
totalMonthlyAmountAndLabel = (
|
||||
#(totalMonthlyDebits, 'Debit'),
|
||||
#(totalMonthlyCredits, 'Credit'),
|
||||
@ -129,10 +97,7 @@ plt.legend()
|
||||
#plt.yscale('symlog')
|
||||
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,}'))
|
||||
|
||||
def getMonthName(monthIndex):
|
||||
return datetime((monthIndex - 1) // 12, 1 + (monthIndex - 1) % 12, 1).strftime('%b %Y')
|
||||
|
||||
ticksLabels = [getMonthName(monthIndex) for monthIndex in xTicks]
|
||||
ticksLabels = [getMonthNameFromMonthIndex(monthIndex) for monthIndex in xTicks]
|
||||
plt.xticks(xTicks, ticksLabels, rotation = 90)
|
||||
#plt.tight_layout()
|
||||
# How to show the horizontal lines for subticks?
|
||||
|
23
utils.py
Normal file
23
utils.py
Normal file
@ -0,0 +1,23 @@
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('\\d{2}\\.\\d{2} \\d{2}\\.\\d{2} \\d+,\\d{2}')
|
||||
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
|
||||
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}')
|
||||
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})\\ ([0-9 ]+,\\d{2})')
|
||||
|
||||
def execute(command):
|
||||
return subprocess.check_output(command).decode('utf-8')
|
||||
|
||||
def getTextFromPdf(pdfPath):
|
||||
return execute(['pdftotext', '-raw', pdfPath, '-'])
|
||||
|
||||
def getDatetimeFromFileName(aDatetimeStr):
|
||||
return datetime.strptime(aDatetimeStr, '%Y%m%d.pdf')
|
||||
|
||||
def getMonthIndexSinceEpoch(aDatetime):
|
||||
return aDatetime.year * 12 + aDatetime.month
|
||||
|
||||
def getMonthNameFromMonthIndex(monthIndex):
|
||||
return datetime((monthIndex - 1) // 12, 1 + (monthIndex - 1) % 12, 1).strftime('%b %Y')
|
Loading…
x
Reference in New Issue
Block a user