Move stuff from main.py
to README.md
and utils.py
This commit is contained in:
parent
17acc478b7
commit
bbe93d0939
19
README.md
Normal file
19
README.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# BNP PDF statement parser
|
||||||
|
|
||||||
|
Depends on `pdftotext`.
|
||||||
|
|
||||||
|
Assuming file hierarchy like:
|
||||||
|
|
||||||
|
```
|
||||||
|
.
|
||||||
|
├── compte_de_cheques/
|
||||||
|
│ ├── 2022/
|
||||||
|
│ │ ├── 20221121.pdf
|
||||||
|
│ │ └── 20221221.pdf
|
||||||
|
│ └── 2023/
|
||||||
|
│ ├── 20230123.pdf
|
||||||
|
│ └── 20230221.pdf
|
||||||
|
livret_a/
|
||||||
|
├── 20230721.pdf
|
||||||
|
└── 20240122.pdf
|
||||||
|
```
|
@ -1,40 +1,15 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
# Depends on `pdftotext`.
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import subprocess
|
|
||||||
import re
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import matplotlib.ticker as ticker
|
import matplotlib.ticker as ticker
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from utils import getTextFromPdf, getDatetimeFromFileName, getMonthIndexSinceEpoch, getMonthNameFromMonthIndex, FIRST_LINE_OF_PAYMENT_REGEX, END_PAGE_AFTER_THE_FIRST_ONE_REGEX, SOLDE_CREDITEUR_AU_REGEX, TOTAL_DES_OPERATIONS_REGEX
|
||||||
|
|
||||||
PATH = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/compte_de_cheques/'
|
PATH = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements/compte_de_cheques/'
|
||||||
|
|
||||||
os.chdir(PATH)
|
os.chdir(PATH)
|
||||||
|
|
||||||
'''
|
|
||||||
Assuming file hierarchy like:
|
|
||||||
|
|
||||||
2022
|
|
||||||
├── 20221121.pdf
|
|
||||||
└── 20221221.pdf
|
|
||||||
2023
|
|
||||||
├── 20230123.pdf
|
|
||||||
└── 20230221.pdf
|
|
||||||
'''
|
|
||||||
|
|
||||||
def execute(command):
|
|
||||||
return subprocess.check_output(command).decode('utf-8')
|
|
||||||
|
|
||||||
def getTextFromPdf(pdfPath):
|
|
||||||
return execute(['pdftotext', '-raw', pdfPath, '-'])
|
|
||||||
|
|
||||||
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('\\d{2}\\.\\d{2} \\d{2}\\.\\d{2} \\d+,\\d{2}')
|
|
||||||
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
|
|
||||||
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}')
|
|
||||||
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})\\ ([0-9 ]+,\\d{2})')
|
|
||||||
|
|
||||||
PRINT_TRANSACTIONS = False
|
PRINT_TRANSACTIONS = False
|
||||||
|
|
||||||
totalMonthlyDebits = []
|
totalMonthlyDebits = []
|
||||||
@ -47,7 +22,7 @@ for folder in sorted(os.listdir()):
|
|||||||
for file in sorted(os.listdir(folder)):
|
for file in sorted(os.listdir(folder)):
|
||||||
filePath = f'{folder}/{file}'
|
filePath = f'{folder}/{file}'
|
||||||
print(filePath)
|
print(filePath)
|
||||||
currentDatetime = getDatetime(file)
|
currentDatetime = getDatetimeFromFileName(file)
|
||||||
if firstDatetime is None:
|
if firstDatetime is None:
|
||||||
firstDatetime = currentDatetime
|
firstDatetime = currentDatetime
|
||||||
content = getTextFromPdf(filePath)
|
content = getTextFromPdf(filePath)
|
||||||
@ -100,7 +75,7 @@ for folder in sorted(os.listdir()):
|
|||||||
comment += [line]
|
comment += [line]
|
||||||
#break
|
#break
|
||||||
#break
|
#break
|
||||||
lastDatetime = getDatetime(file)
|
lastDatetime = getDatetimeFromFileName(file)
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
plt.title('Monthly debits and credits')
|
plt.title('Monthly debits and credits')
|
||||||
@ -108,14 +83,7 @@ plt.xlabel('Date')
|
|||||||
plt.ylabel('€')
|
plt.ylabel('€')
|
||||||
ALPHA = 0.5
|
ALPHA = 0.5
|
||||||
|
|
||||||
def getDatetime(aDatetimeStr):
|
xTicks = range(getMonthIndexSinceEpoch(firstDatetime), getMonthIndexSinceEpoch(lastDatetime) + 1)
|
||||||
return datetime.strptime(aDatetimeStr, '%Y%m%d.pdf')
|
|
||||||
|
|
||||||
def getMonthIndex(aDatetime):
|
|
||||||
return aDatetime.year * 12 + aDatetime.month
|
|
||||||
|
|
||||||
xTicks = range(getMonthIndex(firstDatetime), getMonthIndex(lastDatetime) + 1)
|
|
||||||
# sign does not seem respected for `totalMonthlyDifferences`.
|
|
||||||
totalMonthlyAmountAndLabel = (
|
totalMonthlyAmountAndLabel = (
|
||||||
#(totalMonthlyDebits, 'Debit'),
|
#(totalMonthlyDebits, 'Debit'),
|
||||||
#(totalMonthlyCredits, 'Credit'),
|
#(totalMonthlyCredits, 'Credit'),
|
||||||
@ -129,10 +97,7 @@ plt.legend()
|
|||||||
#plt.yscale('symlog')
|
#plt.yscale('symlog')
|
||||||
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,}'))
|
ax.yaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,}'))
|
||||||
|
|
||||||
def getMonthName(monthIndex):
|
ticksLabels = [getMonthNameFromMonthIndex(monthIndex) for monthIndex in xTicks]
|
||||||
return datetime((monthIndex - 1) // 12, 1 + (monthIndex - 1) % 12, 1).strftime('%b %Y')
|
|
||||||
|
|
||||||
ticksLabels = [getMonthName(monthIndex) for monthIndex in xTicks]
|
|
||||||
plt.xticks(xTicks, ticksLabels, rotation = 90)
|
plt.xticks(xTicks, ticksLabels, rotation = 90)
|
||||||
#plt.tight_layout()
|
#plt.tight_layout()
|
||||||
# How to show the horizontal lines for subticks?
|
# How to show the horizontal lines for subticks?
|
||||||
|
23
utils.py
Normal file
23
utils.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
import subprocess
|
||||||
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
|
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('\\d{2}\\.\\d{2} \\d{2}\\.\\d{2} \\d+,\\d{2}')
|
||||||
|
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
|
||||||
|
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}')
|
||||||
|
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})\\ ([0-9 ]+,\\d{2})')
|
||||||
|
|
||||||
|
def execute(command):
|
||||||
|
return subprocess.check_output(command).decode('utf-8')
|
||||||
|
|
||||||
|
def getTextFromPdf(pdfPath):
|
||||||
|
return execute(['pdftotext', '-raw', pdfPath, '-'])
|
||||||
|
|
||||||
|
def getDatetimeFromFileName(aDatetimeStr):
|
||||||
|
return datetime.strptime(aDatetimeStr, '%Y%m%d.pdf')
|
||||||
|
|
||||||
|
def getMonthIndexSinceEpoch(aDatetime):
|
||||||
|
return aDatetime.year * 12 + aDatetime.month
|
||||||
|
|
||||||
|
def getMonthNameFromMonthIndex(monthIndex):
|
||||||
|
return datetime((monthIndex - 1) // 12, 1 + (monthIndex - 1) % 12, 1).strftime('%b %Y')
|
Loading…
Reference in New Issue
Block a user