Make date and valeur of transaction be datetimes

This commit is contained in:
Benjamin Loison 2024-10-03 21:20:07 +02:00
parent c425ebcb00
commit 31b81fa33e
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8

View File

@ -4,7 +4,7 @@ import re
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('(\\d{2}\\.\\d{2}) (\\d{2}\\.\\d{2}) ([\\d ]+,\\d{2})') FIRST_LINE_OF_PAYMENT_REGEX = re.compile('(\\d{2}\\.\\d{2}) (\\d{2}\\.\\d{2}) ([\\d ]+,\\d{2})')
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+') END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}') SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU (\\d{2}\\.\\d{2}\\.\\d{4}) ([\\d ]+,\\d{2})')
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([\\d ]+,\\d{2})\\ ([\\d ]+,\\d{2})') TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([\\d ]+,\\d{2})\\ ([\\d ]+,\\d{2})')
TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([\\d ]+,\\d{2})') TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([\\d ]+,\\d{2})')
@ -28,25 +28,38 @@ def getMonthNameFromMonthIndex(monthIndex):
def toFloat(group): def toFloat(group):
return float(group.replace(',', '.').replace(' ', '')) return float(group.replace(',', '.').replace(' ', ''))
def getDateFollowing(date, initialDate):
#print(f'start {date}')
date = datetime.strptime(date, '%d.%m').replace(year = initialDate.year)
# To support new year.
if date < initialDate:
date = date.replace(year = date.year + 1)
#print(f'end {date}')
return date
def readPdfBankStatement(filePath): def readPdfBankStatement(filePath):
file = filePath.split('/')[-1] file = filePath.split('/')[-1]
fileDatetime = getDatetimeFromFileName(file) fileDatetime = getDatetimeFromFileName(file)
content = getTextFromPdf(filePath) content = getTextFromPdf(filePath)
lines = content.splitlines() lines = content.splitlines()
#print('\n'.join(lines))
started = False started = False
firstPage = True firstPage = True
initialAmount = None initialAmount = None
initialDate = None
#currentAmount = None #currentAmount = None
date = None date = None
comment = [] comment = []
transactions = [] transactions = []
for line in lines: for line in lines:
if not started: if not started:
# We are interested in the content after this line: # We are interested in the content after this line:)
if SOLDE_CREDITEUR_AU_REGEX.match(line) is not None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage): soldeCrediteurAuRegexMatch = SOLDE_CREDITEUR_AU_REGEX.match(line)
if SOLDE_CREDITEUR_AU_REGEX.match(line): if soldeCrediteurAuRegexMatch is not None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage):
initialAmount = float(SOLDE_CREDITEUR_AU_REGEX.sub('', line).replace(',', '.').replace(' ', '')) if soldeCrediteurAuRegexMatch is not None:
initialDate = datetime.strptime(soldeCrediteurAuRegexMatch.group(1), '%d.%m.%Y')
print(f'{initialDate=}')
#exit(1)
initialAmount = toFloat(soldeCrediteurAuRegexMatch.group(2))
#currentAmount = initialAmount #currentAmount = initialAmount
started = True started = True
continue continue
@ -71,8 +84,8 @@ def readPdfBankStatement(filePath):
if firstLineOfPaymentRegexMatch is not None: if firstLineOfPaymentRegexMatch is not None:
if date is not None: if date is not None:
transactions += [{ transactions += [{
'date': date, 'date': getDateFollowing(date, initialDate),
'valeur': valeur, 'valeur': getDateFollowing(valeur, initialDate),
'amount': amount, 'amount': amount,
#'currentAmount': currentAmount, #'currentAmount': currentAmount,
'comment': '\n'.join(comment) 'comment': '\n'.join(comment)
@ -86,8 +99,8 @@ def readPdfBankStatement(filePath):
comment += [line] comment += [line]
if date is not None: if date is not None:
transactions += [{ transactions += [{
'date': date, 'date': getDateFollowing(date, initialDate),
'valeur': valeur, 'valeur': getDateFollowing(valeur, initialDate),
'amount': amount, 'amount': amount,
#'currentAmount': currentAmount, #'currentAmount': currentAmount,
'comment': '\n'.join(comment) 'comment': '\n'.join(comment)