Make date and valeur of transaction be datetimes

This commit is contained in:
Benjamin Loison 2024-10-03 21:20:07 +02:00
parent c425ebcb00
commit 31b81fa33e
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8

View File

@ -4,7 +4,7 @@ import re
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('(\\d{2}\\.\\d{2}) (\\d{2}\\.\\d{2}) ([\\d ]+,\\d{2})')
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}')
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU (\\d{2}\\.\\d{2}\\.\\d{4}) ([\\d ]+,\\d{2})')
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([\\d ]+,\\d{2})\\ ([\\d ]+,\\d{2})')
TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([\\d ]+,\\d{2})')
@ -28,25 +28,38 @@ def getMonthNameFromMonthIndex(monthIndex):
def toFloat(group):
return float(group.replace(',', '.').replace(' ', ''))
def getDateFollowing(date, initialDate):
#print(f'start {date}')
date = datetime.strptime(date, '%d.%m').replace(year = initialDate.year)
# To support new year.
if date < initialDate:
date = date.replace(year = date.year + 1)
#print(f'end {date}')
return date
def readPdfBankStatement(filePath):
file = filePath.split('/')[-1]
fileDatetime = getDatetimeFromFileName(file)
content = getTextFromPdf(filePath)
lines = content.splitlines()
#print('\n'.join(lines))
started = False
firstPage = True
initialAmount = None
initialDate = None
#currentAmount = None
date = None
comment = []
transactions = []
for line in lines:
if not started:
# We are interested in the content after this line:
if SOLDE_CREDITEUR_AU_REGEX.match(line) is not None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage):
if SOLDE_CREDITEUR_AU_REGEX.match(line):
initialAmount = float(SOLDE_CREDITEUR_AU_REGEX.sub('', line).replace(',', '.').replace(' ', ''))
# We are interested in the content after this line:)
soldeCrediteurAuRegexMatch = SOLDE_CREDITEUR_AU_REGEX.match(line)
if soldeCrediteurAuRegexMatch is not None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage):
if soldeCrediteurAuRegexMatch is not None:
initialDate = datetime.strptime(soldeCrediteurAuRegexMatch.group(1), '%d.%m.%Y')
print(f'{initialDate=}')
#exit(1)
initialAmount = toFloat(soldeCrediteurAuRegexMatch.group(2))
#currentAmount = initialAmount
started = True
continue
@ -71,8 +84,8 @@ def readPdfBankStatement(filePath):
if firstLineOfPaymentRegexMatch is not None:
if date is not None:
transactions += [{
'date': date,
'valeur': valeur,
'date': getDateFollowing(date, initialDate),
'valeur': getDateFollowing(valeur, initialDate),
'amount': amount,
#'currentAmount': currentAmount,
'comment': '\n'.join(comment)
@ -86,8 +99,8 @@ def readPdfBankStatement(filePath):
comment += [line]
if date is not None:
transactions += [{
'date': date,
'valeur': valeur,
'date': getDateFollowing(date, initialDate),
'valeur': getDateFollowing(valeur, initialDate),
'amount': amount,
#'currentAmount': currentAmount,
'comment': '\n'.join(comment)