Make date
and valeur
of transaction
be datetimes
This commit is contained in:
parent
c425ebcb00
commit
31b81fa33e
33
utils.py
33
utils.py
@ -4,7 +4,7 @@ import re
|
|||||||
|
|
||||||
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('(\\d{2}\\.\\d{2}) (\\d{2}\\.\\d{2}) ([\\d ]+,\\d{2})')
|
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('(\\d{2}\\.\\d{2}) (\\d{2}\\.\\d{2}) ([\\d ]+,\\d{2})')
|
||||||
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
|
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
|
||||||
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}')
|
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU (\\d{2}\\.\\d{2}\\.\\d{4}) ([\\d ]+,\\d{2})')
|
||||||
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([\\d ]+,\\d{2})\\ ([\\d ]+,\\d{2})')
|
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([\\d ]+,\\d{2})\\ ([\\d ]+,\\d{2})')
|
||||||
TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([\\d ]+,\\d{2})')
|
TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([\\d ]+,\\d{2})')
|
||||||
|
|
||||||
@ -28,25 +28,38 @@ def getMonthNameFromMonthIndex(monthIndex):
|
|||||||
def toFloat(group):
|
def toFloat(group):
|
||||||
return float(group.replace(',', '.').replace(' ', ''))
|
return float(group.replace(',', '.').replace(' ', ''))
|
||||||
|
|
||||||
|
def getDateFollowing(date, initialDate):
|
||||||
|
#print(f'start {date}')
|
||||||
|
date = datetime.strptime(date, '%d.%m').replace(year = initialDate.year)
|
||||||
|
# To support new year.
|
||||||
|
if date < initialDate:
|
||||||
|
date = date.replace(year = date.year + 1)
|
||||||
|
#print(f'end {date}')
|
||||||
|
return date
|
||||||
|
|
||||||
def readPdfBankStatement(filePath):
|
def readPdfBankStatement(filePath):
|
||||||
file = filePath.split('/')[-1]
|
file = filePath.split('/')[-1]
|
||||||
fileDatetime = getDatetimeFromFileName(file)
|
fileDatetime = getDatetimeFromFileName(file)
|
||||||
content = getTextFromPdf(filePath)
|
content = getTextFromPdf(filePath)
|
||||||
lines = content.splitlines()
|
lines = content.splitlines()
|
||||||
#print('\n'.join(lines))
|
|
||||||
started = False
|
started = False
|
||||||
firstPage = True
|
firstPage = True
|
||||||
initialAmount = None
|
initialAmount = None
|
||||||
|
initialDate = None
|
||||||
#currentAmount = None
|
#currentAmount = None
|
||||||
date = None
|
date = None
|
||||||
comment = []
|
comment = []
|
||||||
transactions = []
|
transactions = []
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if not started:
|
if not started:
|
||||||
# We are interested in the content after this line:
|
# We are interested in the content after this line:)
|
||||||
if SOLDE_CREDITEUR_AU_REGEX.match(line) is not None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage):
|
soldeCrediteurAuRegexMatch = SOLDE_CREDITEUR_AU_REGEX.match(line)
|
||||||
if SOLDE_CREDITEUR_AU_REGEX.match(line):
|
if soldeCrediteurAuRegexMatch is not None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage):
|
||||||
initialAmount = float(SOLDE_CREDITEUR_AU_REGEX.sub('', line).replace(',', '.').replace(' ', ''))
|
if soldeCrediteurAuRegexMatch is not None:
|
||||||
|
initialDate = datetime.strptime(soldeCrediteurAuRegexMatch.group(1), '%d.%m.%Y')
|
||||||
|
print(f'{initialDate=}')
|
||||||
|
#exit(1)
|
||||||
|
initialAmount = toFloat(soldeCrediteurAuRegexMatch.group(2))
|
||||||
#currentAmount = initialAmount
|
#currentAmount = initialAmount
|
||||||
started = True
|
started = True
|
||||||
continue
|
continue
|
||||||
@ -71,8 +84,8 @@ def readPdfBankStatement(filePath):
|
|||||||
if firstLineOfPaymentRegexMatch is not None:
|
if firstLineOfPaymentRegexMatch is not None:
|
||||||
if date is not None:
|
if date is not None:
|
||||||
transactions += [{
|
transactions += [{
|
||||||
'date': date,
|
'date': getDateFollowing(date, initialDate),
|
||||||
'valeur': valeur,
|
'valeur': getDateFollowing(valeur, initialDate),
|
||||||
'amount': amount,
|
'amount': amount,
|
||||||
#'currentAmount': currentAmount,
|
#'currentAmount': currentAmount,
|
||||||
'comment': '\n'.join(comment)
|
'comment': '\n'.join(comment)
|
||||||
@ -86,8 +99,8 @@ def readPdfBankStatement(filePath):
|
|||||||
comment += [line]
|
comment += [line]
|
||||||
if date is not None:
|
if date is not None:
|
||||||
transactions += [{
|
transactions += [{
|
||||||
'date': date,
|
'date': getDateFollowing(date, initialDate),
|
||||||
'valeur': valeur,
|
'valeur': getDateFollowing(valeur, initialDate),
|
||||||
'amount': amount,
|
'amount': amount,
|
||||||
#'currentAmount': currentAmount,
|
#'currentAmount': currentAmount,
|
||||||
'comment': '\n'.join(comment)
|
'comment': '\n'.join(comment)
|
||||||
|
Loading…
Reference in New Issue
Block a user