diff --git a/bnp_pdf_statement_parser.py b/bnp_pdf_statement_parser.py index b61aadd..d77fc6a 100755 --- a/bnp_pdf_statement_parser.py +++ b/bnp_pdf_statement_parser.py @@ -12,7 +12,7 @@ PATH = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/bank_statements os.chdir(PATH) -PRINT_TRANSACTIONS = False +PRINT_TRANSACTIONS = True MAIN_BANK_ACCOUNT = 'compte_de_cheques' @@ -34,13 +34,13 @@ for folder in os.listdir(): for file in os.listdir(folder): filePath = f'{folder}/{file}' print(filePath) - print(readPdfBankStatement(filePath)) + #print(readPdfBankStatement(filePath)) #exit(0) transactions = readPdfBankStatement(filePath)[3] pprint(transactions) appendTransactions(transactions, folder) -exit(1) +#exit(1) os.chdir(f'{MAIN_BANK_ACCOUNT}/') for folder in sorted(os.listdir()): @@ -50,11 +50,11 @@ for folder in sorted(os.listdir()): transactions = readPdfBankStatement(filePath)[3] appendTransactions(transactions, MAIN_BANK_ACCOUNT) if PRINT_TRANSACTIONS: - for transaction in transactions: - printTransaction(transaction) + pprint(transactions) #break #break +exit(1) allTransactions.sort(key = operator.itemgetter('date')) print(len(allTransactions)) pprint(allTransactions) diff --git a/utils.py b/utils.py index d793850..6f9b8e4 100644 --- a/utils.py +++ b/utils.py @@ -4,10 +4,11 @@ import re # Source: [the Stack Overflow answer 766377](https://stackoverflow.com/a/766377) FIRST_LINE_OF_PAYMENT_REGEX = re.compile('\\ +(\\d{2}\\.\\d{2})\\ +([A-Z\\d /.()*]+?)\\ +(\\d{2}\\.\\d{2})\\ +([\\d ]+,\\d{2})') -END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+') +END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile(' +RELEVE ((DE (COMPTE (CHEQUES|D\'EPARGNE LOGEMENT|LEP))|LIVRET (A|JEUNE))|LIVRET DEV. DURABLE ET SOLIDAIRE) +P\\. \\d+/\\d+') SOLDE_CREDITEUR_AU_REGEX = re.compile('\\ +SOLDE CREDITEUR AU (\\d{2}\\.\\d{2}\\.\\d{4})\\ +([\\d ]+,\\d{2})') TOTAL_DES_OPERATIONS_REGEX = re.compile('\\ +TOTAL\\ DES\\ OPERATIONS\\ +([\\d ]+,\\d{2})\\ +([\\d ]+,\\d{2})') TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX = re.compile('\\ +TOTAL\\ DES\\ OPERATIONS\\ +([\\d ]+,\\d{2})') +COLUMNS_HEADER = re.compile(' +Date +Nature des opérations +Valeur +Débit +Crédit') def execute(command): return subprocess.check_output(command).decode('utf-8') @@ -53,7 +54,7 @@ def readPdfBankStatement(filePath): if not started: # We are interested in the content after this line:) soldeCrediteurAuRegexMatch = SOLDE_CREDITEUR_AU_REGEX.match(line) - if soldeCrediteurAuRegexMatch is not None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage): + if soldeCrediteurAuRegexMatch is not None or (COLUMNS_HEADER.match(line) and not firstPage): if soldeCrediteurAuRegexMatch is not None: initialDate = datetime.strptime(soldeCrediteurAuRegexMatch.group(1), '%d.%m.%Y') initialAmount = toFloat(soldeCrediteurAuRegexMatch.group(2)) @@ -69,6 +70,7 @@ def readPdfBankStatement(filePath): continue # We aren't interested in the content after this line else: + #print('hey', line) totalDesOperationsRegexMatch = TOTAL_DES_OPERATIONS_REGEX.match(line) totalDesOperationsCreditOnlyRegexMatch = TOTAL_DES_OPERATIONS_CREDIT_ONLY_REGEX.match(line) if totalDesOperationsRegexMatch is not None or totalDesOperationsCreditOnlyRegexMatch is not None: @@ -84,7 +86,7 @@ def readPdfBankStatement(filePath): break firstLineOfPaymentRegexMatch = FIRST_LINE_OF_PAYMENT_REGEX.match(line) if firstLineOfPaymentRegexMatch is not None: - print(line) + #print(line) if date is not None: transactions += [{ 'date': getDateFollowing(date, initialDate), @@ -99,8 +101,8 @@ def readPdfBankStatement(filePath): #currentAmount -= amount comment = [firstCommentLine] elif line != '': - print(f'comment: {line}') - comment += [line] + #print(f'comment: {line}') + comment += [line.strip()] if date is not None: transactions += [{ 'date': getDateFollowing(date, initialDate), @@ -109,10 +111,4 @@ def readPdfBankStatement(filePath): #'currentAmount': currentAmount, 'comment': '\n'.join(comment) }] - return initialAmount, totalMonthlyDebit, totalMonthlyCredit, transactions, fileDatetime - -def printTransaction(transaction): - # , transaction['currentAmount'] - print(transaction['date'], transaction['valeur'], transaction['amount']) - print(transaction['comment']) - print() \ No newline at end of file + return initialAmount, totalMonthlyDebit, totalMonthlyCredit, transactions, fileDatetime \ No newline at end of file