#!/usr/bin/python3

# Depends on `pdftotext`.

import os, subprocess, re, config

path = f'/home/benjamin/Desktop/bens_folder/bazaar/documents/bnp/RLV_CHQ_{config.RLV_CHQ}'

os.chdir(path)

'''
Assuming file hierarchy like:

2022
├── 20221121.pdf
└── 20221221.pdf
2023
├── 20230123.pdf
└── 20230221.pdf
'''

def execute(command):
    return subprocess.check_output(command, shell).decode('utf-8')

def getTextFromPdf(pdfPath):
    return execute(['pdftotext', '-raw', pdfPath, '-')

firstLineOfPaymentRegex = re.compile('\d{2}\.\d{2} \d{2}\.\d{2} \d+,\d{2}')
endPageAfterTheFirstOneRegex = re.compile('P\. \d+/\d+')
soldeCrediteurAuRegex = re.compile('SOLDE CREDITEUR AU \d{2}\.\d{2}\.\d{4}')

for folder in os.listdir():
    for file in os.listdir(folder):
        #folder = '2022'
        #file = '20220321.pdf'
        filePath = f'{folder}/{file}'
        print(filePath)
        content = getTextFromPdf(filePath)
        lines = content.splitlines()
        started = False
        firstPage = True
        initialAmount = None
        currentAmount = None
        date = None
        comment = []
        for line in lines:
            if not started:
                # We are interested in the content after this line:
                if soldeCrediteurAuRegex.match(line) != None or (line.startswith('Date Nature des opérations Valeur Débit Crédit') and not firstPage):
                    if soldeCrediteurAuRegex.match(line):
                        initialAmount = float(soldeCrediteurAuRegex.sub('', line).replace(',', '.').replace(' ', ''))
                        currentAmount = initialAmount
                        print('Initial amount', initialAmount)
                        print()
                    started = True
                    continue
            else:
                # We aren't interested in the content after this line:
                if line.startswith('BNP PARIBAS SA au capital de') or endPageAfterTheFirstOneRegex.match(line) != None:
                    firstPage = False
                    started = False
                    continue
                # We aren't interested in the content after this line
                elif line.startswith('TOTAL DES OPERATIONS'):
                    break
                if firstLineOfPaymentRegex.match(line) != None:
                    if date != None:
                        print(date, valeur, amount, currentAmount)
                        print('\n'.join(comment))
                        print()
                    date, valeur, amount = line.split()
                    amount = float(amount.replace(',', '.'))
                    currentAmount -= amount
                    comment = []
                else:
                    comment += [line]
        break
    break