import subprocess from datetime import datetime import re FIRST_LINE_OF_PAYMENT_REGEX = re.compile('\\d{2}\\.\\d{2} \\d{2}\\.\\d{2} \\d+,\\d{2}') END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+') SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}') TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})\\ ([0-9 ]+,\\d{2})') def execute(command): return subprocess.check_output(command).decode('utf-8') def getTextFromPdf(pdfPath): return execute(['pdftotext', '-raw', pdfPath, '-']) def getDatetimeFromFileName(aDatetimeStr): return datetime.strptime(aDatetimeStr, '%Y%m%d.pdf') def getMonthIndexSinceEpoch(aDatetime): return aDatetime.year * 12 + aDatetime.month def getMonthNameFromMonthIndex(monthIndex): return datetime((monthIndex - 1) // 12, 1 + (monthIndex - 1) % 12, 1).strftime('%b %Y')