BNP_PDF_statement_parser/utils.py

23 lines
900 B
Python
Raw Normal View History

import subprocess
from datetime import datetime
import re
FIRST_LINE_OF_PAYMENT_REGEX = re.compile('\\d{2}\\.\\d{2} \\d{2}\\.\\d{2} \\d+,\\d{2}')
END_PAGE_AFTER_THE_FIRST_ONE_REGEX = re.compile('P\\. \\d+/\\d+')
SOLDE_CREDITEUR_AU_REGEX = re.compile('SOLDE CREDITEUR AU \\d{2}\\.\\d{2}\\.\\d{4}')
TOTAL_DES_OPERATIONS_REGEX = re.compile('TOTAL\\ DES\\ OPERATIONS\\ ([0-9 ]+,\\d{2})\\ ([0-9 ]+,\\d{2})')
def execute(command):
return subprocess.check_output(command).decode('utf-8')
def getTextFromPdf(pdfPath):
return execute(['pdftotext', '-raw', pdfPath, '-'])
def getDatetimeFromFileName(aDatetimeStr):
return datetime.strptime(aDatetimeStr, '%Y%m%d.pdf')
def getMonthIndexSinceEpoch(aDatetime):
return aDatetime.year * 12 + aDatetime.month
def getMonthNameFromMonthIndex(monthIndex):
return datetime((monthIndex - 1) // 12, 1 + (monthIndex - 1) % 12, 1).strftime('%b %Y')