Use compiled regex in the for loops for the website

This commit is contained in:
Benjamin Loison 2023-02-26 16:01:56 +01:00
parent e493eaeb49
commit 20025e02b4
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8

View File

@ -10,6 +10,9 @@ pathSearchMessageParts = sys.argv[2].split(' ')
pathSearch = pathSearchMessageParts[1] pathSearch = pathSearchMessageParts[1]
message = ' '.join(pathSearchMessageParts[2:]) message = ' '.join(pathSearchMessageParts[2:])
pathSearchRegex = re.compile(pathSearch)
messageRegex = re.compile(message)
isPathSearchAChannelId = re.match(r'[a-zA-Z0-9-_]{24}', pathSearch) isPathSearchAChannelId = re.match(r'[a-zA-Z0-9-_]{24}', pathSearch)
searchOnlyCaptions = pathSearchMessageParts[0] == 'search-only-captions' searchOnlyCaptions = pathSearchMessageParts[0] == 'search-only-captions'
@ -52,14 +55,14 @@ for fileIndex, file in enumerate(files):
if searchOnlyCaptions and not endsWithVtt: if searchOnlyCaptions and not endsWithVtt:
continue continue
toWrite = f'{file}/{fileInZip}' toWrite = f'{file}/{fileInZip}'
if not bool(re.search(pathSearch, toWrite)): if not bool(pathSearchRegex.search(toWrite)):
continue continue
with zip.open(fileInZip) as f: with zip.open(fileInZip) as f:
if endsWithVtt: if endsWithVtt:
content = f.read().decode('utf-8') content = f.read().decode('utf-8')
stringIOf = StringIO(content) stringIOf = StringIO(content)
wholeCaption = ' '.join([cleanCaption(caption.text) for caption in webvtt.read_buffer(stringIOf)]) wholeCaption = ' '.join([cleanCaption(caption.text) for caption in webvtt.read_buffer(stringIOf)])
messagePositions = [m.start() for m in re.finditer(message, wholeCaption)] messagePositions = [m.start() for m in messageRegex.finditer(wholeCaption)]
if messagePositions != []: if messagePositions != []:
timestamps = [] timestamps = []
for messagePosition in messagePositions: for messagePosition in messagePositions: