From 20025e02b41762eeb62a1a4eb6bbbe0ff0251c32 Mon Sep 17 00:00:00 2001 From: Benjamin Loison Date: Sun, 26 Feb 2023 16:01:56 +0100 Subject: [PATCH] Use compiled regex in the `for` loops for the website --- website/search.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/website/search.py b/website/search.py index 812948b..6d7319a 100755 --- a/website/search.py +++ b/website/search.py @@ -10,6 +10,9 @@ pathSearchMessageParts = sys.argv[2].split(' ') pathSearch = pathSearchMessageParts[1] message = ' '.join(pathSearchMessageParts[2:]) +pathSearchRegex = re.compile(pathSearch) +messageRegex = re.compile(message) + isPathSearchAChannelId = re.match(r'[a-zA-Z0-9-_]{24}', pathSearch) searchOnlyCaptions = pathSearchMessageParts[0] == 'search-only-captions' @@ -52,14 +55,14 @@ for fileIndex, file in enumerate(files): if searchOnlyCaptions and not endsWithVtt: continue toWrite = f'{file}/{fileInZip}' - if not bool(re.search(pathSearch, toWrite)): + if not bool(pathSearchRegex.search(toWrite)): continue with zip.open(fileInZip) as f: if endsWithVtt: content = f.read().decode('utf-8') stringIOf = StringIO(content) wholeCaption = ' '.join([cleanCaption(caption.text) for caption in webvtt.read_buffer(stringIOf)]) - messagePositions = [m.start() for m in re.finditer(message, wholeCaption)] + messagePositions = [m.start() for m in messageRegex.finditer(wholeCaption)] if messagePositions != []: timestamps = [] for messagePosition in messagePositions: