Make the website support regex for both search and path filtering

This commit is contained in:
Benjamin Loison 2023-02-24 15:38:51 +01:00
parent 4a0bd6fce5
commit 884ce22ff8
2 changed files with 4 additions and 4 deletions

View File

@ -13,8 +13,8 @@ Access raw data with: <?php echoUrl('channels/'); ?>.<br/>
Access found channels with: <?php echoUrl('channels.txt'); ?>. Access found channels with: <?php echoUrl('channels.txt'); ?>.
<form id="form"> <form id="form">
<input type="text" autofocus id="search" size="23" placeholder="Your search"></input><br/> <input type="text" autofocus id="search" size="23" placeholder="Your search regex"></input><br/>
<input type="text" autofocus id="path-search" size="23" placeholder="In path containing (default: *empty*)"></input><br/> <input type="text" autofocus id="path-search" size="23" placeholder="Your path regex (default: *empty*)"></input><br/>
<input type="submit" id="search" value="Search"> <input type="submit" id="search" value="Search">
<input type="submit" id="search-only-captions" value="Search only captions"> <input type="submit" id="search-only-captions" value="Search only captions">
</form> </form>

View File

@ -42,14 +42,14 @@ for fileIndex, file in enumerate(files):
if searchOnlyCaptions and not endsWithVtt: if searchOnlyCaptions and not endsWithVtt:
continue continue
toWrite = f'{file}/{fileInZip}' toWrite = f'{file}/{fileInZip}'
if not pathSearch in toWrite: if not bool(re.search(pathSearch, toWrite)):
continue continue
with zip.open(fileInZip) as f: with zip.open(fileInZip) as f:
if endsWithVtt: if endsWithVtt:
content = f.read().decode('utf-8') content = f.read().decode('utf-8')
stringIOf = StringIO(content) stringIOf = StringIO(content)
wholeCaption = ' '.join([cleanCaption(caption.text) for caption in webvtt.read_buffer(stringIOf)]) wholeCaption = ' '.join([cleanCaption(caption.text) for caption in webvtt.read_buffer(stringIOf)])
messagePositions = [m.start() for m in re.finditer(f'(?={message})', wholeCaption)] messagePositions = [m.start() for m in re.finditer(message, wholeCaption)]
if messagePositions != []: if messagePositions != []:
timestamps = [] timestamps = []
for messagePosition in messagePositions: for messagePosition in messagePositions: