#31: Make search within captions not limited by line wrapping

This commit is contained in:
2023-02-14 01:32:36 +01:00
parent e0faf053a1
commit 57572c6d6c
2 changed files with 22 additions and 8 deletions

View File

@@ -1,6 +1,7 @@
#!/usr/bin/python3
import sys, time, fcntl, os, zipfile
import sys, time, fcntl, os, zipfile, webvtt
from io import StringIO
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
@@ -34,14 +35,21 @@ for fileIndex, file in enumerate(files):
write(f'progress:{fileIndex + 1} / {len(files)}')
zip = zipfile.ZipFile(path + file)
for fileInZip in zip.namelist():
if searchOnlyCaptions and not fileInZip.endswith('.vtt'):
endsWithVtt = fileInZip.endswith('.vtt')
if searchOnlyCaptions and not endsWithVtt:
continue
f = zip.open(fileInZip)
for line in f.readlines():
if message in str(line):
write(f'{file}/{fileInZip}')
break
f.close()
with zip.open(fileInZip) as f:
toWrite = f'{file}/{fileInZip}'
if endsWithVtt:
content = StringIO(f.read().decode('utf-8'))
wholeCaption = ' '.join([caption.text for caption in webvtt.read_buffer(content)])
if message in wholeCaption:
write(toWrite)
else:
for line in f.readlines():
if message in str(line):
write(toWrite)
break
f = open(clientFilePath)
while True: