#31: Make search within captions not limited by line wrapping

This commit is contained in:
Benjamin Loison 2023-02-14 01:32:36 +01:00
parent 4449d488c9
commit 09f7675bf7
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8
2 changed files with 22 additions and 8 deletions

View File

@ -20,6 +20,12 @@ make
./youtubeCaptionsSearchEngine -h
```
If you plan to use the front-end website, also run:
```sh
pip install webvtt-py
```
Except if you provide the argument `--youtube-operational-api-instance-url https://yt.lemnoslife.com`, you have [to host your own instance of the YouTube operational API](https://github.com/Benjamin-Loison/YouTube-operational-API/#install-your-own-instance-of-the-api).
Except if you provide the argument `--no-keys`, you have to provide at least one [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started) in `keys.txt`.

View File

@ -1,6 +1,7 @@
#!/usr/bin/python3
import sys, time, fcntl, os, zipfile
import sys, time, fcntl, os, zipfile, webvtt
from io import StringIO
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
@ -34,14 +35,21 @@ for fileIndex, file in enumerate(files):
write(f'progress:{fileIndex + 1} / {len(files)}')
zip = zipfile.ZipFile(path + file)
for fileInZip in zip.namelist():
if searchOnlyCaptions and not fileInZip.endswith('.vtt'):
endsWithVtt = fileInZip.endswith('.vtt')
if searchOnlyCaptions and not endsWithVtt:
continue
f = zip.open(fileInZip)
with zip.open(fileInZip) as f:
toWrite = f'{file}/{fileInZip}'
if endsWithVtt:
content = StringIO(f.read().decode('utf-8'))
wholeCaption = ' '.join([caption.text for caption in webvtt.read_buffer(content)])
if message in wholeCaption:
write(toWrite)
else:
for line in f.readlines():
if message in str(line):
write(f'{file}/{fileInZip}')
write(toWrite)
break
f.close()
f = open(clientFilePath)
while True: