#31: Make search within captions not limited by line wrapping
This commit is contained in:
parent
4449d488c9
commit
09f7675bf7
@ -20,6 +20,12 @@ make
|
||||
./youtubeCaptionsSearchEngine -h
|
||||
```
|
||||
|
||||
If you plan to use the front-end website, also run:
|
||||
|
||||
```sh
|
||||
pip install webvtt-py
|
||||
```
|
||||
|
||||
Except if you provide the argument `--youtube-operational-api-instance-url https://yt.lemnoslife.com`, you have [to host your own instance of the YouTube operational API](https://github.com/Benjamin-Loison/YouTube-operational-API/#install-your-own-instance-of-the-api).
|
||||
|
||||
Except if you provide the argument `--no-keys`, you have to provide at least one [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started) in `keys.txt`.
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import sys, time, fcntl, os, zipfile
|
||||
import sys, time, fcntl, os, zipfile, webvtt
|
||||
from io import StringIO
|
||||
|
||||
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
|
||||
|
||||
@ -34,14 +35,21 @@ for fileIndex, file in enumerate(files):
|
||||
write(f'progress:{fileIndex + 1} / {len(files)}')
|
||||
zip = zipfile.ZipFile(path + file)
|
||||
for fileInZip in zip.namelist():
|
||||
if searchOnlyCaptions and not fileInZip.endswith('.vtt'):
|
||||
endsWithVtt = fileInZip.endswith('.vtt')
|
||||
if searchOnlyCaptions and not endsWithVtt:
|
||||
continue
|
||||
f = zip.open(fileInZip)
|
||||
for line in f.readlines():
|
||||
if message in str(line):
|
||||
write(f'{file}/{fileInZip}')
|
||||
break
|
||||
f.close()
|
||||
with zip.open(fileInZip) as f:
|
||||
toWrite = f'{file}/{fileInZip}'
|
||||
if endsWithVtt:
|
||||
content = StringIO(f.read().decode('utf-8'))
|
||||
wholeCaption = ' '.join([caption.text for caption in webvtt.read_buffer(content)])
|
||||
if message in wholeCaption:
|
||||
write(toWrite)
|
||||
else:
|
||||
for line in f.readlines():
|
||||
if message in str(line):
|
||||
write(toWrite)
|
||||
break
|
||||
|
||||
f = open(clientFilePath)
|
||||
while True:
|
||||
|
Loading…
x
Reference in New Issue
Block a user