#31: Make search within captions not limited by line wrapping
This commit is contained in:
parent
4449d488c9
commit
09f7675bf7
@ -20,6 +20,12 @@ make
|
|||||||
./youtubeCaptionsSearchEngine -h
|
./youtubeCaptionsSearchEngine -h
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you plan to use the front-end website, also run:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
pip install webvtt-py
|
||||||
|
```
|
||||||
|
|
||||||
Except if you provide the argument `--youtube-operational-api-instance-url https://yt.lemnoslife.com`, you have [to host your own instance of the YouTube operational API](https://github.com/Benjamin-Loison/YouTube-operational-API/#install-your-own-instance-of-the-api).
|
Except if you provide the argument `--youtube-operational-api-instance-url https://yt.lemnoslife.com`, you have [to host your own instance of the YouTube operational API](https://github.com/Benjamin-Loison/YouTube-operational-API/#install-your-own-instance-of-the-api).
|
||||||
|
|
||||||
Except if you provide the argument `--no-keys`, you have to provide at least one [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started) in `keys.txt`.
|
Except if you provide the argument `--no-keys`, you have to provide at least one [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started) in `keys.txt`.
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
import sys, time, fcntl, os, zipfile
|
import sys, time, fcntl, os, zipfile, webvtt
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
|
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
|
||||||
|
|
||||||
@ -34,14 +35,21 @@ for fileIndex, file in enumerate(files):
|
|||||||
write(f'progress:{fileIndex + 1} / {len(files)}')
|
write(f'progress:{fileIndex + 1} / {len(files)}')
|
||||||
zip = zipfile.ZipFile(path + file)
|
zip = zipfile.ZipFile(path + file)
|
||||||
for fileInZip in zip.namelist():
|
for fileInZip in zip.namelist():
|
||||||
if searchOnlyCaptions and not fileInZip.endswith('.vtt'):
|
endsWithVtt = fileInZip.endswith('.vtt')
|
||||||
|
if searchOnlyCaptions and not endsWithVtt:
|
||||||
continue
|
continue
|
||||||
f = zip.open(fileInZip)
|
with zip.open(fileInZip) as f:
|
||||||
for line in f.readlines():
|
toWrite = f'{file}/{fileInZip}'
|
||||||
if message in str(line):
|
if endsWithVtt:
|
||||||
write(f'{file}/{fileInZip}')
|
content = StringIO(f.read().decode('utf-8'))
|
||||||
break
|
wholeCaption = ' '.join([caption.text for caption in webvtt.read_buffer(content)])
|
||||||
f.close()
|
if message in wholeCaption:
|
||||||
|
write(toWrite)
|
||||||
|
else:
|
||||||
|
for line in f.readlines():
|
||||||
|
if message in str(line):
|
||||||
|
write(toWrite)
|
||||||
|
break
|
||||||
|
|
||||||
f = open(clientFilePath)
|
f = open(clientFilePath)
|
||||||
while True:
|
while True:
|
||||||
|
Loading…
Reference in New Issue
Block a user