Fix #31: List all occurrences of search within video captions
This commit is contained in:
parent
8d34cf33ae
commit
e166fdb4e5
@ -42,10 +42,10 @@ Progress: <span id="progress"></span> channels
|
|||||||
document.getElementById('progress').innerHTML = line.replace('progress:', '');
|
document.getElementById('progress').innerHTML = line.replace('progress:', '');
|
||||||
} else {
|
} else {
|
||||||
var channelsDom = document.getElementById('channels');
|
var channelsDom = document.getElementById('channels');
|
||||||
var timestamp = null;
|
var timestamp = [];
|
||||||
if (line.includes('|')) {
|
const lineParts = line.split('|');
|
||||||
const lineParts = line.split('|');
|
if (lineParts.length > 0) {
|
||||||
timestamp = parseInt(lineParts[1]);
|
timestamps = lineParts.slice(1).map(linePart => parseInt(linePart));
|
||||||
line = lineParts[0];
|
line = lineParts[0];
|
||||||
}
|
}
|
||||||
const channelFileParts = line.split('/');
|
const channelFileParts = line.split('/');
|
||||||
@ -66,10 +66,11 @@ Progress: <span id="progress"></span> channels
|
|||||||
var channelFileDom = document.createElement('li');
|
var channelFileDom = document.createElement('li');
|
||||||
var a = createA(channelFile, `${channelHref}/${channelFile}`);
|
var a = createA(channelFile, `${channelHref}/${channelFile}`);
|
||||||
channelFileDom.appendChild(a);
|
channelFileDom.appendChild(a);
|
||||||
if (timestamp != null) {
|
const id = channelFileParts[2];
|
||||||
|
for(var timestampsIndex = 0; timestampsIndex < timestamps.length; timestampsIndex++) {
|
||||||
const space = document.createTextNode('\u00A0');
|
const space = document.createTextNode('\u00A0');
|
||||||
channelFileDom.appendChild(space);
|
channelFileDom.appendChild(space);
|
||||||
const id = channelFileParts[2];
|
const timestamp = timestamps[timestampsIndex];
|
||||||
var a = createA(`${timestamp} s`, `https://www.youtube.com/watch?v=${id}&t=${timestamp}`);
|
var a = createA(`${timestamp} s`, `https://www.youtube.com/watch?v=${id}&t=${timestamp}`);
|
||||||
channelFileDom.appendChild(a);
|
channelFileDom.appendChild(a);
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
import sys, time, fcntl, os, zipfile, webvtt
|
import sys, time, fcntl, os, zipfile, webvtt, re
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
|
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
|
||||||
@ -44,15 +44,19 @@ for fileIndex, file in enumerate(files):
|
|||||||
content = f.read().decode('utf-8')
|
content = f.read().decode('utf-8')
|
||||||
stringIOf = StringIO(content)
|
stringIOf = StringIO(content)
|
||||||
wholeCaption = ' '.join([caption.text for caption in webvtt.read_buffer(stringIOf)])
|
wholeCaption = ' '.join([caption.text for caption in webvtt.read_buffer(stringIOf)])
|
||||||
messagePosition = wholeCaption.find(message)
|
messagePositions = [m.start() for m in re.finditer(f'(?={message})', wholeCaption)]
|
||||||
if messagePosition != -1:
|
if messagePositions != []:
|
||||||
stringIOf = StringIO(content)
|
timestamps = []
|
||||||
for caption in webvtt.read_buffer(stringIOf):
|
for messagePosition in messagePositions:
|
||||||
text = caption.text
|
stringIOf = StringIO(content)
|
||||||
if messagePosition <= len(text):
|
for caption in webvtt.read_buffer(stringIOf):
|
||||||
write(f'{toWrite}|{int(caption.start_in_seconds)}')
|
text = caption.text
|
||||||
break
|
if messagePosition <= len(text):
|
||||||
messagePosition -= len(text) + 1
|
timestamp = str(int(caption.start_in_seconds))
|
||||||
|
timestamps += [timestamp]
|
||||||
|
break
|
||||||
|
messagePosition -= len(text) + 1
|
||||||
|
write(f'{toWrite}|{"|".join(timestamps)}')
|
||||||
else:
|
else:
|
||||||
for line in f.readlines():
|
for line in f.readlines():
|
||||||
if message in str(line):
|
if message in str(line):
|
||||||
|
Loading…
Reference in New Issue
Block a user