diff --git a/website/index.php b/website/index.php
index 6c79161..21eb0b6 100644
--- a/website/index.php
+++ b/website/index.php
@@ -42,10 +42,10 @@ Progress: channels
document.getElementById('progress').innerHTML = line.replace('progress:', '');
} else {
var channelsDom = document.getElementById('channels');
- var timestamp = null;
- if (line.includes('|')) {
- const lineParts = line.split('|');
- timestamp = parseInt(lineParts[1]);
+ var timestamp = [];
+ const lineParts = line.split('|');
+ if (lineParts.length > 0) {
+ timestamps = lineParts.slice(1).map(linePart => parseInt(linePart));
line = lineParts[0];
}
const channelFileParts = line.split('/');
@@ -66,10 +66,11 @@ Progress: channels
var channelFileDom = document.createElement('li');
var a = createA(channelFile, `${channelHref}/${channelFile}`);
channelFileDom.appendChild(a);
- if (timestamp != null) {
+ const id = channelFileParts[2];
+ for(var timestampsIndex = 0; timestampsIndex < timestamps.length; timestampsIndex++) {
const space = document.createTextNode('\u00A0');
channelFileDom.appendChild(space);
- const id = channelFileParts[2];
+ const timestamp = timestamps[timestampsIndex];
var a = createA(`${timestamp} s`, `https://www.youtube.com/watch?v=${id}&t=${timestamp}`);
channelFileDom.appendChild(a);
}
diff --git a/website/search.py b/website/search.py
index d65cd0c..710720c 100755
--- a/website/search.py
+++ b/website/search.py
@@ -1,6 +1,6 @@
#!/usr/bin/python3
-import sys, time, fcntl, os, zipfile, webvtt
+import sys, time, fcntl, os, zipfile, webvtt, re
from io import StringIO
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
@@ -44,15 +44,19 @@ for fileIndex, file in enumerate(files):
content = f.read().decode('utf-8')
stringIOf = StringIO(content)
wholeCaption = ' '.join([caption.text for caption in webvtt.read_buffer(stringIOf)])
- messagePosition = wholeCaption.find(message)
- if messagePosition != -1:
- stringIOf = StringIO(content)
- for caption in webvtt.read_buffer(stringIOf):
- text = caption.text
- if messagePosition <= len(text):
- write(f'{toWrite}|{int(caption.start_in_seconds)}')
- break
- messagePosition -= len(text) + 1
+ messagePositions = [m.start() for m in re.finditer(f'(?={message})', wholeCaption)]
+ if messagePositions != []:
+ timestamps = []
+ for messagePosition in messagePositions:
+ stringIOf = StringIO(content)
+ for caption in webvtt.read_buffer(stringIOf):
+ text = caption.text
+ if messagePosition <= len(text):
+ timestamp = str(int(caption.start_in_seconds))
+ timestamps += [timestamp]
+ break
+ messagePosition -= len(text) + 1
+ write(f'{toWrite}|{"|".join(timestamps)}')
else:
for line in f.readlines():
if message in str(line):