2023-01-30 22:14:05 +01:00
|
|
|
<?php
|
|
|
|
|
|
|
|
function echoUrl($url)
|
|
|
|
{
|
|
|
|
echo "<a href=\"$url\">$url</a>";
|
|
|
|
}
|
|
|
|
|
|
|
|
?>
|
|
|
|
|
|
|
|
See <?php echoUrl('https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions_search_engine'); ?> for more information.<br/>
|
|
|
|
|
2023-02-23 23:12:18 +01:00
|
|
|
Access raw data with: <?php echoUrl('channels/'); ?>.<br/>
|
|
|
|
Access found channels with: <?php echoUrl('channels.txt'); ?>.
|
2023-02-13 05:55:44 +01:00
|
|
|
|
2023-02-07 17:25:17 +01:00
|
|
|
<form id="form">
|
2023-02-24 15:38:51 +01:00
|
|
|
<input type="text" autofocus id="search" size="23" placeholder="Your search regex"></input><br/>
|
|
|
|
<input type="text" autofocus id="path-search" size="23" placeholder="Your path regex (default: *empty*)"></input><br/>
|
2023-02-14 00:59:37 +01:00
|
|
|
<input type="submit" id="search" value="Search">
|
|
|
|
<input type="submit" id="search-only-captions" value="Search only captions">
|
2023-02-07 17:25:17 +01:00
|
|
|
</form>
|
2023-01-30 22:19:04 +01:00
|
|
|
|
2023-02-14 02:00:23 +01:00
|
|
|
Progress: <span id="progress"></span> channels
|
2023-02-14 01:08:05 +01:00
|
|
|
|
2023-02-07 20:15:36 +01:00
|
|
|
<ul id="channels">
|
|
|
|
</ul>
|
|
|
|
|
2023-01-30 22:19:04 +01:00
|
|
|
<script>
|
2023-02-07 17:25:17 +01:00
|
|
|
var firstRun = true;
|
|
|
|
var conn;
|
2023-02-07 20:15:36 +01:00
|
|
|
// Could parse DOM instead of using following variable.
|
|
|
|
var channels = [];
|
|
|
|
|
|
|
|
function createA(text, href) {
|
|
|
|
var a = document.createElement('a');
|
|
|
|
var text = document.createTextNode(text);
|
|
|
|
a.appendChild(text);
|
|
|
|
a.href = href;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
function treatLine(line) {
|
|
|
|
console.log(line);
|
2023-02-14 01:08:05 +01:00
|
|
|
if (line.startsWith('progress:')) {
|
|
|
|
document.getElementById('progress').innerHTML = line.replace('progress:', '');
|
2023-02-07 20:15:36 +01:00
|
|
|
} else {
|
|
|
|
var channelsDom = document.getElementById('channels');
|
2023-02-14 02:56:11 +01:00
|
|
|
var timestamp = [];
|
|
|
|
const lineParts = line.split('|');
|
|
|
|
if (lineParts.length > 0) {
|
|
|
|
timestamps = lineParts.slice(1).map(linePart => parseInt(linePart));
|
2023-02-14 02:00:23 +01:00
|
|
|
line = lineParts[0];
|
|
|
|
}
|
2023-02-07 20:15:36 +01:00
|
|
|
const channelFileParts = line.split('/');
|
|
|
|
const channel = channelFileParts[0];
|
2023-02-13 05:45:08 +01:00
|
|
|
const channelFile = channelFileParts.slice(1).join('/');
|
2023-02-07 20:15:36 +01:00
|
|
|
const channelHref = `channels/${channel}`;
|
|
|
|
if (!channels.includes(channel)) {
|
|
|
|
channels.push(channel);
|
|
|
|
channelDom = document.createElement('li');
|
|
|
|
var a = createA(channel, channelHref);
|
|
|
|
channelDom.appendChild(a);
|
|
|
|
var channelFilesDom = document.createElement('ul');
|
|
|
|
channelDom.appendChild(channelFilesDom);
|
|
|
|
channelsDom.appendChild(channelDom);
|
|
|
|
}
|
|
|
|
var channelDom = channelsDom.lastChild;
|
|
|
|
var channelFilesDom = channelDom.lastChild;
|
|
|
|
var channelFileDom = document.createElement('li');
|
|
|
|
var a = createA(channelFile, `${channelHref}/${channelFile}`);
|
|
|
|
channelFileDom.appendChild(a);
|
2023-02-14 02:56:11 +01:00
|
|
|
const id = channelFileParts[2];
|
|
|
|
for(var timestampsIndex = 0; timestampsIndex < timestamps.length; timestampsIndex++) {
|
2023-02-14 02:00:23 +01:00
|
|
|
const space = document.createTextNode('\u00A0');
|
|
|
|
channelFileDom.appendChild(space);
|
2023-02-14 02:56:11 +01:00
|
|
|
const timestamp = timestamps[timestampsIndex];
|
2023-02-14 02:00:23 +01:00
|
|
|
var a = createA(`${timestamp} s`, `https://www.youtube.com/watch?v=${id}&t=${timestamp}`);
|
|
|
|
channelFileDom.appendChild(a);
|
|
|
|
}
|
2023-02-07 20:15:36 +01:00
|
|
|
channelFilesDom.appendChild(channelFileDom);
|
|
|
|
}
|
|
|
|
}
|
2023-02-07 17:25:17 +01:00
|
|
|
|
|
|
|
function search(event) {
|
2023-02-07 18:14:49 +01:00
|
|
|
// We don't want to refresh the webpage which is the default behavior.
|
2023-02-07 17:25:17 +01:00
|
|
|
event.preventDefault();
|
2023-02-24 15:12:07 +01:00
|
|
|
const query = event.submitter.id + ' ' + document.getElementById('path-search').value + ' ' + document.getElementById('search').value;
|
2023-02-07 17:25:17 +01:00
|
|
|
if (firstRun) {
|
|
|
|
firstRun = false;
|
|
|
|
conn = new WebSocket('wss://crawler.yt.lemnoslife.com/websocket');
|
2023-02-07 20:15:36 +01:00
|
|
|
conn.onmessage = function(e) {
|
|
|
|
e.data.split('\n').forEach(treatLine);
|
|
|
|
};
|
2023-02-07 18:14:49 +01:00
|
|
|
// We can't directly proceed with `conn.send`, as the connection may not be already established.
|
2023-02-07 17:25:17 +01:00
|
|
|
conn.onopen = function(e) { conn.send(query); };
|
|
|
|
} else {
|
2023-02-07 18:14:49 +01:00
|
|
|
// We assume at this point that the connection is established.
|
2023-02-07 20:15:36 +01:00
|
|
|
channels = [];
|
|
|
|
document.getElementById('channels').innerHTML = '';
|
2023-02-07 17:25:17 +01:00
|
|
|
conn.send(query);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var form = document.getElementById('form');
|
|
|
|
form.addEventListener('submit', search);
|
2023-01-30 22:19:04 +01:00
|
|
|
</script>
|