YouTube_captions_search_engine/website/index.php

106 lines
4.1 KiB
PHP

<?php
function echoUrl($url)
{
echo "<a href=\"$url\">$url</a>";
}
?>
See <?php echoUrl('https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions_search_engine'); ?> for more information.<br/>
Access raw data with: <?php echoUrl('channels/'); ?>.<br/>
Access found channels with: <?php echoUrl('channels.txt'); ?>.
<form id="form">
<input type="text" autofocus id="search" size="23" placeholder="Your search"></input><br/>
<input type="text" autofocus id="path-search" size="23" placeholder="In path containing (default: *empty*)"></input><br/>
<input type="submit" id="search" value="Search">
<input type="submit" id="search-only-captions" value="Search only captions">
</form>
Progress: <span id="progress"></span> channels
<ul id="channels">
</ul>
<script>
var firstRun = true;
var conn;
// Could parse DOM instead of using following variable.
var channels = [];
function createA(text, href) {
var a = document.createElement('a');
var text = document.createTextNode(text);
a.appendChild(text);
a.href = href;
return a;
}
function treatLine(line) {
console.log(line);
if (line.startsWith('progress:')) {
document.getElementById('progress').innerHTML = line.replace('progress:', '');
} else {
var channelsDom = document.getElementById('channels');
var timestamp = [];
const lineParts = line.split('|');
if (lineParts.length > 0) {
timestamps = lineParts.slice(1).map(linePart => parseInt(linePart));
line = lineParts[0];
}
const channelFileParts = line.split('/');
const channel = channelFileParts[0];
const channelFile = channelFileParts.slice(1).join('/');
const channelHref = `channels/${channel}`;
if (!channels.includes(channel)) {
channels.push(channel);
channelDom = document.createElement('li');
var a = createA(channel, channelHref);
channelDom.appendChild(a);
var channelFilesDom = document.createElement('ul');
channelDom.appendChild(channelFilesDom);
channelsDom.appendChild(channelDom);
}
var channelDom = channelsDom.lastChild;
var channelFilesDom = channelDom.lastChild;
var channelFileDom = document.createElement('li');
var a = createA(channelFile, `${channelHref}/${channelFile}`);
channelFileDom.appendChild(a);
const id = channelFileParts[2];
for(var timestampsIndex = 0; timestampsIndex < timestamps.length; timestampsIndex++) {
const space = document.createTextNode('\u00A0');
channelFileDom.appendChild(space);
const timestamp = timestamps[timestampsIndex];
var a = createA(`${timestamp} s`, `https://www.youtube.com/watch?v=${id}&t=${timestamp}`);
channelFileDom.appendChild(a);
}
channelFilesDom.appendChild(channelFileDom);
}
}
function search(event) {
// We don't want to refresh the webpage which is the default behavior.
event.preventDefault();
const query = event.submitter.id + ' ' + document.getElementById('path-search').value + ' ' + document.getElementById('search').value;
if (firstRun) {
firstRun = false;
conn = new WebSocket('wss://crawler.yt.lemnoslife.com/websocket');
conn.onmessage = function(e) {
e.data.split('\n').forEach(treatLine);
};
// We can't directly proceed with `conn.send`, as the connection may not be already established.
conn.onopen = function(e) { conn.send(query); };
} else {
// We assume at this point that the connection is established.
channels = [];
document.getElementById('channels').innerHTML = '';
conn.send(query);
}
}
var form = document.getElementById('form');
form.addEventListener('submit', search);
</script>