#31: Add zip files search
This commit is contained in:
parent
82e597f205
commit
fda8fc728e
@ -14,9 +14,50 @@ See <?php echoUrl('https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions
|
||||
<input type="submit" value="Search">
|
||||
</form>
|
||||
|
||||
<ul id="channels">
|
||||
</ul>
|
||||
|
||||
<script>
|
||||
var firstRun = true;
|
||||
var conn;
|
||||
// Could parse DOM instead of using following variable.
|
||||
var channels = [];
|
||||
|
||||
function createA(text, href) {
|
||||
var a = document.createElement('a');
|
||||
var text = document.createTextNode(text);
|
||||
a.appendChild(text);
|
||||
a.href = href;
|
||||
return a;
|
||||
}
|
||||
|
||||
function treatLine(line) {
|
||||
console.log(line);
|
||||
if (line.startsWith('alert:')) {
|
||||
alert(line.replace('alert:', ''));
|
||||
} else {
|
||||
var channelsDom = document.getElementById('channels');
|
||||
const channelFileParts = line.split('/');
|
||||
const channel = channelFileParts[0];
|
||||
const channelFile = channelFileParts[1];
|
||||
const channelHref = `channels/${channel}`;
|
||||
if (!channels.includes(channel)) {
|
||||
channels.push(channel);
|
||||
channelDom = document.createElement('li');
|
||||
var a = createA(channel, channelHref);
|
||||
channelDom.appendChild(a);
|
||||
var channelFilesDom = document.createElement('ul');
|
||||
channelDom.appendChild(channelFilesDom);
|
||||
channelsDom.appendChild(channelDom);
|
||||
}
|
||||
var channelDom = channelsDom.lastChild;
|
||||
var channelFilesDom = channelDom.lastChild;
|
||||
var channelFileDom = document.createElement('li');
|
||||
var a = createA(channelFile, `${channelHref}/${channelFile}`);
|
||||
channelFileDom.appendChild(a);
|
||||
channelFilesDom.appendChild(channelFileDom);
|
||||
}
|
||||
}
|
||||
|
||||
function search(event) {
|
||||
// We don't want to refresh the webpage which is the default behavior.
|
||||
@ -25,11 +66,15 @@ See <?php echoUrl('https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions
|
||||
if (firstRun) {
|
||||
firstRun = false;
|
||||
conn = new WebSocket('wss://crawler.yt.lemnoslife.com/websocket');
|
||||
conn.onmessage = function(e) { console.log(e.data); };
|
||||
conn.onmessage = function(e) {
|
||||
e.data.split('\n').forEach(treatLine);
|
||||
};
|
||||
// We can't directly proceed with `conn.send`, as the connection may not be already established.
|
||||
conn.onopen = function(e) { conn.send(query); };
|
||||
} else {
|
||||
// We assume at this point that the connection is established.
|
||||
channels = [];
|
||||
document.getElementById('channels').innerHTML = '';
|
||||
conn.send(query);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import sys, time, fcntl, os
|
||||
import sys, time, fcntl, os, zipfile
|
||||
|
||||
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
|
||||
|
||||
clientId = sys.argv[1]
|
||||
message = sys.argv[2]
|
||||
@ -8,21 +10,35 @@ message = sys.argv[2]
|
||||
clientFilePath = f'users/{clientId}.txt'
|
||||
|
||||
def write(s):
|
||||
f = open(clientFilePath, 'w+')
|
||||
f = open(clientFilePath, 'r+')
|
||||
try:
|
||||
fcntl.flock(f, fcntl.LOCK_EX)
|
||||
# If the output file is empty, then it means that `websocket.php` read it. Anyway we don't wait it and we append what we want to output.
|
||||
read = f.read()
|
||||
f.write(f"{read}\n{s}")
|
||||
# We are appening content, as we moved in-file cursor.
|
||||
if read != '':
|
||||
f.write("\n")
|
||||
f.write(s)
|
||||
f.flush()
|
||||
fcntl.flock(f, fcntl.LOCK_UN)
|
||||
f.close()
|
||||
except Exception as e:
|
||||
sys.exit(e)
|
||||
f.close()
|
||||
|
||||
for i in range(10):
|
||||
write(f'{i}: {message}')
|
||||
time.sleep(2)
|
||||
# Unclear if `os.listdir` takes a lot of time, as it's a generator.
|
||||
# As `zipgrep` doesn't support arguments to stop on first match for each file, we proceed manually to keep a good theoretical complexity.
|
||||
for file in os.listdir(path):
|
||||
if file.endswith('.zip'):
|
||||
zip = zipfile.ZipFile(path + file)
|
||||
for fileInZip in zip.namelist():
|
||||
f = zip.open(fileInZip)
|
||||
for line in f.readlines():
|
||||
if message in str(line):
|
||||
write(f'{file}/{fileInZip}')
|
||||
break
|
||||
f.close()
|
||||
|
||||
f = open(clientFilePath, 'r')
|
||||
f = open(clientFilePath)
|
||||
while True:
|
||||
try:
|
||||
fcntl.flock(f, fcntl.LOCK_EX)
|
||||
@ -30,6 +46,7 @@ while True:
|
||||
os.remove(clientFilePath)
|
||||
break
|
||||
else:
|
||||
fcntl.flock(f, fcntl.LOCK_UN)
|
||||
time.sleep(1)
|
||||
except Exception as e:
|
||||
sys.exit(e)
|
||||
|
@ -96,6 +96,7 @@ class MyProcess implements MessageComponentInterface
|
||||
if (preg_match("/^[a-zA-Z0-9-_ ]+$/", $msg) !== 1) {
|
||||
return;
|
||||
}
|
||||
$from->send('alert:Started searching...');
|
||||
$client = $this->clients->offsetGet($from);
|
||||
// If a previous request was received, we execute the new one with another client for simplicity otherwise with current file deletion approach, we can't tell the worker `search.py` that we don't care about its execution anymore.
|
||||
if ($client->pid !== null) {
|
||||
@ -137,6 +138,7 @@ class MyProcess implements MessageComponentInterface
|
||||
} else {
|
||||
// We don't need the periodic timer anymore, as the worker finished its work and acknowledged that `websocket.php` completely read its output.
|
||||
$this->loop->cancelTimer($client->timer);
|
||||
$from->send('alert:Search finished!');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user