#31: Add zip files search
This commit is contained in:
parent
82e597f205
commit
fda8fc728e
@ -14,9 +14,50 @@ See <?php echoUrl('https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions
|
|||||||
<input type="submit" value="Search">
|
<input type="submit" value="Search">
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
|
<ul id="channels">
|
||||||
|
</ul>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
var firstRun = true;
|
var firstRun = true;
|
||||||
var conn;
|
var conn;
|
||||||
|
// Could parse DOM instead of using following variable.
|
||||||
|
var channels = [];
|
||||||
|
|
||||||
|
function createA(text, href) {
|
||||||
|
var a = document.createElement('a');
|
||||||
|
var text = document.createTextNode(text);
|
||||||
|
a.appendChild(text);
|
||||||
|
a.href = href;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
function treatLine(line) {
|
||||||
|
console.log(line);
|
||||||
|
if (line.startsWith('alert:')) {
|
||||||
|
alert(line.replace('alert:', ''));
|
||||||
|
} else {
|
||||||
|
var channelsDom = document.getElementById('channels');
|
||||||
|
const channelFileParts = line.split('/');
|
||||||
|
const channel = channelFileParts[0];
|
||||||
|
const channelFile = channelFileParts[1];
|
||||||
|
const channelHref = `channels/${channel}`;
|
||||||
|
if (!channels.includes(channel)) {
|
||||||
|
channels.push(channel);
|
||||||
|
channelDom = document.createElement('li');
|
||||||
|
var a = createA(channel, channelHref);
|
||||||
|
channelDom.appendChild(a);
|
||||||
|
var channelFilesDom = document.createElement('ul');
|
||||||
|
channelDom.appendChild(channelFilesDom);
|
||||||
|
channelsDom.appendChild(channelDom);
|
||||||
|
}
|
||||||
|
var channelDom = channelsDom.lastChild;
|
||||||
|
var channelFilesDom = channelDom.lastChild;
|
||||||
|
var channelFileDom = document.createElement('li');
|
||||||
|
var a = createA(channelFile, `${channelHref}/${channelFile}`);
|
||||||
|
channelFileDom.appendChild(a);
|
||||||
|
channelFilesDom.appendChild(channelFileDom);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function search(event) {
|
function search(event) {
|
||||||
// We don't want to refresh the webpage which is the default behavior.
|
// We don't want to refresh the webpage which is the default behavior.
|
||||||
@ -25,11 +66,15 @@ See <?php echoUrl('https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions
|
|||||||
if (firstRun) {
|
if (firstRun) {
|
||||||
firstRun = false;
|
firstRun = false;
|
||||||
conn = new WebSocket('wss://crawler.yt.lemnoslife.com/websocket');
|
conn = new WebSocket('wss://crawler.yt.lemnoslife.com/websocket');
|
||||||
conn.onmessage = function(e) { console.log(e.data); };
|
conn.onmessage = function(e) {
|
||||||
|
e.data.split('\n').forEach(treatLine);
|
||||||
|
};
|
||||||
// We can't directly proceed with `conn.send`, as the connection may not be already established.
|
// We can't directly proceed with `conn.send`, as the connection may not be already established.
|
||||||
conn.onopen = function(e) { conn.send(query); };
|
conn.onopen = function(e) { conn.send(query); };
|
||||||
} else {
|
} else {
|
||||||
// We assume at this point that the connection is established.
|
// We assume at this point that the connection is established.
|
||||||
|
channels = [];
|
||||||
|
document.getElementById('channels').innerHTML = '';
|
||||||
conn.send(query);
|
conn.send(query);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
import sys, time, fcntl, os
|
import sys, time, fcntl, os, zipfile
|
||||||
|
|
||||||
|
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
|
||||||
|
|
||||||
clientId = sys.argv[1]
|
clientId = sys.argv[1]
|
||||||
message = sys.argv[2]
|
message = sys.argv[2]
|
||||||
@ -8,21 +10,35 @@ message = sys.argv[2]
|
|||||||
clientFilePath = f'users/{clientId}.txt'
|
clientFilePath = f'users/{clientId}.txt'
|
||||||
|
|
||||||
def write(s):
|
def write(s):
|
||||||
f = open(clientFilePath, 'w+')
|
f = open(clientFilePath, 'r+')
|
||||||
try:
|
try:
|
||||||
fcntl.flock(f, fcntl.LOCK_EX)
|
fcntl.flock(f, fcntl.LOCK_EX)
|
||||||
# If the output file is empty, then it means that `websocket.php` read it. Anyway we don't wait it and we append what we want to output.
|
# If the output file is empty, then it means that `websocket.php` read it. Anyway we don't wait it and we append what we want to output.
|
||||||
read = f.read()
|
read = f.read()
|
||||||
f.write(f"{read}\n{s}")
|
# We are appening content, as we moved in-file cursor.
|
||||||
|
if read != '':
|
||||||
|
f.write("\n")
|
||||||
|
f.write(s)
|
||||||
|
f.flush()
|
||||||
|
fcntl.flock(f, fcntl.LOCK_UN)
|
||||||
|
f.close()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
sys.exit(e)
|
sys.exit(e)
|
||||||
f.close()
|
|
||||||
|
|
||||||
for i in range(10):
|
# Unclear if `os.listdir` takes a lot of time, as it's a generator.
|
||||||
write(f'{i}: {message}')
|
# As `zipgrep` doesn't support arguments to stop on first match for each file, we proceed manually to keep a good theoretical complexity.
|
||||||
time.sleep(2)
|
for file in os.listdir(path):
|
||||||
|
if file.endswith('.zip'):
|
||||||
|
zip = zipfile.ZipFile(path + file)
|
||||||
|
for fileInZip in zip.namelist():
|
||||||
|
f = zip.open(fileInZip)
|
||||||
|
for line in f.readlines():
|
||||||
|
if message in str(line):
|
||||||
|
write(f'{file}/{fileInZip}')
|
||||||
|
break
|
||||||
|
f.close()
|
||||||
|
|
||||||
f = open(clientFilePath, 'r')
|
f = open(clientFilePath)
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
fcntl.flock(f, fcntl.LOCK_EX)
|
fcntl.flock(f, fcntl.LOCK_EX)
|
||||||
@ -30,6 +46,7 @@ while True:
|
|||||||
os.remove(clientFilePath)
|
os.remove(clientFilePath)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
fcntl.flock(f, fcntl.LOCK_UN)
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
sys.exit(e)
|
sys.exit(e)
|
||||||
|
@ -96,6 +96,7 @@ class MyProcess implements MessageComponentInterface
|
|||||||
if (preg_match("/^[a-zA-Z0-9-_ ]+$/", $msg) !== 1) {
|
if (preg_match("/^[a-zA-Z0-9-_ ]+$/", $msg) !== 1) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
$from->send('alert:Started searching...');
|
||||||
$client = $this->clients->offsetGet($from);
|
$client = $this->clients->offsetGet($from);
|
||||||
// If a previous request was received, we execute the new one with another client for simplicity otherwise with current file deletion approach, we can't tell the worker `search.py` that we don't care about its execution anymore.
|
// If a previous request was received, we execute the new one with another client for simplicity otherwise with current file deletion approach, we can't tell the worker `search.py` that we don't care about its execution anymore.
|
||||||
if ($client->pid !== null) {
|
if ($client->pid !== null) {
|
||||||
@ -137,6 +138,7 @@ class MyProcess implements MessageComponentInterface
|
|||||||
} else {
|
} else {
|
||||||
// We don't need the periodic timer anymore, as the worker finished its work and acknowledged that `websocket.php` completely read its output.
|
// We don't need the periodic timer anymore, as the worker finished its work and acknowledged that `websocket.php` completely read its output.
|
||||||
$this->loop->cancelTimer($client->timer);
|
$this->loop->cancelTimer($client->timer);
|
||||||
|
$from->send('alert:Search finished!');
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user