Comment WebSocket mechanism to work with an arbitrary number of independent send

This commit is contained in:
Benjamin Loison 2023-02-07 18:14:49 +01:00
parent 126cc75dc6
commit b45384bab7
3 changed files with 45 additions and 28 deletions

View File

@ -10,7 +10,7 @@
See <?php echoUrl('https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions_search_engine'); ?> for more information.<br/>
<form id="form">
<input type="text" autofocus id="search" pattern="[A-Za-z0-9-_ ]+" placeholder="Your alphanumeric search"></input>
<input type="text" autofocus id="search" pattern="[A-Za-z0-9-_ ]+" placeholder="Your [A-Za-z0-9-_ ]+ search"></input>
<input type="submit" value="Search">
</form>
@ -19,14 +19,17 @@ See <?php echoUrl('https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions
var conn;
function search(event) {
// We don't want to refresh the webpage which is the default behavior.
event.preventDefault();
const query = document.getElementById('search').value;
if (firstRun) {
firstRun = false;
conn = new WebSocket('wss://crawler.yt.lemnoslife.com/websocket');
conn.onmessage = function(e) { console.log(e.data); };
// We can't directly proceed with `conn.send`, as the connection may not be already established.
conn.onopen = function(e) { conn.send(query); };
} else {
// We assume at this point that the connection is established.
conn.send(query);
}
}

View File

@ -11,18 +11,27 @@ def write(s):
f = open(clientFilePath, 'w+')
try:
fcntl.flock(f, fcntl.LOCK_EX)
if f.read() == '':
f.write(s)
else:
f.close()
time.sleep(1)
write(s)
# If the output file is empty, then it means that `websocket.php` read it. Anyway we don't wait it and we append what we want to output.
read = f.read()
f.write(f"{read}\n{s}")
except Exception as e:
print(e)
sys.exit(e)
f.close()
for i in range(10):
write(f'{i}: {message}')
time.sleep(2)
os.remove(clientFilePath)
f = open(clientFilePath, 'r')
while True:
try:
fcntl.flock(f, fcntl.LOCK_EX)
if f.read() == '':
os.remove(clientFilePath)
break
else:
time.sleep(1)
except Exception as e:
sys.exit(e)
f.close()

View File

@ -19,6 +19,7 @@ class Client
$this->id = $id;
}
// `__destruct` can't take arguments.
public function free($loop)
{
$loop->cancelTimer($this->timer);
@ -38,10 +39,13 @@ class Client
}
}
// Need to be passed as a reference to `flock`.
$WAIT_IF_LOCKED = 1;
define('USERS_FOLDER', 'users/');
// Delete users outputs of previous `websocket.php` execution.
// We skip `.`, `..` and `.gitignore`.
foreach (array_slice(scandir(USERS_FOLDER), 3) as $file) {
unlink(USERS_FOLDER . $file);
}
@ -69,12 +73,14 @@ class MyProcess implements MessageComponentInterface
private function newClient()
{
// If `onOpen` and `onMessage` can't be called at the same time, then this semaphore is useless.
if (sem_acquire($this->newClientIdSem)) {
// Note that we don't re-use ids except on `websockets.php` restart, but as the maximal int in PHP is a very great number we are fine for a while (https://www.php.net/manual/en/reserved.constants.php#constant.php-int-max)
$clientId = $this->newClientId++;
sem_release($this->newClientIdSem);
return new Client($clientId);
} else {
exit('`onOpen` error');
exit('`newClient` error');
}
}
@ -86,38 +92,50 @@ class MyProcess implements MessageComponentInterface
public function onMessage(ConnectionInterface $from, $msg)
{
// As we are going to use this argument in a shell command, we verify a limited set of characters that are safe once quoted.
if (preg_match("/^[a-zA-Z0-9-_ ]+$/", $msg) !== 1) {
return;
}
$client = $this->clients->offsetGet($from);
// If a previous request was received, we execute the new one with another client for simplicity.
// If a previous request was received, we execute the new one with another client for simplicity otherwise with current file deletion approach, we can't tell the worker `search.py` that we don't care about its execution anymore.
if ($client->pid !== null) {
// As `$this->clients->detach` doesn't call `__destruct` for unknown reason, we clean manually the previous request.
$client->free($this->loop);
$client = $this->newClient();
}
$clientId = $client->id;
$clientFilePath = getClientFilePath($clientId);
// Create the worker output file otherwise it would believe that we don't need this worker anymore.
file_put_contents($clientFilePath, '');
// Start the independent worker.
// Redirecting `stdout` is mandatory otherwise `exec` is blocking.
$client->pid = exec("./search.py $clientId '$msg' > /dev/null & echo $!");
// `addTimer` doesn't enable us to use independently `$from->send` multiple times with blocking instructions between.
$client->timer = $this->loop->addPeriodicTimer(1, function () use ($from, $clientId, $clientFilePath, $client) {
echo "Checking news from $clientId\n";
// If the worker output file doesn't exist anymore, then it means that the worker have finished its work and acknowledged that `websocket.php` completely read its output.
if (file_exists($clientFilePath)) {
// `flock` requires `r`eading permission and we need `w`riting one due to `ftruncate` usage.
$fp = fopen($clientFilePath, "r+");
$read = null;
if (flock($fp, LOCK_EX, $WAIT_IF_LOCKED)) { // acquire an exclusive lock
// We assume that the temporary output is less than 1 MB long.
$read = fread($fp, 1_000_000);
ftruncate($fp, 0); // truncate file
fflush($fp); // flush output before releasing the lock
flock($fp, LOCK_UN); // release the lock
} else {
echo "Couldn't get the lock!";
// We `die` instead of `echo`ing to force the developer to investigate the reason.
die("Couldn't get the lock!");
}
fclose($fp);
// Assume that empty output doesn't need to me forwarded to the end-user.
if ($read !== null && $read !== '') {
$from->send($read);
}
} else {
// We don't need the periodic timer anymore, as the worker finished its work and acknowledged that `websocket.php` completely read its output.
$this->loop->cancelTimer($client->timer);
}
});
@ -129,33 +147,20 @@ class MyProcess implements MessageComponentInterface
$clientId = $client->id;
$client->free($this->loop);
echo "$clientId disconnected\n";
/*$this->loop->cancelTimer($client->timer);
// Should in theory verify that the pid wasn't re-assigned.
posix_kill($client->pid, SIGTERM);
$clientFilePath = getClientFilePath($clientId);
if (file_exists($clientFilePath)) {
$fp = fopen($clientFilePath, "r+");
if (flock($fp, LOCK_EX, $WAIT_IF_LOCKED)) { // acquire an exclusive lock
unlink($clientFilePath); // delete file
flock($fp, LOCK_UN); // release the lock
} else {
echo "Couldn't get the lock!";
}
fclose($fp);
}*/
$this->clients->detach($conn);
}
public function onError(ConnectionInterface $conn, \Exception $e)
{
echo '`onError`';
$conn->close();
die('`onError`');
}
}
$loop = \React\EventLoop\Factory::create();
// Run the server application through the WebSocket protocol on port 4430
// Run the server application through the WebSocket protocol on port 4430.
// Note that named arguments come with PHP 8 which isn't current Debian one.
$app = new Ratchet\App('crawler.yt.lemnoslife.com', 4430, '127.0.0.1', $loop);
$app->route('/websocket', new MyProcess($loop), array('*'));
$app->run();