#31: Add zip files search
This commit is contained in:
		| @@ -14,9 +14,50 @@ See <?php echoUrl('https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions | ||||
|     <input type="submit" value="Search"> | ||||
| </form> | ||||
|  | ||||
| <ul id="channels"> | ||||
| </ul> | ||||
|  | ||||
| <script> | ||||
|     var firstRun = true; | ||||
|     var conn; | ||||
|     // Could parse DOM instead of using following variable. | ||||
|     var channels = []; | ||||
|  | ||||
|     function createA(text, href) { | ||||
|         var a = document.createElement('a'); | ||||
|         var text = document.createTextNode(text); | ||||
|         a.appendChild(text); | ||||
|         a.href = href; | ||||
|         return a; | ||||
|     } | ||||
|  | ||||
|     function treatLine(line) { | ||||
|         console.log(line); | ||||
|         if (line.startsWith('alert:')) { | ||||
|             alert(line.replace('alert:', '')); | ||||
|         } else { | ||||
|             var channelsDom = document.getElementById('channels'); | ||||
|             const channelFileParts = line.split('/'); | ||||
|             const channel = channelFileParts[0]; | ||||
|             const channelFile = channelFileParts[1]; | ||||
|             const channelHref = `channels/${channel}`; | ||||
|             if (!channels.includes(channel)) { | ||||
|                 channels.push(channel); | ||||
|                 channelDom = document.createElement('li'); | ||||
|                 var a = createA(channel, channelHref); | ||||
|                 channelDom.appendChild(a); | ||||
|                 var channelFilesDom = document.createElement('ul'); | ||||
|                 channelDom.appendChild(channelFilesDom); | ||||
|                 channelsDom.appendChild(channelDom); | ||||
|             } | ||||
|             var channelDom = channelsDom.lastChild; | ||||
|             var channelFilesDom = channelDom.lastChild; | ||||
|             var channelFileDom = document.createElement('li'); | ||||
|             var a = createA(channelFile, `${channelHref}/${channelFile}`); | ||||
|             channelFileDom.appendChild(a); | ||||
|             channelFilesDom.appendChild(channelFileDom); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     function search(event) { | ||||
|         // We don't want to refresh the webpage which is the default behavior. | ||||
| @@ -25,11 +66,15 @@ See <?php echoUrl('https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions | ||||
|         if (firstRun) { | ||||
|             firstRun = false; | ||||
|             conn = new WebSocket('wss://crawler.yt.lemnoslife.com/websocket'); | ||||
|             conn.onmessage = function(e) { console.log(e.data); }; | ||||
|             conn.onmessage = function(e) { | ||||
|                 e.data.split('\n').forEach(treatLine); | ||||
|             }; | ||||
|             // We can't directly proceed with `conn.send`, as the connection may not be already established. | ||||
|             conn.onopen = function(e) { conn.send(query); }; | ||||
|         } else { | ||||
|             // We assume at this point that the connection is established. | ||||
|             channels = []; | ||||
|             document.getElementById('channels').innerHTML = ''; | ||||
|             conn.send(query); | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import sys, time, fcntl, os | ||||
| import sys, time, fcntl, os, zipfile | ||||
|  | ||||
| path = '/mnt/HDD0/YouTube_captions_search_engine/channels/' | ||||
|  | ||||
| clientId = sys.argv[1] | ||||
| message = sys.argv[2] | ||||
| @@ -8,21 +10,35 @@ message = sys.argv[2] | ||||
| clientFilePath = f'users/{clientId}.txt' | ||||
|  | ||||
| def write(s): | ||||
|     f = open(clientFilePath, 'w+') | ||||
|     f = open(clientFilePath, 'r+') | ||||
|     try: | ||||
|         fcntl.flock(f, fcntl.LOCK_EX) | ||||
|         # If the output file is empty, then it means that `websocket.php` read it. Anyway we don't wait it and we append what we want to output. | ||||
|         read = f.read() | ||||
|         f.write(f"{read}\n{s}") | ||||
|         # We are appening content, as we moved in-file cursor. | ||||
|         if read != '': | ||||
|             f.write("\n") | ||||
|         f.write(s) | ||||
|         f.flush() | ||||
|         fcntl.flock(f, fcntl.LOCK_UN) | ||||
|         f.close() | ||||
|     except Exception as e: | ||||
|         sys.exit(e) | ||||
|     f.close() | ||||
|  | ||||
| for i in range(10): | ||||
|     write(f'{i}: {message}') | ||||
|     time.sleep(2) | ||||
| # Unclear if `os.listdir` takes a lot of time, as it's a generator. | ||||
| # As `zipgrep` doesn't support arguments to stop on first match for each file, we proceed manually to keep a good theoretical complexity. | ||||
| for file in os.listdir(path): | ||||
|     if file.endswith('.zip'): | ||||
|         zip = zipfile.ZipFile(path + file) | ||||
|         for fileInZip in zip.namelist(): | ||||
|             f = zip.open(fileInZip) | ||||
|             for line in f.readlines(): | ||||
|                 if message in str(line): | ||||
|                     write(f'{file}/{fileInZip}') | ||||
|                     break | ||||
|             f.close() | ||||
|  | ||||
| f = open(clientFilePath, 'r') | ||||
| f = open(clientFilePath) | ||||
| while True: | ||||
|     try: | ||||
|         fcntl.flock(f, fcntl.LOCK_EX) | ||||
| @@ -30,6 +46,7 @@ while True: | ||||
|             os.remove(clientFilePath) | ||||
|             break | ||||
|         else: | ||||
|             fcntl.flock(f, fcntl.LOCK_UN) | ||||
|             time.sleep(1) | ||||
|     except Exception as e: | ||||
|         sys.exit(e) | ||||
|   | ||||
| @@ -96,6 +96,7 @@ class MyProcess implements MessageComponentInterface | ||||
|         if (preg_match("/^[a-zA-Z0-9-_ ]+$/", $msg) !== 1) { | ||||
|             return; | ||||
|         } | ||||
|         $from->send('alert:Started searching...'); | ||||
|         $client = $this->clients->offsetGet($from); | ||||
|         // If a previous request was received, we execute the new one with another client for simplicity otherwise with current file deletion approach, we can't tell the worker `search.py` that we don't care about its execution anymore. | ||||
|         if ($client->pid !== null) { | ||||
| @@ -137,6 +138,7 @@ class MyProcess implements MessageComponentInterface | ||||
|             } else { | ||||
|                 // We don't need the periodic timer anymore, as the worker finished its work and acknowledged that `websocket.php` completely read its output. | ||||
|                 $this->loop->cancelTimer($client->timer); | ||||
|                 $from->send('alert:Search finished!'); | ||||
|             } | ||||
|         }); | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user