#!/usr/bin/python3 import sys, time, fcntl, os, zipfile path = '/mnt/HDD0/YouTube_captions_search_engine/channels/' clientId = sys.argv[1] message = sys.argv[2] searchOnlyCaptions = message.startswith('search-only-captions ') message = message[message.find(' ') + 1:] clientFilePath = f'users/{clientId}.txt' def write(s): f = open(clientFilePath, 'r+') try: fcntl.flock(f, fcntl.LOCK_EX) # If the output file is empty, then it means that `websocket.php` read it. Anyway we don't wait it and we append what we want to output. read = f.read() # We are appening content, as we moved in-file cursor. if read != '': f.write("\n") f.write(s) f.flush() fcntl.flock(f, fcntl.LOCK_UN) f.close() except Exception as e: sys.exit(e) # As `zipgrep` doesn't support arguments to stop on first match for each file, we proceed manually to keep a good theoretical complexity. files = [file for file in os.listdir(path) if file.endswith('.zip')] for fileIndex, file in enumerate(files): write(f'progress:{fileIndex + 1} / {len(files)}') zip = zipfile.ZipFile(path + file) for fileInZip in zip.namelist(): if searchOnlyCaptions and not fileInZip.endswith('.vtt'): continue f = zip.open(fileInZip) for line in f.readlines(): if message in str(line): write(f'{file}/{fileInZip}') break f.close() f = open(clientFilePath) while True: try: fcntl.flock(f, fcntl.LOCK_EX) if f.read() == '': os.remove(clientFilePath) break else: fcntl.flock(f, fcntl.LOCK_UN) time.sleep(1) except Exception as e: sys.exit(e) f.close()