2023-02-07 17:25:17 +01:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
2023-02-07 20:15:36 +01:00
|
|
|
import sys, time, fcntl, os, zipfile
|
|
|
|
|
|
|
|
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
|
2023-02-07 17:25:17 +01:00
|
|
|
|
|
|
|
clientId = sys.argv[1]
|
|
|
|
message = sys.argv[2]
|
|
|
|
|
2023-02-14 00:59:37 +01:00
|
|
|
searchOnlyCaptions = message.startswith('search-only-captions ')
|
|
|
|
message = message[message.find(' ') + 1:]
|
|
|
|
|
2023-02-07 17:25:17 +01:00
|
|
|
clientFilePath = f'users/{clientId}.txt'
|
|
|
|
|
|
|
|
def write(s):
|
2023-02-07 20:15:36 +01:00
|
|
|
f = open(clientFilePath, 'r+')
|
2023-02-07 17:25:17 +01:00
|
|
|
try:
|
|
|
|
fcntl.flock(f, fcntl.LOCK_EX)
|
2023-02-07 18:14:49 +01:00
|
|
|
# If the output file is empty, then it means that `websocket.php` read it. Anyway we don't wait it and we append what we want to output.
|
|
|
|
read = f.read()
|
2023-02-07 20:15:36 +01:00
|
|
|
# We are appening content, as we moved in-file cursor.
|
|
|
|
if read != '':
|
|
|
|
f.write("\n")
|
|
|
|
f.write(s)
|
|
|
|
f.flush()
|
|
|
|
fcntl.flock(f, fcntl.LOCK_UN)
|
|
|
|
f.close()
|
2023-02-07 17:25:17 +01:00
|
|
|
except Exception as e:
|
2023-02-07 18:14:49 +01:00
|
|
|
sys.exit(e)
|
2023-02-07 17:25:17 +01:00
|
|
|
|
2023-02-07 20:15:36 +01:00
|
|
|
# Unclear if `os.listdir` takes a lot of time, as it's a generator.
|
|
|
|
# As `zipgrep` doesn't support arguments to stop on first match for each file, we proceed manually to keep a good theoretical complexity.
|
|
|
|
for file in os.listdir(path):
|
|
|
|
if file.endswith('.zip'):
|
|
|
|
zip = zipfile.ZipFile(path + file)
|
|
|
|
for fileInZip in zip.namelist():
|
2023-02-14 00:59:37 +01:00
|
|
|
if searchOnlyCaptions and not fileInZip.endswith('.vtt'):
|
|
|
|
continue
|
2023-02-07 20:15:36 +01:00
|
|
|
f = zip.open(fileInZip)
|
|
|
|
for line in f.readlines():
|
|
|
|
if message in str(line):
|
|
|
|
write(f'{file}/{fileInZip}')
|
|
|
|
break
|
|
|
|
f.close()
|
2023-02-07 17:25:17 +01:00
|
|
|
|
2023-02-07 20:15:36 +01:00
|
|
|
f = open(clientFilePath)
|
2023-02-07 18:14:49 +01:00
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
fcntl.flock(f, fcntl.LOCK_EX)
|
|
|
|
if f.read() == '':
|
|
|
|
os.remove(clientFilePath)
|
|
|
|
break
|
|
|
|
else:
|
2023-02-07 20:15:36 +01:00
|
|
|
fcntl.flock(f, fcntl.LOCK_UN)
|
2023-02-07 18:14:49 +01:00
|
|
|
time.sleep(1)
|
|
|
|
except Exception as e:
|
|
|
|
sys.exit(e)
|
|
|
|
|
|
|
|
f.close()
|