YouTube_captions_search_engine/website/search.py

55 lines
1.6 KiB
Python
Raw Normal View History

#!/usr/bin/python3
2023-02-07 20:15:36 +01:00
import sys, time, fcntl, os, zipfile
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
clientId = sys.argv[1]
message = sys.argv[2]
clientFilePath = f'users/{clientId}.txt'
def write(s):
2023-02-07 20:15:36 +01:00
f = open(clientFilePath, 'r+')
try:
fcntl.flock(f, fcntl.LOCK_EX)
# If the output file is empty, then it means that `websocket.php` read it. Anyway we don't wait it and we append what we want to output.
read = f.read()
2023-02-07 20:15:36 +01:00
# We are appening content, as we moved in-file cursor.
if read != '':
f.write("\n")
f.write(s)
f.flush()
fcntl.flock(f, fcntl.LOCK_UN)
f.close()
except Exception as e:
sys.exit(e)
2023-02-07 20:15:36 +01:00
# Unclear if `os.listdir` takes a lot of time, as it's a generator.
# As `zipgrep` doesn't support arguments to stop on first match for each file, we proceed manually to keep a good theoretical complexity.
for file in os.listdir(path):
if file.endswith('.zip'):
zip = zipfile.ZipFile(path + file)
for fileInZip in zip.namelist():
f = zip.open(fileInZip)
for line in f.readlines():
if message in str(line):
write(f'{file}/{fileInZip}')
break
f.close()
2023-02-07 20:15:36 +01:00
f = open(clientFilePath)
while True:
try:
fcntl.flock(f, fcntl.LOCK_EX)
if f.read() == '':
os.remove(clientFilePath)
break
else:
2023-02-07 20:15:36 +01:00
fcntl.flock(f, fcntl.LOCK_UN)
time.sleep(1)
except Exception as e:
sys.exit(e)
f.close()