Add an optimization to the website when providing a channel id as a file path filter

This commit is contained in:
Benjamin Loison 2023-02-26 15:56:16 +01:00
parent e1aff6f469
commit e493eaeb49
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8

View File

@ -10,6 +10,8 @@ pathSearchMessageParts = sys.argv[2].split(' ')
pathSearch = pathSearchMessageParts[1] pathSearch = pathSearchMessageParts[1]
message = ' '.join(pathSearchMessageParts[2:]) message = ' '.join(pathSearchMessageParts[2:])
isPathSearchAChannelId = re.match(r'[a-zA-Z0-9-_]{24}', pathSearch)
searchOnlyCaptions = pathSearchMessageParts[0] == 'search-only-captions' searchOnlyCaptions = pathSearchMessageParts[0] == 'search-only-captions'
clientFilePath = f'users/{clientId}.txt' clientFilePath = f'users/{clientId}.txt'
@ -33,9 +35,17 @@ def cleanCaption(caption):
return caption.replace('\n', ' ') return caption.replace('\n', ' ')
# As `zipgrep` doesn't support arguments to stop on first match for each file, we proceed manually to keep a good theoretical complexity. # As `zipgrep` doesn't support arguments to stop on first match for each file, we proceed manually to keep a good theoretical complexity.
files = [file for file in os.listdir(path) if file.endswith('.zip')] if isPathSearchAChannelId:
file = pathSearch + '.zip'
if os.path.isfile(path + file):
files = [file]
else:
write(f'progress:0 / 0')
else:
files = [file for file in os.listdir(path) if file.endswith('.zip')]
for fileIndex, file in enumerate(files): for fileIndex, file in enumerate(files):
write(f'progress:{fileIndex + 1} / {len(files)}') write(f'progress:{fileIndex} / {len(files)}')
zip = zipfile.ZipFile(path + file) zip = zipfile.ZipFile(path + file)
for fileInZip in zip.namelist(): for fileInZip in zip.namelist():
endsWithVtt = fileInZip.endswith('.vtt') endsWithVtt = fileInZip.endswith('.vtt')
@ -67,6 +77,7 @@ for fileIndex, file in enumerate(files):
if message in str(line): if message in str(line):
write(toWrite) write(toWrite)
break break
write(f'progress:{fileIndex + 1} / {len(files)}')
with open(clientFilePath) as f: with open(clientFilePath) as f:
while True: while True: