Add an optimization to the website when providing a channel id as a file path filter
This commit is contained in:
parent
e1aff6f469
commit
e493eaeb49
@ -10,6 +10,8 @@ pathSearchMessageParts = sys.argv[2].split(' ')
|
||||
pathSearch = pathSearchMessageParts[1]
|
||||
message = ' '.join(pathSearchMessageParts[2:])
|
||||
|
||||
isPathSearchAChannelId = re.match(r'[a-zA-Z0-9-_]{24}', pathSearch)
|
||||
|
||||
searchOnlyCaptions = pathSearchMessageParts[0] == 'search-only-captions'
|
||||
|
||||
clientFilePath = f'users/{clientId}.txt'
|
||||
@ -33,9 +35,17 @@ def cleanCaption(caption):
|
||||
return caption.replace('\n', ' ')
|
||||
|
||||
# As `zipgrep` doesn't support arguments to stop on first match for each file, we proceed manually to keep a good theoretical complexity.
|
||||
files = [file for file in os.listdir(path) if file.endswith('.zip')]
|
||||
if isPathSearchAChannelId:
|
||||
file = pathSearch + '.zip'
|
||||
if os.path.isfile(path + file):
|
||||
files = [file]
|
||||
else:
|
||||
write(f'progress:0 / 0')
|
||||
else:
|
||||
files = [file for file in os.listdir(path) if file.endswith('.zip')]
|
||||
|
||||
for fileIndex, file in enumerate(files):
|
||||
write(f'progress:{fileIndex + 1} / {len(files)}')
|
||||
write(f'progress:{fileIndex} / {len(files)}')
|
||||
zip = zipfile.ZipFile(path + file)
|
||||
for fileInZip in zip.namelist():
|
||||
endsWithVtt = fileInZip.endswith('.vtt')
|
||||
@ -67,6 +77,7 @@ for fileIndex, file in enumerate(files):
|
||||
if message in str(line):
|
||||
write(toWrite)
|
||||
break
|
||||
write(f'progress:{fileIndex + 1} / {len(files)}')
|
||||
|
||||
with open(clientFilePath) as f:
|
||||
while True:
|
||||
|
Loading…
Reference in New Issue
Block a user