#44: Enable end-users to filter path for searches

This commit is contained in:
Benjamin Loison 2023-02-24 15:12:07 +01:00
parent 3bba97e90c
commit 58f25a114e
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8
2 changed files with 10 additions and 6 deletions

View File

@ -13,7 +13,8 @@ Access raw data with: <?php echoUrl('channels/'); ?>.<br/>
Access found channels with: <?php echoUrl('channels.txt'); ?>.
<form id="form">
<input type="text" autofocus id="search" placeholder="Your search"></input>
<input type="text" autofocus id="search" size="23" placeholder="Your search"></input><br/>
<input type="text" autofocus id="path-search" size="23" placeholder="In path containing (default: *empty*)"></input><br/>
<input type="submit" id="search" value="Search">
<input type="submit" id="search-only-captions" value="Search only captions">
</form>
@ -82,7 +83,7 @@ Progress: <span id="progress"></span> channels
function search(event) {
// We don't want to refresh the webpage which is the default behavior.
event.preventDefault();
const query = event.submitter.id + ' ' + document.getElementById('search').value;
const query = event.submitter.id + ' ' + document.getElementById('path-search').value + ' ' + document.getElementById('search').value;
if (firstRun) {
firstRun = false;
conn = new WebSocket('wss://crawler.yt.lemnoslife.com/websocket');

View File

@ -6,10 +6,11 @@ from io import StringIO
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
clientId = sys.argv[1]
message = sys.argv[2]
pathSearchMessageParts = sys.argv[2].split(' ')
pathSearch = pathSearchMessageParts[1]
message = ' '.join(pathSearchMessageParts[2:])
searchOnlyCaptions = message.startswith('search-only-captions ')
message = message[message.find(' ') + 1:]
searchOnlyCaptions = pathSearchMessageParts[0] == 'search-only-captions'
clientFilePath = f'users/{clientId}.txt'
@ -41,8 +42,10 @@ for fileIndex, file in enumerate(files):
endsWithVtt = fileInZip.endswith('.vtt')
if searchOnlyCaptions and not endsWithVtt:
continue
toWrite = f'{file}/{fileInZip}'
if not pathSearch in toWrite:
continue
with zip.open(fileInZip) as f:
toWrite = f'{file}/{fileInZip}'
if endsWithVtt:
content = f.read().decode('utf-8')
stringIOf = StringIO(content)