#44: Enable end-users to filter path for searches

This commit is contained in:
Benjamin Loison 2023-02-24 15:12:07 +01:00
parent cba2535d97
commit 9f79c988d1
2 changed files with 10 additions and 6 deletions

View File

@ -13,7 +13,8 @@ Access raw data with: <?php echoUrl('channels/'); ?>.<br/>
Access found channels with: <?php echoUrl('channels.txt'); ?>. Access found channels with: <?php echoUrl('channels.txt'); ?>.
<form id="form"> <form id="form">
<input type="text" autofocus id="search" placeholder="Your search"></input> <input type="text" autofocus id="search" size="23" placeholder="Your search"></input><br/>
<input type="text" autofocus id="path-search" size="23" placeholder="In path containing (default: *empty*)"></input><br/>
<input type="submit" id="search" value="Search"> <input type="submit" id="search" value="Search">
<input type="submit" id="search-only-captions" value="Search only captions"> <input type="submit" id="search-only-captions" value="Search only captions">
</form> </form>
@ -82,7 +83,7 @@ Progress: <span id="progress"></span> channels
function search(event) { function search(event) {
// We don't want to refresh the webpage which is the default behavior. // We don't want to refresh the webpage which is the default behavior.
event.preventDefault(); event.preventDefault();
const query = event.submitter.id + ' ' + document.getElementById('search').value; const query = event.submitter.id + ' ' + document.getElementById('path-search').value + ' ' + document.getElementById('search').value;
if (firstRun) { if (firstRun) {
firstRun = false; firstRun = false;
conn = new WebSocket('wss://crawler.yt.lemnoslife.com/websocket'); conn = new WebSocket('wss://crawler.yt.lemnoslife.com/websocket');

View File

@ -6,10 +6,11 @@ from io import StringIO
path = '/mnt/HDD0/YouTube_captions_search_engine/channels/' path = '/mnt/HDD0/YouTube_captions_search_engine/channels/'
clientId = sys.argv[1] clientId = sys.argv[1]
message = sys.argv[2] pathSearchMessageParts = sys.argv[2].split(' ')
pathSearch = pathSearchMessageParts[1]
message = ' '.join(pathSearchMessageParts[2:])
searchOnlyCaptions = message.startswith('search-only-captions ') searchOnlyCaptions = pathSearchMessageParts[0] == 'search-only-captions'
message = message[message.find(' ') + 1:]
clientFilePath = f'users/{clientId}.txt' clientFilePath = f'users/{clientId}.txt'
@ -41,8 +42,10 @@ for fileIndex, file in enumerate(files):
endsWithVtt = fileInZip.endswith('.vtt') endsWithVtt = fileInZip.endswith('.vtt')
if searchOnlyCaptions and not endsWithVtt: if searchOnlyCaptions and not endsWithVtt:
continue continue
with zip.open(fileInZip) as f:
toWrite = f'{file}/{fileInZip}' toWrite = f'{file}/{fileInZip}'
if not pathSearch in toWrite:
continue
with zip.open(fileInZip) as f:
if endsWithVtt: if endsWithVtt:
content = f.read().decode('utf-8') content = f.read().decode('utf-8')
stringIOf = StringIO(content) stringIOf = StringIO(content)