2 Home
Benjamin_Loison edited this page 2023-02-20 14:17:26 +01:00

Check some consistency (actually very bad) from YouTube Audio library using this Python script:

import os, json, subprocess

path = '/home/benjamin/Desktop/bens_folder/dev/yt/audio_library'

os.chdir(path)

with open('sound_effects.json') as json_file:
    tracks = json.load(json_file)

def execute(command):
    return subprocess.check_output(command, shell = True).decode('utf-8')

path = '/home/benjamin/Downloads'

os.chdir(path)

ids = {}

for track in tracks:
    #if track['title'] == 'Organic Guitar House':
    #    print(track)
    #continue
    #print(track)
    duration = track['duration']
    #if not 'seconds' in duration and 'nanos' in duration and duration['nanos'] >= 10 ** 9:
    #    print('both')
    #continue
    duration = duration['nanos'] if 'nanos' in duration else int(duration['seconds']) * (10 ** 9)
    # Add category for sound effect: _{track['attributes']['category']}
    attributes = track['attributes']
    category = attributes['category'] if 'category' in attributes else None
    id = f"{track['artist']['name']}_{track['title']}_{duration}_{category}"
    if id in ids:
        print(id)
        fileName = f"{track['title']} - {track['artist']['name']}"
        command = f'cmp "{fileName}.mp3" "{fileName}(1).mp3"'
        #print(command)
        #execute(command)
        alreadyRegistered = ids[id]
        track['trackId'] = alreadyRegistered['trackId']
        track['viperId'] = alreadyRegistered['viperId']
        #track['attributes']['category'] = alreadyRegistered['attributes']['category']
        #track['artist']['name'] = alreadyRegistered['artist']['name']
        track['duration']['seconds'] = alreadyRegistered['duration']['seconds']
        track['publishTime']['seconds'] = alreadyRegistered['publishTime']['seconds']
        if track != alreadyRegistered:
            print('different')
            print(track)
            print(alreadyRegistered)
            #break
        #print('Already in `ids`!')
        #break
    else:
        ids[id] = track

Computing MAXIMAL_NUMBER_OF_RESULTS

The updated algorithm initially published in 588b20409b5acc2e4412427ccd1d333847157400 is:

import os, json

path = '/home/benjamin/Desktop/bens_folder/dev/yt/audio_library'

os.chdir(path)

with open('music.json') as json_file:
    tracks = json.load(json_file)

mostResults = 0
mostResultsTitle = None

for track in tracks:
    title = track['title']
    results = 0
    for otherTrack in tracks:
        # Doesn't used to have `replace` and `lower`, but it seems that it is what YouTube UI title search does.
        title = title.replace('!', '')
        if title.lower() in otherTrack['title'].lower():
            results += 1
    if results > mostResults:
        mostResults = results
        mostResultsTitle = title

print(mostResults, mostResultsTitle)