import undetected_chromedriver.v2 as uc from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options import os, json, time """ As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach. """ AUDIO_LIBRARY_URL = 'https://studio.youtube.com/channel/UC/music' options = Options() options.add_argument("--user-data-dir=selenium") browser = uc.Chrome(options=options) browser.get(AUDIO_LIBRARY_URL) """ For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries. For `Sound effects` tab, YouTube UI returns 400 entries while my reverse-engineering approach returns 2021 entries. So I assume YouTube UI pagination doesn't work fine, so to retrieve all media files, the idea is to filter by `Track title` and download all entries, preferably only those that have the title we are looking for, as some tracks have the same titles. As for `Sound effects`, even with `Sound effect`, `Duration`, `Category` and `Added` there is an ambiguity on which files do we refer to (for instance for `Truck Driving in Parking Structure`, as they all are different). """ with open('music.json') as json_file: tracks = json.load(json_file) path = '/home/benjamin/Downloads' os.chdir(path) MAXIMAL_NUMBER_OF_RESULTS = 162 with open('rename.txt', 'w') as f: browser.find_element(By.XPATH, '//*[@id="trigger"]/ytcp-dropdown-trigger/div/div[2]/span').click() browser.find_element(By.CSS_SELECTOR, '#text-item-2 > ytcp-ve > div > div > yt-formatted-string').click() tracks = tracks[38:] for trackIndex, track in enumerate(tracks): seconds = track["duration"]["seconds"] cleanDuration = f'{seconds // 60}:{seconds % 60}' print(f'{trackIndex} / {len(tracks)}: {track["title"]} - {track["artist"]["name"]} - {cleanDuration}') browser.find_element(By.ID, 'text-input').send_keys(track['title']) browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click() numberOfResults = MAXIMAL_NUMBER_OF_RESULTS + 1 while numberOfResults > MAXIMAL_NUMBER_OF_RESULTS: if numberOfResults != MAXIMAL_NUMBER_OF_RESULTS + 1: time.sleep(1) numberOfResults = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1]) print(f'Found {numberOfResults} results') # `DOWNLOAD` # Doesn't block. if numberOfResults > 100: print('More than 100 results') break if numberOfResults > 1: print('found multiple') occurrences = 0 for resultsIndex in range(numberOfResults): row = f'/html/body/ytcp-app/ytcp-entity-page/div/div/main/div/ytcp-animatable[24]/ytmus-page/ytmus-library-table/div[1]/ytmus-library-row[{resultsIndex + 1}]/div/' title = browser.find_element(By.XPATH, row + 'div[2]/div').get_attribute('innerHTML') artistCommon = 'div[5]/div' try: artist = browser.find_element(By.XPATH, row + artistCommon + '/ytcp-hover-anchor/a/span') except: artist = browser.find_element(By.XPATH, row + artistCommon) artist = artist.get_attribute('innerHTML') print(title, artist) if title == track['title'] and artist == track['artist']['name']: print("it's a match") browser.find_element(By.XPATH, row + 'div[8]/div[2]/ytcp-button/div').click() occurrences += 1 if occurrences > 1: print('multiple occurrences') break #break #browser.get('chrome://downloads/') #browser.get(AUDIO_LIBRARY_URL) else: browser.find_element(By.CSS_SELECTOR, 'ytcp-button.style-scope:nth-child(1) > div:nth-child(2)').click() browser.find_element(By.CSS_SELECTOR, '#delete-icon > tp-yt-iron-icon').click() f.write(f"{track['title']} - {track['artist']['name']}|{track['viperId']}\n") #break #browser.quit()