2023-02-04 14:18:26 +01:00
from selenium import webdriver
from selenium . webdriver . common . by import By
from selenium . webdriver . firefox . options import Options
import json
"""
As there is something looking as an anti - bot for downloading media files , we use a Selenium - based approach .
"""
profile_path = ' /home/benjamin/.mozilla/firefox/ilfnifi0.default-release '
fp = webdriver . FirefoxProfile ( profile_path )
2023-02-04 14:21:54 +01:00
browser = webdriver . Firefox ( fp )
2023-02-04 14:18:26 +01:00
browser . get ( ' https://studio.youtube.com/channel/UC/music ' )
"""
For ` Music ` tab , YouTube UI returns 3 , 000 entries while my reverse - engineering approach returns 5 , 819 entries .
For ` Sound effects ` tab , YouTube UI returns 400 entries while my reverse - engineering approach returns 2021 entries .
So I assume YouTube UI pagination doesn ' t work fine, so to retrieve all media files, the idea is to filter by `Track title` and download all returned entries, as some tracks have the same titles.
We could verify the ` trackId ` or ` viperId ` to avoid downloading other media files containing the provided title .
"""
with open ( ' music.json ' ) as json_file :
tracks = json . load ( json_file )
for track in tracks :
browser . find_element ( By . ID , ' text-input ' ) . send_keys ( track [ ' title ' ] )
browser . find_element ( By . XPATH , ' /html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1] ' ) . click ( )
number_of_results = int ( browser . find_element ( By . CSS_SELECTOR , ' .page-description ' ) . get_attribute ( ' innerHTML ' ) . split ( ) [ - 1 ] )
print ( number_of_results )
# `DOWNLOAD`
browser . find_element ( By . XPATH , ' div.overflow-actions:nth-child(12) > ytcp-button:nth-child(1) > div:nth-child(2) ' ) . click ( )
break
#browser.quit()