diff --git a/media_files_extractor.py b/media_files_extractor.py new file mode 100644 index 0000000..2d1ea39 --- /dev/null +++ b/media_files_extractor.py @@ -0,0 +1,50 @@ +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.firefox.options import Options +#from selenium.webdriver.common.action_chains import ActionChains +import json + +""" +As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach. +""" + +profile_path = '/home/benjamin/.mozilla/firefox/ilfnifi0.default-release' +fp = webdriver.FirefoxProfile(profile_path) +# I wasn't easily able to use not deprecated code, my try consists in the commented code. +#options = Options() +#options.set_preference('profile', profile_path) + +browser = webdriver.Firefox(fp)#options = options) +browser.get('https://studio.youtube.com/channel/UC/music') + +""" +For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries. +For `Sound effects` tab, YouTube UI returns 400 entries while my reverse-engineering approach returns 2021 entries. + +So I assume YouTube UI pagination doesn't work fine, so to retrieve all media files, the idea is to filter by `Track title` and download all returned entries, as some tracks have the same titles. +We could verify the `trackId` or `viperId` to avoid downloading other media files containing the provided title. +""" + +#actions = ActionChains(browser) + +with open('music.json') as json_file: + tracks = json.load(json_file) + +for track in tracks: + #browser.find_element(By.ID, 'text-input').click() + #browser.find_element(By.ID, 'text-item-2').click() + #actions.send_keys(track['title']) + #ctions.perform() + browser.find_element(By.ID, 'text-input').send_keys(track['title']) + browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click() + + #browser.find_element(By.XPATH, '/html/body/ytcp-filter-dialog/tp-yt-paper-dialog/div[2]/ytcp-button/div').click() + + number_of_results = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1]) + print(number_of_results) + + # `DOWNLOAD` + browser.find_element(By.XPATH, 'div.overflow-actions:nth-child(12) > ytcp-button:nth-child(1) > div:nth-child(2)').click() + break + +#browser.quit() \ No newline at end of file