40 lines
2.2 KiB
Python
40 lines
2.2 KiB
Python
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.firefox.options import Options
|
|
import json
|
|
|
|
"""
|
|
As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach.
|
|
"""
|
|
|
|
profile_path = '/home/benjamin/.mozilla/firefox/ilfnifi0.default-release'
|
|
fp = webdriver.FirefoxProfile(profile_path)
|
|
|
|
browser = webdriver.Firefox(fp)
|
|
browser.get('https://studio.youtube.com/channel/UC/music')
|
|
|
|
"""
|
|
For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries.
|
|
For `Sound effects` tab, YouTube UI returns 400 entries while my reverse-engineering approach returns 2021 entries.
|
|
|
|
So I assume YouTube UI pagination doesn't work fine, so to retrieve all media files (for `Music`), the idea is to filter by `Track title` and download one entry that perfectly (not just `contains`) matches `artist/name`, `title` and `duration/nanos` (converted if only `seconds`), as some tracks have the same titles.
|
|
Only `trackId` and `viperId` differ when identifying with `artist/name`, `title` and `duration/nanos` (cf above comment) (example: `Dyalla_Ringside_116`), as I verified all duplicates, they are binary identical.
|
|
So we will have to duplicate the media file with the different `trackId`s for files being *identitcal* (note that `trackId`, as well as `viperId` are uniquely identified).
|
|
Otherwise I could clean the metadata by removing duplicates (but then if we update the database we have to make sure that ids that we have kept are still kept).
|
|
"""
|
|
|
|
with open('music.json') as json_file:
|
|
tracks = json.load(json_file)
|
|
|
|
for track in tracks:
|
|
browser.find_element(By.ID, 'text-input').send_keys(track['title'])
|
|
browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click()
|
|
|
|
number_of_results = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1])
|
|
print(number_of_results)
|
|
|
|
# `DOWNLOAD`
|
|
browser.find_element(By.XPATH, 'div.overflow-actions:nth-child(12) > ytcp-button:nth-child(1) > div:nth-child(2)').click()
|
|
break
|
|
|
|
#browser.quit() |