2023-02-04 19:08:02 +01:00
import undetected_chromedriver . v2 as uc
2023-02-04 14:18:26 +01:00
from selenium . webdriver . common . by import By
2023-02-04 19:08:02 +01:00
from selenium . webdriver . chrome . options import Options
2023-02-04 19:12:54 +01:00
import os , json , time
2023-02-04 14:18:26 +01:00
"""
As there is something looking as an anti - bot for downloading media files , we use a Selenium - based approach .
"""
2023-02-04 19:12:54 +01:00
AUDIO_LIBRARY_URL = ' https://studio.youtube.com/channel/UC/music '
2023-02-04 19:08:02 +01:00
options = Options ( )
options . add_argument ( " --user-data-dir=selenium " )
browser = uc . Chrome ( options = options )
2023-02-04 19:12:54 +01:00
browser . get ( AUDIO_LIBRARY_URL )
2023-02-04 14:18:26 +01:00
"""
For ` Music ` tab , YouTube UI returns 3 , 000 entries while my reverse - engineering approach returns 5 , 819 entries .
For ` Sound effects ` tab , YouTube UI returns 400 entries while my reverse - engineering approach returns 2021 entries .
2023-02-04 19:08:02 +01:00
So I assume YouTube UI pagination doesn ' t work fine, so to retrieve all media files, the idea is to filter by `Track title` and download all entries, preferably only those that have the title we are looking for, as some tracks have the same titles.
As for ` Sound effects ` , even with ` Sound effect ` , ` Duration ` , ` Category ` and ` Added ` there is an ambiguity on which files do we refer to ( for instance for ` Truck Driving in Parking Structure ` , as they all are different ) .
2023-02-04 14:18:26 +01:00
"""
with open ( ' music.json ' ) as json_file :
tracks = json . load ( json_file )
2023-02-04 19:12:54 +01:00
path = ' /home/benjamin/Downloads '
os . chdir ( path )
MAXIMAL_NUMBER_OF_RESULTS = 162
with open ( ' rename.txt ' , ' w ' ) as f :
browser . find_element ( By . XPATH , ' //*[@id= " trigger " ]/ytcp-dropdown-trigger/div/div[2]/span ' ) . click ( )
browser . find_element ( By . CSS_SELECTOR , ' #text-item-2 > ytcp-ve > div > div > yt-formatted-string ' ) . click ( )
tracks = tracks [ 38 : ]
for trackIndex , track in enumerate ( tracks ) :
2023-02-04 19:45:30 +01:00
seconds = int ( track [ " duration " ] [ " seconds " ] )
2023-02-04 19:12:54 +01:00
cleanDuration = f ' { seconds / / 60 } : { seconds % 60 } '
print ( f ' { trackIndex } / { len ( tracks ) } : { track [ " title " ] } - { track [ " artist " ] [ " name " ] } - { cleanDuration } ' )
browser . find_element ( By . ID , ' text-input ' ) . send_keys ( track [ ' title ' ] )
browser . find_element ( By . XPATH , ' /html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1] ' ) . click ( )
numberOfResults = MAXIMAL_NUMBER_OF_RESULTS + 1
while numberOfResults > MAXIMAL_NUMBER_OF_RESULTS :
if numberOfResults != MAXIMAL_NUMBER_OF_RESULTS + 1 :
time . sleep ( 1 )
numberOfResults = int ( browser . find_element ( By . CSS_SELECTOR , ' .page-description ' ) . get_attribute ( ' innerHTML ' ) . split ( ) [ - 1 ] )
print ( f ' Found { numberOfResults } results ' )
2023-02-04 14:18:26 +01:00
2023-02-04 19:12:54 +01:00
# `DOWNLOAD`
# Doesn't block.
if numberOfResults > 100 :
print ( ' More than 100 results ' )
break
2023-02-04 14:18:26 +01:00
2023-02-04 19:12:54 +01:00
if numberOfResults > 1 :
print ( ' found multiple ' )
occurrences = 0
for resultsIndex in range ( numberOfResults ) :
row = f ' /html/body/ytcp-app/ytcp-entity-page/div/div/main/div/ytcp-animatable[24]/ytmus-page/ytmus-library-table/div[1]/ytmus-library-row[ { resultsIndex + 1 } ]/div/ '
title = browser . find_element ( By . XPATH , row + ' div[2]/div ' ) . get_attribute ( ' innerHTML ' )
artistCommon = ' div[5]/div '
try :
artist = browser . find_element ( By . XPATH , row + artistCommon + ' /ytcp-hover-anchor/a/span ' )
except :
artist = browser . find_element ( By . XPATH , row + artistCommon )
artist = artist . get_attribute ( ' innerHTML ' )
print ( title , artist )
if title == track [ ' title ' ] and artist == track [ ' artist ' ] [ ' name ' ] :
print ( " it ' s a match " )
browser . find_element ( By . XPATH , row + ' div[8]/div[2]/ytcp-button/div ' ) . click ( )
occurrences + = 1
if occurrences > 1 :
print ( ' multiple occurrences ' )
2023-02-04 19:45:30 +01:00
# This isn't a clean solution.
time . sleep ( 1 )
browser . get ( ' chrome://downloads/ ' )
for occurrence in range ( occurrences ) :
url = browser . find_element ( By . XPATH , f ' /html/body/downloads-manager ' ) . shadow_root . find_element ( By . ID , ' downloadsList ' ) . find_element ( By . ID , f ' frb { occurrences - occurrence - 1 } ' ) . shadow_root . find_element ( By . ID , ' url ' ) . get_attribute ( ' href ' )
viperId = url . split ( ' &id= ' ) [ 1 ] . split ( ' & ' ) [ 0 ]
print ( viperId )
occurrenceStr = ' ' if occurrence == 0 else f ' ( { occurrence } ) '
f . write ( f " { track [ ' title ' ] } - { track [ ' artist ' ] [ ' name ' ] } { occurrenceStr } | { viperId } \n " )
browser . get ( AUDIO_LIBRARY_URL )
2023-02-04 19:12:54 +01:00
break
2023-02-04 19:45:30 +01:00
else :
f . write ( f " { track [ ' title ' ] } - { track [ ' artist ' ] [ ' name ' ] } | { track [ ' viperId ' ] } \n " )
2023-02-04 19:12:54 +01:00
#break
else :
browser . find_element ( By . CSS_SELECTOR , ' ytcp-button.style-scope:nth-child(1) > div:nth-child(2) ' ) . click ( )
2023-02-04 19:45:30 +01:00
f . write ( f " { track [ ' title ' ] } - { track [ ' artist ' ] [ ' name ' ] } | { track [ ' viperId ' ] } \n " )
2023-02-04 19:12:54 +01:00
browser . find_element ( By . CSS_SELECTOR , ' #delete-icon > tp-yt-iron-icon ' ) . click ( )
2023-02-04 19:45:30 +01:00
2023-02-04 19:12:54 +01:00
#break
2023-02-04 14:18:26 +01:00
#browser.quit()