Add media_files_extractor.py
				
					
				
			This commit is contained in:
		
							
								
								
									
										50
									
								
								media_files_extractor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								media_files_extractor.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,50 @@
 | 
			
		||||
from selenium import webdriver
 | 
			
		||||
from selenium.webdriver.common.by import By
 | 
			
		||||
from selenium.webdriver.firefox.options import Options
 | 
			
		||||
#from selenium.webdriver.common.action_chains import ActionChains
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
profile_path = '/home/benjamin/.mozilla/firefox/ilfnifi0.default-release'
 | 
			
		||||
fp = webdriver.FirefoxProfile(profile_path)
 | 
			
		||||
# I wasn't easily able to use not deprecated code, my try consists in the commented code.
 | 
			
		||||
#options = Options()
 | 
			
		||||
#options.set_preference('profile', profile_path)
 | 
			
		||||
 | 
			
		||||
browser = webdriver.Firefox(fp)#options = options)
 | 
			
		||||
browser.get('https://studio.youtube.com/channel/UC/music')
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries.
 | 
			
		||||
For `Sound effects` tab, YouTube UI returns 400 entries while my reverse-engineering approach returns 2021 entries.
 | 
			
		||||
 | 
			
		||||
So I assume YouTube UI pagination doesn't work fine, so to retrieve all media files, the idea is to filter by `Track title` and download all returned entries, as some tracks have the same titles.
 | 
			
		||||
We could verify the `trackId` or `viperId` to avoid downloading other media files containing the provided title.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
#actions = ActionChains(browser)
 | 
			
		||||
 | 
			
		||||
with open('music.json') as json_file:
 | 
			
		||||
    tracks = json.load(json_file)
 | 
			
		||||
 | 
			
		||||
for track in tracks:
 | 
			
		||||
    #browser.find_element(By.ID, 'text-input').click()
 | 
			
		||||
    #browser.find_element(By.ID, 'text-item-2').click()
 | 
			
		||||
    #actions.send_keys(track['title'])
 | 
			
		||||
    #ctions.perform()
 | 
			
		||||
    browser.find_element(By.ID, 'text-input').send_keys(track['title'])
 | 
			
		||||
    browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click()
 | 
			
		||||
 | 
			
		||||
    #browser.find_element(By.XPATH, '/html/body/ytcp-filter-dialog/tp-yt-paper-dialog/div[2]/ytcp-button/div').click()
 | 
			
		||||
 | 
			
		||||
    number_of_results = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1])
 | 
			
		||||
    print(number_of_results)
 | 
			
		||||
 | 
			
		||||
    # `DOWNLOAD`
 | 
			
		||||
    browser.find_element(By.XPATH, 'div.overflow-actions:nth-child(12) > ytcp-button:nth-child(1) > div:nth-child(2)').click()
 | 
			
		||||
    break
 | 
			
		||||
 | 
			
		||||
#browser.quit()
 | 
			
		||||
		Reference in New Issue
	
	Block a user