Make the media download work up to 100 results or duplicates
The `MAXIMAL_NUMBER_OF_RESULTS` constant was computed thanks to: ```py import os, json path = '/home/benjamin/Desktop/bens_folder/dev/yt/audio_library' os.chdir(path) with open('sound_effects.json') as json_file: tracks = json.load(json_file) mostResults = 0 mostResultsTitle = None for track in tracks: title = track['title'] results = 0 for otherTrack in tracks: if title in otherTrack['title']: results += 1 if results > mostResults: mostResults = results mostResultsTitle = title print(mostResults, mostResultsTitle) ```
This commit is contained in:
parent
4f7e9ac336
commit
5540098e96
@ -1,16 +1,18 @@
|
||||
import undetected_chromedriver.v2 as uc
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import json
|
||||
import os, json, time
|
||||
|
||||
"""
|
||||
As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach.
|
||||
"""
|
||||
|
||||
AUDIO_LIBRARY_URL = 'https://studio.youtube.com/channel/UC/music'
|
||||
|
||||
options = Options()
|
||||
options.add_argument("--user-data-dir=selenium")
|
||||
browser = uc.Chrome(options=options)
|
||||
browser.get('https://studio.youtube.com/channel/UC/music')
|
||||
browser.get(AUDIO_LIBRARY_URL)
|
||||
|
||||
"""
|
||||
For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries.
|
||||
@ -23,15 +25,63 @@ As for `Sound effects`, even with `Sound effect`, `Duration`, `Category` and `Ad
|
||||
with open('music.json') as json_file:
|
||||
tracks = json.load(json_file)
|
||||
|
||||
for track in tracks:
|
||||
path = '/home/benjamin/Downloads'
|
||||
|
||||
os.chdir(path)
|
||||
|
||||
MAXIMAL_NUMBER_OF_RESULTS = 162
|
||||
|
||||
with open('rename.txt', 'w') as f:
|
||||
browser.find_element(By.XPATH, '//*[@id="trigger"]/ytcp-dropdown-trigger/div/div[2]/span').click()
|
||||
browser.find_element(By.CSS_SELECTOR, '#text-item-2 > ytcp-ve > div > div > yt-formatted-string').click()
|
||||
tracks = tracks[38:]
|
||||
for trackIndex, track in enumerate(tracks):
|
||||
seconds = track["duration"]["seconds"]
|
||||
cleanDuration = f'{seconds // 60}:{seconds % 60}'
|
||||
print(f'{trackIndex} / {len(tracks)}: {track["title"]} - {track["artist"]["name"]} - {cleanDuration}')
|
||||
browser.find_element(By.ID, 'text-input').send_keys(track['title'])
|
||||
browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click()
|
||||
|
||||
number_of_results = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1])
|
||||
print(number_of_results)
|
||||
numberOfResults = MAXIMAL_NUMBER_OF_RESULTS + 1
|
||||
while numberOfResults > MAXIMAL_NUMBER_OF_RESULTS:
|
||||
if numberOfResults != MAXIMAL_NUMBER_OF_RESULTS + 1:
|
||||
time.sleep(1)
|
||||
numberOfResults = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1])
|
||||
print(f'Found {numberOfResults} results')
|
||||
|
||||
# `DOWNLOAD`
|
||||
browser.find_element(By.XPATH, 'div.overflow-actions:nth-child(12) > ytcp-button:nth-child(1) > div:nth-child(2)').click()
|
||||
# Doesn't block.
|
||||
if numberOfResults > 100:
|
||||
print('More than 100 results')
|
||||
break
|
||||
|
||||
if numberOfResults > 1:
|
||||
print('found multiple')
|
||||
occurrences = 0
|
||||
for resultsIndex in range(numberOfResults):
|
||||
row = f'/html/body/ytcp-app/ytcp-entity-page/div/div/main/div/ytcp-animatable[24]/ytmus-page/ytmus-library-table/div[1]/ytmus-library-row[{resultsIndex + 1}]/div/'
|
||||
title = browser.find_element(By.XPATH, row + 'div[2]/div').get_attribute('innerHTML')
|
||||
artistCommon = 'div[5]/div'
|
||||
try:
|
||||
artist = browser.find_element(By.XPATH, row + artistCommon + '/ytcp-hover-anchor/a/span')
|
||||
except:
|
||||
artist = browser.find_element(By.XPATH, row + artistCommon)
|
||||
artist = artist.get_attribute('innerHTML')
|
||||
print(title, artist)
|
||||
if title == track['title'] and artist == track['artist']['name']:
|
||||
print("it's a match")
|
||||
browser.find_element(By.XPATH, row + 'div[8]/div[2]/ytcp-button/div').click()
|
||||
occurrences += 1
|
||||
if occurrences > 1:
|
||||
print('multiple occurrences')
|
||||
break
|
||||
#break
|
||||
#browser.get('chrome://downloads/')
|
||||
#browser.get(AUDIO_LIBRARY_URL)
|
||||
else:
|
||||
browser.find_element(By.CSS_SELECTOR, 'ytcp-button.style-scope:nth-child(1) > div:nth-child(2)').click()
|
||||
browser.find_element(By.CSS_SELECTOR, '#delete-icon > tp-yt-iron-icon').click()
|
||||
f.write(f"{track['title']} - {track['artist']['name']}|{track['viperId']}\n")
|
||||
#break
|
||||
|
||||
#browser.quit()
|
Loading…
Reference in New Issue
Block a user