Make the media download work up to 100 results or duplicates

The `MAXIMAL_NUMBER_OF_RESULTS` constant was computed thanks to:

```py
import os, json

path = '/home/benjamin/Desktop/bens_folder/dev/yt/audio_library'

os.chdir(path)

with open('sound_effects.json') as json_file:
    tracks = json.load(json_file)

mostResults = 0
mostResultsTitle = None

for track in tracks:
    title = track['title']
    results = 0
    for otherTrack in tracks:
        if title in otherTrack['title']:
            results += 1
    if results > mostResults:
        mostResults = results
        mostResultsTitle = title

print(mostResults, mostResultsTitle)
```
This commit is contained in:
Benjamin Loison 2023-02-04 19:12:54 +01:00
parent 4f7e9ac336
commit 5540098e96
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8

View File

@ -1,16 +1,18 @@
import undetected_chromedriver.v2 as uc import undetected_chromedriver.v2 as uc
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
import json import os, json, time
""" """
As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach. As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach.
""" """
AUDIO_LIBRARY_URL = 'https://studio.youtube.com/channel/UC/music'
options = Options() options = Options()
options.add_argument("--user-data-dir=selenium") options.add_argument("--user-data-dir=selenium")
browser = uc.Chrome(options=options) browser = uc.Chrome(options=options)
browser.get('https://studio.youtube.com/channel/UC/music') browser.get(AUDIO_LIBRARY_URL)
""" """
For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries. For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries.
@ -23,15 +25,63 @@ As for `Sound effects`, even with `Sound effect`, `Duration`, `Category` and `Ad
with open('music.json') as json_file: with open('music.json') as json_file:
tracks = json.load(json_file) tracks = json.load(json_file)
for track in tracks: path = '/home/benjamin/Downloads'
browser.find_element(By.ID, 'text-input').send_keys(track['title'])
browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click()
number_of_results = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1]) os.chdir(path)
print(number_of_results)
# `DOWNLOAD` MAXIMAL_NUMBER_OF_RESULTS = 162
browser.find_element(By.XPATH, 'div.overflow-actions:nth-child(12) > ytcp-button:nth-child(1) > div:nth-child(2)').click()
break with open('rename.txt', 'w') as f:
browser.find_element(By.XPATH, '//*[@id="trigger"]/ytcp-dropdown-trigger/div/div[2]/span').click()
browser.find_element(By.CSS_SELECTOR, '#text-item-2 > ytcp-ve > div > div > yt-formatted-string').click()
tracks = tracks[38:]
for trackIndex, track in enumerate(tracks):
seconds = track["duration"]["seconds"]
cleanDuration = f'{seconds // 60}:{seconds % 60}'
print(f'{trackIndex} / {len(tracks)}: {track["title"]} - {track["artist"]["name"]} - {cleanDuration}')
browser.find_element(By.ID, 'text-input').send_keys(track['title'])
browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click()
numberOfResults = MAXIMAL_NUMBER_OF_RESULTS + 1
while numberOfResults > MAXIMAL_NUMBER_OF_RESULTS:
if numberOfResults != MAXIMAL_NUMBER_OF_RESULTS + 1:
time.sleep(1)
numberOfResults = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1])
print(f'Found {numberOfResults} results')
# `DOWNLOAD`
# Doesn't block.
if numberOfResults > 100:
print('More than 100 results')
break
if numberOfResults > 1:
print('found multiple')
occurrences = 0
for resultsIndex in range(numberOfResults):
row = f'/html/body/ytcp-app/ytcp-entity-page/div/div/main/div/ytcp-animatable[24]/ytmus-page/ytmus-library-table/div[1]/ytmus-library-row[{resultsIndex + 1}]/div/'
title = browser.find_element(By.XPATH, row + 'div[2]/div').get_attribute('innerHTML')
artistCommon = 'div[5]/div'
try:
artist = browser.find_element(By.XPATH, row + artistCommon + '/ytcp-hover-anchor/a/span')
except:
artist = browser.find_element(By.XPATH, row + artistCommon)
artist = artist.get_attribute('innerHTML')
print(title, artist)
if title == track['title'] and artist == track['artist']['name']:
print("it's a match")
browser.find_element(By.XPATH, row + 'div[8]/div[2]/ytcp-button/div').click()
occurrences += 1
if occurrences > 1:
print('multiple occurrences')
break
#break
#browser.get('chrome://downloads/')
#browser.get(AUDIO_LIBRARY_URL)
else:
browser.find_element(By.CSS_SELECTOR, 'ytcp-button.style-scope:nth-child(1) > div:nth-child(2)').click()
browser.find_element(By.CSS_SELECTOR, '#delete-icon > tp-yt-iron-icon').click()
f.write(f"{track['title']} - {track['artist']['name']}|{track['viperId']}\n")
#break
#browser.quit() #browser.quit()