Add goToAudioLibraryAndSelect100RowsPerPage function to keep rows per page setting even after round-tripping chrome://downloads

This commit is contained in:
Benjamin Loison 2023-02-20 14:19:16 +01:00
parent 9d12e9cbe3
commit ead3fef5b3
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8

View File

@ -1,4 +1,8 @@
import undetected_chromedriver.v2 as uc import undetected_chromedriver as uc
"""
pip install undetected-chromedriver==3.2.1
Relying on Linux Mint apt chromium which is only at version 109.
"""
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
import os, json, time import os, json, time
@ -7,12 +11,22 @@ import os, json, time
As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach. As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach.
""" """
path = '/home/benjamin/Desktop/bens_folder/dev/yt/audio_library'
os.chdir(path)
AUDIO_LIBRARY_URL = 'https://studio.youtube.com/channel/UC/music' AUDIO_LIBRARY_URL = 'https://studio.youtube.com/channel/UC/music'
options = Options() options = Options()
options.add_argument("--user-data-dir=selenium") options.add_argument("--user-data-dir=selenium")
browser = uc.Chrome(options=options) browser = uc.Chrome(options=options, version_main=109)
def goToAudioLibraryAndSelect100RowsPerPage():
browser.get(AUDIO_LIBRARY_URL) browser.get(AUDIO_LIBRARY_URL)
browser.find_element(By.XPATH, '//*[@id="trigger"]/ytcp-dropdown-trigger/div/div[2]/span').click()
browser.find_element(By.CSS_SELECTOR, '#text-item-2 > ytcp-ve > div > div > yt-formatted-string').click()
goToAudioLibraryAndSelect100RowsPerPage()
""" """
For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries. For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries.
@ -29,16 +43,24 @@ path = '/home/benjamin/Downloads'
os.chdir(path) os.chdir(path)
MAXIMAL_NUMBER_OF_RESULTS = 162 MAXIMAL_NUMBER_OF_RESULTS = 367
alreadyTreatedMultipleOccurrences = set()
with open('rename.txt', 'w') as f: with open('rename.txt', 'w') as f:
browser.find_element(By.XPATH, '//*[@id="trigger"]/ytcp-dropdown-trigger/div/div[2]/span').click() #tracks = tracks[36:]
browser.find_element(By.CSS_SELECTOR, '#text-item-2 > ytcp-ve > div > div > yt-formatted-string').click()
tracks = tracks[38:]
for trackIndex, track in enumerate(tracks): for trackIndex, track in enumerate(tracks):
seconds = int(track["duration"]["seconds"]) seconds = int(track["duration"]["seconds"])
# Note that the leading `0` for seconds may be missing.
cleanDuration = f'{seconds // 60}:{seconds % 60}' cleanDuration = f'{seconds // 60}:{seconds % 60}'
print(f'{trackIndex} / {len(tracks)}: {track["title"]} - {track["artist"]["name"]} - {cleanDuration}') id = f'{track["title"]} - {track["artist"]["name"]} - {cleanDuration}'
print(f'{trackIndex} / {len(tracks)}: {id}')
if id in alreadyTreatedMultipleOccurrences:
print('Already treated these multiple occurrences')
continue
alreadyTreatedMultipleOccurrences.add(id)
browser.find_element(By.ID, 'text-input').send_keys(track['title']) browser.find_element(By.ID, 'text-input').send_keys(track['title'])
browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click() browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click()
@ -49,16 +71,19 @@ with open('rename.txt', 'w') as f:
numberOfResults = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1]) numberOfResults = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1])
print(f'Found {numberOfResults} results') print(f'Found {numberOfResults} results')
# I noticed the fact that after a round-trip to the chrome://downloads, the initial setting of rows per page is reset.
#rowsPerPage = int(browser.find_element(By.CSS_SELECTOR, '#trigger > ytcp-dropdown-trigger > div > div.left-container.style-scope.ytcp-dropdown-trigger > span').get_attribute('innerHTML'))
#print('rowsPerPage:', rowsPerPage)
# `DOWNLOAD` # `DOWNLOAD`
# Doesn't block. # Doesn't block.
if numberOfResults > 100: hasMultipleOccurrences = False
print('More than 100 results')
break
if numberOfResults > 1: if numberOfResults > 1:
print('found multiple') print('found multiple')
occurrences = 0 occurrences = 0
for resultsIndex in range(numberOfResults): upperBound = ((numberOfResults - 1) // 100) + 1
for i in range(upperBound):
for resultsIndex in range(min(100, numberOfResults - i * 100)):
row = f'/html/body/ytcp-app/ytcp-entity-page/div/div/main/div/ytcp-animatable[24]/ytmus-page/ytmus-library-table/div[1]/ytmus-library-row[{resultsIndex + 1}]/div/' row = f'/html/body/ytcp-app/ytcp-entity-page/div/div/main/div/ytcp-animatable[24]/ytmus-page/ytmus-library-table/div[1]/ytmus-library-row[{resultsIndex + 1}]/div/'
title = browser.find_element(By.XPATH, row + 'div[2]/div').get_attribute('innerHTML') title = browser.find_element(By.XPATH, row + 'div[2]/div').get_attribute('innerHTML')
artistCommon = 'div[5]/div' artistCommon = 'div[5]/div'
@ -67,29 +92,37 @@ with open('rename.txt', 'w') as f:
except: except:
artist = browser.find_element(By.XPATH, row + artistCommon) artist = browser.find_element(By.XPATH, row + artistCommon)
artist = artist.get_attribute('innerHTML') artist = artist.get_attribute('innerHTML')
print(title, artist) print(resultsIndex, title, artist)
if title == track['title'] and artist == track['artist']['name']: if title == track['title'] and artist == track['artist']['name']:
print("it's a match") print("it's a match")
browser.find_element(By.XPATH, row + 'div[8]/div[2]/ytcp-button/div').click() browser.find_element(By.XPATH, row + 'div[8]/div[2]/ytcp-button/div').click()
occurrences += 1 occurrences += 1
if upperBound > 1:
browser.find_element(By.CSS_SELECTOR, '#navigate-after > tp-yt-iron-icon').click()
if occurrences > 1: if occurrences > 1:
print('multiple occurrences') print('MULTIPLE OCCURRENCES')
# This isn't a clean solution. # This isn't a clean solution.
time.sleep(1) # Questionable if it's even necessary when plugged with optic fiber and power and not running anything else on the computer.
time.sleep(4)
browser.get('chrome://downloads/') browser.get('chrome://downloads/')
for occurrence in range(occurrences): for occurrence in range(occurrences):
url = browser.find_element(By.XPATH, f'/html/body/downloads-manager').shadow_root.find_element(By.ID, 'downloadsList').find_element(By.ID, f'frb{occurrences - occurrence - 1}').shadow_root.find_element(By.ID, 'url').get_attribute('href') download = browser.find_element(By.XPATH, f'/html/body/downloads-manager').shadow_root.find_element(By.ID, 'downloadsList').find_element(By.ID, f'frb{occurrences - occurrence - 1}').shadow_root
url = download.find_element(By.ID, 'url').get_attribute('href')
downloadFileName = download.find_element(By.ID, 'file-link').get_attribute('innerHTML')
viperId = url.split('&id=')[1].split('&')[0] viperId = url.split('&id=')[1].split('&')[0]
print(viperId) print(viperId)
occurrenceStr = '' if occurrence == 0 else f' ({occurrence})' occurrenceStr = '' if occurrence == 0 else f' ({occurrence})'
f.write(f"{track['title']} - {track['artist']['name']}{occurrenceStr}|{viperId}\n") fileName = f"{track['title']} - {track['artist']['name']}{occurrenceStr}"
browser.get(AUDIO_LIBRARY_URL) if f'{fileName}.mp3' != downloadFileName:
break print("The solution isn't clean enough!'")
else: exit(1)
f.write(f"{track['title']} - {track['artist']['name']}|{track['viperId']}\n") f.write(f"{fileName}|{viperId}\n")
hasMultipleOccurrences = True
goToAudioLibraryAndSelect100RowsPerPage()
#break #break
else: else:
browser.find_element(By.CSS_SELECTOR, 'ytcp-button.style-scope:nth-child(1) > div:nth-child(2)').click() browser.find_element(By.CSS_SELECTOR, 'ytcp-button.style-scope:nth-child(1) > div:nth-child(2)').click()
if not hasMultipleOccurrences:
f.write(f"{track['title']} - {track['artist']['name']}|{track['viperId']}\n") f.write(f"{track['title']} - {track['artist']['name']}|{track['viperId']}\n")
browser.find_element(By.CSS_SELECTOR, '#delete-icon > tp-yt-iron-icon').click() browser.find_element(By.CSS_SELECTOR, '#delete-icon > tp-yt-iron-icon').click()