Add goToAudioLibraryAndSelect100RowsPerPage
function to keep rows per page setting even after round-tripping chrome://downloads
This commit is contained in:
parent
9d12e9cbe3
commit
ead3fef5b3
@ -1,4 +1,8 @@
|
|||||||
import undetected_chromedriver.v2 as uc
|
import undetected_chromedriver as uc
|
||||||
|
"""
|
||||||
|
pip install undetected-chromedriver==3.2.1
|
||||||
|
Relying on Linux Mint apt chromium which is only at version 109.
|
||||||
|
"""
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
import os, json, time
|
import os, json, time
|
||||||
@ -7,12 +11,22 @@ import os, json, time
|
|||||||
As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach.
|
As there is something looking as an anti-bot for downloading media files, we use a Selenium-based approach.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
path = '/home/benjamin/Desktop/bens_folder/dev/yt/audio_library'
|
||||||
|
|
||||||
|
os.chdir(path)
|
||||||
|
|
||||||
AUDIO_LIBRARY_URL = 'https://studio.youtube.com/channel/UC/music'
|
AUDIO_LIBRARY_URL = 'https://studio.youtube.com/channel/UC/music'
|
||||||
|
|
||||||
options = Options()
|
options = Options()
|
||||||
options.add_argument("--user-data-dir=selenium")
|
options.add_argument("--user-data-dir=selenium")
|
||||||
browser = uc.Chrome(options=options)
|
browser = uc.Chrome(options=options, version_main=109)
|
||||||
browser.get(AUDIO_LIBRARY_URL)
|
|
||||||
|
def goToAudioLibraryAndSelect100RowsPerPage():
|
||||||
|
browser.get(AUDIO_LIBRARY_URL)
|
||||||
|
browser.find_element(By.XPATH, '//*[@id="trigger"]/ytcp-dropdown-trigger/div/div[2]/span').click()
|
||||||
|
browser.find_element(By.CSS_SELECTOR, '#text-item-2 > ytcp-ve > div > div > yt-formatted-string').click()
|
||||||
|
|
||||||
|
goToAudioLibraryAndSelect100RowsPerPage()
|
||||||
|
|
||||||
"""
|
"""
|
||||||
For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries.
|
For `Music` tab, YouTube UI returns 3,000 entries while my reverse-engineering approach returns 5,819 entries.
|
||||||
@ -29,16 +43,24 @@ path = '/home/benjamin/Downloads'
|
|||||||
|
|
||||||
os.chdir(path)
|
os.chdir(path)
|
||||||
|
|
||||||
MAXIMAL_NUMBER_OF_RESULTS = 162
|
MAXIMAL_NUMBER_OF_RESULTS = 367
|
||||||
|
|
||||||
|
alreadyTreatedMultipleOccurrences = set()
|
||||||
|
|
||||||
with open('rename.txt', 'w') as f:
|
with open('rename.txt', 'w') as f:
|
||||||
browser.find_element(By.XPATH, '//*[@id="trigger"]/ytcp-dropdown-trigger/div/div[2]/span').click()
|
#tracks = tracks[36:]
|
||||||
browser.find_element(By.CSS_SELECTOR, '#text-item-2 > ytcp-ve > div > div > yt-formatted-string').click()
|
|
||||||
tracks = tracks[38:]
|
|
||||||
for trackIndex, track in enumerate(tracks):
|
for trackIndex, track in enumerate(tracks):
|
||||||
seconds = int(track["duration"]["seconds"])
|
seconds = int(track["duration"]["seconds"])
|
||||||
|
# Note that the leading `0` for seconds may be missing.
|
||||||
cleanDuration = f'{seconds // 60}:{seconds % 60}'
|
cleanDuration = f'{seconds // 60}:{seconds % 60}'
|
||||||
print(f'{trackIndex} / {len(tracks)}: {track["title"]} - {track["artist"]["name"]} - {cleanDuration}')
|
id = f'{track["title"]} - {track["artist"]["name"]} - {cleanDuration}'
|
||||||
|
print(f'{trackIndex} / {len(tracks)}: {id}')
|
||||||
|
if id in alreadyTreatedMultipleOccurrences:
|
||||||
|
print('Already treated these multiple occurrences')
|
||||||
|
continue
|
||||||
|
|
||||||
|
alreadyTreatedMultipleOccurrences.add(id)
|
||||||
|
|
||||||
browser.find_element(By.ID, 'text-input').send_keys(track['title'])
|
browser.find_element(By.ID, 'text-input').send_keys(track['title'])
|
||||||
browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click()
|
browser.find_element(By.XPATH, '/html/body/ytcp-text-menu/tp-yt-paper-dialog/tp-yt-paper-listbox/tp-yt-paper-item[2]/ytcp-ve/div/div/yt-formatted-string/span[1]').click()
|
||||||
|
|
||||||
@ -49,16 +71,19 @@ with open('rename.txt', 'w') as f:
|
|||||||
numberOfResults = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1])
|
numberOfResults = int(browser.find_element(By.CSS_SELECTOR, '.page-description').get_attribute('innerHTML').split()[-1])
|
||||||
print(f'Found {numberOfResults} results')
|
print(f'Found {numberOfResults} results')
|
||||||
|
|
||||||
|
# I noticed the fact that after a round-trip to the chrome://downloads, the initial setting of rows per page is reset.
|
||||||
|
#rowsPerPage = int(browser.find_element(By.CSS_SELECTOR, '#trigger > ytcp-dropdown-trigger > div > div.left-container.style-scope.ytcp-dropdown-trigger > span').get_attribute('innerHTML'))
|
||||||
|
#print('rowsPerPage:', rowsPerPage)
|
||||||
|
|
||||||
# `DOWNLOAD`
|
# `DOWNLOAD`
|
||||||
# Doesn't block.
|
# Doesn't block.
|
||||||
if numberOfResults > 100:
|
hasMultipleOccurrences = False
|
||||||
print('More than 100 results')
|
|
||||||
break
|
|
||||||
|
|
||||||
if numberOfResults > 1:
|
if numberOfResults > 1:
|
||||||
print('found multiple')
|
print('found multiple')
|
||||||
occurrences = 0
|
occurrences = 0
|
||||||
for resultsIndex in range(numberOfResults):
|
upperBound = ((numberOfResults - 1) // 100) + 1
|
||||||
|
for i in range(upperBound):
|
||||||
|
for resultsIndex in range(min(100, numberOfResults - i * 100)):
|
||||||
row = f'/html/body/ytcp-app/ytcp-entity-page/div/div/main/div/ytcp-animatable[24]/ytmus-page/ytmus-library-table/div[1]/ytmus-library-row[{resultsIndex + 1}]/div/'
|
row = f'/html/body/ytcp-app/ytcp-entity-page/div/div/main/div/ytcp-animatable[24]/ytmus-page/ytmus-library-table/div[1]/ytmus-library-row[{resultsIndex + 1}]/div/'
|
||||||
title = browser.find_element(By.XPATH, row + 'div[2]/div').get_attribute('innerHTML')
|
title = browser.find_element(By.XPATH, row + 'div[2]/div').get_attribute('innerHTML')
|
||||||
artistCommon = 'div[5]/div'
|
artistCommon = 'div[5]/div'
|
||||||
@ -67,29 +92,37 @@ with open('rename.txt', 'w') as f:
|
|||||||
except:
|
except:
|
||||||
artist = browser.find_element(By.XPATH, row + artistCommon)
|
artist = browser.find_element(By.XPATH, row + artistCommon)
|
||||||
artist = artist.get_attribute('innerHTML')
|
artist = artist.get_attribute('innerHTML')
|
||||||
print(title, artist)
|
print(resultsIndex, title, artist)
|
||||||
if title == track['title'] and artist == track['artist']['name']:
|
if title == track['title'] and artist == track['artist']['name']:
|
||||||
print("it's a match")
|
print("it's a match")
|
||||||
browser.find_element(By.XPATH, row + 'div[8]/div[2]/ytcp-button/div').click()
|
browser.find_element(By.XPATH, row + 'div[8]/div[2]/ytcp-button/div').click()
|
||||||
occurrences += 1
|
occurrences += 1
|
||||||
|
if upperBound > 1:
|
||||||
|
browser.find_element(By.CSS_SELECTOR, '#navigate-after > tp-yt-iron-icon').click()
|
||||||
if occurrences > 1:
|
if occurrences > 1:
|
||||||
print('multiple occurrences')
|
print('MULTIPLE OCCURRENCES')
|
||||||
# This isn't a clean solution.
|
# This isn't a clean solution.
|
||||||
time.sleep(1)
|
# Questionable if it's even necessary when plugged with optic fiber and power and not running anything else on the computer.
|
||||||
|
time.sleep(4)
|
||||||
browser.get('chrome://downloads/')
|
browser.get('chrome://downloads/')
|
||||||
for occurrence in range(occurrences):
|
for occurrence in range(occurrences):
|
||||||
url = browser.find_element(By.XPATH, f'/html/body/downloads-manager').shadow_root.find_element(By.ID, 'downloadsList').find_element(By.ID, f'frb{occurrences - occurrence - 1}').shadow_root.find_element(By.ID, 'url').get_attribute('href')
|
download = browser.find_element(By.XPATH, f'/html/body/downloads-manager').shadow_root.find_element(By.ID, 'downloadsList').find_element(By.ID, f'frb{occurrences - occurrence - 1}').shadow_root
|
||||||
|
url = download.find_element(By.ID, 'url').get_attribute('href')
|
||||||
|
downloadFileName = download.find_element(By.ID, 'file-link').get_attribute('innerHTML')
|
||||||
viperId = url.split('&id=')[1].split('&')[0]
|
viperId = url.split('&id=')[1].split('&')[0]
|
||||||
print(viperId)
|
print(viperId)
|
||||||
occurrenceStr = '' if occurrence == 0 else f'({occurrence})'
|
occurrenceStr = '' if occurrence == 0 else f' ({occurrence})'
|
||||||
f.write(f"{track['title']} - {track['artist']['name']}{occurrenceStr}|{viperId}\n")
|
fileName = f"{track['title']} - {track['artist']['name']}{occurrenceStr}"
|
||||||
browser.get(AUDIO_LIBRARY_URL)
|
if f'{fileName}.mp3' != downloadFileName:
|
||||||
break
|
print("The solution isn't clean enough!'")
|
||||||
else:
|
exit(1)
|
||||||
f.write(f"{track['title']} - {track['artist']['name']}|{track['viperId']}\n")
|
f.write(f"{fileName}|{viperId}\n")
|
||||||
|
hasMultipleOccurrences = True
|
||||||
|
goToAudioLibraryAndSelect100RowsPerPage()
|
||||||
#break
|
#break
|
||||||
else:
|
else:
|
||||||
browser.find_element(By.CSS_SELECTOR, 'ytcp-button.style-scope:nth-child(1) > div:nth-child(2)').click()
|
browser.find_element(By.CSS_SELECTOR, 'ytcp-button.style-scope:nth-child(1) > div:nth-child(2)').click()
|
||||||
|
if not hasMultipleOccurrences:
|
||||||
f.write(f"{track['title']} - {track['artist']['name']}|{track['viperId']}\n")
|
f.write(f"{track['title']} - {track['artist']['name']}|{track['viperId']}\n")
|
||||||
browser.find_element(By.CSS_SELECTOR, '#delete-icon > tp-yt-iron-icon').click()
|
browser.find_element(By.CSS_SELECTOR, '#delete-icon > tp-yt-iron-icon').click()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user