Add Python code associated to paragraph concerning YouTube exact search inconsistency
parent
49ff87ac54
commit
e1e087267a
86
Home.md
86
Home.md
@ -74,6 +74,92 @@ Note that [YouTube UI](https://www.youtube.com/results?search_query=%22kids+have
|
||||
|
||||
From [my experience with YouTube](https://stackoverflow.com/users/7123660/benjamin-loison) which starts to be significant, we can't rely on YouTube search feature, as they give weird results as shown. However YouTube gives quite correctly the information concerning a given video id, so [the best approach that I am aware of](https://stackoverflow.com/a/69259093) to returns exactly correct and as far as possible exhaustive results consists in discovering the maximum number of videos through some crawling approach as I sketch in the last paragraph of the project proposal.
|
||||
|
||||
<details>
|
||||
<summary>The code associated to this approach is here:</summary>
|
||||
|
||||
```py
|
||||
import requests, json, subprocess
|
||||
|
||||
channelId = 'UCAuUUnT6oDeKwE6v1NGQxug'
|
||||
uploadsPlaylistId = 'UU' + channelId[2:]
|
||||
|
||||
def getJson(url):
|
||||
url = f'https://yt.lemnoslife.com/{url}'
|
||||
content = requests.get(url).text
|
||||
data = json.loads(content)
|
||||
return data
|
||||
|
||||
videoIds = []
|
||||
|
||||
pageToken = ''
|
||||
while True:
|
||||
data = getJson(f'noKey/playlistItems?part=snippet&playlistId={uploadsPlaylistId}&maxResults=50&pageToken={pageToken}')
|
||||
items = data['items']
|
||||
print(len(videoIds))
|
||||
for item in items:
|
||||
#print(item)
|
||||
videoId = item['snippet']['resourceId']['videoId']
|
||||
#print(videoId)
|
||||
videoIds += [videoId]
|
||||
if 'nextPageToken' in data:
|
||||
pageToken = data['nextPageToken']
|
||||
else:
|
||||
break
|
||||
|
||||
print(len(videoIds))
|
||||
# 4185
|
||||
|
||||
videoIds = videoIds[::-1]
|
||||
|
||||
def execute(command):
|
||||
subprocess.check_output(command, shell = True)
|
||||
|
||||
videoIds = videoIds[2968:]
|
||||
|
||||
##
|
||||
|
||||
# 2968 SMnKboI4fvY
|
||||
|
||||
for videoIndex, videoId in enumerate(videoIds):
|
||||
print(videoIndex, videoId)
|
||||
data = getJson(f'noKey/captions?part=snippet&videoId={videoId}')
|
||||
items = data['items']
|
||||
if len(items) <= 2:
|
||||
for item in items:
|
||||
snippet = item['snippet']
|
||||
trackKind = snippet['trackKind']
|
||||
language = snippet['language']
|
||||
if language == 'en' and trackKind == 'standard':
|
||||
print('Found')
|
||||
#execute('notify-send "Found"')
|
||||
break
|
||||
|
||||
##
|
||||
|
||||
# Find shortest video:
|
||||
|
||||
url = 'noKey/search?part=snippet&q="your software Linux is in millions of computers"&maxResults=50'
|
||||
data = getJson(url)
|
||||
items = data['items']
|
||||
setVideoIds = []
|
||||
shortestVideo = 10 ** 9
|
||||
shortestVideoId = None
|
||||
for item in items:
|
||||
videoId = item['id']['videoId']
|
||||
print(videoId)
|
||||
setVideoIds += [videoId]
|
||||
url = f'videos?part=contentDetails&id={videoId}'
|
||||
data = getJson(url)
|
||||
duration = data['items'][0]['contentDetails']['duration']
|
||||
if shortestVideo > duration and duration > 0:
|
||||
shortestVideo = duration
|
||||
shortestVideoId = videoId
|
||||
|
||||
print(shortestVideoId, shortestVideo)
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Concerning 20,000 videos limit for YouTube Data API v3 PlaylistItems: list endpoint
|
||||
|
||||
Could try both (`-i` was required for ignoring errors such as age-restricted videos):
|
||||
|
Loading…
Reference in New Issue
Block a user