From e1e087267aca7a16cc54212e5efd67623f8c28a9 Mon Sep 17 00:00:00 2001
From: Benjamin_Loison <benjamin_loison@noreply.localhost>
Date: Wed, 25 Jan 2023 00:51:13 +0100
Subject: [PATCH] Add Python code associated to paragraph concerning YouTube
 exact search inconsistency

---
 Home.md | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
diff --git a/Home.md b/Home.md
index ee602e9..1fad285 100644
--- a/Home.md
+++ b/Home.md
@@ -74,6 +74,92 @@ Note that [YouTube UI](https://www.youtube.com/results?search_query=%22kids+have
 
 From [my experience with YouTube](https://stackoverflow.com/users/7123660/benjamin-loison) which starts to be significant, we can't rely on YouTube search feature, as they give weird results as shown. However YouTube gives quite correctly the information concerning a given video id, so [the best approach that I am aware of](https://stackoverflow.com/a/69259093) to returns exactly correct and as far as possible exhaustive results consists in discovering the maximum number of videos through some crawling approach as I sketch in the last paragraph of the project proposal.
 
+<details>
+<summary>The code associated to this approach is here:</summary>
+    
+```py
+import requests, json, subprocess
+
+channelId = 'UCAuUUnT6oDeKwE6v1NGQxug'
+uploadsPlaylistId = 'UU' + channelId[2:]
+
+def getJson(url):
+    url = f'https://yt.lemnoslife.com/{url}'
+    content = requests.get(url).text
+    data = json.loads(content)
+    return data
+
+videoIds = []
+
+pageToken = ''
+while True:
+    data = getJson(f'noKey/playlistItems?part=snippet&playlistId={uploadsPlaylistId}&maxResults=50&pageToken={pageToken}')
+    items = data['items']
+    print(len(videoIds))
+    for item in items:
+        #print(item)
+        videoId = item['snippet']['resourceId']['videoId']
+        #print(videoId)
+        videoIds += [videoId]
+    if 'nextPageToken' in data:
+        pageToken = data['nextPageToken']
+    else:
+        break
+
+print(len(videoIds))
+# 4185
+
+videoIds = videoIds[::-1]
+
+def execute(command):
+    subprocess.check_output(command, shell = True)
+
+videoIds = videoIds[2968:]
+
+##
+
+# 2968 SMnKboI4fvY
+
+for videoIndex, videoId in enumerate(videoIds):
+    print(videoIndex, videoId)
+    data = getJson(f'noKey/captions?part=snippet&videoId={videoId}')
+    items = data['items']
+    if len(items) <= 2:
+        for item in items:
+            snippet = item['snippet']
+            trackKind = snippet['trackKind']
+            language = snippet['language']
+            if language == 'en' and trackKind == 'standard':
+                print('Found')
+                #execute('notify-send "Found"')
+                break
+
+##
+
+# Find shortest video:
+
+url = 'noKey/search?part=snippet&q="your software Linux is in millions of computers"&maxResults=50'
+data = getJson(url)
+items = data['items']
+setVideoIds = []
+shortestVideo = 10 ** 9
+shortestVideoId = None
+for item in items:
+    videoId = item['id']['videoId']
+    print(videoId)
+    setVideoIds += [videoId]
+    url = f'videos?part=contentDetails&id={videoId}'
+    data = getJson(url)
+    duration = data['items'][0]['contentDetails']['duration']
+    if shortestVideo > duration and duration > 0:
+        shortestVideo = duration
+        shortestVideoId = videoId
+
+print(shortestVideoId, shortestVideo)
+```
+    
+</details>
+
 ## Concerning 20,000 videos limit for YouTube Data API v3 PlaylistItems: list endpoint
 
 Could try both (`-i` was required for ignoring errors such as age-restricted videos):