commit 902d2084f138fbdc40717dc9c85d47c8c5b9e6bf Author: Benjamin Loison Date: Thu Apr 27 00:01:58 2023 +0200 Add `README.md` and `search_in_youtube_video_comments.py` diff --git a/README.md b/README.md new file mode 100644 index 0000000..b297956 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Search in YouTube video comments diff --git a/search_in_youtube_video_comments.py b/search_in_youtube_video_comments.py new file mode 100644 index 0000000..fb8336b --- /dev/null +++ b/search_in_youtube_video_comments.py @@ -0,0 +1,51 @@ +import requests, json, os, re + +def getContentFromURL(url): + #url = f'https://yt.lemnoslife.com/noKey/{url}' + url = f'https://www.googleapis.com/youtube/v3/{url}&key={API_KEY}' + #print(url) + content = requests.get(url).text + data = json.loads(content) + return data + +VIDEO_ID = '5PdEmeopJVQ' +API_KEY = 'AIzaSy...' +PATTERN_REGEX = '76115293' + +nextPageToken = '' +pattern = re.compile(PATTERN_REGEX) + +def treatComment(comment): + id = comment['id'] + textOriginal = comment['snippet']['textOriginal'] + if pattern.match(textOriginal): + url = f'https://www.youtube.com/watch?v={VIDEO_ID}&lc={id}' + #os.system(f"firefox -new-tab '{url}'") + print(f'{url} {textOriginal}') + +while True: + data = getContentFromURL(f'commentThreads?part=snippet,replies&videoId={VIDEO_ID}&maxResults=100&pageToken={nextPageToken}') + #print(data) + for item in data['items']: + #print(item) + snippet = item['snippet'] + totalReplyCount = snippet['totalReplyCount'] + if totalReplyCount > 5: + parentId = item['id'] + commentsNextPageToken = '' + while True: + commentsData = getContentFromURL(f'comments?part=snippet&parentId={parentId}&maxResults=100&pageToken={commentsNextPageToken}') + for item in commentsData['items']: + treatComment(item) + if not 'nextPageToken' in commentsData: + break + commentsNextPageToken = commentsData['nextPageToken'] + else: + treatComment(snippet['topLevelComment']) + if totalReplyCount > 0: + for comment in item['replies']['comments']: + treatComment(comment) + if not 'nextPageToken' in data: + break + nextPageToken = data['nextPageToken'] +