Fix #16: Provide an algorithm to determine the progress of retrieving comments for huge YouTube channels

2023-01-06 17:51:00 +01:00
parent dfbf38b071
commit f436007836
1 changed files with 33 additions and 0 deletions
--- a/findLatestTreatedCommentsForChannelsBeingTreated.py
+++ b/findLatestTreatedCommentsForChannelsBeingTreated.py
@@ -0,0 +1,33 @@
+#!/usr/bin/python3
+
+import os, requests, json, time, datetime
+
+path = 'channels/'
+
+os.chdir(path)
+
+def getTimestampFromDateString(dateString):
+    return int(time.mktime(datetime.datetime.strptime(dateString, "%Y-%m-%dT%H:%M:%SZ").timetuple()))
+
+for channelId in list(os.walk('.'))[1]:
+    channelId = channelId[2:]
+    #print(channelId)
+    numberOfRequests = len(list(os.walk(channelId))[0][2])
+    # Assume that the folder isn't empty (may not be the case, but it is most of the time).
+    with open(f'{channelId}/{str(numberOfRequests - 1)}.json') as f:
+        content = "\n".join(f.read().splitlines()[1:])
+        data = json.loads(content)
+        snippet = data['items'][-1]['snippet']
+        if 'topLevelComment' in snippet:
+            snippet = snippet['topLevelComment']['snippet']
+        latestTreatedCommentDate = snippet['publishedAt']
+    url = f'https://yt.lemnoslife.com/noKey/channels?part=snippet&id={channelId}'
+    content = requests.get(url).text
+    data = json.loads(content)
+    channelCreationDate = data['items'][0]['snippet']['publishedAt']
+    #print(channelCreationDate)
+    # Timing percentage not taking into account the not uniform in time distribution of comments.
+    currentTimestamp = int(time.time())
+    timingPercentage = round(100 * (currentTimestamp - getTimestampFromDateString(latestTreatedCommentDate)) / (currentTimestamp - getTimestampFromDateString(channelCreationDate)), 3)
+    print(f'{channelId} {latestTreatedCommentDate} / {channelCreationDate} ({timingPercentage}%)')
+    break