YouTube_captions_search_engine/findLatestTreatedCommentsForChannelsBeingTreated.py

#!/usr/bin/python3

import os, requests, json, time, datetime

path = 'channels/'

os.chdir(path)

def getTimestampFromDateString(dateString):
    return int(time.mktime(datetime.datetime.strptime(dateString, "%Y-%m-%dT%H:%M:%SZ").timetuple()))

for channelId in list(os.walk('.'))[1]:
    channelId = channelId[2:]
    #print(channelId)
    numberOfRequests = len(list(os.walk(channelId))[0][2])
    # Assume that the folder isn't empty (may not be the case, but it is most of the time).
    with open(f'{channelId}/{str(numberOfRequests - 1)}.json') as f:
        content = "\n".join(f.read().splitlines()[1:])
        data = json.loads(content)
        snippet = data['items'][-1]['snippet']
        if 'topLevelComment' in snippet:
            snippet = snippet['topLevelComment']['snippet']
        latestTreatedCommentDate = snippet['publishedAt']
    url = f'https://yt.lemnoslife.com/noKey/channels?part=snippet&id={channelId}'
    content = requests.get(url).text
    data = json.loads(content)
    channelCreationDate = data['items'][0]['snippet']['publishedAt']
    #print(channelCreationDate)
    # Timing percentage not taking into account the not uniform in time distribution of comments. Note that in the case of the last request is to list replies to a comment, the percentage might goes a bit backward, as replies are posted after the initial comment.
    currentTimestamp = int(time.time())
    timingPercentage = round(100 * (currentTimestamp - getTimestampFromDateString(latestTreatedCommentDate)) / (currentTimestamp - getTimestampFromDateString(channelCreationDate)), 3)
    print(f'{channelId} {latestTreatedCommentDate} / {channelCreationDate} ({timingPercentage}%)')
    break