#!/usr/bin/python3 # This algorithm should also take in account other features that we use to retrieve channels. import os, requests, json, time, datetime path = 'channels/' os.chdir(path) def getTimestampFromDateString(dateString): return int(time.mktime(datetime.datetime.strptime(dateString, '%Y-%m-%dT%H:%M:%SZ').timetuple())) for channelId in list(os.walk('.'))[1]: channelId = channelId[2:] #print(channelId) numberOfRequests = len(list(os.walk(f'{channelId}/requests'))[0][2]) - 1 # Assume that the folder isn't empty (may not be the case, but it is most of the time). filePath = f'{channelId}/requests/{str(numberOfRequests - 1)}.json' with open(filePath) as f: print(filePath) #content = '\n'.join(f.read().splitlines()[1:]) data = json.load(f)#json.loads(content) snippet = data['items'][-1]['snippet'] if 'topLevelComment' in snippet: snippet = snippet['topLevelComment']['snippet'] latestTreatedCommentDate = snippet['publishedAt'] url = f'https://yt.lemnoslife.com/noKey/channels?part=snippet&id={channelId}' data = requests.get(url).json() channelCreationDate = data['items'][0]['snippet']['publishedAt'] #print(channelCreationDate) # Timing percentage not taking into account the not uniform in time distribution of comments. Note that in the case of the last request is to list replies to a comment, the percentage might goes a bit backward, as replies are posted after the initial comment. currentTimestamp = int(time.time()) timingPercentage = round(100 * (currentTimestamp - getTimestampFromDateString(latestTreatedCommentDate)) / (currentTimestamp - getTimestampFromDateString(channelCreationDate)), 3) print(f'{channelId} {latestTreatedCommentDate} / {channelCreationDate} ({timingPercentage}%)') break