37 lines
1.8 KiB
Python
Executable File
37 lines
1.8 KiB
Python
Executable File
#!/usr/bin/python3
|
|
|
|
# This algorithm should also take in account other features that we use to retrieve channels.
|
|
|
|
import os, requests, json, time, datetime
|
|
|
|
path = 'channels/'
|
|
|
|
os.chdir(path)
|
|
|
|
def getTimestampFromDateString(dateString):
|
|
return int(time.mktime(datetime.datetime.strptime(dateString, '%Y-%m-%dT%H:%M:%SZ').timetuple()))
|
|
|
|
for channelId in list(os.walk('.'))[1]:
|
|
channelId = channelId[2:]
|
|
#print(channelId)
|
|
numberOfRequests = len(list(os.walk(f'{channelId}/requests'))[0][2]) - 1
|
|
# Assume that the folder isn't empty (may not be the case, but it is most of the time).
|
|
filePath = f'{channelId}/requests/{str(numberOfRequests - 1)}.json'
|
|
with open(filePath) as f:
|
|
print(filePath)
|
|
#content = '\n'.join(f.read().splitlines()[1:])
|
|
data = json.load(f)#json.loads(content)
|
|
snippet = data['items'][-1]['snippet']
|
|
if 'topLevelComment' in snippet:
|
|
snippet = snippet['topLevelComment']['snippet']
|
|
latestTreatedCommentDate = snippet['publishedAt']
|
|
url = f'https://yt.lemnoslife.com/noKey/channels?part=snippet&id={channelId}'
|
|
data = requests.get(url).json()
|
|
channelCreationDate = data['items'][0]['snippet']['publishedAt']
|
|
#print(channelCreationDate)
|
|
# Timing percentage not taking into account the not uniform in time distribution of comments. Note that in the case of the last request is to list replies to a comment, the percentage might goes a bit backward, as replies are posted after the initial comment.
|
|
currentTimestamp = int(time.time())
|
|
timingPercentage = round(100 * (currentTimestamp - getTimestampFromDateString(latestTreatedCommentDate)) / (currentTimestamp - getTimestampFromDateString(channelCreationDate)), 3)
|
|
print(f'{channelId} {latestTreatedCommentDate} / {channelCreationDate} ({timingPercentage}%)')
|
|
break
|