Fix #16: Provide an algorithm to determine the progress of retrieving comments for huge YouTube channels
This commit is contained in:
parent
dfbf38b071
commit
f436007836
33
findLatestTreatedCommentsForChannelsBeingTreated.py
Normal file
33
findLatestTreatedCommentsForChannelsBeingTreated.py
Normal file
@ -0,0 +1,33 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import os, requests, json, time, datetime
|
||||
|
||||
path = 'channels/'
|
||||
|
||||
os.chdir(path)
|
||||
|
||||
def getTimestampFromDateString(dateString):
|
||||
return int(time.mktime(datetime.datetime.strptime(dateString, "%Y-%m-%dT%H:%M:%SZ").timetuple()))
|
||||
|
||||
for channelId in list(os.walk('.'))[1]:
|
||||
channelId = channelId[2:]
|
||||
#print(channelId)
|
||||
numberOfRequests = len(list(os.walk(channelId))[0][2])
|
||||
# Assume that the folder isn't empty (may not be the case, but it is most of the time).
|
||||
with open(f'{channelId}/{str(numberOfRequests - 1)}.json') as f:
|
||||
content = "\n".join(f.read().splitlines()[1:])
|
||||
data = json.loads(content)
|
||||
snippet = data['items'][-1]['snippet']
|
||||
if 'topLevelComment' in snippet:
|
||||
snippet = snippet['topLevelComment']['snippet']
|
||||
latestTreatedCommentDate = snippet['publishedAt']
|
||||
url = f'https://yt.lemnoslife.com/noKey/channels?part=snippet&id={channelId}'
|
||||
content = requests.get(url).text
|
||||
data = json.loads(content)
|
||||
channelCreationDate = data['items'][0]['snippet']['publishedAt']
|
||||
#print(channelCreationDate)
|
||||
# Timing percentage not taking into account the not uniform in time distribution of comments.
|
||||
currentTimestamp = int(time.time())
|
||||
timingPercentage = round(100 * (currentTimestamp - getTimestampFromDateString(latestTreatedCommentDate)) / (currentTimestamp - getTimestampFromDateString(channelCreationDate)), 3)
|
||||
print(f'{channelId} {latestTreatedCommentDate} / {channelCreationDate} ({timingPercentage}%)')
|
||||
break
|
Loading…
x
Reference in New Issue
Block a user