From ad3e90fe92cf114f937db4475f553824caf4a875 Mon Sep 17 00:00:00 2001 From: Benjamin Loison Date: Tue, 3 Jan 2023 02:56:07 +0100 Subject: [PATCH] Fix #8: Support comments disabled channels Tested with `UCWIdqSQekeGmUWlSFeCiEnA` which treated correctly the 36 comments of the only comments enabled video `3F8dFt8LsXY`. Note that this commit doesn't support comments disabled channels with more than 20,000 videos. --- main.cpp | 169 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 107 insertions(+), 62 deletions(-) diff --git a/main.cpp b/main.cpp index 602fc2f..bfeef7e 100644 --- a/main.cpp +++ b/main.cpp @@ -12,7 +12,8 @@ vector getFileContent(string filePath); json getJson(string url, string directoryPath); void createDirectory(string path), print(ostringstream* toPrint), - treatComment(json comment, string channelId); + treatComment(json comment, string channelId), + treatChannelOrVideo(bool isChannel, string id, string channelToTreat); string getHttps(string url); size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp); bool doesFileExist(string filePath), @@ -63,67 +64,7 @@ int main() string channelToTreatDirectory = CHANNELS_DIRECTORY + channelToTreat + "/"; createDirectory(channelToTreatDirectory); - string pageToken = ""; - while(true) - { - json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken, channelToTreat); - bool doesRelyingOnCommentThreadsIsEnough = data["error"]["errors"][0]["reason"] != "commentsDisabled"; - if(doesRelyingOnCommentThreadsIsEnough) - { - json items = data["items"]; - for(const auto& item : items) - { - json comment = item["snippet"]["topLevelComment"]; - string commentId = comment["id"]; - treatComment(comment, channelToTreat); - if(item.contains("replies")) - { - json replies = item["replies"]["comments"]; - if(replies.size() >= 5) - { - string pageToken = ""; - while(true) - { - json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, channelToTreat); - json items = data["items"]; - for(const auto& item : items) - { - treatComment(item, channelToTreat); - } - if(data.contains("nextPageToken")) - { - pageToken = data["nextPageToken"]; - } - else - { - break; - } - } - } - else - { - for(const auto& reply : replies) - { - treatComment(reply, channelToTreat); - } - } - } - } - if(data.contains("nextPageToken")) - { - pageToken = data["nextPageToken"]; - } - else - { - break; - } - } - else - { - PRINT("Comments disabled channel!") - exit(1); - } - } + treatChannelOrVideo(true, channelToTreat, channelToTreat); PRINT(commentsCount) commentsCount = 0; @@ -135,6 +76,110 @@ int main() return 0; } +void treatChannelOrVideo(bool isChannel, string id, string channelToTreat) +{ + string pageToken = ""; + while(true) + { + ostringstream toString; + toString << "commentThreads?part=snippet,replies&" << (isChannel ? "allThreadsRelatedToChannelId" : "videoId") << "=" << id << "&maxResults=100&pageToken=" << pageToken; + string url = toString.str(); + json data = getJson(url, channelToTreat); + bool doesRelyingOnCommentThreadsIsEnough = (!isChannel) || data["error"]["errors"][0]["reason"] != "commentsDisabled"; + if(doesRelyingOnCommentThreadsIsEnough) + { + json items = data["items"]; + for(const auto& item : items) + { + json comment = item["snippet"]["topLevelComment"]; + string commentId = comment["id"]; + treatComment(comment, channelToTreat); + if(item.contains("replies")) + { + json replies = item["replies"]["comments"]; + if(replies.size() >= 5) + { + string pageToken = ""; + while(true) + { + json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, channelToTreat), + items = data["items"]; + for(const auto& item : items) + { + treatComment(item, channelToTreat); + } + if(data.contains("nextPageToken")) + { + pageToken = data["nextPageToken"]; + } + else + { + break; + } + } + } + else + { + for(const auto& reply : replies) + { + treatComment(reply, channelToTreat); + } + } + } + } + if(data.contains("nextPageToken")) + { + pageToken = data["nextPageToken"]; + } + else + { + break; + } + } + else + { + PRINT("Comments disabled channel, treating differently...") + json data = getJson("channels?part=statistics&id=" + channelToTreat, channelToTreat); + // YouTube Data API v3 Videos: list endpoint returns `videoCount` as a string and not an integer... + unsigned int videoCount = atoi(string(data["items"][0]["statistics"]["videoCount"]).c_str()); + PRINT("The channel has about " << videoCount << " videos.") + // `UC-3A9g4U1PpLaeAuD4jSP_w` has a `videoCount` of 2, while its `uploads` playlist contains 3 videos. So we use a strict inequality here. + if(videoCount < 20000) + { + string playlistToTreat = "UU" + channelToTreat.substr(2), + pageToken = ""; + while(true) + { + // `snippet` and `status` are unneeded `part`s here but may be interesting later, as we log them. + json data = getJson("playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, channelToTreat), + items = data["items"]; + for(const auto& item : items) + { + string videoId = item["contentDetails"]["videoId"]; + // To keep the same amount of logs for each channel, I comment the following `PRINT`. + //PRINT("Treating video " << videoId) + treatChannelOrVideo(false, videoId, channelToTreat); + } + if(data.contains("nextPageToken")) + { + pageToken = data["nextPageToken"]; + } + else + { + break; + } + } + break; + } + else + { + PRINT("The videos count of the channel exceeds the supported 20,000 limit!") + exit(1); + } + } + } +} + void treatComment(json comment, string channelId) { json snippet = comment["snippet"];