From 7a1eac5e406e59a6d7738580faec37a949f3aecf Mon Sep 17 00:00:00 2001 From: Benjamin Loison Date: Thu, 22 Dec 2022 06:18:22 +0100 Subject: [PATCH] Add progression save and use spaces instead of tabs --- channelsToTreat.txt | 2 +- main.cpp | 242 ++++++++++++++++++++++++++------------------ 2 files changed, 144 insertions(+), 100 deletions(-) diff --git a/channelsToTreat.txt b/channelsToTreat.txt index 2c63c27..d2927c3 100644 --- a/channelsToTreat.txt +++ b/channelsToTreat.txt @@ -1 +1 @@ -UCt5USYpzzMCYhkirVQGHwKQ +UCt5USYpzzMCYhkirVQGHwKQ \ No newline at end of file diff --git a/main.cpp b/main.cpp index 9b54960..d12c675 100644 --- a/main.cpp +++ b/main.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include using namespace std; @@ -8,10 +9,13 @@ using json = nlohmann::json; vector getFileContent(string filePath); json getJson(string url); -void print(ostringstream* toPrint), +void createDirectory(string path), + print(ostringstream* toPrint), treatComment(json comment); string getHttps(string url); size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp); +bool doesFileExist(string filePath), + writeFile(string filePath, string option, string toWrite); #define API_KEY "AIzaSy..." @@ -19,104 +23,144 @@ size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp); #define PRINT(x) toPrint << x; print(&toPrint); ostringstream toPrint; -set channelsToTreat, - channelsAlreadyTreated; +set channelsAlreadyTreated, + channelsToTreat; unsigned int commentsCount = 0; int main() { - vector channelsToTreatVec = getFileContent("channelsToTreat.txt"); - channelsToTreat = set(channelsToTreatVec.begin(), channelsToTreatVec.end()); - - PRINT(channelsToTreat.size() << " channel(s) to treat") - - while(!channelsToTreat.empty()) - { - string channelToTreat = *channelsToTreat.begin(); - PRINT("Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")") - - string pageToken = ""; - while(true) - { - json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken); - bool doesRelyingOnCommentThreadsIsEnough = data["error"]["errors"][0]["reason"] != "commentsDisabled"; - if(doesRelyingOnCommentThreadsIsEnough) - { - json items = data["items"]; - for(const auto& item : items) - { - json comment = item["snippet"]["topLevelComment"]; - string commentId = comment["id"]; - treatComment(comment); - if(item.contains("replies")) - { - json replies = item["replies"]["comments"]; - if(replies.size() >= 5) - { - string pageToken = ""; - while(true) - { - json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken); - json items = data["items"]; - for(const auto& item : items) - { - treatComment(item); - } - if(data.contains("nextPageToken")) - { - pageToken = data["nextPageToken"]; - } - else - { - break; - } - } - } - else - { - for(const auto& reply : replies) - { - treatComment(reply); - } - } - } - } - if(data.contains("nextPageToken")) - { - pageToken = data["nextPageToken"]; - } - else - { - break; - } - } - else - { - PRINT("Comments disabled channel!") - exit(1); - } - } - - PRINT(commentsCount) - commentsCount = 0; - channelsToTreat.erase(channelToTreat); - channelsAlreadyTreated.insert(channelToTreat); - } + string channelsToTreatFilePath = "channelsToTreat.txt"; + vector channelsToTreatVec = getFileContent(channelsToTreatFilePath); + channelsToTreat = set(channelsToTreatVec.begin(), channelsToTreatVec.end()); - return 0; + string channelsDirectory = "channels/"; + createDirectory(channelsDirectory); + + for(const auto& entry : filesystem::directory_iterator(channelsDirectory)) + { + channelsAlreadyTreated.insert(entry.path().filename()); + } + + PRINT(channelsToTreat.size() << " channel(s) to treat") + PRINT(channelsAlreadyTreated.size() << " channel(s) already treated") + + while(!channelsToTreat.empty()) + { + string channelToTreat = *channelsToTreat.begin(); + PRINT("Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")") + + string pageToken = ""; + while(true) + { + json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken); + bool doesRelyingOnCommentThreadsIsEnough = data["error"]["errors"][0]["reason"] != "commentsDisabled"; + if(doesRelyingOnCommentThreadsIsEnough) + { + json items = data["items"]; + for(const auto& item : items) + { + json comment = item["snippet"]["topLevelComment"]; + string commentId = comment["id"]; + treatComment(comment); + if(item.contains("replies")) + { + json replies = item["replies"]["comments"]; + if(replies.size() >= 5) + { + string pageToken = ""; + while(true) + { + json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken); + json items = data["items"]; + for(const auto& item : items) + { + treatComment(item); + } + if(data.contains("nextPageToken")) + { + pageToken = data["nextPageToken"]; + } + else + { + break; + } + } + } + else + { + for(const auto& reply : replies) + { + treatComment(reply); + } + } + } + } + if(data.contains("nextPageToken")) + { + pageToken = data["nextPageToken"]; + } + else + { + break; + } + } + else + { + PRINT("Comments disabled channel!") + exit(1); + } + } + + PRINT(commentsCount) + commentsCount = 0; + + channelsToTreat.erase(channelToTreat); + channelsAlreadyTreated.insert(channelToTreat); + + string channelToTreatDirectory = channelsDirectory + channelToTreat + "/"; + createDirectory(channelToTreatDirectory); + + string toWrite = (doesFileExist(channelsToTreatFilePath) ? "\n" : "") + channelToTreat; + writeFile(channelsToTreatFilePath, "a", toWrite); + } + + return 0; } void treatComment(json comment) { - json snippet = comment["snippet"]; - // The `else` case can happen (cf `95a9421ad0469a09335afeddb2983e31dc00bc36`). - if(snippet.contains("authorChannelId")) - { - string channelId = snippet["authorChannelId"]["value"]; - if(find(channelsAlreadyTreated.begin(), channelsAlreadyTreated.end(), channelId) == channelsAlreadyTreated.end()) - channelsToTreat.insert(channelId); - } - commentsCount++; + json snippet = comment["snippet"]; + // The `else` case can happen (cf `95a9421ad0469a09335afeddb2983e31dc00bc36`). + if(snippet.contains("authorChannelId")) + { + string channelId = snippet["authorChannelId"]["value"]; + if(find(channelsAlreadyTreated.begin(), channelsAlreadyTreated.end(), channelId) == channelsAlreadyTreated.end()) + channelsToTreat.insert(channelId); + } + commentsCount++; +} + +bool writeFile(string filePath, string option, string toWrite) +{ + FILE* file = fopen(filePath.c_str(), option.c_str()); + if(file != NULL) + { + fputs(toWrite.c_str(), file); + fclose(file); + return true; + } + return false; +} + +bool doesFileExist(string filePath) +{ + struct stat buffer; + return stat(filePath.c_str(), &buffer) == 0; +} + +void createDirectory(string path) +{ + mkdir(path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); } string getDate() @@ -130,26 +174,26 @@ string getDate() vector getFileContent(string filePath) { - vector lines; - ifstream infile(filePath.c_str()); + vector lines; + ifstream infile(filePath.c_str()); string line; while(getline(infile, line)) - lines.push_back(line); + lines.push_back(line); return lines; } json getJson(string url) { - url = "https://www.googleapis.com/youtube/v3/" + url + "&key=" + API_KEY; - string content = getHttps(url); - json data = json::parse(content); - return data; + url = "https://www.googleapis.com/youtube/v3/" + url + "&key=" + API_KEY; + string content = getHttps(url); + json data = json::parse(content); + return data; } void print(ostringstream* toPrint) { - cout << getDate() << ": " << toPrint->str() << endl; - toPrint->str(""); + cout << getDate() << ": " << toPrint->str() << endl; + toPrint->str(""); } string getHttps(string url)