Add progression save and use spaces instead of tabs

2022-12-22 06:18:22 +01:00
parent 934954092a
commit 36f1fb9e83
2 changed files with 144 additions and 100 deletions
@@ -1,6 +1,7 @@
 #include <iostream>
 #include <fstream>
 #include <set>
 #include <sys/stat.h>
 #include <curl/curl.h>
 #include <nlohmann/json.hpp>
 using namespace std;
@@ -8,10 +9,13 @@ using json = nlohmann::json;
 vector<string> getFileContent(string filePath);
 json getJson(string url);
-void print(ostringstream* toPrint),
+void createDirectory(string path),
     print(ostringstream* toPrint),
     treatComment(json comment);
 string getHttps(string url);
 size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp);
 bool doesFileExist(string filePath),
     writeFile(string filePath, string option, string toWrite);
 #define API_KEY "AIzaSy..."
@@ -19,104 +23,144 @@ size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp);
 #define PRINT(x) toPrint << x; print(&toPrint);
 ostringstream toPrint;
-set<string> channelsToTreat,
+set<string> channelsAlreadyTreated,
-            channelsAlreadyTreated;
+            channelsToTreat;
 unsigned int commentsCount = 0;
 int main()
 {
-	vector<string> channelsToTreatVec = getFileContent("channelsToTreat.txt");
+    string channelsToTreatFilePath = "channelsToTreat.txt";
-	channelsToTreat = set(channelsToTreatVec.begin(), channelsToTreatVec.end());
+    vector<string> channelsToTreatVec = getFileContent(channelsToTreatFilePath);
    channelsToTreat = set(channelsToTreatVec.begin(), channelsToTreatVec.end());
-	PRINT(channelsToTreat.size() << " channel(s) to treat")
+    string channelsDirectory = "channels/";
    createDirectory(channelsDirectory);
-	while(!channelsToTreat.empty())
+    for(const auto& entry : filesystem::directory_iterator(channelsDirectory))
-	{
+    {
-		string channelToTreat = *channelsToTreat.begin();
+        channelsAlreadyTreated.insert(entry.path().filename());
-		PRINT("Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")
+    }
-		string pageToken = "";
+    PRINT(channelsToTreat.size() << " channel(s) to treat")
-		while(true)
+    PRINT(channelsAlreadyTreated.size() << " channel(s) already treated")
 		{
 			json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken);
 			bool doesRelyingOnCommentThreadsIsEnough = data["error"]["errors"][0]["reason"] != "commentsDisabled";
 			if(doesRelyingOnCommentThreadsIsEnough)
 			{
 				json items = data["items"];
 				for(const auto& item : items)
 				{
 					json comment = item["snippet"]["topLevelComment"];
 					string commentId = comment["id"];
 					treatComment(comment);
 					if(item.contains("replies"))
 					{
 						json replies = item["replies"]["comments"];
 						if(replies.size() >= 5)
 						{
 							string pageToken = "";
 							while(true)
 							{
 								json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken);
 								json items = data["items"];
 								for(const auto& item : items)
 								{
 									treatComment(item);
 								}
 								if(data.contains("nextPageToken"))
 								{
 									pageToken = data["nextPageToken"];
 								}
 								else
 								{
 									break;
 								}
 							}
 						}
 						else
 						{
 							for(const auto& reply : replies)
 							{
 								treatComment(reply);
 							}
 						}
 					}
 				}
 				if(data.contains("nextPageToken"))
 				{
 					pageToken = data["nextPageToken"];
 				}
 				else
 				{
 					break;
 				}
 			}
 			else
 			{
 				PRINT("Comments disabled channel!")
 				exit(1);
 			}
 		}
-		PRINT(commentsCount)
+    while(!channelsToTreat.empty())
-		commentsCount = 0;
+    {
-		channelsToTreat.erase(channelToTreat);
+        string channelToTreat = *channelsToTreat.begin();
-		channelsAlreadyTreated.insert(channelToTreat);
+        PRINT("Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")
 	}
-	return 0;
+        string pageToken = "";
        while(true)
        {
            json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken);
            bool doesRelyingOnCommentThreadsIsEnough = data["error"]["errors"][0]["reason"] != "commentsDisabled";
            if(doesRelyingOnCommentThreadsIsEnough)
            {
                json items = data["items"];
                for(const auto& item : items)
                {
                    json comment = item["snippet"]["topLevelComment"];
                    string commentId = comment["id"];
                    treatComment(comment);
                    if(item.contains("replies"))
                    {
                        json replies = item["replies"]["comments"];
                        if(replies.size() >= 5)
                        {
                            string pageToken = "";
                            while(true)
                            {
                                json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken);
                                json items = data["items"];
                                for(const auto& item : items)
                                {
                                    treatComment(item);
                                }
                                if(data.contains("nextPageToken"))
                                {
                                    pageToken = data["nextPageToken"];
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }
                        else
                        {
                            for(const auto& reply : replies)
                            {
                                treatComment(reply);
                            }
                        }
                    }
                }
                if(data.contains("nextPageToken"))
                {
                    pageToken = data["nextPageToken"];
                }
                else
                {
                    break;
                }
            }
            else
            {
                PRINT("Comments disabled channel!")
                exit(1);
            }
        }
        PRINT(commentsCount)
        commentsCount = 0;
        channelsToTreat.erase(channelToTreat);
        channelsAlreadyTreated.insert(channelToTreat);
        string channelToTreatDirectory = channelsDirectory + channelToTreat + "/";
        createDirectory(channelToTreatDirectory);
        string toWrite = (doesFileExist(channelsToTreatFilePath) ? "\n" : "") + channelToTreat;
        writeFile(channelsToTreatFilePath, "a", toWrite);
    }
    return 0;
 }
 void treatComment(json comment)
 {
-	json snippet = comment["snippet"];
+    json snippet = comment["snippet"];
-	// The `else` case can happen (cf `95a9421ad0469a09335afeddb2983e31dc00bc36`).
+    // The `else` case can happen (cf `95a9421ad0469a09335afeddb2983e31dc00bc36`).
-	if(snippet.contains("authorChannelId"))
+    if(snippet.contains("authorChannelId"))
-	{
+    {
-		string channelId = snippet["authorChannelId"]["value"];
+        string channelId = snippet["authorChannelId"]["value"];
-		if(find(channelsAlreadyTreated.begin(), channelsAlreadyTreated.end(), channelId) == channelsAlreadyTreated.end())
+        if(find(channelsAlreadyTreated.begin(), channelsAlreadyTreated.end(), channelId) == channelsAlreadyTreated.end())
-			channelsToTreat.insert(channelId);
+            channelsToTreat.insert(channelId);
-	}
+    }
-	commentsCount++;
+    commentsCount++;
 }
 bool writeFile(string filePath, string option, string toWrite)
 {
    FILE* file = fopen(filePath.c_str(), option.c_str());
    if(file != NULL)
    {
        fputs(toWrite.c_str(), file);
        fclose(file);
        return true;
    }
    return false;
 }
 bool doesFileExist(string filePath)
 {
    struct stat buffer;
    return stat(filePath.c_str(), &buffer) == 0;
 }
 void createDirectory(string path)
 {
    mkdir(path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
 }
 string getDate()
@@ -130,26 +174,26 @@ string getDate()
 vector<string> getFileContent(string filePath)
 {
-	vector<string> lines;
+    vector<string> lines;
-	ifstream infile(filePath.c_str());
+    ifstream infile(filePath.c_str());
    string line;
    while(getline(infile, line))
-		lines.push_back(line);
+        lines.push_back(line);
    return lines;
 }
 json getJson(string url)
 {
-	url = "https://www.googleapis.com/youtube/v3/" + url + "&key=" + API_KEY;
+    url = "https://www.googleapis.com/youtube/v3/" + url + "&key=" + API_KEY;
-	string content = getHttps(url);
+    string content = getHttps(url);
-	json data = json::parse(content);
+    json data = json::parse(content);
-	return data;
+    return data;
 }
 void print(ostringstream* toPrint)
 {
-	cout << getDate() << ": " << toPrint->str() << endl;
+    cout << getDate() << ": " << toPrint->str() << endl;
-	toPrint->str("");
+    toPrint->str("");
 }
 string getHttps(string url)