#2: Add data logging
This commit is contained in:
		
							
								
								
									
										76
									
								
								main.cpp
									
									
									
									
									
								
							
							
						
						
									
										76
									
								
								main.cpp
									
									
									
									
									
								
							| @@ -9,10 +9,10 @@ using namespace std; | ||||
| using json = nlohmann::json; | ||||
|  | ||||
| vector<string> getFileContent(string filePath); | ||||
| json getJson(string url); | ||||
| json getJson(string url, string directoryPath); | ||||
| void createDirectory(string path), | ||||
|      print(ostringstream* toPrint), | ||||
|      treatComment(json comment); | ||||
|      treatComment(json comment, string channelId); | ||||
| string getHttps(string url); | ||||
| size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp); | ||||
| bool doesFileExist(string filePath), | ||||
| @@ -27,20 +27,26 @@ ostringstream toPrint; | ||||
|  | ||||
| set<string> channelsAlreadyTreated, | ||||
|     channelsToTreat; | ||||
| unsigned int commentsCount = 0; | ||||
| unsigned int commentsCount = 0, | ||||
|              requestsPerChannel = 0; | ||||
| string CHANNELS_DIRECTORY = "channels/", | ||||
|        CHANNELS_FILE_PATH = "channels.txt"; | ||||
|  | ||||
| int main() | ||||
| { | ||||
|     string channelsToTreatFilePath = "channelsToTreat.txt"; | ||||
|     vector<string> channelsToTreatVec = getFileContent(channelsToTreatFilePath); | ||||
|     channelsToTreat = set(channelsToTreatVec.begin(), channelsToTreatVec.end()); | ||||
|     // The starting set should be written to `CHANNELS_FILE_PATH`. | ||||
|     // To resume this algorithm after a shutdown, just restart it after having deleted the last channel folder in `CHANNELS_DIRECTORY` being treated. | ||||
|     // On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat` or `channelsToTreat` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set. | ||||
|     vector<string> channelsVec = getFileContent(CHANNELS_FILE_PATH); | ||||
|     channelsToTreat = set(channelsVec.begin(), channelsVec.end()); | ||||
|  | ||||
|     string channelsDirectory = "channels/"; | ||||
|     createDirectory(channelsDirectory); | ||||
|     createDirectory(CHANNELS_DIRECTORY); | ||||
|  | ||||
|     for(const auto& entry : filesystem::directory_iterator(channelsDirectory)) | ||||
|     for(const auto& entry : filesystem::directory_iterator(CHANNELS_DIRECTORY)) | ||||
|     { | ||||
|         channelsAlreadyTreated.insert(entry.path().filename()); | ||||
|         string channelId = entry.path().filename(); | ||||
|         channelsToTreat.erase(channelId); | ||||
|         channelsAlreadyTreated.insert(channelId); | ||||
|     } | ||||
|  | ||||
|     PRINT(channelsToTreat.size() << " channel(s) to treat") | ||||
| @@ -49,12 +55,18 @@ int main() | ||||
|     while(!channelsToTreat.empty()) | ||||
|     { | ||||
|         string channelToTreat = *channelsToTreat.begin(); | ||||
|  | ||||
|         PRINT("Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")") | ||||
|  | ||||
|         channelsAlreadyTreated.insert(channelToTreat); | ||||
|  | ||||
|         string channelToTreatDirectory = CHANNELS_DIRECTORY + channelToTreat + "/"; | ||||
|         createDirectory(channelToTreatDirectory); | ||||
|  | ||||
|         string pageToken = ""; | ||||
|         while(true) | ||||
|         { | ||||
|             json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken); | ||||
|             json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken, channelToTreat); | ||||
|             bool doesRelyingOnCommentThreadsIsEnough = data["error"]["errors"][0]["reason"] != "commentsDisabled"; | ||||
|             if(doesRelyingOnCommentThreadsIsEnough) | ||||
|             { | ||||
| @@ -63,7 +75,7 @@ int main() | ||||
|                 { | ||||
|                     json comment = item["snippet"]["topLevelComment"]; | ||||
|                     string commentId = comment["id"]; | ||||
|                     treatComment(comment); | ||||
|                     treatComment(comment, channelToTreat); | ||||
|                     if(item.contains("replies")) | ||||
|                     { | ||||
|                         json replies = item["replies"]["comments"]; | ||||
| @@ -72,11 +84,11 @@ int main() | ||||
|                             string pageToken = ""; | ||||
|                             while(true) | ||||
|                             { | ||||
|                                 json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken); | ||||
|                                 json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, channelToTreat); | ||||
|                                 json items = data["items"]; | ||||
|                                 for(const auto& item : items) | ||||
|                                 { | ||||
|                                     treatComment(item); | ||||
|                                     treatComment(item, channelToTreat); | ||||
|                                 } | ||||
|                                 if(data.contains("nextPageToken")) | ||||
|                                 { | ||||
| @@ -92,7 +104,7 @@ int main() | ||||
|                         { | ||||
|                             for(const auto& reply : replies) | ||||
|                             { | ||||
|                                 treatComment(reply); | ||||
|                                 treatComment(reply, channelToTreat); | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
| @@ -115,29 +127,27 @@ int main() | ||||
|  | ||||
|         PRINT(commentsCount) | ||||
|         commentsCount = 0; | ||||
|         requestsPerChannel = 0; | ||||
|  | ||||
|         channelsToTreat.erase(channelToTreat); | ||||
|         channelsAlreadyTreated.insert(channelToTreat); | ||||
|  | ||||
|         string channelToTreatDirectory = channelsDirectory + channelToTreat + "/"; | ||||
|         createDirectory(channelToTreatDirectory); | ||||
|  | ||||
|         string toWrite = (doesFileExist(channelsToTreatFilePath) ? "\n" : "") + channelToTreat; | ||||
|         writeFile(channelsToTreatFilePath, "a", toWrite); | ||||
|     } | ||||
|  | ||||
|     return 0; | ||||
| } | ||||
|  | ||||
| void treatComment(json comment) | ||||
| void treatComment(json comment, string channelId) | ||||
| { | ||||
|     json snippet = comment["snippet"]; | ||||
|     // The `else` case can happen (cf `95a9421ad0469a09335afeddb2983e31dc00bc36`). | ||||
|     if(snippet.contains("authorChannelId")) | ||||
|     { | ||||
|         string channelId = snippet["authorChannelId"]["value"]; | ||||
|         if(find(channelsAlreadyTreated.begin(), channelsAlreadyTreated.end(), channelId) == channelsAlreadyTreated.end()) | ||||
|         if(find(channelsAlreadyTreated.begin(), channelsAlreadyTreated.end(), channelId) == channelsAlreadyTreated.end() && find(channelsToTreat.begin(), channelsToTreat.end(), channelId) == channelsToTreat.end()) | ||||
|         { | ||||
|             channelsToTreat.insert(channelId); | ||||
|  | ||||
|             writeFile(CHANNELS_FILE_PATH, "a", "\n" + channelId); | ||||
|         } | ||||
|     } | ||||
|     commentsCount++; | ||||
| } | ||||
| @@ -151,6 +161,10 @@ bool writeFile(string filePath, string option, string toWrite) | ||||
|         fclose(file); | ||||
|         return true; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         PRINT("writeFile error: " << strerror(errno)) | ||||
|     } | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| @@ -184,15 +198,21 @@ vector<string> getFileContent(string filePath) | ||||
|     return lines; | ||||
| } | ||||
|  | ||||
| json getJson(string url) | ||||
| json getJson(string url, string directoryPath) | ||||
| { | ||||
| #ifdef USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE | ||||
|     url = "https://yt.lemnoslife.com/noKey/" + url; | ||||
|     string finalUrl = "https://yt.lemnoslife.com/noKey/" + url; | ||||
| #else | ||||
|     url = "https://www.googleapis.com/youtube/v3/" + url + "&key=" + API_KEY; | ||||
|     string finalUrl = "https://www.googleapis.com/youtube/v3/" + url + "&key=" + API_KEY; | ||||
| #endif | ||||
|     string content = getHttps(url); | ||||
|     string content = getHttps(finalUrl); | ||||
|     json data = json::parse(content); | ||||
|  | ||||
|     ostringstream toString; | ||||
|     toString << CHANNELS_DIRECTORY << directoryPath << "/" << requestsPerChannel << ".json"; | ||||
|     requestsPerChannel++; | ||||
|     writeFile(toString.str(), "w", url + "\n" + content); | ||||
|  | ||||
|     return data; | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user