#11: Add a first iteration for the CHANNELS retrieval
				
					
				
			This commit is contained in:
		
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,4 +1,4 @@ | ||||
| .PHONY: main | ||||
|  | ||||
| main: | ||||
| 	g++ main.cpp -g -std=c++17 -lcurl -lpthread -o main | ||||
| 	g++ main.cpp -g -std=c++17 -lcurl -lpthread -o youtubeCaptionsSearchEngine | ||||
|   | ||||
| @@ -15,5 +15,10 @@ Have to proceed with a breadth-first search approach as treating all *child* cha | ||||
| ```sh | ||||
| sudo apt install nlohmann-json3-dev | ||||
| make | ||||
| ./main | ||||
| ``` | ||||
|  | ||||
| Except if you provide the argument `--youtube-operational-api-instance-url https://yt.lemnoslife.com`, you have [to host your own instance of the YouTube operational API](https://github.com/Benjamin-Loison/YouTube-operational-API/#install-your-own-instance-of-the-api). | ||||
|  | ||||
| ```sh | ||||
| ./youtubeCaptionsSearchEngine | ||||
| ``` | ||||
|   | ||||
							
								
								
									
										69
									
								
								main.cpp
									
									
									
									
									
								
							
							
						
						
									
										69
									
								
								main.cpp
									
									
									
									
									
								
							| @@ -16,13 +16,14 @@ enum getJsonBehavior { normal, retryOnCommentsDisabled, returnErrorIfPlaylistNot | ||||
|  | ||||
| set<string> setFromVector(vector<string> vec); | ||||
| vector<string> getFileContent(string filePath); | ||||
| json getJson(unsigned short threadId, string url, string directoryPath, getJsonBehavior behavior = normal); | ||||
| json getJson(unsigned short threadId, string url, bool usingYouTubeDataApiV3, string directoryPath, getJsonBehavior behavior = normal); | ||||
| void createDirectory(string path), | ||||
|      print(ostringstream* toPrint), | ||||
|      treatComment(unsigned short threadId, json comment, string channelId), | ||||
|      treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, string channelToTreat), | ||||
|      treatChannels(unsigned short threadId), | ||||
|      deleteDirectory(string path); | ||||
|      deleteDirectory(string path), | ||||
|      addChannelToTreat(unsigned short threadId, string channelId); | ||||
| string getHttps(string url), | ||||
|        exec(string cmd); | ||||
| size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp); | ||||
| @@ -196,7 +197,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str | ||||
|         ostringstream toString; | ||||
|         toString << "commentThreads?part=snippet,replies&" << (isChannel ? "allThreadsRelatedToChannelId" : "videoId") << "=" << id << "&maxResults=100&pageToken=" << pageToken; | ||||
|         string url = toString.str(); | ||||
|         json data = getJson(threadId, url, channelToTreat, pageToken == "" ? normal : retryOnCommentsDisabled); | ||||
|         json data = getJson(threadId, url, true, channelToTreat, pageToken == "" ? normal : retryOnCommentsDisabled); | ||||
|         bool doesRelyingOnCommentThreadsIsEnough = (!isChannel) || data["error"]["errors"][0]["reason"] != "commentsDisabled"; | ||||
|         if(doesRelyingOnCommentThreadsIsEnough) | ||||
|         { | ||||
| @@ -213,7 +214,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str | ||||
|                         string pageToken = ""; | ||||
|                         while(true) | ||||
|                         { | ||||
|                             json data = getJson(threadId, "comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, channelToTreat), | ||||
|                             json data = getJson(threadId, "comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, true, channelToTreat), | ||||
|                                  items = data["items"]; | ||||
|                             for(const auto& item : items) | ||||
|                             { | ||||
| @@ -251,7 +252,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str | ||||
|         else | ||||
|         { | ||||
|             PRINT(threadId, "Comments disabled channel, treating differently...") | ||||
|             json data = getJson(threadId, "channels?part=statistics&id=" + channelToTreat, channelToTreat); | ||||
|             json data = getJson(threadId, "channels?part=statistics&id=" + channelToTreat, true, channelToTreat); | ||||
|             // YouTube Data API v3 Videos: list endpoint returns `videoCount` as a string and not an integer... | ||||
|             unsigned int videoCount = atoi(string(data["items"][0]["statistics"]["videoCount"]).c_str()); | ||||
|             PRINT(threadId, "The channel has about " << videoCount << " videos.") | ||||
| @@ -263,7 +264,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str | ||||
|                 while(true) | ||||
|                 { | ||||
|                     // `snippet` and `status` are unneeded `part`s here but may be interesting later, as we log them. | ||||
|                     json data = getJson(threadId, "playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, channelToTreat, returnErrorIfPlaylistNotFound); | ||||
|                     json data = getJson(threadId, "playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, true, channelToTreat, returnErrorIfPlaylistNotFound); | ||||
|                     if(data.contains("error")) | ||||
|                     { | ||||
|                         PRINT(threadId, "Not listing comments on videos, as `playlistItems` hasn't found the `uploads` playlist!") | ||||
| @@ -300,15 +301,36 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     if(isChannel) | ||||
|     { | ||||
|         string pageToken = ""; | ||||
|         while(true) | ||||
|         { | ||||
|             json data = getJson(threadId, "channels?part=channels&id=" + id + (pageToken == "" ? "" : "&pageToken=" + pageToken), false, id), | ||||
|                  items = data["items"]; | ||||
|             for(const auto& item : items) | ||||
|             { | ||||
|                 for(const auto& channel : item["channels"]["channels"]) | ||||
|                 { | ||||
|                     PRINT(threadId, channel) | ||||
|                     addChannelToTreat(threadId, channel["channelId"]); | ||||
|                 } | ||||
|             } | ||||
|             if(!data["nextPageToken"].is_null()) | ||||
|             { | ||||
|                 pageToken = data["nextPageToken"]; | ||||
|             } | ||||
|             else | ||||
|             { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| void treatComment(unsigned short threadId, json comment, string channelId) | ||||
| // This function verifies that the given hasn't already been treated. | ||||
| void addChannelToTreat(unsigned short threadId, string channelId) | ||||
| { | ||||
|     json snippet = comment["snippet"]; | ||||
|     // The `else` case can happen (cf `95a9421ad0469a09335afeddb2983e31dc00bc36`). | ||||
|     if(snippet.contains("authorChannelId")) | ||||
|     { | ||||
|         string channelId = snippet["authorChannelId"]["value"]; | ||||
|     channelsAlreadyTreatedAndToTreatMutex.lock(); | ||||
|     if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreatRev.find(channelId) == channelsToTreatRev.end()) | ||||
|     { | ||||
| @@ -325,6 +347,16 @@ void treatComment(unsigned short threadId, json comment, string channelId) | ||||
|         channelsAlreadyTreatedAndToTreatMutex.unlock(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void treatComment(unsigned short threadId, json comment, string channelId) | ||||
| { | ||||
|     json snippet = comment["snippet"]; | ||||
|     // The `else` case can happen (cf `95a9421ad0469a09335afeddb2983e31dc00bc36`). | ||||
|     if(snippet.contains("authorChannelId")) | ||||
|     { | ||||
|         string channelId = snippet["authorChannelId"]["value"]; | ||||
|         addChannelToTreat(threadId, channelId); | ||||
|     } | ||||
|     commentsCount++; | ||||
|     commentsPerSecondCount++; | ||||
| } | ||||
| @@ -405,10 +437,13 @@ vector<string> getFileContent(string filePath) | ||||
|     return lines; | ||||
| } | ||||
|  | ||||
| json getJson(unsigned short threadId, string url, string directoryPath, getJsonBehavior behavior) | ||||
| json getJson(unsigned short threadId, string url, bool usingYoutubeDataApiv3, string directoryPath, getJsonBehavior behavior) | ||||
| { | ||||
|     string finalUrl = USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE ? "https://yt.lemnoslife.com/noKey/" + url : | ||||
|                       "https://www.googleapis.com/youtube/v3/" + url + "&key=" + apiKey, | ||||
|     string finalUrl = usingYoutubeDataApiv3 ? | ||||
|                       (USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE ? | ||||
|                        "https://yt.lemnoslife.com/noKey/" + url : | ||||
|                        "https://www.googleapis.com/youtube/v3/" + url + "&key=" + apiKey) : | ||||
|                       YOUTUBE_OPERATIONAL_API_INSTANCE_URL + "/" + url, | ||||
|                       content = getHttps(finalUrl); | ||||
|     json data; | ||||
|     try | ||||
| @@ -433,12 +468,12 @@ json getJson(unsigned short threadId, string url, string directoryPath, getJsonB | ||||
|             PRINT(threadId, "No more quota on " << apiKey << " switching to " << keys[0] << ".") | ||||
|             apiKey = keys[0]; | ||||
|             quotaMutex.unlock(); | ||||
|             return getJson(threadId, url, directoryPath); | ||||
|             return getJson(threadId, url, true, directoryPath); | ||||
|         } | ||||
|         PRINT(threadId, "Found error in JSON at URL: " << finalUrl << " for content: " << content << " !") | ||||
|         if(reason != "commentsDisabled" || behavior == retryOnCommentsDisabled) | ||||
|         { | ||||
|             return reason == "playlistNotFound" && behavior == returnErrorIfPlaylistNotFound ? data : getJson(threadId, url, directoryPath); | ||||
|             return reason == "playlistNotFound" && behavior == returnErrorIfPlaylistNotFound ? data : getJson(threadId, url, true, directoryPath); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user