Fix #20: YouTube Data API v3 returns rarely suddenly commentsDisabled error which involves an unwanted method switch
				
					
				
			Also modified compression command, as I got `sh: 1: zip: Argument list too long` when compressing the 248,868 json files of the French most subscribers channel.
This commit is contained in:
		
							
								
								
									
										22
									
								
								main.cpp
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								main.cpp
									
									
									
									
									
								
							| @@ -13,7 +13,7 @@ using namespace chrono; | ||||
| using json = nlohmann::json; | ||||
|  | ||||
| vector<string> getFileContent(string filePath); | ||||
| json getJson(unsigned short threadId, string url, string directoryPath); | ||||
| json getJson(unsigned short threadId, string url, string directoryPath, bool retryOnCommentsDisabled = false); | ||||
| void createDirectory(string path), | ||||
|      print(ostringstream* toPrint), | ||||
|      treatComment(unsigned short threadId, json comment, string channelId), | ||||
| @@ -49,13 +49,15 @@ int main() | ||||
|     // To resume this algorithm after a shutdown, just restart it after having deleted the last channel folders in `CHANNELS_DIRECTORY` being treated. | ||||
|     // On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat` or `channelsToTreat` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set. | ||||
|     vector<string> channelsVec = getFileContent(CHANNELS_FILE_PATH); | ||||
|     // Note that using `set`s makes the search faster but we lose the `channels.txt` lines order. | ||||
|     channelsToTreat = set(channelsVec.begin(), channelsVec.end()); | ||||
|  | ||||
|     createDirectory(CHANNELS_DIRECTORY); | ||||
|  | ||||
|     for(const auto& entry : filesystem::directory_iterator(CHANNELS_DIRECTORY)) | ||||
|     { | ||||
|         string channelId = entry.path().filename(); | ||||
|         string fileName = entry.path().filename(), | ||||
|                channelId = fileName.substr(0, fileName.length() - 4); | ||||
|         channelsToTreat.erase(channelId); | ||||
|         channelsAlreadyTreated.insert(channelId); | ||||
|     } | ||||
| @@ -112,9 +114,14 @@ void treatChannels(unsigned short threadId) | ||||
|  | ||||
|         treatChannelOrVideo(threadId, true, channelToTreat, channelToTreat); | ||||
|  | ||||
|         // Note that compressing the French most subscribers channel took 4 minutes and 42 seconds. | ||||
|         PRINT(threadId, "Starting compression...") | ||||
|         // As I haven't found any well-known library that compress easily a directory, I have chosen to rely on `zip` cli. | ||||
|         exec("cd " + channelToTreatDirectory + " && zip -r ../" + channelToTreat + ".zip *"); | ||||
|         exec("cd " + channelToTreatDirectory + " && ls | zip ../" + channelToTreat + ".zip -@"); | ||||
|  | ||||
|         PRINT(threadId, "Compression finished, started deleting initial directory...") | ||||
|         deleteDirectory(channelToTreatDirectory); | ||||
|         PRINT(threadId, "Deleting directory finished.") | ||||
|  | ||||
|         PRINT(threadId, commentsCount << " comments were found for this channel.") | ||||
|         commentsCount = 0; | ||||
| @@ -132,7 +139,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str | ||||
|         ostringstream toString; | ||||
|         toString << "commentThreads?part=snippet,replies&" << (isChannel ? "allThreadsRelatedToChannelId" : "videoId") << "=" << id << "&maxResults=100&pageToken=" << pageToken; | ||||
|         string url = toString.str(); | ||||
|         json data = getJson(threadId, url, channelToTreat); | ||||
|         json data = getJson(threadId, url, channelToTreat, pageToken != ""); | ||||
|         bool doesRelyingOnCommentThreadsIsEnough = (!isChannel) || data["error"]["errors"][0]["reason"] != "commentsDisabled"; | ||||
|         if(doesRelyingOnCommentThreadsIsEnough) | ||||
|         { | ||||
| @@ -323,7 +330,7 @@ vector<string> getFileContent(string filePath) | ||||
|     return lines; | ||||
| } | ||||
|  | ||||
| json getJson(unsigned short threadId, string url, string directoryPath) | ||||
| json getJson(unsigned short threadId, string url, string directoryPath, bool retryOnCommentsDisabled) | ||||
| { | ||||
| #ifdef USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE | ||||
|     string finalUrl = "https://yt.lemnoslife.com/noKey/" + url; | ||||
| @@ -345,7 +352,10 @@ json getJson(unsigned short threadId, string url, string directoryPath) | ||||
|     if(data.contains("error")) | ||||
|     { | ||||
|         PRINT(threadId, "Found error in JSON at URL: " << finalUrl << " for content: " << content << " !") | ||||
|         return getJson(threadId, url, directoryPath); | ||||
|         if(data["error"]["errors"][0]["reason"] != "commentsDisabled" || retryOnCommentsDisabled) | ||||
|         { | ||||
|             return getJson(threadId, url, directoryPath); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     ostringstream toString; | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import shutil | ||||
| import shutil, os | ||||
|  | ||||
| infix = ': Treating channel ' | ||||
| path = 'channels/' | ||||
| @@ -18,8 +18,16 @@ with open('nohup.out') as f: | ||||
|     for threadId in threads: | ||||
|         channelId = threads[threadId] | ||||
|         print(threadId, channelId) | ||||
|         # There are three cases: | ||||
|         # - `channelId`/ exists | ||||
|         # - `channelId`/ and `channelId`.zip exist | ||||
|         # - `channelId`.zip exists | ||||
|         # To manage every case, we need to use two `try`/`except`. | ||||
|         try: | ||||
|             shutil.rmtree(path + channelId) | ||||
|         except: | ||||
|             pass | ||||
|         try: | ||||
|             os.remove(path + channelId + ".zip") | ||||
|         except: | ||||
|             pass | ||||
|   | ||||
		Reference in New Issue
	
	Block a user