Move YouTube API requests logging to requests/ channel sub-folder

This commit is contained in:
Benjamin Loison 2023-02-10 20:17:49 +01:00
parent 3c4664a4b1
commit 8df226e2bc

View File

@ -16,7 +16,7 @@ enum getJsonBehavior { normal, retryOnCommentsDisabled, returnErrorIfPlaylistNot
set<string> setFromVector(vector<string> vec); set<string> setFromVector(vector<string> vec);
vector<string> getFileContent(string filePath); vector<string> getFileContent(string filePath);
json getJson(unsigned short threadId, string url, bool usingYouTubeDataApiV3, string directoryPath, getJsonBehavior behavior = normal); json getJson(unsigned short threadId, string url, bool usingYouTubeDataApiV3, string channelId, getJsonBehavior behavior = normal);
void createDirectory(string path), void createDirectory(string path),
print(ostringstream* toPrint), print(ostringstream* toPrint),
treatComment(unsigned short threadId, json comment, string channelId), treatComment(unsigned short threadId, json comment, string channelId),
@ -59,7 +59,8 @@ string CHANNELS_DIRECTORY = "channels/",
apiKey = "", // Will firstly be filled with `KEYS_FILE_PATH` first line. apiKey = "", // Will firstly be filled with `KEYS_FILE_PATH` first line.
YOUTUBE_OPERATIONAL_API_INSTANCE_URL = "http://localhost/YouTube-operational-API", // Can be "https://yt.lemnoslife.com" for instance. YOUTUBE_OPERATIONAL_API_INSTANCE_URL = "http://localhost/YouTube-operational-API", // Can be "https://yt.lemnoslife.com" for instance.
CAPTIONS_DIRECTORY = "captions/", CAPTIONS_DIRECTORY = "captions/",
DEBUG_DIRECTORY = "debug/"; DEBUG_DIRECTORY = "debug/",
YOUTUBE_API_REQUESTS_DIRECTORY = "requests/";
bool USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE = false; bool USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE = false;
int main(int argc, char *argv[]) int main(int argc, char *argv[])
@ -181,6 +182,7 @@ void treatChannels(unsigned short threadId)
createDirectory(channelToTreatDirectory); createDirectory(channelToTreatDirectory);
createDirectory(DEBUG_DIRECTORY); createDirectory(DEBUG_DIRECTORY);
createDirectory(channelToTreatDirectory + CAPTIONS_DIRECTORY); createDirectory(channelToTreatDirectory + CAPTIONS_DIRECTORY);
createDirectory(channelToTreatDirectory + YOUTUBE_API_REQUESTS_DIRECTORY);
treatChannelOrVideo(threadId, true, channelToTreat, channelToTreat); treatChannelOrVideo(threadId, true, channelToTreat, channelToTreat);
@ -562,45 +564,45 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
break; break;
} }
} }
} // Captions retrieval by relying on `yt-dlp` after having listed all videos ids of the given channel.
// Captions retrieval by relying on `yt-dlp` after having listed all videos ids of the given channel. string playlistToTreat = "UU" + channelToTreat.substr(2);
string playlistToTreat = "UU" + channelToTreat.substr(2); pageToken = "";
pageToken = ""; while(true)
while(true)
{
json data = getJson(threadId, "playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, true, channelToTreat, returnErrorIfPlaylistNotFound);
if(data.contains("error"))
{ {
EXIT_WITH_ERROR("Not listing captions on videos, as `playlistItems` hasn't found the `uploads` playlist!") json data = getJson(threadId, "playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, true, channelToTreat, returnErrorIfPlaylistNotFound);
} if(data.contains("error"))
json items = data["items"]; {
for(const auto& item : items) EXIT_WITH_ERROR("Not listing captions on videos, as `playlistItems` hasn't found the `uploads` playlist!")
{ }
string videoId = item["contentDetails"]["videoId"]; json items = data["items"];
// Could proceed as follows by verifying `!isChannel` but as we don't know how to manage unlisted videos, we don't proceed this way. for(const auto& item : items)
//treatChannelOrVideo(threadId, false, videoId, channelToTreat); {
string videoId = item["contentDetails"]["videoId"];
// Could proceed as follows by verifying `!isChannel` but as we don't know how to manage unlisted videos, we don't proceed this way.
//treatChannelOrVideo(threadId, false, videoId, channelToTreat);
string channelCaptionsToTreatDirectory = CHANNELS_DIRECTORY + channelToTreat + "/" + CAPTIONS_DIRECTORY + videoId + "/"; string channelCaptionsToTreatDirectory = CHANNELS_DIRECTORY + channelToTreat + "/" + CAPTIONS_DIRECTORY + videoId + "/";
createDirectory(channelCaptionsToTreatDirectory); createDirectory(channelCaptionsToTreatDirectory);
// Firstly download all not automatically generated captions. // Firstly download all not automatically generated captions.
// The underscore in `-o` argument is used to not end up with hidden files. // The underscore in `-o` argument is used to not end up with hidden files.
string cmdCommonPrefix = "yt-dlp --skip-download ", string cmdCommonPrefix = "yt-dlp --skip-download ",
cmdCommonPostfix = " '" + videoId + "' -o '" + channelCaptionsToTreatDirectory + "_'"; cmdCommonPostfix = " '" + videoId + "' -o '" + channelCaptionsToTreatDirectory + "_'";
string cmd = cmdCommonPrefix + "--all-subs" + cmdCommonPostfix; string cmd = cmdCommonPrefix + "--all-subs" + cmdCommonPostfix;
exec(threadId, cmd); exec(threadId, cmd);
// Secondly download the automatically generated captions. // Secondly download the automatically generated captions.
cmd = cmdCommonPrefix + "--write-auto-subs --sub-langs '.*orig' --sub-format ttml --convert-subs vtt" + cmdCommonPostfix; cmd = cmdCommonPrefix + "--write-auto-subs --sub-langs '.*orig' --sub-format ttml --convert-subs vtt" + cmdCommonPostfix;
exec(threadId, cmd); exec(threadId, cmd);
} }
if(data.contains("nextPageToken")) if(data.contains("nextPageToken"))
{ {
pageToken = data["nextPageToken"]; pageToken = data["nextPageToken"];
} }
else else
{ {
break; break;
}
} }
} }
} }
@ -734,7 +736,7 @@ vector<string> getFileContent(string filePath)
return lines; return lines;
} }
json getJson(unsigned short threadId, string url, bool usingYoutubeDataApiv3, string directoryPath, getJsonBehavior behavior) json getJson(unsigned short threadId, string url, bool usingYoutubeDataApiv3, string channelId, getJsonBehavior behavior)
{ {
string finalUrl = usingYoutubeDataApiv3 ? string finalUrl = usingYoutubeDataApiv3 ?
(USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE ? (USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE ?
@ -768,17 +770,17 @@ json getJson(unsigned short threadId, string url, bool usingYoutubeDataApiv3, st
PRINT("No more quota on " << apiKey << " switching to " << keys[0] << ".") PRINT("No more quota on " << apiKey << " switching to " << keys[0] << ".")
apiKey = keys[0]; apiKey = keys[0];
quotaMutex.unlock(); quotaMutex.unlock();
return getJson(threadId, url, true, directoryPath); return getJson(threadId, url, true, channelId);
} }
PRINT("Found error in JSON at URL: " << finalUrl << " for content: " << content << " !") PRINT("Found error in JSON at URL: " << finalUrl << " for content: " << content << " !")
if(reason != "commentsDisabled" || behavior == retryOnCommentsDisabled) if(reason != "commentsDisabled" || behavior == retryOnCommentsDisabled)
{ {
return reason == "playlistNotFound" && behavior == returnErrorIfPlaylistNotFound ? data : getJson(threadId, url, true, directoryPath); return reason == "playlistNotFound" && behavior == returnErrorIfPlaylistNotFound ? data : getJson(threadId, url, true, channelId);
} }
} }
ostringstream toString; ostringstream toString;
toString << CHANNELS_DIRECTORY << directoryPath << "/"; toString << CHANNELS_DIRECTORY << channelId << "/" << YOUTUBE_API_REQUESTS_DIRECTORY;
writeFile(threadId, toString.str() + "urls.txt", "a", url + " " + (usingYoutubeDataApiv3 ? "true" : "false") + "\n"); writeFile(threadId, toString.str() + "urls.txt", "a", url + " " + (usingYoutubeDataApiv3 ? "true" : "false") + "\n");
toString << requestsPerChannelThreads[threadId]++ << ".json"; toString << requestsPerChannelThreads[threadId]++ << ".json";
writeFile(threadId, toString.str(), "w", content); writeFile(threadId, toString.str(), "w", content);