Add logging to exec and make it crashless, requests and captions folders support for compressing, clean captions support for videos being livestreams and videos starting with -

This commit is contained in:
Benjamin Loison 2023-02-12 16:24:16 +01:00
parent 8cf5698051
commit 54fe40e588
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8

View File

@ -23,9 +23,9 @@ void createDirectory(string path),
treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, string channelToTreat),
treatChannels(unsigned short threadId),
deleteDirectory(string path),
addChannelToTreat(unsigned short threadId, string channelId);
addChannelToTreat(unsigned short threadId, string channelId),
exec(unsigned short threadId, string cmd, bool debug = true);
string getHttps(string url),
exec(unsigned short threadId, string cmd),
join(vector<string> parts, string delimiter);
size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp);
bool doesFileExist(string filePath),
@ -189,7 +189,9 @@ void treatChannels(unsigned short threadId)
// Note that compressing the French most subscribers channel took 4 minutes and 42 seconds.
PRINT("Starting compression...")
// As I haven't found any well-known library that compress easily a directory, I have chosen to rely on `zip` cli.
exec(threadId, "cd " + channelToTreatDirectory + " && ls | zip ../" + channelToTreat + ".zip -@");
// We precise no `debug`ging, as otherwise the zipping operation doesn't work as expected.
// As the zipping process isn't recursive, we can't just rely on `ls`, but we are obliged to use `find`.
exec(threadId, "cd " + channelToTreatDirectory + " && find | zip ../" + channelToTreat + ".zip -@", false);
PRINT("Compression finished, started deleting initial directory...")
deleteDirectory(channelToTreatDirectory);
@ -586,9 +588,10 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
// Firstly download all not automatically generated captions.
// The underscore in `-o` argument is used to not end up with hidden files.
// We are obliged to precise the video id after `--`, otherwise if the video id starts with `-` it's considered as an argument.
string cmdCommonPrefix = "yt-dlp --skip-download ",
cmdCommonPostfix = " '" + videoId + "' -o '" + channelCaptionsToTreatDirectory + "_'";
string cmd = cmdCommonPrefix + "--all-subs" + cmdCommonPostfix;
cmdCommonPostfix = " -o '" + channelCaptionsToTreatDirectory + "_' -- " + videoId;
string cmd = cmdCommonPrefix + "--sub-lang all,-live_chat" + cmdCommonPostfix;
exec(threadId, cmd);
// Secondly download the automatically generated captions.
@ -655,25 +658,22 @@ string join(vector<string> parts, string delimiter)
return result;
}
string exec(unsigned short threadId, string cmd)
void exec(unsigned short threadId, string cmd, bool debug)
{
if(debug)
{
ostringstream toString;
toString << threadId;
string threadIdStr = toString.str(), debugCommonFilePath = DEBUG_DIRECTORY + threadIdStr;
cmd += " >> " + debugCommonFilePath + ".out";
cmd += " 2>> " + debugCommonFilePath + ".err";
array<char, 128> buffer;
string result;
unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd.c_str(), "r"), pclose);
if (!pipe)
{
throw runtime_error("popen() failed!");
string initialCmd = cmd,
threadIdStr = toString.str(),
debugCommonFilePath = DEBUG_DIRECTORY + threadIdStr,
debugOutFilePath = debugCommonFilePath + ".out",
debugErrFilePath = debugCommonFilePath + ".err";
cmd += " >> " + debugOutFilePath;
cmd += " 2>> " + debugErrFilePath;
cmd += "; echo \"" + initialCmd + "\" | tee -a " + debugOutFilePath + " " + debugErrFilePath;
}
while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr)
{
result += buffer.data();
}
return result;
system(cmd.c_str());
}
bool writeFile(unsigned short threadId, string filePath, string option, string toWrite)