13 Commits

Author SHA1 Message Date
10e8811817 Introduce {,MAIN_}EXIT_WITH_ERROR macros for exitting with an error 2023-01-22 15:17:14 +01:00
0f15bb0235 #11: Add the discovering of channels having commented on ended livestreams 2023-01-22 15:15:27 +01:00
bdb4e6443a #11: Add current livestreams support to discover channels 2023-01-22 04:00:11 +01:00
d2391e5d54 Instead of looping on items where we expect only one to be, we just use items[0] 2023-01-22 02:19:26 +01:00
993d0b9771 Make PRINT not requiring to precise threadId 2023-01-22 02:04:03 +01:00
0fcb5a0426 #11: Treat COMMUNITY post comments to discover channels 2023-01-22 01:37:32 +01:00
57200da482 Add in README.md the fact that as documented in #30, this algorithm is only known to be working fin on Linux 2023-01-21 22:20:45 +01:00
a0880c79bb #11: Update channel CHANNELS tab treatment following YouTube-operational-API/issues/121 closure 2023-01-21 02:24:42 +01:00
10c5c1d605 #11: Add the treatment of channels' tab, but only postpone unlisted videos treatment 2023-01-15 14:56:44 +01:00
51a70f6e54 #7: Make commentsCount and requestsPerChannel compatible with multithreading 2023-01-15 14:31:55 +01:00
aa97c94bf8 #11: Add a first iteration for the CHANNELS retrieval 2023-01-15 02:19:31 +01:00
d1b84335d1 #11: Add --youtube-operational-api-instance-url parameter and use exit(EXIT_{SUCCESS, FAILURE}) instead of exit({0, 1}) 2023-01-15 00:49:32 +01:00
6ce29051c0 Fix #26: Keep efficient search algorithm while keeping order (notably of the starting set) 2023-01-14 15:14:24 +01:00
3 changed files with 377 additions and 64 deletions

View File

@@ -1,4 +1,4 @@
.PHONY: main
main:
g++ main.cpp -g -std=c++17 -lcurl -lpthread -o main
g++ main.cpp -g -std=c++17 -lcurl -lpthread -o youtubeCaptionsSearchEngine

View File

@@ -12,8 +12,15 @@ As would like to proceed channel per channel, the question is **how much time do
Have to proceed with a breadth-first search approach as treating all *child* channels might take a time equivalent to treating the whole original tree.
Because of [the current compression mechanism](https://gitea.lemnoslife.com/Benjamin_Loison/YouTube_captions_search_engine/issues/30), Linux is the only known OS able to run this algorithm.
```sh
sudo apt install nlohmann-json3-dev
make
./main
```
Except if you provide the argument `--youtube-operational-api-instance-url https://yt.lemnoslife.com`, you have [to host your own instance of the YouTube operational API](https://github.com/Benjamin-Loison/YouTube-operational-API/#install-your-own-instance-of-the-api).
```sh
./youtubeCaptionsSearchEngine
```

430
main.cpp
View File

@@ -16,37 +16,48 @@ enum getJsonBehavior { normal, retryOnCommentsDisabled, returnErrorIfPlaylistNot
set<string> setFromVector(vector<string> vec);
vector<string> getFileContent(string filePath);
json getJson(unsigned short threadId, string url, string directoryPath, getJsonBehavior behavior = normal);
json getJson(unsigned short threadId, string url, bool usingYouTubeDataApiV3, string directoryPath, getJsonBehavior behavior = normal);
void createDirectory(string path),
print(ostringstream* toPrint),
treatComment(unsigned short threadId, json comment, string channelId),
treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, string channelToTreat),
treatChannels(unsigned short threadId),
deleteDirectory(string path);
deleteDirectory(string path),
addChannelToTreat(unsigned short threadId, string channelId);
string getHttps(string url),
exec(string cmd);
exec(string cmd),
join(vector<string> parts, string delimiter);
size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp);
bool doesFileExist(string filePath),
writeFile(unsigned short threadId, string filePath, string option, string toWrite);
#define PRINT(threadId, x) { ostringstream toPrint; toPrint << threadId << ": " << x; print(&toPrint); }
#define THREAD_PRINT(threadId, x) { ostringstream toPrint; toPrint << threadId << ": " << x; print(&toPrint); }
#define PRINT(x) THREAD_PRINT(threadId, x)
#define DEFAULT_THREAD_ID 0
#define MAIN_PRINT(x) PRINT(DEFAULT_THREAD_ID, x)
#define MAIN_PRINT(x) THREAD_PRINT(DEFAULT_THREAD_ID, x)
#define EXIT_WITH_ERROR(x) { PRINT(x); exit(EXIT_FAILURE); }
#define MAIN_EXIT_WITH_ERROR(x) { MAIN_PRINT(x); exit(EXIT_FAILURE); }
mutex printMutex,
channelsAlreadyTreatedAndToTreatMutex,
quotaMutex;
set<string> channelsAlreadyTreated,
channelsToTreat;
set<string> channelsAlreadyTreated;
// Two `map`s to simulate a bidirectional map.
map<unsigned int, string> channelsToTreat;
map<string, unsigned int> channelsToTreatRev;
vector<string> keys;
unsigned int commentsCount = 0,
commentsPerSecondCount = 0,
requestsPerChannel = 0;
unsigned int commentsPerSecondCount = 0;
map<unsigned short, unsigned int> commentsCountThreads,
requestsPerChannelThreads;
unsigned short THREADS_NUMBER = 1;
// Use `string` variables instead of macros to have `string` properties, even if could use a meta-macro inlining as `string`s.
string CHANNELS_DIRECTORY = "channels/",
CHANNELS_FILE_PATH = "channels.txt",
KEYS_FILE_PATH = "keys.txt",
apiKey = ""; // Will firstly be filled with `KEYS_FILE_PATH` first line.
UNLISTED_VIDEOS_FILE_PATH = "unlistedVideos.txt",
apiKey = "", // Will firstly be filled with `KEYS_FILE_PATH` first line.
YOUTUBE_OPERATIONAL_API_INSTANCE_URL = "http://localhost/YouTube-operational-API"; // Can be "https://yt.lemnoslife.com" for instance.
bool USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE = false;
int main(int argc, char *argv[])
@@ -64,22 +75,37 @@ int main(int argc, char *argv[])
}
else if(argvStr == "-h" || argvStr == "--help")
{
MAIN_PRINT("Usage: " << argv[0] << " [--help/-h] [--no-keys] [--threads=N]")
exit(0);
MAIN_PRINT("Usage: " << argv[0] << " [--help/-h] [--no-keys] [--threads=N] [--youtube-operational-api-instance-url URL]")
exit(EXIT_SUCCESS);
}
else if(argvStr == "--youtube-operational-api-instance-url")
{
if(argvIndex < argc - 1)
{
YOUTUBE_OPERATIONAL_API_INSTANCE_URL = string(argv[argvIndex + 1]);
argvIndex++;
}
else
{
MAIN_EXIT_WITH_ERROR("YouTube operational API instance URL missing!")
}
}
else
{
MAIN_PRINT("Unrecognized parameter " << argvStr)
exit(1);
MAIN_EXIT_WITH_ERROR("Unrecognized parameter " << argvStr)
}
}
// The starting set should be written to `CHANNELS_FILE_PATH`.
// To resume this algorithm after a shutdown, just restart it after having deleted the last channel folders in `CHANNELS_DIRECTORY` being treated.
// On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat` or `channelsToTreat` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set.
// On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat*` or `channelsToTreat*` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set.
vector<string> channelsVec = getFileContent(CHANNELS_FILE_PATH);
// Note that using `set`s makes the search faster but we lose the `channels.txt` lines order.
channelsToTreat = setFromVector(channelsVec);
for(unsigned int channelsVecIndex = 0; channelsVecIndex < channelsVec.size(); channelsVecIndex++)
{
string channel = channelsVec[channelsVecIndex];
channelsToTreat[channelsVecIndex] = channel;
channelsToTreatRev[channel] = channelsVecIndex;
}
keys = getFileContent(KEYS_FILE_PATH);
apiKey = keys[0];
@@ -90,7 +116,10 @@ int main(int argc, char *argv[])
{
string fileName = entry.path().filename(),
channelId = fileName.substr(0, fileName.length() - 4);
channelsToTreat.erase(channelId);
channelsToTreat.erase(channelsToTreatRev[channelId]);
channelsToTreatRev.erase(channelId);
channelsAlreadyTreated.insert(channelId);
}
@@ -132,11 +161,16 @@ void treatChannels(unsigned short threadId)
continue;
}
string channelToTreat = *channelsToTreat.begin();
string channelToTreat = channelsToTreat.begin()->second;
PRINT(threadId, "Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")
PRINT("Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")
commentsCountThreads[threadId] = 0;
requestsPerChannelThreads[threadId] = 0;
channelsToTreat.erase(channelsToTreatRev[channelToTreat]);
channelsToTreatRev.erase(channelToTreat);
channelsToTreat.erase(channelToTreat);
channelsAlreadyTreated.insert(channelToTreat);
channelsAlreadyTreatedAndToTreatMutex.unlock();
@@ -147,22 +181,21 @@ void treatChannels(unsigned short threadId)
treatChannelOrVideo(threadId, true, channelToTreat, channelToTreat);
// Note that compressing the French most subscribers channel took 4 minutes and 42 seconds.
PRINT(threadId, "Starting compression...")
PRINT("Starting compression...")
// As I haven't found any well-known library that compress easily a directory, I have chosen to rely on `zip` cli.
exec("cd " + channelToTreatDirectory + " && ls | zip ../" + channelToTreat + ".zip -@");
PRINT(threadId, "Compression finished, started deleting initial directory...")
PRINT("Compression finished, started deleting initial directory...")
deleteDirectory(channelToTreatDirectory);
PRINT(threadId, "Deleting directory finished.")
PRINT("Deleting directory finished.")
PRINT(threadId, commentsCount << " comments were found for this channel.")
commentsCount = 0;
requestsPerChannel = 0;
PRINT(commentsCountThreads[threadId] << " comments were found for this channel.")
}
channelsAlreadyTreatedAndToTreatMutex.unlock();
}
// Have to pay attention not to recursively call this function with another channel otherwise we break the ability of the program to halt at any top level channel.
void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, string channelToTreat)
{
string pageToken = "";
@@ -171,7 +204,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
ostringstream toString;
toString << "commentThreads?part=snippet,replies&" << (isChannel ? "allThreadsRelatedToChannelId" : "videoId") << "=" << id << "&maxResults=100&pageToken=" << pageToken;
string url = toString.str();
json data = getJson(threadId, url, channelToTreat, pageToken == "" ? normal : retryOnCommentsDisabled);
json data = getJson(threadId, url, true, channelToTreat, pageToken == "" ? normal : retryOnCommentsDisabled);
bool doesRelyingOnCommentThreadsIsEnough = (!isChannel) || data["error"]["errors"][0]["reason"] != "commentsDisabled";
if(doesRelyingOnCommentThreadsIsEnough)
{
@@ -188,7 +221,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
string pageToken = "";
while(true)
{
json data = getJson(threadId, "comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, channelToTreat),
json data = getJson(threadId, "comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, true, channelToTreat),
items = data["items"];
for(const auto& item : items)
{
@@ -225,11 +258,11 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
}
else
{
PRINT(threadId, "Comments disabled channel, treating differently...")
json data = getJson(threadId, "channels?part=statistics&id=" + channelToTreat, channelToTreat);
PRINT("Comments disabled channel, treating differently...")
json data = getJson(threadId, "channels?part=statistics&id=" + channelToTreat, true, channelToTreat);
// YouTube Data API v3 Videos: list endpoint returns `videoCount` as a string and not an integer...
unsigned int videoCount = atoi(string(data["items"][0]["statistics"]["videoCount"]).c_str());
PRINT(threadId, "The channel has about " << videoCount << " videos.")
PRINT("The channel has about " << videoCount << " videos.")
// `UC-3A9g4U1PpLaeAuD4jSP_w` has a `videoCount` of 2, while its `uploads` playlist contains 3 videos. So we use a strict inequality here.
if(0 < videoCount && videoCount < 20000)
{
@@ -238,11 +271,10 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
while(true)
{
// `snippet` and `status` are unneeded `part`s here but may be interesting later, as we log them.
json data = getJson(threadId, "playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, channelToTreat, returnErrorIfPlaylistNotFound);
json data = getJson(threadId, "playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, true, channelToTreat, returnErrorIfPlaylistNotFound);
if(data.contains("error"))
{
PRINT(threadId, "Not listing comments on videos, as `playlistItems` hasn't found the `uploads` playlist!")
exit(1);
EXIT_WITH_ERROR("Not listing comments on videos, as `playlistItems` hasn't found the `uploads` playlist!")
}
json items = data["items"];
for(const auto& item : items)
@@ -265,16 +297,284 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
}
else if(videoCount == 0)
{
PRINT(threadId, "Skip listing comments on videos, as they shouldn't be any according to `channels?part=statistics`.")
PRINT("Skip listing comments on videos, as they shouldn't be any according to `channels?part=statistics`.")
break;
}
else //if(videoCount >= 20000)
{
PRINT(threadId, "The videos count of the channel exceeds the supported 20,000 limit!")
exit(1);
EXIT_WITH_ERROR("The videos count of the channel exceeds the supported 20,000 limit!")
}
}
}
if(isChannel)
{
// `CHANNELS`
string pageToken = "";
while(true)
{
json data = getJson(threadId, "channels?part=channels&id=" + id + (pageToken == "" ? "" : "&pageToken=" + pageToken), false, id),
channelSections = data["items"][0]["channelSections"];
for(const auto& channelSection : channelSections)
{
for(const auto& sectionChannel : channelSection["sectionChannels"])
{
string channelId = sectionChannel["channelId"];
addChannelToTreat(threadId, channelId);
}
}
if(channelSections.size() == 1)
{
json channelSection = channelSections[0];
if(!channelSection["nextPageToken"].is_null())
{
pageToken = channelSection["nextPageToken"];
}
else
{
break;
}
}
else
{
break;
}
}
// `COMMUNITY`
pageToken = "";
while(true)
{
json data = getJson(threadId, "channels?part=community&id=" + id + (pageToken == "" ? "" : "&pageToken=" + pageToken), false, id);
data = data["items"][0];
json posts = data["community"];
for(const auto& post : posts)
{
string postId = post["id"];
json data = getJson(threadId, "community?part=snippet&id=" + postId + "&order=time", false, id);
string pageToken = data["items"][0]["snippet"]["comments"]["nextPageToken"];
while(pageToken != "")
{
json data = getJson(threadId, "commentThreads?part=snippet,replies&pageToken=" + pageToken, false, id),
items = data["items"];
for(const auto& item : items)
{
json snippet = item["snippet"]["topLevelComment"]["snippet"];
string channelId = snippet["authorChannelId"]["value"];
addChannelToTreat(threadId, channelId);
string pageToken = snippet["nextPageToken"];
while(pageToken != "")
{
json data = getJson(threadId, "commentThreads?part=snippet,replies&pageToken=" + pageToken, false, id),
items = data["items"];
for(const auto& item : items)
{
string channelId = item["snippet"]["authorChannelId"]["value"];
addChannelToTreat(threadId, channelId);
}
if(data.contains("nextPageToken"))
{
pageToken = data["nextPageToken"];
}
else
{
break;
}
}
}
if(data.contains("nextPageToken"))
{
pageToken = data["nextPageToken"];
}
else
{
break;
}
}
}
if(data.contains("nextPageToken"))
{
pageToken = data["nextPageToken"];
}
if(pageToken == "")
{
break;
}
}
// `PLAYLISTS`
pageToken = "";
while(true)
{
json data = getJson(threadId, "channels?part=playlists&id=" + id + (pageToken == "" ? "" : "&pageToken=" + pageToken), false, id),
playlistSections = data["items"][0]["playlistSections"];
for(const auto& playlistSection : playlistSections)
{
for(const auto& playlist : playlistSection["playlists"])
{
string playlistId = playlist["id"];
//PRINT(threadId, playlistId)
string pageToken = "";
while(true)
{
json data = getJson(threadId, "playlistItems?part=contentDetails,snippet,status&playlistId=" + playlistId + "&maxResults=50&pageToken=" + pageToken, true, id),
items = data["items"];
for(const auto& item : items)
{
json snippet = item["snippet"];
string privacyStatus = item["status"]["privacyStatus"];
// `5-CXVU8si3A` in `PLTYUE9O6WCrjQsnOm56rMMNmFy_A-SjUx` has its privacy status on `privacyStatusUnspecified` and is inaccessible.
// `GMiVi8xkEXA` in `PLTYUE9O6WCrgNpeSiryP8LYVX-7tOJ1f1` has its privacy status on `private`.
// Of course `commentThreads?videoId=` doesn't work for these videos (same result on YouTube UI).
// By hypothesis that the discovery algorithm never ends we can't postpone the treatment of these unlisted videos, because we can find such unlisted videos at any point in time (before or after the given channel treatment).
// Maybe modifying this hypothesis would make sense, otherwise we have to treat them right-away (note that except code architecture, there is no recursion problem as documented on this function).
if(privacyStatus != "public" && privacyStatus != "private" && snippet["title"] != "Deleted video")
{
string videoId = snippet["resourceId"]["videoId"],
channelId = snippet["videoOwnerChannelId"];
PRINT("Found non public video (" << videoId << ") in: " << playlistId)
string channelUnlistedVideosFilePath = CHANNELS_DIRECTORY + UNLISTED_VIDEOS_FILE_PATH;
bool doesChannelUnlistedVideosFileExist = doesFileExist(channelUnlistedVideosFilePath);
writeFile(threadId, channelUnlistedVideosFilePath, !doesChannelUnlistedVideosFileExist ? "w" : "a", (!doesChannelUnlistedVideosFileExist ? "" : "\n") + channelId);
}
if(snippet.contains("videoOwnerChannelId"))
{
// There isn't any `videoOwnerChannelId` to retrieve for `5-CXVU8si3A` for instance.
string channelId = snippet["videoOwnerChannelId"];
if(channelId != id)
{
addChannelToTreat(threadId, channelId);
}
}
}
if(data.contains("nextPageToken"))
{
pageToken = data["nextPageToken"];
}
else
{
break;
}
}
}
}
if(!data["nextPageToken"].is_null())
{
pageToken = data["nextPageToken"];
}
else
{
break;
}
}
// `LIVES`
pageToken = "";
string playlistId = "UU" + id.substr(2);
vector<string> videoIds;
while(true)
{
json data = getJson(threadId, "playlistItems?part=contentDetails,snippet,status&playlistId=" + playlistId + "&maxResults=50&pageToken=" + pageToken, true, id, returnErrorIfPlaylistNotFound),
items = data["items"];
for(const auto& item : items)
{
string videoId = item["snippet"]["resourceId"]["videoId"];
videoIds.push_back(videoId);
}
bool hasNextPageToken = data.contains("nextPageToken");
if(videoIds.size() == 50 || !hasNextPageToken)
{
json data = getJson(threadId, "videos?part=contentDetails,id,liveStreamingDetails,localizations,player,snippet,statistics,status,topicDetails&id=" + join(videoIds, ","), true, id),
items = data["items"];
for(const auto& item : items)
{
if(item.contains("liveStreamingDetails"))
{
string videoId = item["id"];
//PRINT(videoId)
json liveStreamingDetails = item["liveStreamingDetails"];
if(liveStreamingDetails.contains("activeLiveChatId"))
{
string activeLiveChatId = liveStreamingDetails["activeLiveChatId"];
json data = getJson(threadId, "liveChat/messages?part=snippet,authorDetails&liveChatId=" + activeLiveChatId, true, id),
items = data["items"];
for(const auto& item : items)
{
string channelId = item["snippet"]["authorChannelId"];
addChannelToTreat(threadId, channelId);
}
}
else
{
// As there isn't the usual pagination mechanism for these ended livestreams, we proceed in an uncertain way as follows.
set<string> messageIds;
unsigned long long lastMessageTimestampRelativeMsec = 0;
while(true)
{
string time = to_string(lastMessageTimestampRelativeMsec);
json data = getJson(threadId, "liveChats?part=snippet&id=" + videoId + "&time=" + time, false, id),
snippet = data["items"][0]["snippet"];
if(snippet.empty())
{
break;
}
json firstMessage = snippet[0];
string firstMessageId = firstMessage["id"];
// We verify that we don't skip any message by verifying that the first message was already treated if we already treated some messages.
if(!messageIds.empty() && messageIds.find(firstMessageId) == messageIds.end())
{
EXIT_WITH_ERROR("The verification that we don't skip any message failed!")
}
for(const auto& message : snippet)
{
string messageId = message["id"];
if(messageIds.find(messageId) == messageIds.end())
{
messageIds.insert(messageId);
string channelId = message["authorChannelId"];
addChannelToTreat(threadId, channelId);
}
}
json lastMessage = snippet.back();
// If there isn't any new message, then we stop the retrieving.
if(lastMessageTimestampRelativeMsec == lastMessage["videoOffsetTimeMsec"])
{
break;
}
lastMessageTimestampRelativeMsec = lastMessage["videoOffsetTimeMsec"];
}
}
}
}
videoIds.clear();
}
if(hasNextPageToken)
{
pageToken = data["nextPageToken"];
}
else
{
break;
}
}
}
}
// This function verifies that the given hasn't already been treated.
void addChannelToTreat(unsigned short threadId, string channelId)
{
channelsAlreadyTreatedAndToTreatMutex.lock();
if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreatRev.find(channelId) == channelsToTreatRev.end())
{
unsigned int channelsToTreatIndex = channelsToTreat.end()->first + 1;
channelsToTreat[channelsToTreatIndex] = channelId;
channelsToTreatRev[channelId] = channelsToTreatIndex;
channelsAlreadyTreatedAndToTreatMutex.unlock();
writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);
}
else
{
channelsAlreadyTreatedAndToTreatMutex.unlock();
}
}
void treatComment(unsigned short threadId, json comment, string channelId)
@@ -284,21 +584,25 @@ void treatComment(unsigned short threadId, json comment, string channelId)
if(snippet.contains("authorChannelId"))
{
string channelId = snippet["authorChannelId"]["value"];
channelsAlreadyTreatedAndToTreatMutex.lock();
if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreat.find(channelId) == channelsToTreat.end())
{
channelsToTreat.insert(channelId);
channelsAlreadyTreatedAndToTreatMutex.unlock();
addChannelToTreat(threadId, channelId);
}
commentsCountThreads[threadId]++;
commentsPerSecondCount++;
}
writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);
}
else
string join(vector<string> parts, string delimiter)
{
string result = "";
unsigned int partsSize = parts.size();
for(unsigned int partsIndex = 0; partsIndex < partsSize; partsIndex++)
{
result += parts[partsIndex];
if(partsIndex < partsSize - 1)
{
channelsAlreadyTreatedAndToTreatMutex.unlock();
result += delimiter;
}
}
commentsCount++;
commentsPerSecondCount++;
return result;
}
string exec(string cmd)
@@ -328,7 +632,7 @@ bool writeFile(unsigned short threadId, string filePath, string option, string t
}
else
{
PRINT(threadId, "writeFile error: " << strerror(errno))
PRINT("writeFile error: " << strerror(errno))
}
return false;
}
@@ -377,10 +681,13 @@ vector<string> getFileContent(string filePath)
return lines;
}
json getJson(unsigned short threadId, string url, string directoryPath, getJsonBehavior behavior)
json getJson(unsigned short threadId, string url, bool usingYoutubeDataApiv3, string directoryPath, getJsonBehavior behavior)
{
string finalUrl = USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE ? "https://yt.lemnoslife.com/noKey/" + url :
"https://www.googleapis.com/youtube/v3/" + url + "&key=" + apiKey,
string finalUrl = usingYoutubeDataApiv3 ?
(USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE ?
"https://yt.lemnoslife.com/noKey/" + url :
"https://www.googleapis.com/youtube/v3/" + url + "&key=" + apiKey) :
YOUTUBE_OPERATIONAL_API_INSTANCE_URL + "/" + url,
content = getHttps(finalUrl);
json data;
try
@@ -389,8 +696,7 @@ json getJson(unsigned short threadId, string url, string directoryPath, getJsonB
}
catch (json::parse_error& ex)
{
PRINT(threadId, "Parse error for " << finalUrl << ", as got: " << content << " !")
exit(1);
EXIT_WITH_ERROR("Parse error for " << finalUrl << ", as got: " << content << " !")
}
if(data.contains("error"))
@@ -402,21 +708,21 @@ json getJson(unsigned short threadId, string url, string directoryPath, getJsonB
quotaMutex.lock();
keys.erase(keys.begin());
keys.push_back(apiKey);
PRINT(threadId, "No more quota on " << apiKey << " switching to " << keys[0] << ".")
PRINT("No more quota on " << apiKey << " switching to " << keys[0] << ".")
apiKey = keys[0];
quotaMutex.unlock();
return getJson(threadId, url, directoryPath);
return getJson(threadId, url, true, directoryPath);
}
PRINT(threadId, "Found error in JSON at URL: " << finalUrl << " for content: " << content << " !")
PRINT("Found error in JSON at URL: " << finalUrl << " for content: " << content << " !")
if(reason != "commentsDisabled" || behavior == retryOnCommentsDisabled)
{
return reason == "playlistNotFound" && behavior == returnErrorIfPlaylistNotFound ? data : getJson(threadId, url, directoryPath);
return reason == "playlistNotFound" && behavior == returnErrorIfPlaylistNotFound ? data : getJson(threadId, url, true, directoryPath);
}
}
ostringstream toString;
toString << CHANNELS_DIRECTORY << directoryPath << "/" << requestsPerChannel << ".json";
requestsPerChannel++;
toString << CHANNELS_DIRECTORY << directoryPath << "/" << requestsPerChannelThreads[threadId] << ".json";
requestsPerChannelThreads[threadId]++;
writeFile(threadId, toString.str(), "w", url + "\n" + content);
return data;