#11: Add a first iteration for the CHANNELS
retrieval
This commit is contained in:
parent
270c48da02
commit
7456685f2b
2
Makefile
2
Makefile
@ -1,4 +1,4 @@
|
||||
.PHONY: main
|
||||
|
||||
main:
|
||||
g++ main.cpp -g -std=c++17 -lcurl -lpthread -o main
|
||||
g++ main.cpp -g -std=c++17 -lcurl -lpthread -o youtubeCaptionsSearchEngine
|
||||
|
@ -15,5 +15,10 @@ Have to proceed with a breadth-first search approach as treating all *child* cha
|
||||
```sh
|
||||
sudo apt install nlohmann-json3-dev
|
||||
make
|
||||
./main
|
||||
```
|
||||
|
||||
Except if you provide the argument `--youtube-operational-api-instance-url https://yt.lemnoslife.com`, you have [to host your own instance of the YouTube operational API](https://github.com/Benjamin-Loison/YouTube-operational-API/#install-your-own-instance-of-the-api).
|
||||
|
||||
```sh
|
||||
./youtubeCaptionsSearchEngine
|
||||
```
|
||||
|
87
main.cpp
87
main.cpp
@ -16,13 +16,14 @@ enum getJsonBehavior { normal, retryOnCommentsDisabled, returnErrorIfPlaylistNot
|
||||
|
||||
set<string> setFromVector(vector<string> vec);
|
||||
vector<string> getFileContent(string filePath);
|
||||
json getJson(unsigned short threadId, string url, string directoryPath, getJsonBehavior behavior = normal);
|
||||
json getJson(unsigned short threadId, string url, bool usingYouTubeDataApiV3, string directoryPath, getJsonBehavior behavior = normal);
|
||||
void createDirectory(string path),
|
||||
print(ostringstream* toPrint),
|
||||
treatComment(unsigned short threadId, json comment, string channelId),
|
||||
treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, string channelToTreat),
|
||||
treatChannels(unsigned short threadId),
|
||||
deleteDirectory(string path);
|
||||
deleteDirectory(string path),
|
||||
addChannelToTreat(unsigned short threadId, string channelId);
|
||||
string getHttps(string url),
|
||||
exec(string cmd);
|
||||
size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp);
|
||||
@ -196,7 +197,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
|
||||
ostringstream toString;
|
||||
toString << "commentThreads?part=snippet,replies&" << (isChannel ? "allThreadsRelatedToChannelId" : "videoId") << "=" << id << "&maxResults=100&pageToken=" << pageToken;
|
||||
string url = toString.str();
|
||||
json data = getJson(threadId, url, channelToTreat, pageToken == "" ? normal : retryOnCommentsDisabled);
|
||||
json data = getJson(threadId, url, true, channelToTreat, pageToken == "" ? normal : retryOnCommentsDisabled);
|
||||
bool doesRelyingOnCommentThreadsIsEnough = (!isChannel) || data["error"]["errors"][0]["reason"] != "commentsDisabled";
|
||||
if(doesRelyingOnCommentThreadsIsEnough)
|
||||
{
|
||||
@ -213,7 +214,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
|
||||
string pageToken = "";
|
||||
while(true)
|
||||
{
|
||||
json data = getJson(threadId, "comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, channelToTreat),
|
||||
json data = getJson(threadId, "comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, true, channelToTreat),
|
||||
items = data["items"];
|
||||
for(const auto& item : items)
|
||||
{
|
||||
@ -251,7 +252,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
|
||||
else
|
||||
{
|
||||
PRINT(threadId, "Comments disabled channel, treating differently...")
|
||||
json data = getJson(threadId, "channels?part=statistics&id=" + channelToTreat, channelToTreat);
|
||||
json data = getJson(threadId, "channels?part=statistics&id=" + channelToTreat, true, channelToTreat);
|
||||
// YouTube Data API v3 Videos: list endpoint returns `videoCount` as a string and not an integer...
|
||||
unsigned int videoCount = atoi(string(data["items"][0]["statistics"]["videoCount"]).c_str());
|
||||
PRINT(threadId, "The channel has about " << videoCount << " videos.")
|
||||
@ -263,7 +264,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
|
||||
while(true)
|
||||
{
|
||||
// `snippet` and `status` are unneeded `part`s here but may be interesting later, as we log them.
|
||||
json data = getJson(threadId, "playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, channelToTreat, returnErrorIfPlaylistNotFound);
|
||||
json data = getJson(threadId, "playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, true, channelToTreat, returnErrorIfPlaylistNotFound);
|
||||
if(data.contains("error"))
|
||||
{
|
||||
PRINT(threadId, "Not listing comments on videos, as `playlistItems` hasn't found the `uploads` playlist!")
|
||||
@ -300,6 +301,51 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
|
||||
}
|
||||
}
|
||||
}
|
||||
if(isChannel)
|
||||
{
|
||||
string pageToken = "";
|
||||
while(true)
|
||||
{
|
||||
json data = getJson(threadId, "channels?part=channels&id=" + id + (pageToken == "" ? "" : "&pageToken=" + pageToken), false, id),
|
||||
items = data["items"];
|
||||
for(const auto& item : items)
|
||||
{
|
||||
for(const auto& channel : item["channels"]["channels"])
|
||||
{
|
||||
PRINT(threadId, channel)
|
||||
addChannelToTreat(threadId, channel["channelId"]);
|
||||
}
|
||||
}
|
||||
if(!data["nextPageToken"].is_null())
|
||||
{
|
||||
pageToken = data["nextPageToken"];
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This function verifies that the given hasn't already been treated.
|
||||
void addChannelToTreat(unsigned short threadId, string channelId)
|
||||
{
|
||||
channelsAlreadyTreatedAndToTreatMutex.lock();
|
||||
if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreatRev.find(channelId) == channelsToTreatRev.end())
|
||||
{
|
||||
unsigned int channelsToTreatIndex = channelsToTreat.end()->first + 1;
|
||||
channelsToTreat[channelsToTreatIndex] = channelId;
|
||||
channelsToTreatRev[channelId] = channelsToTreatIndex;
|
||||
|
||||
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
||||
|
||||
writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);
|
||||
}
|
||||
else
|
||||
{
|
||||
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
void treatComment(unsigned short threadId, json comment, string channelId)
|
||||
@ -309,21 +355,7 @@ void treatComment(unsigned short threadId, json comment, string channelId)
|
||||
if(snippet.contains("authorChannelId"))
|
||||
{
|
||||
string channelId = snippet["authorChannelId"]["value"];
|
||||
channelsAlreadyTreatedAndToTreatMutex.lock();
|
||||
if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreatRev.find(channelId) == channelsToTreatRev.end())
|
||||
{
|
||||
unsigned int channelsToTreatIndex = channelsToTreat.end()->first + 1;
|
||||
channelsToTreat[channelsToTreatIndex] = channelId;
|
||||
channelsToTreatRev[channelId] = channelsToTreatIndex;
|
||||
|
||||
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
||||
|
||||
writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);
|
||||
}
|
||||
else
|
||||
{
|
||||
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
||||
}
|
||||
addChannelToTreat(threadId, channelId);
|
||||
}
|
||||
commentsCount++;
|
||||
commentsPerSecondCount++;
|
||||
@ -405,10 +437,13 @@ vector<string> getFileContent(string filePath)
|
||||
return lines;
|
||||
}
|
||||
|
||||
json getJson(unsigned short threadId, string url, string directoryPath, getJsonBehavior behavior)
|
||||
json getJson(unsigned short threadId, string url, bool usingYoutubeDataApiv3, string directoryPath, getJsonBehavior behavior)
|
||||
{
|
||||
string finalUrl = USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE ? "https://yt.lemnoslife.com/noKey/" + url :
|
||||
"https://www.googleapis.com/youtube/v3/" + url + "&key=" + apiKey,
|
||||
string finalUrl = usingYoutubeDataApiv3 ?
|
||||
(USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE ?
|
||||
"https://yt.lemnoslife.com/noKey/" + url :
|
||||
"https://www.googleapis.com/youtube/v3/" + url + "&key=" + apiKey) :
|
||||
YOUTUBE_OPERATIONAL_API_INSTANCE_URL + "/" + url,
|
||||
content = getHttps(finalUrl);
|
||||
json data;
|
||||
try
|
||||
@ -433,12 +468,12 @@ json getJson(unsigned short threadId, string url, string directoryPath, getJsonB
|
||||
PRINT(threadId, "No more quota on " << apiKey << " switching to " << keys[0] << ".")
|
||||
apiKey = keys[0];
|
||||
quotaMutex.unlock();
|
||||
return getJson(threadId, url, directoryPath);
|
||||
return getJson(threadId, url, true, directoryPath);
|
||||
}
|
||||
PRINT(threadId, "Found error in JSON at URL: " << finalUrl << " for content: " << content << " !")
|
||||
if(reason != "commentsDisabled" || behavior == retryOnCommentsDisabled)
|
||||
{
|
||||
return reason == "playlistNotFound" && behavior == returnErrorIfPlaylistNotFound ? data : getJson(threadId, url, directoryPath);
|
||||
return reason == "playlistNotFound" && behavior == returnErrorIfPlaylistNotFound ? data : getJson(threadId, url, true, directoryPath);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user