#11: Add a first iteration for the CHANNELS
retrieval
This commit is contained in:
parent
270c48da02
commit
7456685f2b
2
Makefile
2
Makefile
@ -1,4 +1,4 @@
|
|||||||
.PHONY: main
|
.PHONY: main
|
||||||
|
|
||||||
main:
|
main:
|
||||||
g++ main.cpp -g -std=c++17 -lcurl -lpthread -o main
|
g++ main.cpp -g -std=c++17 -lcurl -lpthread -o youtubeCaptionsSearchEngine
|
||||||
|
@ -15,5 +15,10 @@ Have to proceed with a breadth-first search approach as treating all *child* cha
|
|||||||
```sh
|
```sh
|
||||||
sudo apt install nlohmann-json3-dev
|
sudo apt install nlohmann-json3-dev
|
||||||
make
|
make
|
||||||
./main
|
```
|
||||||
|
|
||||||
|
Except if you provide the argument `--youtube-operational-api-instance-url https://yt.lemnoslife.com`, you have [to host your own instance of the YouTube operational API](https://github.com/Benjamin-Loison/YouTube-operational-API/#install-your-own-instance-of-the-api).
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./youtubeCaptionsSearchEngine
|
||||||
```
|
```
|
||||||
|
87
main.cpp
87
main.cpp
@ -16,13 +16,14 @@ enum getJsonBehavior { normal, retryOnCommentsDisabled, returnErrorIfPlaylistNot
|
|||||||
|
|
||||||
set<string> setFromVector(vector<string> vec);
|
set<string> setFromVector(vector<string> vec);
|
||||||
vector<string> getFileContent(string filePath);
|
vector<string> getFileContent(string filePath);
|
||||||
json getJson(unsigned short threadId, string url, string directoryPath, getJsonBehavior behavior = normal);
|
json getJson(unsigned short threadId, string url, bool usingYouTubeDataApiV3, string directoryPath, getJsonBehavior behavior = normal);
|
||||||
void createDirectory(string path),
|
void createDirectory(string path),
|
||||||
print(ostringstream* toPrint),
|
print(ostringstream* toPrint),
|
||||||
treatComment(unsigned short threadId, json comment, string channelId),
|
treatComment(unsigned short threadId, json comment, string channelId),
|
||||||
treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, string channelToTreat),
|
treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, string channelToTreat),
|
||||||
treatChannels(unsigned short threadId),
|
treatChannels(unsigned short threadId),
|
||||||
deleteDirectory(string path);
|
deleteDirectory(string path),
|
||||||
|
addChannelToTreat(unsigned short threadId, string channelId);
|
||||||
string getHttps(string url),
|
string getHttps(string url),
|
||||||
exec(string cmd);
|
exec(string cmd);
|
||||||
size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp);
|
size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp);
|
||||||
@ -196,7 +197,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
|
|||||||
ostringstream toString;
|
ostringstream toString;
|
||||||
toString << "commentThreads?part=snippet,replies&" << (isChannel ? "allThreadsRelatedToChannelId" : "videoId") << "=" << id << "&maxResults=100&pageToken=" << pageToken;
|
toString << "commentThreads?part=snippet,replies&" << (isChannel ? "allThreadsRelatedToChannelId" : "videoId") << "=" << id << "&maxResults=100&pageToken=" << pageToken;
|
||||||
string url = toString.str();
|
string url = toString.str();
|
||||||
json data = getJson(threadId, url, channelToTreat, pageToken == "" ? normal : retryOnCommentsDisabled);
|
json data = getJson(threadId, url, true, channelToTreat, pageToken == "" ? normal : retryOnCommentsDisabled);
|
||||||
bool doesRelyingOnCommentThreadsIsEnough = (!isChannel) || data["error"]["errors"][0]["reason"] != "commentsDisabled";
|
bool doesRelyingOnCommentThreadsIsEnough = (!isChannel) || data["error"]["errors"][0]["reason"] != "commentsDisabled";
|
||||||
if(doesRelyingOnCommentThreadsIsEnough)
|
if(doesRelyingOnCommentThreadsIsEnough)
|
||||||
{
|
{
|
||||||
@ -213,7 +214,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
|
|||||||
string pageToken = "";
|
string pageToken = "";
|
||||||
while(true)
|
while(true)
|
||||||
{
|
{
|
||||||
json data = getJson(threadId, "comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, channelToTreat),
|
json data = getJson(threadId, "comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, true, channelToTreat),
|
||||||
items = data["items"];
|
items = data["items"];
|
||||||
for(const auto& item : items)
|
for(const auto& item : items)
|
||||||
{
|
{
|
||||||
@ -251,7 +252,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
PRINT(threadId, "Comments disabled channel, treating differently...")
|
PRINT(threadId, "Comments disabled channel, treating differently...")
|
||||||
json data = getJson(threadId, "channels?part=statistics&id=" + channelToTreat, channelToTreat);
|
json data = getJson(threadId, "channels?part=statistics&id=" + channelToTreat, true, channelToTreat);
|
||||||
// YouTube Data API v3 Videos: list endpoint returns `videoCount` as a string and not an integer...
|
// YouTube Data API v3 Videos: list endpoint returns `videoCount` as a string and not an integer...
|
||||||
unsigned int videoCount = atoi(string(data["items"][0]["statistics"]["videoCount"]).c_str());
|
unsigned int videoCount = atoi(string(data["items"][0]["statistics"]["videoCount"]).c_str());
|
||||||
PRINT(threadId, "The channel has about " << videoCount << " videos.")
|
PRINT(threadId, "The channel has about " << videoCount << " videos.")
|
||||||
@ -263,7 +264,7 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
|
|||||||
while(true)
|
while(true)
|
||||||
{
|
{
|
||||||
// `snippet` and `status` are unneeded `part`s here but may be interesting later, as we log them.
|
// `snippet` and `status` are unneeded `part`s here but may be interesting later, as we log them.
|
||||||
json data = getJson(threadId, "playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, channelToTreat, returnErrorIfPlaylistNotFound);
|
json data = getJson(threadId, "playlistItems?part=snippet,contentDetails,status&playlistId=" + playlistToTreat + "&maxResults=50&pageToken=" + pageToken, true, channelToTreat, returnErrorIfPlaylistNotFound);
|
||||||
if(data.contains("error"))
|
if(data.contains("error"))
|
||||||
{
|
{
|
||||||
PRINT(threadId, "Not listing comments on videos, as `playlistItems` hasn't found the `uploads` playlist!")
|
PRINT(threadId, "Not listing comments on videos, as `playlistItems` hasn't found the `uploads` playlist!")
|
||||||
@ -300,6 +301,51 @@ void treatChannelOrVideo(unsigned short threadId, bool isChannel, string id, str
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if(isChannel)
|
||||||
|
{
|
||||||
|
string pageToken = "";
|
||||||
|
while(true)
|
||||||
|
{
|
||||||
|
json data = getJson(threadId, "channels?part=channels&id=" + id + (pageToken == "" ? "" : "&pageToken=" + pageToken), false, id),
|
||||||
|
items = data["items"];
|
||||||
|
for(const auto& item : items)
|
||||||
|
{
|
||||||
|
for(const auto& channel : item["channels"]["channels"])
|
||||||
|
{
|
||||||
|
PRINT(threadId, channel)
|
||||||
|
addChannelToTreat(threadId, channel["channelId"]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(!data["nextPageToken"].is_null())
|
||||||
|
{
|
||||||
|
pageToken = data["nextPageToken"];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function verifies that the given hasn't already been treated.
|
||||||
|
void addChannelToTreat(unsigned short threadId, string channelId)
|
||||||
|
{
|
||||||
|
channelsAlreadyTreatedAndToTreatMutex.lock();
|
||||||
|
if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreatRev.find(channelId) == channelsToTreatRev.end())
|
||||||
|
{
|
||||||
|
unsigned int channelsToTreatIndex = channelsToTreat.end()->first + 1;
|
||||||
|
channelsToTreat[channelsToTreatIndex] = channelId;
|
||||||
|
channelsToTreatRev[channelId] = channelsToTreatIndex;
|
||||||
|
|
||||||
|
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
||||||
|
|
||||||
|
writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void treatComment(unsigned short threadId, json comment, string channelId)
|
void treatComment(unsigned short threadId, json comment, string channelId)
|
||||||
@ -309,21 +355,7 @@ void treatComment(unsigned short threadId, json comment, string channelId)
|
|||||||
if(snippet.contains("authorChannelId"))
|
if(snippet.contains("authorChannelId"))
|
||||||
{
|
{
|
||||||
string channelId = snippet["authorChannelId"]["value"];
|
string channelId = snippet["authorChannelId"]["value"];
|
||||||
channelsAlreadyTreatedAndToTreatMutex.lock();
|
addChannelToTreat(threadId, channelId);
|
||||||
if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreatRev.find(channelId) == channelsToTreatRev.end())
|
|
||||||
{
|
|
||||||
unsigned int channelsToTreatIndex = channelsToTreat.end()->first + 1;
|
|
||||||
channelsToTreat[channelsToTreatIndex] = channelId;
|
|
||||||
channelsToTreatRev[channelId] = channelsToTreatIndex;
|
|
||||||
|
|
||||||
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
|
||||||
|
|
||||||
writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
commentsCount++;
|
commentsCount++;
|
||||||
commentsPerSecondCount++;
|
commentsPerSecondCount++;
|
||||||
@ -405,10 +437,13 @@ vector<string> getFileContent(string filePath)
|
|||||||
return lines;
|
return lines;
|
||||||
}
|
}
|
||||||
|
|
||||||
json getJson(unsigned short threadId, string url, string directoryPath, getJsonBehavior behavior)
|
json getJson(unsigned short threadId, string url, bool usingYoutubeDataApiv3, string directoryPath, getJsonBehavior behavior)
|
||||||
{
|
{
|
||||||
string finalUrl = USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE ? "https://yt.lemnoslife.com/noKey/" + url :
|
string finalUrl = usingYoutubeDataApiv3 ?
|
||||||
"https://www.googleapis.com/youtube/v3/" + url + "&key=" + apiKey,
|
(USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE ?
|
||||||
|
"https://yt.lemnoslife.com/noKey/" + url :
|
||||||
|
"https://www.googleapis.com/youtube/v3/" + url + "&key=" + apiKey) :
|
||||||
|
YOUTUBE_OPERATIONAL_API_INSTANCE_URL + "/" + url,
|
||||||
content = getHttps(finalUrl);
|
content = getHttps(finalUrl);
|
||||||
json data;
|
json data;
|
||||||
try
|
try
|
||||||
@ -433,12 +468,12 @@ json getJson(unsigned short threadId, string url, string directoryPath, getJsonB
|
|||||||
PRINT(threadId, "No more quota on " << apiKey << " switching to " << keys[0] << ".")
|
PRINT(threadId, "No more quota on " << apiKey << " switching to " << keys[0] << ".")
|
||||||
apiKey = keys[0];
|
apiKey = keys[0];
|
||||||
quotaMutex.unlock();
|
quotaMutex.unlock();
|
||||||
return getJson(threadId, url, directoryPath);
|
return getJson(threadId, url, true, directoryPath);
|
||||||
}
|
}
|
||||||
PRINT(threadId, "Found error in JSON at URL: " << finalUrl << " for content: " << content << " !")
|
PRINT(threadId, "Found error in JSON at URL: " << finalUrl << " for content: " << content << " !")
|
||||||
if(reason != "commentsDisabled" || behavior == retryOnCommentsDisabled)
|
if(reason != "commentsDisabled" || behavior == retryOnCommentsDisabled)
|
||||||
{
|
{
|
||||||
return reason == "playlistNotFound" && behavior == returnErrorIfPlaylistNotFound ? data : getJson(threadId, url, directoryPath);
|
return reason == "playlistNotFound" && behavior == returnErrorIfPlaylistNotFound ? data : getJson(threadId, url, true, directoryPath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user