Add progression save and use spaces instead of tabs

This commit is contained in:
Benjamin Loison 2022-12-22 06:18:22 +01:00
parent 934954092a
commit 36f1fb9e83
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8
2 changed files with 144 additions and 100 deletions

View File

@ -1 +1 @@
UCt5USYpzzMCYhkirVQGHwKQ UCt5USYpzzMCYhkirVQGHwKQ

242
main.cpp
View File

@ -1,6 +1,7 @@
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <set> #include <set>
#include <sys/stat.h>
#include <curl/curl.h> #include <curl/curl.h>
#include <nlohmann/json.hpp> #include <nlohmann/json.hpp>
using namespace std; using namespace std;
@ -8,10 +9,13 @@ using json = nlohmann::json;
vector<string> getFileContent(string filePath); vector<string> getFileContent(string filePath);
json getJson(string url); json getJson(string url);
void print(ostringstream* toPrint), void createDirectory(string path),
print(ostringstream* toPrint),
treatComment(json comment); treatComment(json comment);
string getHttps(string url); string getHttps(string url);
size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp); size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp);
bool doesFileExist(string filePath),
writeFile(string filePath, string option, string toWrite);
#define API_KEY "AIzaSy..." #define API_KEY "AIzaSy..."
@ -19,104 +23,144 @@ size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp);
#define PRINT(x) toPrint << x; print(&toPrint); #define PRINT(x) toPrint << x; print(&toPrint);
ostringstream toPrint; ostringstream toPrint;
set<string> channelsToTreat, set<string> channelsAlreadyTreated,
channelsAlreadyTreated; channelsToTreat;
unsigned int commentsCount = 0; unsigned int commentsCount = 0;
int main() int main()
{ {
vector<string> channelsToTreatVec = getFileContent("channelsToTreat.txt"); string channelsToTreatFilePath = "channelsToTreat.txt";
channelsToTreat = set(channelsToTreatVec.begin(), channelsToTreatVec.end()); vector<string> channelsToTreatVec = getFileContent(channelsToTreatFilePath);
channelsToTreat = set(channelsToTreatVec.begin(), channelsToTreatVec.end());
PRINT(channelsToTreat.size() << " channel(s) to treat")
while(!channelsToTreat.empty())
{
string channelToTreat = *channelsToTreat.begin();
PRINT("Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")
string pageToken = "";
while(true)
{
json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken);
bool doesRelyingOnCommentThreadsIsEnough = data["error"]["errors"][0]["reason"] != "commentsDisabled";
if(doesRelyingOnCommentThreadsIsEnough)
{
json items = data["items"];
for(const auto& item : items)
{
json comment = item["snippet"]["topLevelComment"];
string commentId = comment["id"];
treatComment(comment);
if(item.contains("replies"))
{
json replies = item["replies"]["comments"];
if(replies.size() >= 5)
{
string pageToken = "";
while(true)
{
json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken);
json items = data["items"];
for(const auto& item : items)
{
treatComment(item);
}
if(data.contains("nextPageToken"))
{
pageToken = data["nextPageToken"];
}
else
{
break;
}
}
}
else
{
for(const auto& reply : replies)
{
treatComment(reply);
}
}
}
}
if(data.contains("nextPageToken"))
{
pageToken = data["nextPageToken"];
}
else
{
break;
}
}
else
{
PRINT("Comments disabled channel!")
exit(1);
}
}
PRINT(commentsCount)
commentsCount = 0;
channelsToTreat.erase(channelToTreat);
channelsAlreadyTreated.insert(channelToTreat);
}
return 0; string channelsDirectory = "channels/";
createDirectory(channelsDirectory);
for(const auto& entry : filesystem::directory_iterator(channelsDirectory))
{
channelsAlreadyTreated.insert(entry.path().filename());
}
PRINT(channelsToTreat.size() << " channel(s) to treat")
PRINT(channelsAlreadyTreated.size() << " channel(s) already treated")
while(!channelsToTreat.empty())
{
string channelToTreat = *channelsToTreat.begin();
PRINT("Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")
string pageToken = "";
while(true)
{
json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken);
bool doesRelyingOnCommentThreadsIsEnough = data["error"]["errors"][0]["reason"] != "commentsDisabled";
if(doesRelyingOnCommentThreadsIsEnough)
{
json items = data["items"];
for(const auto& item : items)
{
json comment = item["snippet"]["topLevelComment"];
string commentId = comment["id"];
treatComment(comment);
if(item.contains("replies"))
{
json replies = item["replies"]["comments"];
if(replies.size() >= 5)
{
string pageToken = "";
while(true)
{
json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken);
json items = data["items"];
for(const auto& item : items)
{
treatComment(item);
}
if(data.contains("nextPageToken"))
{
pageToken = data["nextPageToken"];
}
else
{
break;
}
}
}
else
{
for(const auto& reply : replies)
{
treatComment(reply);
}
}
}
}
if(data.contains("nextPageToken"))
{
pageToken = data["nextPageToken"];
}
else
{
break;
}
}
else
{
PRINT("Comments disabled channel!")
exit(1);
}
}
PRINT(commentsCount)
commentsCount = 0;
channelsToTreat.erase(channelToTreat);
channelsAlreadyTreated.insert(channelToTreat);
string channelToTreatDirectory = channelsDirectory + channelToTreat + "/";
createDirectory(channelToTreatDirectory);
string toWrite = (doesFileExist(channelsToTreatFilePath) ? "\n" : "") + channelToTreat;
writeFile(channelsToTreatFilePath, "a", toWrite);
}
return 0;
} }
void treatComment(json comment) void treatComment(json comment)
{ {
json snippet = comment["snippet"]; json snippet = comment["snippet"];
// The `else` case can happen (cf `95a9421ad0469a09335afeddb2983e31dc00bc36`). // The `else` case can happen (cf `95a9421ad0469a09335afeddb2983e31dc00bc36`).
if(snippet.contains("authorChannelId")) if(snippet.contains("authorChannelId"))
{ {
string channelId = snippet["authorChannelId"]["value"]; string channelId = snippet["authorChannelId"]["value"];
if(find(channelsAlreadyTreated.begin(), channelsAlreadyTreated.end(), channelId) == channelsAlreadyTreated.end()) if(find(channelsAlreadyTreated.begin(), channelsAlreadyTreated.end(), channelId) == channelsAlreadyTreated.end())
channelsToTreat.insert(channelId); channelsToTreat.insert(channelId);
} }
commentsCount++; commentsCount++;
}
bool writeFile(string filePath, string option, string toWrite)
{
FILE* file = fopen(filePath.c_str(), option.c_str());
if(file != NULL)
{
fputs(toWrite.c_str(), file);
fclose(file);
return true;
}
return false;
}
bool doesFileExist(string filePath)
{
struct stat buffer;
return stat(filePath.c_str(), &buffer) == 0;
}
void createDirectory(string path)
{
mkdir(path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
} }
string getDate() string getDate()
@ -130,26 +174,26 @@ string getDate()
vector<string> getFileContent(string filePath) vector<string> getFileContent(string filePath)
{ {
vector<string> lines; vector<string> lines;
ifstream infile(filePath.c_str()); ifstream infile(filePath.c_str());
string line; string line;
while(getline(infile, line)) while(getline(infile, line))
lines.push_back(line); lines.push_back(line);
return lines; return lines;
} }
json getJson(string url) json getJson(string url)
{ {
url = "https://www.googleapis.com/youtube/v3/" + url + "&key=" + API_KEY; url = "https://www.googleapis.com/youtube/v3/" + url + "&key=" + API_KEY;
string content = getHttps(url); string content = getHttps(url);
json data = json::parse(content); json data = json::parse(content);
return data; return data;
} }
void print(ostringstream* toPrint) void print(ostringstream* toPrint)
{ {
cout << getDate() << ": " << toPrint->str() << endl; cout << getDate() << ": " << toPrint->str() << endl;
toPrint->str(""); toPrint->str("");
} }
string getHttps(string url) string getHttps(string url)