#2: Add data logging
This commit is contained in:
parent
73a9dea023
commit
923c14a77b
76
main.cpp
76
main.cpp
@ -9,10 +9,10 @@ using namespace std;
|
||||
using json = nlohmann::json;
|
||||
|
||||
vector<string> getFileContent(string filePath);
|
||||
json getJson(string url);
|
||||
json getJson(string url, string directoryPath);
|
||||
void createDirectory(string path),
|
||||
print(ostringstream* toPrint),
|
||||
treatComment(json comment);
|
||||
treatComment(json comment, string channelId);
|
||||
string getHttps(string url);
|
||||
size_t writeCallback(void* contents, size_t size, size_t nmemb, void* userp);
|
||||
bool doesFileExist(string filePath),
|
||||
@ -27,20 +27,26 @@ ostringstream toPrint;
|
||||
|
||||
set<string> channelsAlreadyTreated,
|
||||
channelsToTreat;
|
||||
unsigned int commentsCount = 0;
|
||||
unsigned int commentsCount = 0,
|
||||
requestsPerChannel = 0;
|
||||
string CHANNELS_DIRECTORY = "channels/",
|
||||
CHANNELS_FILE_PATH = "channels.txt";
|
||||
|
||||
int main()
|
||||
{
|
||||
string channelsToTreatFilePath = "channelsToTreat.txt";
|
||||
vector<string> channelsToTreatVec = getFileContent(channelsToTreatFilePath);
|
||||
channelsToTreat = set(channelsToTreatVec.begin(), channelsToTreatVec.end());
|
||||
// The starting set should be written to `CHANNELS_FILE_PATH`.
|
||||
// To resume this algorithm after a shutdown, just restart it after having deleted the last channel folder in `CHANNELS_DIRECTORY` being treated.
|
||||
// On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat` or `channelsToTreat` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set.
|
||||
vector<string> channelsVec = getFileContent(CHANNELS_FILE_PATH);
|
||||
channelsToTreat = set(channelsVec.begin(), channelsVec.end());
|
||||
|
||||
string channelsDirectory = "channels/";
|
||||
createDirectory(channelsDirectory);
|
||||
createDirectory(CHANNELS_DIRECTORY);
|
||||
|
||||
for(const auto& entry : filesystem::directory_iterator(channelsDirectory))
|
||||
for(const auto& entry : filesystem::directory_iterator(CHANNELS_DIRECTORY))
|
||||
{
|
||||
channelsAlreadyTreated.insert(entry.path().filename());
|
||||
string channelId = entry.path().filename();
|
||||
channelsToTreat.erase(channelId);
|
||||
channelsAlreadyTreated.insert(channelId);
|
||||
}
|
||||
|
||||
PRINT(channelsToTreat.size() << " channel(s) to treat")
|
||||
@ -49,12 +55,18 @@ int main()
|
||||
while(!channelsToTreat.empty())
|
||||
{
|
||||
string channelToTreat = *channelsToTreat.begin();
|
||||
|
||||
PRINT("Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")
|
||||
|
||||
channelsAlreadyTreated.insert(channelToTreat);
|
||||
|
||||
string channelToTreatDirectory = CHANNELS_DIRECTORY + channelToTreat + "/";
|
||||
createDirectory(channelToTreatDirectory);
|
||||
|
||||
string pageToken = "";
|
||||
while(true)
|
||||
{
|
||||
json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken);
|
||||
json data = getJson("commentThreads?part=snippet,replies&allThreadsRelatedToChannelId=" + channelToTreat + "&maxResults=100&pageToken=" + pageToken, channelToTreat);
|
||||
bool doesRelyingOnCommentThreadsIsEnough = data["error"]["errors"][0]["reason"] != "commentsDisabled";
|
||||
if(doesRelyingOnCommentThreadsIsEnough)
|
||||
{
|
||||
@ -63,7 +75,7 @@ int main()
|
||||
{
|
||||
json comment = item["snippet"]["topLevelComment"];
|
||||
string commentId = comment["id"];
|
||||
treatComment(comment);
|
||||
treatComment(comment, channelToTreat);
|
||||
if(item.contains("replies"))
|
||||
{
|
||||
json replies = item["replies"]["comments"];
|
||||
@ -72,11 +84,11 @@ int main()
|
||||
string pageToken = "";
|
||||
while(true)
|
||||
{
|
||||
json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken);
|
||||
json data = getJson("comments?part=snippet&parentId=" + commentId + "&maxResults=100&pageToken=" + pageToken, channelToTreat);
|
||||
json items = data["items"];
|
||||
for(const auto& item : items)
|
||||
{
|
||||
treatComment(item);
|
||||
treatComment(item, channelToTreat);
|
||||
}
|
||||
if(data.contains("nextPageToken"))
|
||||
{
|
||||
@ -92,7 +104,7 @@ int main()
|
||||
{
|
||||
for(const auto& reply : replies)
|
||||
{
|
||||
treatComment(reply);
|
||||
treatComment(reply, channelToTreat);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -115,29 +127,27 @@ int main()
|
||||
|
||||
PRINT(commentsCount)
|
||||
commentsCount = 0;
|
||||
requestsPerChannel = 0;
|
||||
|
||||
channelsToTreat.erase(channelToTreat);
|
||||
channelsAlreadyTreated.insert(channelToTreat);
|
||||
|
||||
string channelToTreatDirectory = channelsDirectory + channelToTreat + "/";
|
||||
createDirectory(channelToTreatDirectory);
|
||||
|
||||
string toWrite = (doesFileExist(channelsToTreatFilePath) ? "\n" : "") + channelToTreat;
|
||||
writeFile(channelsToTreatFilePath, "a", toWrite);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void treatComment(json comment)
|
||||
void treatComment(json comment, string channelId)
|
||||
{
|
||||
json snippet = comment["snippet"];
|
||||
// The `else` case can happen (cf `95a9421ad0469a09335afeddb2983e31dc00bc36`).
|
||||
if(snippet.contains("authorChannelId"))
|
||||
{
|
||||
string channelId = snippet["authorChannelId"]["value"];
|
||||
if(find(channelsAlreadyTreated.begin(), channelsAlreadyTreated.end(), channelId) == channelsAlreadyTreated.end())
|
||||
if(find(channelsAlreadyTreated.begin(), channelsAlreadyTreated.end(), channelId) == channelsAlreadyTreated.end() && find(channelsToTreat.begin(), channelsToTreat.end(), channelId) == channelsToTreat.end())
|
||||
{
|
||||
channelsToTreat.insert(channelId);
|
||||
|
||||
writeFile(CHANNELS_FILE_PATH, "a", "\n" + channelId);
|
||||
}
|
||||
}
|
||||
commentsCount++;
|
||||
}
|
||||
@ -151,6 +161,10 @@ bool writeFile(string filePath, string option, string toWrite)
|
||||
fclose(file);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
PRINT("writeFile error: " << strerror(errno))
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -184,15 +198,21 @@ vector<string> getFileContent(string filePath)
|
||||
return lines;
|
||||
}
|
||||
|
||||
json getJson(string url)
|
||||
json getJson(string url, string directoryPath)
|
||||
{
|
||||
#ifdef USE_YT_LEMNOSLIFE_COM_NO_KEY_SERVICE
|
||||
url = "https://yt.lemnoslife.com/noKey/" + url;
|
||||
string finalUrl = "https://yt.lemnoslife.com/noKey/" + url;
|
||||
#else
|
||||
url = "https://www.googleapis.com/youtube/v3/" + url + "&key=" + API_KEY;
|
||||
string finalUrl = "https://www.googleapis.com/youtube/v3/" + url + "&key=" + API_KEY;
|
||||
#endif
|
||||
string content = getHttps(url);
|
||||
string content = getHttps(finalUrl);
|
||||
json data = json::parse(content);
|
||||
|
||||
ostringstream toString;
|
||||
toString << CHANNELS_DIRECTORY << directoryPath << "/" << requestsPerChannel << ".json";
|
||||
requestsPerChannel++;
|
||||
writeFile(toString.str(), "w", url + "\n" + content);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user