From f6c11b54f31634d19317269562d1fb03dd361a2e Mon Sep 17 00:00:00 2001 From: Benjamin Loison Date: Sat, 14 Jan 2023 15:14:24 +0100 Subject: [PATCH] Fix #26: Keep efficient search algorithm while keeping order (notably of the starting set) --- main.cpp | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/main.cpp b/main.cpp index 67dfa97..7fee7fd 100644 --- a/main.cpp +++ b/main.cpp @@ -36,8 +36,10 @@ bool doesFileExist(string filePath), mutex printMutex, channelsAlreadyTreatedAndToTreatMutex, quotaMutex; -set channelsAlreadyTreated, - channelsToTreat; +set channelsAlreadyTreated; +// Two `map`s to simulate a bidirectional map. +map channelsToTreat; +map channelsToTreatRev; vector keys; unsigned int commentsCount = 0, commentsPerSecondCount = 0, @@ -76,10 +78,14 @@ int main(int argc, char *argv[]) // The starting set should be written to `CHANNELS_FILE_PATH`. // To resume this algorithm after a shutdown, just restart it after having deleted the last channel folders in `CHANNELS_DIRECTORY` being treated. - // On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat` or `channelsToTreat` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set. + // On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat*` or `channelsToTreat*` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set. vector channelsVec = getFileContent(CHANNELS_FILE_PATH); - // Note that using `set`s makes the search faster but we lose the `channels.txt` lines order. - channelsToTreat = setFromVector(channelsVec); + for(unsigned int channelsVecIndex = 0; channelsVecIndex < channelsVec.size(); channelsVecIndex++) + { + string channel = channelsVec[channelsVecIndex]; + channelsToTreat[channelsVecIndex] = channel; + channelsToTreatRev[channel] = channelsVecIndex; + } keys = getFileContent(KEYS_FILE_PATH); apiKey = keys[0]; @@ -90,7 +96,10 @@ int main(int argc, char *argv[]) { string fileName = entry.path().filename(), channelId = fileName.substr(0, fileName.length() - 4); - channelsToTreat.erase(channelId); + + channelsToTreat.erase(channelsToTreatRev[channelId]); + channelsToTreatRev.erase(channelId); + channelsAlreadyTreated.insert(channelId); } @@ -132,11 +141,13 @@ void treatChannels(unsigned short threadId) continue; } - string channelToTreat = *channelsToTreat.begin(); + string channelToTreat = channelsToTreat.begin()->second; PRINT(threadId, "Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")") - channelsToTreat.erase(channelToTreat); + channelsToTreat.erase(channelsToTreatRev[channelToTreat]); + channelsToTreatRev.erase(channelToTreat); + channelsAlreadyTreated.insert(channelToTreat); channelsAlreadyTreatedAndToTreatMutex.unlock(); @@ -285,9 +296,12 @@ void treatComment(unsigned short threadId, json comment, string channelId) { string channelId = snippet["authorChannelId"]["value"]; channelsAlreadyTreatedAndToTreatMutex.lock(); - if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreat.find(channelId) == channelsToTreat.end()) + if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreatRev.find(channelId) == channelsToTreatRev.end()) { - channelsToTreat.insert(channelId); + unsigned int channelsToTreatIndex = channelsToTreat.end()->first + 1; + channelsToTreat[channelsToTreatIndex] = channelId; + channelsToTreatRev[channelId] = channelsToTreatIndex; + channelsAlreadyTreatedAndToTreatMutex.unlock(); writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);