Fix #26: Keep efficient search algorithm while keeping order (notably of the starting set)

2023-01-14 15:14:24 +01:00
parent 27cd5c3a64
commit f6c11b54f3
1 changed files with 24 additions and 10 deletions
--- a/main.cpp
+++ b/main.cpp
@@ -36,8 +36,10 @@ bool doesFileExist(string filePath),
 mutex printMutex,
      channelsAlreadyTreatedAndToTreatMutex,
      quotaMutex;
-set<string> channelsAlreadyTreated,
-    channelsToTreat;
+set<string> channelsAlreadyTreated;
+// Two `map`s to simulate a bidirectional map.
+map<unsigned int, string> channelsToTreat;
+map<string, unsigned int> channelsToTreatRev;
 vector<string> keys;
 unsigned int commentsCount = 0,
             commentsPerSecondCount = 0,
@@ -76,10 +78,14 @@ int main(int argc, char *argv[])

    // The starting set should be written to `CHANNELS_FILE_PATH`.
    // To resume this algorithm after a shutdown, just restart it after having deleted the last channel folders in `CHANNELS_DIRECTORY` being treated.
-    // On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat` or `channelsToTreat` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set.
+    // On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat*` or `channelsToTreat*` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set.
    vector<string> channelsVec = getFileContent(CHANNELS_FILE_PATH);
-    // Note that using `set`s makes the search faster but we lose the `channels.txt` lines order.
-    channelsToTreat = setFromVector(channelsVec);
+    for(unsigned int channelsVecIndex = 0; channelsVecIndex < channelsVec.size(); channelsVecIndex++)
+    {
+        string channel = channelsVec[channelsVecIndex];
+        channelsToTreat[channelsVecIndex] = channel;
+        channelsToTreatRev[channel] = channelsVecIndex;
+    }

    keys = getFileContent(KEYS_FILE_PATH);
    apiKey = keys[0];
@@ -90,7 +96,10 @@ int main(int argc, char *argv[])
    {
        string fileName = entry.path().filename(),
               channelId = fileName.substr(0, fileName.length() - 4);
-        channelsToTreat.erase(channelId);
+
+        channelsToTreat.erase(channelsToTreatRev[channelId]);
+        channelsToTreatRev.erase(channelId);
+
        channelsAlreadyTreated.insert(channelId);
    }

@@ -132,11 +141,13 @@ void treatChannels(unsigned short threadId)
            continue;
        }

-        string channelToTreat = *channelsToTreat.begin();
+        string channelToTreat = channelsToTreat.begin()->second;

        PRINT(threadId, "Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")

-        channelsToTreat.erase(channelToTreat);
+        channelsToTreat.erase(channelsToTreatRev[channelToTreat]);
+        channelsToTreatRev.erase(channelToTreat);
+
        channelsAlreadyTreated.insert(channelToTreat);

        channelsAlreadyTreatedAndToTreatMutex.unlock();
@@ -285,9 +296,12 @@ void treatComment(unsigned short threadId, json comment, string channelId)
    {
        string channelId = snippet["authorChannelId"]["value"];
        channelsAlreadyTreatedAndToTreatMutex.lock();
-        if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreat.find(channelId) == channelsToTreat.end())
+        if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreatRev.find(channelId) == channelsToTreatRev.end())
        {
-            channelsToTreat.insert(channelId);
+            unsigned int channelsToTreatIndex = channelsToTreat.end()->first + 1;
+            channelsToTreat[channelsToTreatIndex] = channelId;
+            channelsToTreatRev[channelId] = channelsToTreatIndex;
+
            channelsAlreadyTreatedAndToTreatMutex.unlock();

            writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);