Fix #26: Keep efficient search algorithm while keeping order (notably of the starting set)

This commit is contained in:
Benjamin Loison 2023-01-14 15:14:24 +01:00
parent 27cd5c3a64
commit f6c11b54f3
Signed by: Benjamin_Loison
SSH Key Fingerprint: SHA256:BtnEgYTlHdOg1u+RmYcDE0mnfz1rhv5dSbQ2gyxW8B8

View File

@ -36,8 +36,10 @@ bool doesFileExist(string filePath),
mutex printMutex, mutex printMutex,
channelsAlreadyTreatedAndToTreatMutex, channelsAlreadyTreatedAndToTreatMutex,
quotaMutex; quotaMutex;
set<string> channelsAlreadyTreated, set<string> channelsAlreadyTreated;
channelsToTreat; // Two `map`s to simulate a bidirectional map.
map<unsigned int, string> channelsToTreat;
map<string, unsigned int> channelsToTreatRev;
vector<string> keys; vector<string> keys;
unsigned int commentsCount = 0, unsigned int commentsCount = 0,
commentsPerSecondCount = 0, commentsPerSecondCount = 0,
@ -76,10 +78,14 @@ int main(int argc, char *argv[])
// The starting set should be written to `CHANNELS_FILE_PATH`. // The starting set should be written to `CHANNELS_FILE_PATH`.
// To resume this algorithm after a shutdown, just restart it after having deleted the last channel folders in `CHANNELS_DIRECTORY` being treated. // To resume this algorithm after a shutdown, just restart it after having deleted the last channel folders in `CHANNELS_DIRECTORY` being treated.
// On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat` or `channelsToTreat` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set. // On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat*` or `channelsToTreat*` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set.
vector<string> channelsVec = getFileContent(CHANNELS_FILE_PATH); vector<string> channelsVec = getFileContent(CHANNELS_FILE_PATH);
// Note that using `set`s makes the search faster but we lose the `channels.txt` lines order. for(unsigned int channelsVecIndex = 0; channelsVecIndex < channelsVec.size(); channelsVecIndex++)
channelsToTreat = setFromVector(channelsVec); {
string channel = channelsVec[channelsVecIndex];
channelsToTreat[channelsVecIndex] = channel;
channelsToTreatRev[channel] = channelsVecIndex;
}
keys = getFileContent(KEYS_FILE_PATH); keys = getFileContent(KEYS_FILE_PATH);
apiKey = keys[0]; apiKey = keys[0];
@ -90,7 +96,10 @@ int main(int argc, char *argv[])
{ {
string fileName = entry.path().filename(), string fileName = entry.path().filename(),
channelId = fileName.substr(0, fileName.length() - 4); channelId = fileName.substr(0, fileName.length() - 4);
channelsToTreat.erase(channelId);
channelsToTreat.erase(channelsToTreatRev[channelId]);
channelsToTreatRev.erase(channelId);
channelsAlreadyTreated.insert(channelId); channelsAlreadyTreated.insert(channelId);
} }
@ -132,11 +141,13 @@ void treatChannels(unsigned short threadId)
continue; continue;
} }
string channelToTreat = *channelsToTreat.begin(); string channelToTreat = channelsToTreat.begin()->second;
PRINT(threadId, "Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")") PRINT(threadId, "Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")
channelsToTreat.erase(channelToTreat); channelsToTreat.erase(channelsToTreatRev[channelToTreat]);
channelsToTreatRev.erase(channelToTreat);
channelsAlreadyTreated.insert(channelToTreat); channelsAlreadyTreated.insert(channelToTreat);
channelsAlreadyTreatedAndToTreatMutex.unlock(); channelsAlreadyTreatedAndToTreatMutex.unlock();
@ -285,9 +296,12 @@ void treatComment(unsigned short threadId, json comment, string channelId)
{ {
string channelId = snippet["authorChannelId"]["value"]; string channelId = snippet["authorChannelId"]["value"];
channelsAlreadyTreatedAndToTreatMutex.lock(); channelsAlreadyTreatedAndToTreatMutex.lock();
if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreat.find(channelId) == channelsToTreat.end()) if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreatRev.find(channelId) == channelsToTreatRev.end())
{ {
channelsToTreat.insert(channelId); unsigned int channelsToTreatIndex = channelsToTreat.end()->first + 1;
channelsToTreat[channelsToTreatIndex] = channelId;
channelsToTreatRev[channelId] = channelsToTreatIndex;
channelsAlreadyTreatedAndToTreatMutex.unlock(); channelsAlreadyTreatedAndToTreatMutex.unlock();
writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId); writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);