Fix #26: Keep efficient search algorithm while keeping order (notably of the starting set)
This commit is contained in:
parent
27cd5c3a64
commit
f6c11b54f3
34
main.cpp
34
main.cpp
@ -36,8 +36,10 @@ bool doesFileExist(string filePath),
|
|||||||
mutex printMutex,
|
mutex printMutex,
|
||||||
channelsAlreadyTreatedAndToTreatMutex,
|
channelsAlreadyTreatedAndToTreatMutex,
|
||||||
quotaMutex;
|
quotaMutex;
|
||||||
set<string> channelsAlreadyTreated,
|
set<string> channelsAlreadyTreated;
|
||||||
channelsToTreat;
|
// Two `map`s to simulate a bidirectional map.
|
||||||
|
map<unsigned int, string> channelsToTreat;
|
||||||
|
map<string, unsigned int> channelsToTreatRev;
|
||||||
vector<string> keys;
|
vector<string> keys;
|
||||||
unsigned int commentsCount = 0,
|
unsigned int commentsCount = 0,
|
||||||
commentsPerSecondCount = 0,
|
commentsPerSecondCount = 0,
|
||||||
@ -76,10 +78,14 @@ int main(int argc, char *argv[])
|
|||||||
|
|
||||||
// The starting set should be written to `CHANNELS_FILE_PATH`.
|
// The starting set should be written to `CHANNELS_FILE_PATH`.
|
||||||
// To resume this algorithm after a shutdown, just restart it after having deleted the last channel folders in `CHANNELS_DIRECTORY` being treated.
|
// To resume this algorithm after a shutdown, just restart it after having deleted the last channel folders in `CHANNELS_DIRECTORY` being treated.
|
||||||
// On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat` or `channelsToTreat` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set.
|
// On a restart, `CHANNELS_FILE_PATH` is read and every channel not found in `CHANNELS_DIRECTORY` is added to `channelsToTreat*` or `channelsToTreat*` otherwise before continuing, as if `CHANNELS_FILE_PATH` was containing a **treated** starting set.
|
||||||
vector<string> channelsVec = getFileContent(CHANNELS_FILE_PATH);
|
vector<string> channelsVec = getFileContent(CHANNELS_FILE_PATH);
|
||||||
// Note that using `set`s makes the search faster but we lose the `channels.txt` lines order.
|
for(unsigned int channelsVecIndex = 0; channelsVecIndex < channelsVec.size(); channelsVecIndex++)
|
||||||
channelsToTreat = setFromVector(channelsVec);
|
{
|
||||||
|
string channel = channelsVec[channelsVecIndex];
|
||||||
|
channelsToTreat[channelsVecIndex] = channel;
|
||||||
|
channelsToTreatRev[channel] = channelsVecIndex;
|
||||||
|
}
|
||||||
|
|
||||||
keys = getFileContent(KEYS_FILE_PATH);
|
keys = getFileContent(KEYS_FILE_PATH);
|
||||||
apiKey = keys[0];
|
apiKey = keys[0];
|
||||||
@ -90,7 +96,10 @@ int main(int argc, char *argv[])
|
|||||||
{
|
{
|
||||||
string fileName = entry.path().filename(),
|
string fileName = entry.path().filename(),
|
||||||
channelId = fileName.substr(0, fileName.length() - 4);
|
channelId = fileName.substr(0, fileName.length() - 4);
|
||||||
channelsToTreat.erase(channelId);
|
|
||||||
|
channelsToTreat.erase(channelsToTreatRev[channelId]);
|
||||||
|
channelsToTreatRev.erase(channelId);
|
||||||
|
|
||||||
channelsAlreadyTreated.insert(channelId);
|
channelsAlreadyTreated.insert(channelId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,11 +141,13 @@ void treatChannels(unsigned short threadId)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
string channelToTreat = *channelsToTreat.begin();
|
string channelToTreat = channelsToTreat.begin()->second;
|
||||||
|
|
||||||
PRINT(threadId, "Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")
|
PRINT(threadId, "Treating channel " << channelToTreat << " (treated: " << channelsAlreadyTreated.size() << ", to treat: " << channelsToTreat.size() << ")")
|
||||||
|
|
||||||
channelsToTreat.erase(channelToTreat);
|
channelsToTreat.erase(channelsToTreatRev[channelToTreat]);
|
||||||
|
channelsToTreatRev.erase(channelToTreat);
|
||||||
|
|
||||||
channelsAlreadyTreated.insert(channelToTreat);
|
channelsAlreadyTreated.insert(channelToTreat);
|
||||||
|
|
||||||
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
||||||
@ -285,9 +296,12 @@ void treatComment(unsigned short threadId, json comment, string channelId)
|
|||||||
{
|
{
|
||||||
string channelId = snippet["authorChannelId"]["value"];
|
string channelId = snippet["authorChannelId"]["value"];
|
||||||
channelsAlreadyTreatedAndToTreatMutex.lock();
|
channelsAlreadyTreatedAndToTreatMutex.lock();
|
||||||
if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreat.find(channelId) == channelsToTreat.end())
|
if(channelsAlreadyTreated.find(channelId) == channelsAlreadyTreated.end() && channelsToTreatRev.find(channelId) == channelsToTreatRev.end())
|
||||||
{
|
{
|
||||||
channelsToTreat.insert(channelId);
|
unsigned int channelsToTreatIndex = channelsToTreat.end()->first + 1;
|
||||||
|
channelsToTreat[channelsToTreatIndex] = channelId;
|
||||||
|
channelsToTreatRev[channelId] = channelsToTreatIndex;
|
||||||
|
|
||||||
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
channelsAlreadyTreatedAndToTreatMutex.unlock();
|
||||||
|
|
||||||
writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);
|
writeFile(threadId, CHANNELS_FILE_PATH, "a", "\n" + channelId);
|
||||||
|
Loading…
Reference in New Issue
Block a user