-
Notifications
You must be signed in to change notification settings - Fork 742
Replace deleted elements at addition #418
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
28 commits
Select commit
Hold shift + click to select a range
19e727d
Replace deleted elements at insertion
dyashuni 812b3a4
Refactoring
dyashuni c4bedcf
Refactoring
0f3214c
Add stress test to check multithreading
b440cbd
Merge branch 'develop' into replace_deleted
dyashuni 34fe7f1
Fix possible multithreading issues
22cdb46
Refactoring
1f12fdb
Add C++ multi thread load test
270b237
Add timeout to jobs in actions
c2fb574
Add locks by label
c750df8
Remove previous element update locks as now we have locks by label
ef7e383
Refactoring
1741f50
Update addPointToVacantPlace
2ba0acc
Update load test
aabd0df
Addressing review comments
c26a45b
Update python 3.6 version in actions to meet Ubuntu 22.04
aaf5b5d
Remove python 3.6 tests as it is not available in Ubuntu 22.04
01bd9d0
Refactoring
50bac85
Fix code and tests
2711a61
Multithread test to check udate of deleted elements
a31bd4e
Refactoring
61ad825
Fix compile issues
d36fe80
Fix update of elements
a188fce
Update test params
5b67a38
Update CMakeLists.txt
dyashuni e4e97bf
Add test to check recall with replaced elements
646bfda
Refactoring of test
1be2fea
Update readme and refactoring
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ var/ | |
.idea/ | ||
.vscode/ | ||
.vs/ | ||
**.DS_Store |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
#include "../hnswlib/hnswlib.h" | ||
#include <thread> | ||
#include <chrono> | ||
|
||
|
||
int main() { | ||
std::cout << "Running multithread load test" << std::endl; | ||
int d = 16; | ||
int max_elements = 1000; | ||
|
||
std::mt19937 rng; | ||
rng.seed(47); | ||
std::uniform_real_distribution<> distrib_real; | ||
|
||
hnswlib::L2Space space(d); | ||
hnswlib::HierarchicalNSW<float>* alg_hnsw = new hnswlib::HierarchicalNSW<float>(&space, 2 * max_elements); | ||
|
||
std::cout << "Building index" << std::endl; | ||
int num_threads = 40; | ||
int num_labels = 10; | ||
|
||
int num_iterations = 10; | ||
int start_label = 0; | ||
|
||
// run threads that will add elements to the index | ||
// about 7 threads (the number depends on num_threads and num_labels) | ||
// will add/update element with the same label simultaneously | ||
while (true) { | ||
// add elements by batches | ||
std::uniform_int_distribution<> distrib_int(start_label, start_label + num_labels - 1); | ||
std::vector<std::thread> threads; | ||
for (size_t thread_id = 0; thread_id < num_threads; thread_id++) { | ||
threads.push_back( | ||
std::thread( | ||
[&] { | ||
for (int iter = 0; iter < num_iterations; iter++) { | ||
std::vector<float> data(d); | ||
hnswlib::labeltype label = distrib_int(rng); | ||
for (int i = 0; i < d; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
alg_hnsw->addPoint(data.data(), label); | ||
} | ||
} | ||
) | ||
); | ||
} | ||
for (auto &thread : threads) { | ||
thread.join(); | ||
} | ||
if (alg_hnsw->cur_element_count > max_elements - num_labels) { | ||
break; | ||
} | ||
start_label += num_labels; | ||
} | ||
|
||
// insert remaining elements if needed | ||
for (hnswlib::labeltype label = 0; label < max_elements; label++) { | ||
auto search = alg_hnsw->label_lookup_.find(label); | ||
if (search == alg_hnsw->label_lookup_.end()) { | ||
std::cout << "Adding " << label << std::endl; | ||
std::vector<float> data(d); | ||
for (int i = 0; i < d; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
alg_hnsw->addPoint(data.data(), label); | ||
} | ||
} | ||
|
||
std::cout << "Index is created" << std::endl; | ||
|
||
bool stop_threads = false; | ||
std::vector<std::thread> threads; | ||
|
||
// create threads that will do markDeleted and unmarkDeleted of random elements | ||
// each thread works with specific range of labels | ||
std::cout << "Starting markDeleted and unmarkDeleted threads" << std::endl; | ||
num_threads = 20; | ||
int chunk_size = max_elements / num_threads; | ||
for (size_t thread_id = 0; thread_id < num_threads; thread_id++) { | ||
threads.push_back( | ||
std::thread( | ||
[&, thread_id] { | ||
std::uniform_int_distribution<> distrib_int(0, chunk_size - 1); | ||
int start_id = thread_id * chunk_size; | ||
std::vector<bool> marked_deleted(chunk_size); | ||
while (!stop_threads) { | ||
int id = distrib_int(rng); | ||
hnswlib::labeltype label = start_id + id; | ||
if (marked_deleted[id]) { | ||
alg_hnsw->unmarkDelete(label); | ||
marked_deleted[id] = false; | ||
} else { | ||
alg_hnsw->markDelete(label); | ||
marked_deleted[id] = true; | ||
} | ||
} | ||
} | ||
) | ||
); | ||
} | ||
|
||
// create threads that will add and update random elements | ||
std::cout << "Starting add and update elements threads" << std::endl; | ||
num_threads = 20; | ||
std::uniform_int_distribution<> distrib_int_add(max_elements, 2 * max_elements - 1); | ||
for (size_t thread_id = 0; thread_id < num_threads; thread_id++) { | ||
threads.push_back( | ||
std::thread( | ||
[&] { | ||
std::vector<float> data(d); | ||
while (!stop_threads) { | ||
hnswlib::labeltype label = distrib_int_add(rng); | ||
for (int i = 0; i < d; i++) { | ||
data[i] = distrib_real(rng); | ||
} | ||
alg_hnsw->addPoint(data.data(), label); | ||
std::vector<float> data = alg_hnsw->getDataByLabel<float>(label); | ||
float max_val = *max_element(data.begin(), data.end()); | ||
// never happens but prevents compiler from deleting unused code | ||
if (max_val > 10) { | ||
throw std::runtime_error("Unexpected value in data"); | ||
} | ||
} | ||
} | ||
) | ||
); | ||
} | ||
|
||
std::cout << "Sleep and continue operations with index" << std::endl; | ||
int sleep_ms = 60 * 1000; | ||
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_ms)); | ||
stop_threads = true; | ||
for (auto &thread : threads) { | ||
thread.join(); | ||
} | ||
|
||
std::cout << "Finish" << std::endl; | ||
return 0; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.