diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d45b8b33..f2662c15 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,7 +20,9 @@ jobs: - name: Test timeout-minutes: 15 - run: python -m unittest discover -v --start-directory tests/python --pattern "bindings_test*.py" + run: | + python -m unittest discover -v --start-directory examples --pattern "example*.py" + python -m unittest discover -v --start-directory tests/python --pattern "bindings_test*.py" test_cpp: runs-on: ${{matrix.os}} diff --git a/README.md b/README.md index 04d84d66..2b027216 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ For other spaces use the nmslib library https://github.com/nmslib/nmslib. * `knn_query(data, k = 1, num_threads = -1, filter = None)` make a batch query for `k` closest elements for each element of the * `data` (shape:`N*dim`). Returns a numpy array of (shape:`N*k`). * `num_threads` sets the number of cpu threads to use (-1 means use default). - * `filter` filters elements by its labels, returns elements with allowed ids + * `filter` filters elements by its labels, returns elements with allowed ids. Note that search with a filter works slow in python in multithreaded mode. It is recommended to set `num_threads=1` * Thread-safe with other `knn_query` calls, but not with `add_items`. * `load_index(path_to_index, max_elements = 0, allow_replace_deleted = False)` loads the index from persistence to the uninitialized index. diff --git a/examples/EXAMPLES.md b/examples/EXAMPLES.md index 71f69ff4..a92f3626 100644 --- a/examples/EXAMPLES.md +++ b/examples/EXAMPLES.md @@ -147,7 +147,8 @@ print("Querying only even elements") # Define filter function that allows only even ids filter_function = lambda idx: idx%2 == 0 # Query the elements for themselves and search only for even elements: -labels, distances = hnsw_index.knn_query(data, k=1, filter=filter_function) +# Warning: search with python filter works slow in multithreaded mode, therefore we set num_threads=1 +labels, distances = hnsw_index.knn_query(data, k=1, num_threads=1, filter=filter_function) # labels contain only elements with even id ``` diff --git a/examples/example_filter.py b/examples/example_filter.py index 10a059a8..add22a3d 100644 --- a/examples/example_filter.py +++ b/examples/example_filter.py @@ -41,5 +41,6 @@ # Define filter function that allows only even ids filter_function = lambda idx: idx%2 == 0 # Query the elements for themselves and search only for even elements: -labels, distances = hnsw_index.knn_query(data, k=1, filter=filter_function) +# Warning: search with a filter works slow in python in multithreaded mode, therefore we set num_threads=1 +labels, distances = hnsw_index.knn_query(data, k=1, num_threads=1, filter=filter_function) # labels contain only elements with even id diff --git a/python_bindings/bindings.cpp b/python_bindings/bindings.cpp index 3196a228..5153bb58 100644 --- a/python_bindings/bindings.cpp +++ b/python_bindings/bindings.cpp @@ -623,6 +623,7 @@ class Index { data_numpy_l = new hnswlib::labeltype[rows * k]; data_numpy_d = new dist_t[rows * k]; + // Warning: search with a filter works slow in python in multithreaded mode. For best performance set num_threads=1 CustomFilterFunctor idFilter(filter); CustomFilterFunctor* p_idFilter = filter ? &idFilter : nullptr; diff --git a/tests/python/bindings_test_filter.py b/tests/python/bindings_test_filter.py index a798e02f..480c8dcd 100644 --- a/tests/python/bindings_test_filter.py +++ b/tests/python/bindings_test_filter.py @@ -47,7 +47,8 @@ def testRandomSelf(self): print("Querying only even elements") # Query the even elements for themselves and measure recall: filter_function = lambda id: id%2 == 0 - labels, distances = hnsw_index.knn_query(data, k=1, filter=filter_function) + # Warning: search with a filter works slow in python in multithreaded mode, therefore we set num_threads=1 + labels, distances = hnsw_index.knn_query(data, k=1, num_threads=1, filter=filter_function) self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), .5, 3) # Verify that there are only even elements: self.assertTrue(np.max(np.mod(labels, 2)) == 0)