From c78a85165516b4b0db3699716b8f0bc1ef331bfe Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 31 Mar 2026 16:29:00 +0000
Subject: [PATCH 01/19] **Step 1: Rename SVS thread pool internal methods to
 match shared pool terminology**
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename `setNumThreads`/`getNumThreads` → `setParallelism`/`getParallelism` and `getThreadPoolCapacity` → `getPoolSize` across VectorSimilarity. Public info API fields (`numThreads`, `lastReservedThreads`, `NUM_THREADS`, `LAST_RESERVED_NUM_THREADS`) remain unchanged. No behavioral changes.
---
 src/VecSim/algorithms/svs/svs.h        | 36 +++++++++++++-------------
 src/VecSim/algorithms/svs/svs_tiered.h | 18 ++++++-------
 tests/benchmark/bm_utils.h             |  4 +--
 tests/benchmark/bm_vecsim_svs.h        |  2 +-
 tests/unit/test_svs_fp16.cpp           |  6 ++---
 tests/unit/test_svs_tiered.cpp         | 12 ++++-----
 6 files changed, 39 insertions(+), 39 deletions(-)
diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h
index 8b03514f0..9d5ee1c3c 100644
--- a/src/VecSim/algorithms/svs/svs.h
+++ b/src/VecSim/algorithms/svs/svs.h
@@ -41,9 +41,9 @@ struct SVSIndexBase
     virtual int deleteVectors(const labelType *labels, size_t n) = 0;
     virtual bool isLabelExists(labelType label) const = 0;
     virtual size_t indexStorageSize() const = 0;
-    virtual size_t getNumThreads() const = 0;
-    virtual void setNumThreads(size_t numThreads) = 0;
-    virtual size_t getThreadPoolCapacity() const = 0;
+    virtual size_t getParallelism() const = 0;
+    virtual void setParallelism(size_t parallelism) = 0;
+    virtual size_t getPoolSize() const = 0;
     virtual bool isCompressed() const = 0;
     size_t getNumMarkedDeleted() const { return num_marked_deleted; }
 
@@ -66,9 +66,9 @@ struct SVSIndexBase
 };
 
 /** Thread Management Strategy:
- * - addVector(): Requires numThreads == 1
- * - addVectors(): Allows any numThreads value, but prohibits n=1 with numThreads>1
- * - Callers are responsible for setting appropriate thread counts
+ * - addVector(): Requires parallelism == 1
+ * - addVectors(): Allows any parallelism value, but prohibits n=1 with parallelism>1
+ * - Callers are responsible for setting appropriate parallelism
  **/
 template <typename MetricType, typename DataType, bool isMulti, size_t QuantBits,
           size_t ResidualBits, bool IsLeanVec>
@@ -251,12 +251,12 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
         this->impl_ = std::move(svs_handler->impl);
     }
 
-    // Assuming numThreads was updated to reflect the number of available threads before this
+    // Assuming parallelism was updated to reflect the number of available threads before this
     // function was called.
-    // This function assumes that the caller has already set numThreads to the appropriate value
+    // This function assumes that the caller has already set parallelism to the appropriate value
     // for the operation.
-    // Important NOTE: For single vector operations (n=1), numThreads should be 1.
-    // For bulk operations (n>1), numThreads should reflect the number of available threads.
+    // Important NOTE: For single vector operations (n=1), parallelism should be 1.
+    // For bulk operations (n>1), parallelism should reflect the number of available threads.
     int addVectorsImpl(const void *vectors_data, const labelType *labels, size_t n) {
         if (n == 0) {
             return 0;
@@ -412,8 +412,8 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
                           .maxCandidatePoolSize = this->buildParams.max_candidate_pool_size,
                           .pruneTo = this->buildParams.prune_to,
                           .useSearchHistory = this->buildParams.use_full_search_history,
-                          .numThreads = this->getThreadPoolCapacity(),
-                          .lastReservedThreads = this->getNumThreads(),
+                          .numThreads = this->getPoolSize(),
+                          .lastReservedThreads = this->getParallelism(),
                           .numberOfMarkedDeletedNodes = this->num_marked_deleted,
                           .searchWindowSize = this->search_window_size,
                           .searchBufferCapacity = this->search_buffer_capacity,
@@ -517,16 +517,16 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
 
     int addVector(const void *vector_data, labelType label) override {
         // Enforce single-threaded execution for single vector operations to ensure optimal
-        // performance and consistent behavior. Callers must set numThreads=1 before calling this
+        // performance and consistent behavior. Callers must set parallelism=1 before calling this
         // method.
-        assert(getNumThreads() == 1 && "Can't use more than one thread to insert a single vector");
+        assert(getParallelism() == 1 && "Can't use more than one thread to insert a single vector");
         return addVectorsImpl(vector_data, &label, 1);
     }
 
     int addVectors(const void *vectors_data, const labelType *labels, size_t n) override {
         // Prevent misuse: single vector operations should use addVector(), not addVectors() with
         // n=1 This ensures proper thread management and API contract enforcement.
-        assert(!(n == 1 && getNumThreads() > 1) &&
+        assert(!(n == 1 && getParallelism() > 1) &&
                "Can't use more than one thread to insert a single vector");
         return addVectorsImpl(vectors_data, labels, n);
     }
@@ -541,10 +541,10 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
         return impl_ ? impl_->has_id(label) : false;
     }
 
-    size_t getNumThreads() const override { return threadpool_.size(); }
-    void setNumThreads(size_t numThreads) override { threadpool_.resize(numThreads); }
+    size_t getParallelism() const override { return threadpool_.size(); }
+    void setParallelism(size_t parallelism) override { threadpool_.resize(parallelism); }
 
-    size_t getThreadPoolCapacity() const override { return threadpool_.capacity(); }
+    size_t getPoolSize() const override { return threadpool_.capacity(); }
 
     bool isCompressed() const override { return storage_traits_t::is_compressed(); }
 
diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h
index 2eac66d24..8ca67bf5a 100644
--- a/src/VecSim/algorithms/svs/svs_tiered.h
+++ b/src/VecSim/algorithms/svs/svs_tiered.h
@@ -587,7 +587,7 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
             // No need to run GC on an empty index.
             return;
         }
-        svs_index->setNumThreads(std::min(availableThreads, index->backendIndex->indexSize()));
+        svs_index->setParallelism(std::min(availableThreads, index->backendIndex->indexSize()));
         // VecSimIndexAbstract::runGC() is protected
         static_cast<VecSimIndexInterface *>(index->backendIndex)->runGC();
     }
@@ -601,7 +601,7 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
             return;
         }
 
-        auto total_threads = this->GetSVSIndex()->getThreadPoolCapacity();
+        auto total_threads = this->GetSVSIndex()->getPoolSize();
         auto jobs = SVSMultiThreadJob::createJobs(
             this->allocator, SVS_BATCH_UPDATE_JOB, updateSVSIndexWrapper, this, total_threads,
             std::chrono::microseconds(updateJobWaitTime), &uncompletedJobs);
@@ -614,7 +614,7 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
             return;
         }
 
-        auto total_threads = this->GetSVSIndex()->getThreadPoolCapacity();
+        auto total_threads = this->GetSVSIndex()->getPoolSize();
         auto jobs = SVSMultiThreadJob::createJobs(
             this->allocator, SVS_GC_JOB, SVSIndexGCWrapper, this, total_threads,
             std::chrono::microseconds(updateJobWaitTime), &uncompletedJobs);
@@ -683,7 +683,7 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
             assert(labels_to_move.size() == vectors_to_move.size() / this->frontendIndex->getDim());
             if (this->backendIndex->indexSize() == 0) {
                 // If backend index is empty, we need to initialize it first.
-                svs_index->setNumThreads(std::min(availableThreads, labels_to_move.size()));
+                svs_index->setParallelism(std::min(availableThreads, labels_to_move.size()));
                 auto impl = svs_index->createImpl(vectors_to_move.data(), labels_to_move.data(),
                                                   labels_to_move.size());
 
@@ -696,7 +696,7 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
                 main_shared_lock.unlock();
                 std::lock_guard lock(this->mainIndexGuard);
                 // Upgrade to unique lock to add vectors
-                svs_index->setNumThreads(std::min(availableThreads, labels_to_move.size()));
+                svs_index->setParallelism(std::min(availableThreads, labels_to_move.size()));
                 svs_index->addVectors(vectors_to_move.data(), labels_to_move.data(),
                                       labels_to_move.size());
             }
@@ -821,9 +821,9 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
                 std::scoped_lock lock(this->updateJobMutex, this->mainIndexGuard);
                 // Set available thread count to 1 for single vector write-in-place operation.
                 // This maintains the contract that single vector operations use exactly one thread.
-                // TODO: Replace this setNumThreads(1) call with an assertion once we establish
-                // a contract that write-in-place mode guarantees numThreads == 1.
-                svs_index->setNumThreads(1);
+                // TODO: Replace this setParallelism(1) call with an assertion once we establish
+                // a contract that write-in-place mode guarantees parallelism == 1.
+                svs_index->setParallelism(1);
                 return this->backendIndex->addVector(storage_blob.get(), label);
             }
         }
@@ -1077,7 +1077,7 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
                 return;
             }
             // Force single thread for write-in-place mode.
-            this->GetSVSIndex()->setNumThreads(1);
+            this->GetSVSIndex()->setParallelism(1);
             // VecSimIndexAbstract::runGC() is protected
             static_cast<VecSimIndexInterface *>(this->backendIndex)->runGC();
             return;
diff --git a/tests/benchmark/bm_utils.h b/tests/benchmark/bm_utils.h
index 88f3e2e5d..e952794ad 100644
--- a/tests/benchmark/bm_utils.h
+++ b/tests/benchmark/bm_utils.h
@@ -30,9 +30,9 @@ CreateTieredSVSParams(VecSimParams &svs_params, tieredIndexMock &mock_thread_poo
 template <typename data_t>
 static void verifyNumThreads(TieredSVSIndex<data_t> *tiered_index, size_t expected_num_threads,
                              size_t expected_capcity, std::string msg = "") {
-    ASSERT_EQ(tiered_index->GetSVSIndex()->getThreadPoolCapacity(), expected_capcity)
+    ASSERT_EQ(tiered_index->GetSVSIndex()->getPoolSize(), expected_capcity)
         << msg << ": thread pool capacity mismatch";
-    size_t num_reserved_threads = tiered_index->GetSVSIndex()->getNumThreads();
+    size_t num_reserved_threads = tiered_index->GetSVSIndex()->getParallelism();
     if (num_reserved_threads < expected_num_threads) {
         std::cout << msg << ": WARNING: last reserved threads (" << num_reserved_threads
                   << ") is less than expected (" << expected_num_threads << ")." << std::endl;
diff --git a/tests/benchmark/bm_vecsim_svs.h b/tests/benchmark/bm_vecsim_svs.h
index fe6c92493..c4b167421 100644
--- a/tests/benchmark/bm_vecsim_svs.h
+++ b/tests/benchmark/bm_vecsim_svs.h
@@ -115,7 +115,7 @@ class BM_VecSimSVS : public BM_VecSimGeneral {
             tiered_params.primaryIndexParams->algoParams.svsParams.num_threads;
         size_t num_threads =
             params_threadpool_size ? params_threadpool_size : mock_thread_pool.thread_pool_size;
-        tiered_index->GetSVSIndex()->setNumThreads(num_threads);
+        tiered_index->GetSVSIndex()->setParallelism(num_threads);
         test_utils::verifyNumThreads(tiered_index, num_threads, num_threads,
                                      std::string("CreateTieredSVSIndex"));
 
diff --git a/tests/unit/test_svs_fp16.cpp b/tests/unit/test_svs_fp16.cpp
index 59c11c0a1..82937ef32 100644
--- a/tests/unit/test_svs_fp16.cpp
+++ b/tests/unit/test_svs_fp16.cpp
@@ -2242,8 +2242,8 @@ class FP16SVSTieredIndexTest : public FP16SVSTest<index_type_t> {
     }
     void verifyNumThreads(TieredSVSIndex<data_t> *tiered_index, size_t expected_num_threads,
                           size_t expected_capcity) {
-        ASSERT_EQ(tiered_index->GetSVSIndex()->getNumThreads(), expected_num_threads);
-        ASSERT_EQ(tiered_index->GetSVSIndex()->getThreadPoolCapacity(), expected_capcity);
+        ASSERT_EQ(tiered_index->GetSVSIndex()->getParallelism(), expected_num_threads);
+        ASSERT_EQ(tiered_index->GetSVSIndex()->getPoolSize(), expected_capcity);
     }
 
     TieredSVSIndex<data_t> *CreateTieredSVSIndex(const TieredIndexParams &tiered_params,
@@ -2258,7 +2258,7 @@ class FP16SVSTieredIndexTest : public FP16SVSTest<index_type_t> {
 
         // Set number of available threads to 1 unless specified otherwise,
         // so we can insert one vector at a time directly to svs.
-        tiered_index->GetSVSIndex()->setNumThreads(num_available_threads);
+        tiered_index->GetSVSIndex()->setParallelism(num_available_threads);
         size_t params_threadpool_size =
             tiered_params.primaryIndexParams->algoParams.svsParams.num_threads;
         size_t expected_capacity =
diff --git a/tests/unit/test_svs_tiered.cpp b/tests/unit/test_svs_tiered.cpp
index 3d9c0bd0d..528df8794 100644
--- a/tests/unit/test_svs_tiered.cpp
+++ b/tests/unit/test_svs_tiered.cpp
@@ -92,8 +92,8 @@ class SVSTieredIndexTest : public ::testing::Test {
 
     void verifyNumThreads(TieredSVSIndex<data_t> *tiered_index, size_t expected_num_threads,
                           size_t expected_capcity) {
-        ASSERT_EQ(tiered_index->GetSVSIndex()->getNumThreads(), expected_num_threads);
-        ASSERT_EQ(tiered_index->GetSVSIndex()->getThreadPoolCapacity(), expected_capcity);
+        ASSERT_EQ(tiered_index->GetSVSIndex()->getParallelism(), expected_num_threads);
+        ASSERT_EQ(tiered_index->GetSVSIndex()->getPoolSize(), expected_capcity);
     }
     TieredSVSIndex<data_t> *CreateTieredSVSIndex(const TieredIndexParams &tiered_params,
                                                  tieredIndexMock &mock_thread_pool,
@@ -104,11 +104,11 @@ class SVSTieredIndexTest : public ::testing::Test {
         // Set the created tiered index in the index external context (it will take ownership over
         // the index, and we'll need to release the ctx at the end of the test.
         mock_thread_pool.ctx->index_strong_ref.reset(tiered_index);
-        // Set numThreads to 1 by default to allow direct calls to SVS addVector() API,
+        // Set parallelism to 1 by default to allow direct calls to SVS addVector() API,
         // which requires exactly 1 thread. When using tiered index addVector API,
-        // the thread count is managed internally according to the operation and threadpool
-        // capacity, so testing parallelism remains intact.
-        tiered_index->GetSVSIndex()->setNumThreads(num_available_threads);
+        // the parallelism is managed internally according to the operation and pool
+        // size, so testing parallelism remains intact.
+        tiered_index->GetSVSIndex()->setParallelism(num_available_threads);
         size_t params_threadpool_size =
             tiered_params.primaryIndexParams->algoParams.svsParams.num_threads;
         size_t expected_capacity =

From c488bf03c6f1c6fd58f8913e7e736ae1964f4cf6 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Fri, 3 Apr 2026 04:41:22 +0000
Subject: [PATCH 02/19] introduce VecSim_UpdateThreadPoolSize api

---
 src/VecSim/algorithms/svs/svs.h |  8 ++++++++
 src/VecSim/vec_sim.cpp          | 11 +++++++++++
 src/VecSim/vec_sim.h            | 10 ++++++++++
 3 files changed, 29 insertions(+)

diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h
index 9d5ee1c3c..2786aa363 100644
--- a/src/VecSim/algorithms/svs/svs.h
+++ b/src/VecSim/algorithms/svs/svs.h
@@ -37,6 +37,14 @@ struct SVSIndexBase
 {
     SVSIndexBase() : num_marked_deleted{0} {};
     virtual ~SVSIndexBase() = default;
+
+    // Singleton accessor for the shared SVS thread pool.
+    // Returns a reference to the static shared_ptr, which is initially null.
+    // Created lazily on first VecSim_UpdateThreadPoolSize(n > 0) call.
+    static std::shared_ptr<VecSimSVSThreadPoolImpl> &getSharedThreadPool() {
+        static std::shared_ptr<VecSimSVSThreadPoolImpl> shared_pool;
+        return shared_pool;
+    }
     virtual int addVectors(const void *vectors_data, const labelType *labels, size_t n) = 0;
     virtual int deleteVectors(const labelType *labels, size_t n) = 0;
     virtual bool isLabelExists(labelType label) const = 0;
diff --git a/src/VecSim/vec_sim.cpp b/src/VecSim/vec_sim.cpp
index 004d575b7..a4b8201bc 100644
--- a/src/VecSim/vec_sim.cpp
+++ b/src/VecSim/vec_sim.cpp
@@ -15,6 +15,7 @@
 #include "VecSim/vec_sim_index.h"
 #include "VecSim/vec_sim_adhoc_bf_ctx.h"
 #include "VecSim/types/bfloat16.h"
+#include "VecSim/algorithms/svs/svs.h"
 #include <cassert>
 #include "memory.h"
 
@@ -34,6 +35,16 @@ extern "C" void VecSim_SetLogCallbackFunction(logCallbackFunction callback) {
 
 extern "C" void VecSim_SetWriteMode(VecSimWriteMode mode) { VecSimIndex::setWriteMode(mode); }
 
+extern "C" void VecSim_UpdateThreadPoolSize(size_t new_size) {
+    if (new_size == 0) {
+        VecSimIndex::setWriteMode(VecSim_WriteInPlace);
+    } else {
+        VecSimIndex::setWriteMode(VecSim_WriteAsync);
+    }
+    // TODO Step 3: Create/resize the shared SVS thread pool singleton via
+    // SVSIndexBase::getSharedThreadPool()
+}
+
 static VecSimResolveCode _ResolveParams_EFRuntime(VecSimAlgo index_type, VecSimRawParam rparam,
                                                   VecSimQueryParams *qparams,
                                                   VecsimQueryType query_type) {
diff --git a/src/VecSim/vec_sim.h b/src/VecSim/vec_sim.h
index 30f19065d..14e1bf65d 100644
--- a/src/VecSim/vec_sim.h
+++ b/src/VecSim/vec_sim.h
@@ -312,6 +312,16 @@ void VecSim_SetTestLogContext(const char *test_name, const char *test_type);
  */
 void VecSim_SetWriteMode(VecSimWriteMode mode);
 
+/**
+ * @brief Update the shared SVS thread pool size and set the write mode accordingly.
+ * If new_size == 0, sets write mode to in-place (no background threads).
+ * If new_size > 0, sets write mode to async and resizes the shared thread pool.
+ * @note Should be called from the main thread only.
+ *
+ * @param new_size the new thread pool size (0 = in-place mode, >0 = async mode).
+ */
+void VecSim_UpdateThreadPoolSize(size_t new_size);
+
 #ifdef __cplusplus
 }
 #endif

From 344538169019e97191cb266c0d1c690f9b84974e Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 5 Apr 2026 11:03:19 +0000
Subject: [PATCH 03/19] imp new VecSimSVSThreadPoolImpl

---
 src/VecSim/algorithms/svs/svs_utils.h | 235 ++++++++++++++++++++------
 1 file changed, 183 insertions(+), 52 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index 2e240358f..f05ceb94b 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -19,8 +19,10 @@
 #include "svs/cpuid.h"
 #endif
 
+#include <atomic>
 #include <cstdint>
 #include <cstdlib>
+#include <memory>
 #include <string>
 #include <utility>
 
@@ -341,93 +343,222 @@ struct SVSGraphBuilder {
     }
 };
 
-// Custom thread pool for SVS index
+// A slot in the shared SVS thread pool. Wraps an SVS Thread with an occupancy flag
+// used by the rental mechanism. Stored via shared_ptr so that a renter's reference
+// keeps the slot (and its OS thread) alive even if the pool shrinks mid-rental.
+struct ThreadSlot {
+    svs::threads::Thread thread;
+    std::atomic<bool> occupied{false};
+
+    ThreadSlot() = default;
+
+    // Non-copyable, non-movable (atomic is not movable)
+    ThreadSlot(const ThreadSlot &) = delete;
+    ThreadSlot &operator=(const ThreadSlot &) = delete;
+    ThreadSlot(ThreadSlot &&) = delete;
+    ThreadSlot &operator=(ThreadSlot &&) = delete;
+};
+
+// RAII guard for threads rented from the shared pool. On destruction, marks all
+// rented slots as unoccupied (lock-free atomic stores). Holds shared_ptr references
+// to keep slots alive even if the pool shrinks while the rental is active.
+class RentedThreads {
+public:
+    RentedThreads() = default;
+
+    // Move-only
+    RentedThreads(RentedThreads &&other) noexcept : slots_(std::move(other.slots_)) {}
+    RentedThreads &operator=(RentedThreads &&other) noexcept {
+        release();
+        slots_ = std::move(other.slots_);
+        return *this;
+    }
+    RentedThreads(const RentedThreads &) = delete;
+    RentedThreads &operator=(const RentedThreads &) = delete;
+
+    ~RentedThreads() { release(); }
+
+    void add(std::shared_ptr<ThreadSlot> slot) { slots_.push_back(std::move(slot)); }
+
+    size_t count() const { return slots_.size(); }
+
+    svs::threads::Thread &operator[](size_t i) { return slots_[i]->thread; }
+
+private:
+    void release() {
+        for (auto &slot : slots_) {
+            slot->occupied.store(false, std::memory_order_release);
+        }
+        slots_.clear();
+    }
+
+    std::vector<std::shared_ptr<ThreadSlot>> slots_;
+};
+
+// Shared thread pool for SVS indexes with rental model.
 // Based on svs::threads::NativeThreadPoolBase with changes:
-// * Number of threads is fixed on construction time
-// * Pool is resizable in bounds of pre-allocated threads
+// * Pool is physically resizable (creates/destroys OS threads)
+// * Threads are rented for the duration of a parallel_for call
+// * Multiple callers can rent disjoint subsets of threads concurrently
+// * Shrinking while threads are rented is safe (shared_ptr lifecycle)
 class VecSimSVSThreadPoolImpl {
 public:
-    // Allocate `num_threads - 1` threads since the main thread participates in the work
-    // as well.
-    explicit VecSimSVSThreadPoolImpl(size_t num_threads = 1)
-        : size_{num_threads}, threads_(num_threads - 1) {}
+    // Create a pool with `num_threads` total parallelism (including the calling thread).
+    // Spawns `num_threads - 1` worker OS threads. num_threads must be >= 1.
+    // When no threads are needed (write-in-place mode), the pool should not be created
+    // at all — the singleton should remain nullptr.
+    explicit VecSimSVSThreadPoolImpl(size_t num_threads = 1) {
+        assert(num_threads && "VecSimSVSThreadPoolImpl should not be created with 0 threads");
+        slots_.reserve(num_threads - 1);
+        for (size_t i = 0; i < num_threads - 1; ++i) {
+            slots_.push_back(std::make_shared<ThreadSlot>());
+        }
+    }
+
+    // Total parallelism: worker slots + 1 (the calling thread always participates).
+    size_t size() const {
+        std::lock_guard lock{pool_mutex_};
+        return slots_.size() + 1;
+    }
 
-    size_t capacity() const { return threads_.size() + 1; }
-    size_t size() const { return size_; }
+    // Alias for size(). Capacity and size are always equal since resize is physical.
+    // TODO: is it needed? can we remove one of them?
+    size_t capacity() const { return size(); }
 
-    // Support resize - do not modify threads container just limit the size
+    // Physically resize the pool. Creates new OS threads on grow, shuts down idle threads
+    // on shrink. new_size is total parallelism including the calling thread (minimum 1).
+    // Occupied threads (held by renters) survive shrink via shared_ptr — their OS thread
+    // is joined when the last shared_ptr reference is dropped (in ~RentedThreads).
     void resize(size_t new_size) {
-        std::lock_guard lock{use_mutex_};
-        size_ = std::clamp(new_size, size_t{1}, threads_.size() + 1);
+        new_size = std::max(new_size, size_t{1});
+        size_t target_workers = new_size - 1;
+
+        std::lock_guard lock{pool_mutex_};
+
+        if (target_workers > slots_.size()) {
+            // Grow: spawn new worker threads
+            slots_.reserve(target_workers);
+            for (size_t i = slots_.size(); i < target_workers; ++i) {
+                slots_.push_back(std::make_shared<ThreadSlot>());
+            }
+        } else if (target_workers < slots_.size()) {
+            // Shrink: remove slots from the back.
+            // If a slot is occupied, the renter's shared_ptr keeps it alive; the OS thread
+            // will be joined when ~ThreadSlot runs after ~RentedThreads releases it.
+            // If a slot is idle, dropping the shared_ptr here triggers immediate shutdown.
+            slots_.resize(target_workers);
+        }
     }
 
-    void parallel_for(std::function<void(size_t)> f, size_t n) {
-        if (n > size_) {
-            throw svs::threads::ThreadingException("Number of tasks exceeds the thread pool size");
+    // Rent up to `count` worker threads from the pool. Returns an RAII guard that
+    // automatically releases the threads when destroyed.
+    // The SVS pool is sized to match the RediSearch thread pool, and RediSearch controls
+    // scheduling via reserve jobs, so all requested slots should always be available.
+    // Getting fewer threads than requested indicates a bug in the scheduling logic.
+    RentedThreads rent(size_t count) {
+        RentedThreads rented;
+        if (count == 0) {
+            return rented;
+        }
+
+        std::lock_guard lock{pool_mutex_};
+        for (auto &slot : slots_) {
+            if (rented.count() >= count) {
+                break;
+            }
+            bool expected = false;
+            if (slot->occupied.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) {
+                rented.add(slot);
+            }
         }
+
+        if (rented.count() < count) {
+            svs::logging::warn("SVS thread pool: rented {} threads out of {} requested "
+                               "(pool has {} slots). This should not happen.",
+                               rented.count(), count, slots_.size());
+            assert(false && "Failed to rent the expected number of SVS threads");
+        }
+        return rented;
+    }
+
+    // Execute `f` in parallel with `n` partitions. The calling thread runs partition 0,
+    // and up to `n-1` worker threads are rented for partitions 1..n-1.
+    // Same signature as the SVS ThreadPool concept.
+    void parallel_for(std::function<void(size_t)> f, size_t n) {
         if (n == 0) {
             return;
-        } else if (n == 1) {
-            // Run on the main function.
+        }
+        if (n == 1) {
+            // Single partition: run on the calling thread, no rental needed.
             try {
                 f(0);
             } catch (const std::exception &error) {
-                manage_exception_during_run(error.what());
+                // No workers to check — just rethrow with formatted message.
+                auto msg = fmt::format("Thread 0: {}\n", error.what());
+                throw svs::threads::ThreadingException{std::move(msg)};
             }
             return;
-        } else {
-            std::lock_guard lock{use_mutex_};
-            for (size_t i = 0; i < n - 1; ++i) {
-                threads_[i].assign({&f, i + 1});
-            }
-            // Run on the main function.
-            try {
-                f(0);
-            } catch (const std::exception &error) {
-                manage_exception_during_run(error.what());
-            }
+        }
 
-            // Wait until all threads are done.
-            // If any thread fails, then we're throwing.
-            for (size_t i = 0; i < size_ - 1; ++i) {
-                auto &thread = threads_[i];
-                thread.wait();
-                if (!thread.is_okay()) {
-                    manage_exception_during_run();
-                }
-            }
+        // Rent n-1 worker threads
+        auto rented = rent(n - 1);
+        assert(rented.count() == n - 1);
+        size_t num_workers = n - 1;
+
+        // Assign work to rented workers (partitions 1..n-1)
+        for (size_t i = 0; i < num_workers; ++i) {
+            rented[i].assign({&f, i + 1});
+        }
+
+        // Run partition 0 on the calling thread
+        std::string main_thread_error;
+        try {
+            f(0);
+        } catch (const std::exception &error) {
+            main_thread_error = error.what();
         }
+
+        // Wait for all rented workers and collect errors.
+        // RentedThreads destructor will release the slots after this block.
+        manage_workers_after_run(main_thread_error, rented, num_workers);
     }
 
-    void manage_exception_during_run(const std::string &thread_0_message = {}) {
+private:
+    // Wait for all rented workers to finish. If any worker (or the main thread) threw,
+    // restart crashed workers and throw a combined exception.
+    void manage_workers_after_run(const std::string &main_thread_error, RentedThreads &rented,
+                                  size_t rented_count) {
         auto message = std::string{};
         auto inserter = std::back_inserter(message);
-        if (!thread_0_message.empty()) {
-            fmt::format_to(inserter, "Thread 0: {}\n", thread_0_message);
+        bool has_error = !main_thread_error.empty();
+
+        if (has_error) {
+            fmt::format_to(inserter, "Thread 0: {}\n", main_thread_error);
         }
 
-        // Manage all other exceptions thrown, restarting crashed threads.
-        for (size_t i = 0; i < size_ - 1; ++i) {
-            auto &thread = threads_[i];
+        for (size_t i = 0; i < rented_count; ++i) {
+            auto &thread = rented[i];
             thread.wait();
             if (!thread.is_okay()) {
+                has_error = true;
                 try {
                     thread.unsafe_get_exception();
                 } catch (const std::exception &error) {
                     fmt::format_to(inserter, "Thread {}: {}\n", i + 1, error.what());
                 }
-                // Restart the thread.
-                threads_[i].shutdown();
-                threads_[i] = svs::threads::Thread{};
+                // Restart the crashed thread so the slot is usable again.
+                thread.shutdown();
+                thread = svs::threads::Thread{};
             }
         }
-        throw svs::threads::ThreadingException{std::move(message)};
+
+        if (has_error) {
+            throw svs::threads::ThreadingException{std::move(message)};
+        }
     }
 
-private:
-    std::mutex use_mutex_;
-    size_t size_;
-    std::vector<svs::threads::Thread> threads_;
+    mutable std::mutex pool_mutex_;
+    std::vector<std::shared_ptr<ThreadSlot>> slots_;
 };
 
 // Copy-movable wrapper for VecSimSVSThreadPoolImpl

From 13454cb548218f6c7ac080f47d88b14de2af3b99 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 5 Apr 2026 13:50:53 +0000
Subject: [PATCH 04/19] imp VecSimSVSThreadPool

---
 src/VecSim/algorithms/svs/svs.h       | 15 +++---
 src/VecSim/algorithms/svs/svs_utils.h | 49 ++++++++++++++----
 tests/unit/test_svs_tiered.cpp        | 72 +++++++++++++++++++--------
 3 files changed, 97 insertions(+), 39 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h
index 2786aa363..cd03370bf 100644
--- a/src/VecSim/algorithms/svs/svs.h
+++ b/src/VecSim/algorithms/svs/svs.h
@@ -39,10 +39,10 @@ struct SVSIndexBase
     virtual ~SVSIndexBase() = default;
 
     // Singleton accessor for the shared SVS thread pool.
-    // Returns a reference to the static shared_ptr, which is initially null.
-    // Created lazily on first VecSim_UpdateThreadPoolSize(n > 0) call.
+    // Always valid — initialized with size 1 (write-in-place mode: 0 worker threads,
+    // only the calling thread participates). Resized on VecSim_UpdateThreadPoolSize() calls.
     static std::shared_ptr<VecSimSVSThreadPoolImpl> &getSharedThreadPool() {
-        static std::shared_ptr<VecSimSVSThreadPoolImpl> shared_pool;
+        static auto shared_pool = std::make_shared<VecSimSVSThreadPoolImpl>(1);
         return shared_pool;
     }
     virtual int addVectors(const void *vectors_data, const labelType *labels, size_t n) = 0;
@@ -369,8 +369,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
               svs_details::getOrDefault(params.leanvec_dim, SVS_VAMANA_DEFAULT_LEANVEC_DIM)},
           epsilon{svs_details::getOrDefault(params.epsilon, SVS_VAMANA_DEFAULT_EPSILON)},
           is_two_level_lvq{isTwoLevelLVQ(params.quantBits)},
-          threadpool_{std::max(size_t{SVS_VAMANA_DEFAULT_NUM_THREADS}, params.num_threads)},
-          impl_{nullptr} {
+          threadpool_{SVSIndexBase::getSharedThreadPool()}, impl_{nullptr} {
         logger_ = makeLogger();
     }
 
@@ -549,10 +548,10 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
         return impl_ ? impl_->has_id(label) : false;
     }
 
-    size_t getParallelism() const override { return threadpool_.size(); }
-    void setParallelism(size_t parallelism) override { threadpool_.resize(parallelism); }
+    size_t getParallelism() const override { return threadpool_.getParallelism(); }
+    void setParallelism(size_t parallelism) override { threadpool_.setParallelism(parallelism); }
 
-    size_t getPoolSize() const override { return threadpool_.capacity(); }
+    size_t getPoolSize() const override { return threadpool_.poolSize(); }
 
     bool isCompressed() const override { return storage_traits_t::is_compressed(); }
 
diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index f05ceb94b..48ad9d72e 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -405,8 +405,8 @@ class VecSimSVSThreadPoolImpl {
 public:
     // Create a pool with `num_threads` total parallelism (including the calling thread).
     // Spawns `num_threads - 1` worker OS threads. num_threads must be >= 1.
-    // When no threads are needed (write-in-place mode), the pool should not be created
-    // at all — the singleton should remain nullptr.
+    // In write-in-place mode, the pool is created with num_threads == 1 (0 worker threads,
+    // only the calling thread participates).
     explicit VecSimSVSThreadPoolImpl(size_t num_threads = 1) {
         assert(num_threads && "VecSimSVSThreadPoolImpl should not be created with 0 threads");
         slots_.reserve(num_threads - 1);
@@ -561,21 +561,50 @@ class VecSimSVSThreadPoolImpl {
     std::vector<std::shared_ptr<ThreadSlot>> slots_;
 };
 
-// Copy-movable wrapper for VecSimSVSThreadPoolImpl
+// Per-index wrapper around the shared VecSimSVSThreadPoolImpl singleton.
+// Lightweight, copyable (SVS stores a copy via ThreadPoolHandle). Both the original
+// and SVS's copy share the same pool_ and parallelism_ via shared_ptr, so state
+// changes propagate automatically.
+// Satisfies the svs::threads::ThreadPool concept (size() + parallel_for).
+// The pool is always valid — in write-in-place mode it has size 1 (0 worker threads).
 class VecSimSVSThreadPool {
 private:
-    std::shared_ptr<VecSimSVSThreadPoolImpl> pool_;
+    std::shared_ptr<VecSimSVSThreadPoolImpl> pool_;    // shared across all indexes
+    std::shared_ptr<std::atomic<size_t>> parallelism_; // per-index, shared across copies
 
 public:
-    explicit VecSimSVSThreadPool(size_t num_threads = 1)
-        : pool_{std::make_shared<VecSimSVSThreadPoolImpl>(num_threads)} {}
+    // Construct with reference to the shared pool singleton.
+    // parallelism_ starts at 0 — caller must call setParallelism() before parallel_for().
+    explicit VecSimSVSThreadPool(std::shared_ptr<VecSimSVSThreadPoolImpl> pool)
+        : pool_(std::move(pool)), parallelism_(std::make_shared<std::atomic<size_t>>(0)) {
+        assert(pool_ && "Pool must not be null");
+    }
+
+    // Set the degree of parallelism for this index's next operation.
+    // n must be the number of threads actually reserved by the caller (i.e., the
+    // RediSearch workers that checked in via ReserveThreadJob). This is what allows
+    // us to assert n <= pool size: reserved workers are occupied RediSearch threads,
+    // so the pool cannot shrink while they are held, and n cannot exceed the pool size.
+    void setParallelism(size_t n) {
+        assert(n >= 1 && "Parallelism must be at least 1 (the calling thread)");
+        assert(n <= pool_->size() && "Parallelism exceeds shared pool size");
+        parallelism_->store(n);
+    }
+    size_t getParallelism() const { return parallelism_->load(); }
 
-    size_t capacity() const { return pool_->capacity(); }
-    size_t size() const { return pool_->size(); }
+    // Returns per-index parallelism. SVS uses this for task partitioning (ThreadPool concept).
+    size_t size() const { return parallelism_->load(); }
 
+    // Shared pool size — used by scheduling to decide how many reserve jobs to submit.
+    size_t poolSize() const { return pool_->size(); }
+
+    // Delegates to the shared pool's parallel_for.
+    // n may be less than parallelism_ when the problem size is smaller than the
+    // thread count (SVS computes n = min(arg.size(), pool.size())).
+    // n must not exceed parallelism_ — we only have that many threads reserved.
     void parallel_for(std::function<void(size_t)> f, size_t n) {
+        assert(parallelism_->load() > 0 && "setParallelism must be called before parallel_for");
+        assert(n <= parallelism_->load() && "n exceeds reserved thread count (parallelism)");
         pool_->parallel_for(std::move(f), n);
     }
-
-    void resize(size_t new_size) { pool_->resize(new_size); }
 };
diff --git a/tests/unit/test_svs_tiered.cpp b/tests/unit/test_svs_tiered.cpp
index 528df8794..d079d64e5 100644
--- a/tests/unit/test_svs_tiered.cpp
+++ b/tests/unit/test_svs_tiered.cpp
@@ -3823,46 +3823,76 @@ TYPED_TEST(SVSTieredIndexTestBasic, testDeletedJournalMulti) {
 }
 
 TEST(SVSTieredIndexTest, testThreadPool) {
-    // Test VecSimSVSThreadPool
+    // Test VecSimSVSThreadPool with shared pool
     const size_t num_threads = 4;
-    auto pool = VecSimSVSThreadPool(num_threads);
-    ASSERT_EQ(pool.capacity(), num_threads);
+    auto shared_pool = std::make_shared<VecSimSVSThreadPoolImpl>(num_threads);
+    auto pool = VecSimSVSThreadPool(shared_pool);
+    ASSERT_EQ(pool.poolSize(), num_threads);
+    ASSERT_EQ(pool.size(), 0); // parallelism starts at 0
+    ASSERT_EQ(pool.getParallelism(), 0);
+
+    // Must call setParallelism before parallel_for
+    pool.setParallelism(num_threads);
     ASSERT_EQ(pool.size(), num_threads);
+    ASSERT_EQ(pool.getParallelism(), num_threads);
 
     std::atomic_int counter(0);
     auto task = [&counter](size_t i) { counter += i + 1; };
 
-    // exeed the number of threads
-    ASSERT_THROW(pool.parallel_for(task, 10), svs::threads::ThreadingException);
+    // n > parallelism is a bug — asserts in debug mode
+#if !defined(RUNNING_ON_VALGRIND) && !defined(NDEBUG)
+    ASSERT_DEATH(pool.parallel_for(task, 10), "n exceeds reserved thread count");
+#endif
 
     counter = 0;
-    pool.parallel_for(task, 4);
+    pool.parallel_for(task, num_threads);
     ASSERT_EQ(counter, 10); // 1+2+3+4 = 10
 
-    pool.resize(1);
-    ASSERT_EQ(pool.capacity(), 4);
-    ASSERT_EQ(pool.size(), 1);
-    // exeed the new pool size
-    ASSERT_THROW(pool.parallel_for(task, 4), svs::threads::ThreadingException);
+    // n < parallelism is valid (SVS may partition into fewer tasks for small problems)
+    counter = 0;
+    pool.parallel_for(task, 2);
+    ASSERT_EQ(counter, 3); // 1+2 = 3
+
+    // setParallelism changes per-index parallelism, not the shared pool
+    pool.setParallelism(1);
+    ASSERT_EQ(pool.poolSize(), num_threads); // shared pool unchanged
+    ASSERT_EQ(pool.size(), 1);               // per-index parallelism
+    ASSERT_EQ(pool.getParallelism(), 1);
 
     counter = 0;
     pool.parallel_for(task, 1);
     ASSERT_EQ(counter, 1); // 0+1 = 1
 
-    pool.resize(0);
-    ASSERT_EQ(pool.capacity(), 4);
-    ASSERT_EQ(pool.size(), 1);
+    // setParallelism boundary checks — asserts in debug mode
+#if !defined(RUNNING_ON_VALGRIND) && !defined(NDEBUG)
+    ASSERT_DEATH(pool.setParallelism(0), "Parallelism must be at least 1");
+    ASSERT_DEATH(pool.setParallelism(10), "Parallelism exceeds shared pool size");
+#endif
 
-    pool.resize(5);
-    ASSERT_EQ(pool.capacity(), 4);
-    ASSERT_EQ(pool.size(), 4);
+    // Test write-in-place mode (pool with size 1)
+    auto inplace_shared = std::make_shared<VecSimSVSThreadPoolImpl>(1);
+    auto inplace_pool = VecSimSVSThreadPool(inplace_shared);
+    inplace_pool.setParallelism(1);
+    ASSERT_EQ(inplace_pool.size(), 1);
+    ASSERT_EQ(inplace_pool.poolSize(), 1);
+    counter = 0;
+    inplace_pool.parallel_for(task, 1);
+    ASSERT_EQ(counter, 1);
+
+    // parallel_for without setParallelism asserts in debug mode
+#if !defined(RUNNING_ON_VALGRIND) && !defined(NDEBUG)
+    auto unset_pool = VecSimSVSThreadPool(shared_pool);
+    ASSERT_DEATH(unset_pool.parallel_for(task, 1),
+                 "setParallelism must be called before parallel_for");
+#endif
 
-    // Test VecSimSVSThreadPool for exception handling
+    // Test exception handling
     auto err_task = [](size_t) { throw std::runtime_error("Test exception"); };
-
-    ASSERT_NO_THROW(pool.parallel_for(err_task, 0)); // no task - no err
+    // n=0 is a no-op (no tasks to run, no error)
+    ASSERT_NO_THROW(pool.parallel_for(err_task, 0));
+    pool.setParallelism(num_threads);
     ASSERT_THROW(pool.parallel_for(err_task, 1), svs::threads::ThreadingException);
-    ASSERT_THROW(pool.parallel_for(err_task, 4), svs::threads::ThreadingException);
+    ASSERT_THROW(pool.parallel_for(err_task, num_threads), svs::threads::ThreadingException);
 }
 
 #else // HAVE_SVS

From 3c69a481f3e9e58d1c6b160aa99445ea46583653 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 5 Apr 2026 13:58:51 +0000
Subject: [PATCH 05/19] add resize handling

---
 src/VecSim/vec_sim.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/VecSim/vec_sim.cpp b/src/VecSim/vec_sim.cpp
index a4b8201bc..c9237d6fe 100644
--- a/src/VecSim/vec_sim.cpp
+++ b/src/VecSim/vec_sim.cpp
@@ -41,8 +41,10 @@ extern "C" void VecSim_UpdateThreadPoolSize(size_t new_size) {
     } else {
         VecSimIndex::setWriteMode(VecSim_WriteAsync);
     }
-    // TODO Step 3: Create/resize the shared SVS thread pool singleton via
-    // SVSIndexBase::getSharedThreadPool()
+    // Resize the shared SVS thread pool. resize() clamps to minimum size 1.
+    // new_size == 0 → pool size 1 (only calling thread, write-in-place).
+    // new_size >  0 → pool size new_size (new_size - 1 worker threads).
+    SVSIndexBase::getSharedThreadPool()->resize(std::max(new_size, size_t{1}));
 }
 
 static VecSimResolveCode _ResolveParams_EFRuntime(VecSimAlgo index_type, VecSimRawParam rparam,

From 99eae2304dae6f58d633303dbdc7a2049ad70a95 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 6 Apr 2026 07:38:07 +0000
Subject: [PATCH 06/19] log ctx

---
 src/VecSim/algorithms/svs/svs.h       |  4 +--
 src/VecSim/algorithms/svs/svs_utils.h | 35 +++++++++++++++------------
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h
index cd03370bf..f44135865 100644
--- a/src/VecSim/algorithms/svs/svs.h
+++ b/src/VecSim/algorithms/svs/svs.h
@@ -41,7 +41,7 @@ struct SVSIndexBase
     // Singleton accessor for the shared SVS thread pool.
     // Always valid — initialized with size 1 (write-in-place mode: 0 worker threads,
     // only the calling thread participates). Resized on VecSim_UpdateThreadPoolSize() calls.
-    static std::shared_ptr<VecSimSVSThreadPoolImpl> &getSharedThreadPool() {
+    static std::shared_ptr<VecSimSVSThreadPoolImpl> getSharedThreadPool() {
         static auto shared_pool = std::make_shared<VecSimSVSThreadPoolImpl>(1);
         return shared_pool;
     }
@@ -369,7 +369,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
               svs_details::getOrDefault(params.leanvec_dim, SVS_VAMANA_DEFAULT_LEANVEC_DIM)},
           epsilon{svs_details::getOrDefault(params.epsilon, SVS_VAMANA_DEFAULT_EPSILON)},
           is_two_level_lvq{isTwoLevelLVQ(params.quantBits)},
-          threadpool_{SVSIndexBase::getSharedThreadPool()}, impl_{nullptr} {
+          threadpool_{SVSIndexBase::getSharedThreadPool(), this->logCallbackCtx}, impl_{nullptr} {
         logger_ = makeLogger();
     }
 
diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index 48ad9d72e..c8daee631 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -9,6 +9,7 @@
 
 #pragma once
 #include "VecSim/query_results.h"
+#include "VecSim/vec_sim_interface.h"
 #include "VecSim/types/float16.h"
 
 #include "svs/core/distance.h"
@@ -421,10 +422,6 @@ class VecSimSVSThreadPoolImpl {
         return slots_.size() + 1;
     }
 
-    // Alias for size(). Capacity and size are always equal since resize is physical.
-    // TODO: is it needed? can we remove one of them?
-    size_t capacity() const { return size(); }
-
     // Physically resize the pool. Creates new OS threads on grow, shuts down idle threads
     // on shrink. new_size is total parallelism including the calling thread (minimum 1).
     // Occupied threads (held by renters) survive shrink via shared_ptr — their OS thread
@@ -455,7 +452,7 @@ class VecSimSVSThreadPoolImpl {
     // The SVS pool is sized to match the RediSearch thread pool, and RediSearch controls
     // scheduling via reserve jobs, so all requested slots should always be available.
     // Getting fewer threads than requested indicates a bug in the scheduling logic.
-    RentedThreads rent(size_t count) {
+    RentedThreads rent(size_t count, void *log_ctx = nullptr) {
         RentedThreads rented;
         if (count == 0) {
             return rented;
@@ -473,9 +470,13 @@ class VecSimSVSThreadPoolImpl {
         }
 
         if (rented.count() < count) {
-            svs::logging::warn("SVS thread pool: rented {} threads out of {} requested "
-                               "(pool has {} slots). This should not happen.",
-                               rented.count(), count, slots_.size());
+            auto msg = fmt::format("SVS thread pool: rented {} threads out of {} requested "
+                                   "(pool has {} slots). This should not happen.",
+                                   rented.count(), count, slots_.size());
+            if (VecSimIndexInterface::logCallback) {
+                assert(log_ctx && "Log context must be provided when logging is available");
+                VecSimIndexInterface::logCallback(log_ctx, "warning", msg.c_str());
+            }
             assert(false && "Failed to rent the expected number of SVS threads");
         }
         return rented;
@@ -484,7 +485,7 @@ class VecSimSVSThreadPoolImpl {
     // Execute `f` in parallel with `n` partitions. The calling thread runs partition 0,
     // and up to `n-1` worker threads are rented for partitions 1..n-1.
     // Same signature as the SVS ThreadPool concept.
-    void parallel_for(std::function<void(size_t)> f, size_t n) {
+    void parallel_for(std::function<void(size_t)> f, size_t n, void *log_ctx = nullptr) {
         if (n == 0) {
             return;
         }
@@ -501,9 +502,8 @@ class VecSimSVSThreadPoolImpl {
         }
 
         // Rent n-1 worker threads
-        auto rented = rent(n - 1);
-        assert(rented.count() == n - 1);
-        size_t num_workers = n - 1;
+        auto rented = rent(n - 1, log_ctx);
+        size_t num_workers = rented.count();
 
         // Assign work to rented workers (partitions 1..n-1)
         for (size_t i = 0; i < num_workers; ++i) {
@@ -571,12 +571,15 @@ class VecSimSVSThreadPool {
 private:
     std::shared_ptr<VecSimSVSThreadPoolImpl> pool_;    // shared across all indexes
     std::shared_ptr<std::atomic<size_t>> parallelism_; // per-index, shared across copies
+    void *log_ctx_ = nullptr;                          // per-index log context
 
 public:
     // Construct with reference to the shared pool singleton.
     // parallelism_ starts at 0 — caller must call setParallelism() before parallel_for().
-    explicit VecSimSVSThreadPool(std::shared_ptr<VecSimSVSThreadPoolImpl> pool)
-        : pool_(std::move(pool)), parallelism_(std::make_shared<std::atomic<size_t>>(0)) {
+    explicit VecSimSVSThreadPool(std::shared_ptr<VecSimSVSThreadPoolImpl> pool,
+                                 void *log_ctx = nullptr)
+        : pool_(std::move(pool)), parallelism_(std::make_shared<std::atomic<size_t>>(0)),
+          log_ctx_(log_ctx) {
         assert(pool_ && "Pool must not be null");
     }
 
@@ -598,13 +601,13 @@ class VecSimSVSThreadPool {
     // Shared pool size — used by scheduling to decide how many reserve jobs to submit.
     size_t poolSize() const { return pool_->size(); }
 
-    // Delegates to the shared pool's parallel_for.
+    // Delegates to the shared pool's parallel_for, passing the per-index log context.
     // n may be less than parallelism_ when the problem size is smaller than the
     // thread count (SVS computes n = min(arg.size(), pool.size())).
     // n must not exceed parallelism_ — we only have that many threads reserved.
     void parallel_for(std::function<void(size_t)> f, size_t n) {
         assert(parallelism_->load() > 0 && "setParallelism must be called before parallel_for");
         assert(n <= parallelism_->load() && "n exceeds reserved thread count (parallelism)");
-        pool_->parallel_for(std::move(f), n);
+        pool_->parallel_for(std::move(f), n, log_ctx_);
     }
 };

From 0a34edfb9626e6aeccc13b94ba1f4e811fb4d98b Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 6 Apr 2026 10:29:31 +0000
Subject: [PATCH 07/19] default paralleism to 1

---
 src/VecSim/algorithms/svs/svs_tiered.h | 8 ++++----
 src/VecSim/algorithms/svs/svs_utils.h  | 7 ++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h
index 8ca67bf5a..85165e7ff 100644
--- a/src/VecSim/algorithms/svs/svs_tiered.h
+++ b/src/VecSim/algorithms/svs/svs_tiered.h
@@ -819,10 +819,10 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
                 // prevent update job from running in parallel and lock any access to the backend
                 // index
                 std::scoped_lock lock(this->updateJobMutex, this->mainIndexGuard);
-                // Set available thread count to 1 for single vector write-in-place operation.
-                // This maintains the contract that single vector operations use exactly one thread.
-                // TODO: Replace this setParallelism(1) call with an assertion once we establish
-                // a contract that write-in-place mode guarantees parallelism == 1.
+                // Defensive: ensure single-threaded operation for write-in-place mode.
+                // parallelism_ defaults to 1, so this is a no-op in the normal case.
+                assert(svs_index->getParallelism() == 1 &&
+                       "Parallelism should be 1 for write-in-place mode, but it is not");
                 svs_index->setParallelism(1);
                 return this->backendIndex->addVector(storage_blob.get(), label);
             }
diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index c8daee631..f1f35e8b6 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -575,10 +575,12 @@ class VecSimSVSThreadPool {
 
 public:
     // Construct with reference to the shared pool singleton.
-    // parallelism_ starts at 0 — caller must call setParallelism() before parallel_for().
+    // parallelism_ starts at 1 (the calling thread always participates), matching the
+    // pool's minimum size. Safe for immediate use in write-in-place mode without an
+    // explicit setParallelism() call.
     explicit VecSimSVSThreadPool(std::shared_ptr<VecSimSVSThreadPoolImpl> pool,
                                  void *log_ctx = nullptr)
-        : pool_(std::move(pool)), parallelism_(std::make_shared<std::atomic<size_t>>(0)),
+        : pool_(std::move(pool)), parallelism_(std::make_shared<std::atomic<size_t>>(1)),
           log_ctx_(log_ctx) {
         assert(pool_ && "Pool must not be null");
     }
@@ -606,7 +608,6 @@ class VecSimSVSThreadPool {
     // thread count (SVS computes n = min(arg.size(), pool.size())).
     // n must not exceed parallelism_ — we only have that many threads reserved.
     void parallel_for(std::function<void(size_t)> f, size_t n) {
-        assert(parallelism_->load() > 0 && "setParallelism must be called before parallel_for");
         assert(n <= parallelism_->load() && "n exceeds reserved thread count (parallelism)");
         pool_->parallel_for(std::move(f), n, log_ctx_);
     }

From d7108220293c6d0de2739788428670814322571b Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 6 Apr 2026 12:38:11 +0000
Subject: [PATCH 08/19] fix VecSim_UpdateThreadPoolSize to include current
 thread

---
 src/VecSim/vec_sim.cpp | 7 ++++---
 src/VecSim/vec_sim.h   | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/VecSim/vec_sim.cpp b/src/VecSim/vec_sim.cpp
index c9237d6fe..1484d7071 100644
--- a/src/VecSim/vec_sim.cpp
+++ b/src/VecSim/vec_sim.cpp
@@ -41,10 +41,11 @@ extern "C" void VecSim_UpdateThreadPoolSize(size_t new_size) {
     } else {
         VecSimIndex::setWriteMode(VecSim_WriteAsync);
     }
-    // Resize the shared SVS thread pool. resize() clamps to minimum size 1.
+    // Resize the shared SVS thread pool. new_size is the number of RediSearch worker
+    // threads. The SVS pool's total parallelism includes the calling thread (+1).
     // new_size == 0 → pool size 1 (only calling thread, write-in-place).
-    // new_size >  0 → pool size new_size (new_size - 1 worker threads).
-    SVSIndexBase::getSharedThreadPool()->resize(std::max(new_size, size_t{1}));
+    // new_size >  0 → pool size new_size + 1 (new_size workers + calling thread).
+    SVSIndexBase::getSharedThreadPool()->resize(new_size + 1);
 }
 
 static VecSimResolveCode _ResolveParams_EFRuntime(VecSimAlgo index_type, VecSimRawParam rparam,
diff --git a/src/VecSim/vec_sim.h b/src/VecSim/vec_sim.h
index 14e1bf65d..e9ffd792c 100644
--- a/src/VecSim/vec_sim.h
+++ b/src/VecSim/vec_sim.h
@@ -318,7 +318,8 @@ void VecSim_SetWriteMode(VecSimWriteMode mode);
  * If new_size > 0, sets write mode to async and resizes the shared thread pool.
  * @note Should be called from the main thread only.
  *
- * @param new_size the new thread pool size (0 = in-place mode, >0 = async mode).
+ * @param new_size the number of worker threads (0 = in-place mode, >0 = async mode).
+ * The total SVS pool parallelism will be new_size + 1 (workers + calling thread).
  */
 void VecSim_UpdateThreadPoolSize(size_t new_size);
 

From 1381cd14a5113dfaeacb9f00e7bf08a1a600848d Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 6 Apr 2026 14:01:33 +0000
Subject: [PATCH 09/19] revert resize

---
 src/VecSim/algorithms/svs/svs_tiered.h |   5 +-
 src/VecSim/vec_sim.cpp                 |   7 +-
 src/VecSim/vec_sim.h                   |   3 +-
 tests/unit/test_svs.cpp                |   4 +-
 tests/unit/test_svs_fp16.cpp           |  24 ++--
 tests/unit/test_svs_tiered.cpp         | 162 +++++++++++--------------
 6 files changed, 88 insertions(+), 117 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs_tiered.h b/src/VecSim/algorithms/svs/svs_tiered.h
index 85165e7ff..6d34779bb 100644
--- a/src/VecSim/algorithms/svs/svs_tiered.h
+++ b/src/VecSim/algorithms/svs/svs_tiered.h
@@ -677,7 +677,8 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
         } // release frontend index
 
         executeTracingCallback("UpdateJob::before_add_to_svs");
-        { // lock backend index for writing and add vectors there
+        if (!labels_to_move.empty()) {
+            // lock backend index for writing and add vectors there
             std::shared_lock main_shared_lock(this->mainIndexGuard);
             auto svs_index = GetSVSIndex();
             assert(labels_to_move.size() == vectors_to_move.size() / this->frontendIndex->getDim());
@@ -821,8 +822,6 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
                 std::scoped_lock lock(this->updateJobMutex, this->mainIndexGuard);
                 // Defensive: ensure single-threaded operation for write-in-place mode.
                 // parallelism_ defaults to 1, so this is a no-op in the normal case.
-                assert(svs_index->getParallelism() == 1 &&
-                       "Parallelism should be 1 for write-in-place mode, but it is not");
                 svs_index->setParallelism(1);
                 return this->backendIndex->addVector(storage_blob.get(), label);
             }
diff --git a/src/VecSim/vec_sim.cpp b/src/VecSim/vec_sim.cpp
index 1484d7071..c9237d6fe 100644
--- a/src/VecSim/vec_sim.cpp
+++ b/src/VecSim/vec_sim.cpp
@@ -41,11 +41,10 @@ extern "C" void VecSim_UpdateThreadPoolSize(size_t new_size) {
     } else {
         VecSimIndex::setWriteMode(VecSim_WriteAsync);
     }
-    // Resize the shared SVS thread pool. new_size is the number of RediSearch worker
-    // threads. The SVS pool's total parallelism includes the calling thread (+1).
+    // Resize the shared SVS thread pool. resize() clamps to minimum size 1.
     // new_size == 0 → pool size 1 (only calling thread, write-in-place).
-    // new_size >  0 → pool size new_size + 1 (new_size workers + calling thread).
-    SVSIndexBase::getSharedThreadPool()->resize(new_size + 1);
+    // new_size >  0 → pool size new_size (new_size - 1 worker threads).
+    SVSIndexBase::getSharedThreadPool()->resize(std::max(new_size, size_t{1}));
 }
 
 static VecSimResolveCode _ResolveParams_EFRuntime(VecSimAlgo index_type, VecSimRawParam rparam,
diff --git a/src/VecSim/vec_sim.h b/src/VecSim/vec_sim.h
index e9ffd792c..14e1bf65d 100644
--- a/src/VecSim/vec_sim.h
+++ b/src/VecSim/vec_sim.h
@@ -318,8 +318,7 @@ void VecSim_SetWriteMode(VecSimWriteMode mode);
  * If new_size > 0, sets write mode to async and resizes the shared thread pool.
  * @note Should be called from the main thread only.
  *
- * @param new_size the number of worker threads (0 = in-place mode, >0 = async mode).
- * The total SVS pool parallelism will be new_size + 1 (workers + calling thread).
+ * @param new_size the new thread pool size (0 = in-place mode, >0 = async mode).
  */
 void VecSim_UpdateThreadPoolSize(size_t new_size);
 
diff --git a/tests/unit/test_svs.cpp b/tests/unit/test_svs.cpp
index 8dd35dd8e..f70681425 100644
--- a/tests/unit/test_svs.cpp
+++ b/tests/unit/test_svs.cpp
@@ -1676,8 +1676,8 @@ TYPED_TEST(SVSTest, test_svs_parameter_combinations_and_defaults) {
           .constructionWindowSize = 100,
           .maxCandidatePoolSize = 500,
           .pruneTo = 55,
-          .useSearchHistory = false, // VecSimOption_DISABLE
-          .numThreads = 4,
+          .useSearchHistory = false,                    // VecSimOption_DISABLE
+          .numThreads = SVS_VAMANA_DEFAULT_NUM_THREADS, // Deprecated, expect default to be used
           .numberOfMarkedDeletedNodes = 0,
           .searchWindowSize = 20,
           .searchBufferCapacity = 40,
diff --git a/tests/unit/test_svs_fp16.cpp b/tests/unit/test_svs_fp16.cpp
index 82937ef32..2f57b1c16 100644
--- a/tests/unit/test_svs_fp16.cpp
+++ b/tests/unit/test_svs_fp16.cpp
@@ -2228,9 +2228,6 @@ class FP16SVSTieredIndexTest : public FP16SVSTest<index_type_t> {
                           size_t update_threshold = SVS_VAMANA_DEFAULT_UPDATE_THRESHOLD) {
         // trainingThreshold = training_threshold;
         // updateThreshold = update_threshold;
-        if (svs_params.algoParams.svsParams.num_threads == 0) {
-            svs_params.algoParams.svsParams.num_threads = mock_thread_pool.thread_pool_size;
-        }
         return TieredIndexParams{
             .jobQueue = &mock_thread_pool.jobQ,
             .jobQueueCtx = mock_thread_pool.ctx,
@@ -2249,6 +2246,10 @@ class FP16SVSTieredIndexTest : public FP16SVSTest<index_type_t> {
     TieredSVSIndex<data_t> *CreateTieredSVSIndex(const TieredIndexParams &tiered_params,
                                                  tieredIndexMock &mock_thread_pool,
                                                  size_t num_available_threads = 1) {
+        // Resize the shared SVS thread pool singleton to match the mock thread pool size,
+        // simulating what RediSearch does via VecSim_UpdateThreadPoolSize during module init.
+        SVSIndexBase::getSharedThreadPool()->resize(mock_thread_pool.thread_pool_size);
+
         auto *tiered_index =
             reinterpret_cast<TieredSVSIndex<data_t> *>(TieredFactory::NewIndex(&tiered_params));
 
@@ -2256,14 +2257,7 @@ class FP16SVSTieredIndexTest : public FP16SVSTest<index_type_t> {
         // the index, and we'll need to release the ctx at the end of the test.
         mock_thread_pool.ctx->index_strong_ref.reset(tiered_index);
 
-        // Set number of available threads to 1 unless specified otherwise,
-        // so we can insert one vector at a time directly to svs.
-        tiered_index->GetSVSIndex()->setParallelism(num_available_threads);
-        size_t params_threadpool_size =
-            tiered_params.primaryIndexParams->algoParams.svsParams.num_threads;
-        size_t expected_capacity =
-            params_threadpool_size ? params_threadpool_size : mock_thread_pool.thread_pool_size;
-        verifyNumThreads(tiered_index, num_available_threads, expected_capacity);
+        verifyNumThreads(tiered_index, num_available_threads, mock_thread_pool.thread_pool_size);
         return tiered_index;
     }
 
@@ -2812,10 +2806,10 @@ TYPED_TEST(FP16SVSTieredIndexTest, KNNSearch) {
 TYPED_TEST(FP16SVSTieredIndexTest, deleteVector) {
     // Create TieredSVS index instance with a mock queue.
     size_t dim = 4;
-    SVSParams params = {.dim = dim, .metric = VecSimMetric_L2, .num_threads = 1};
+    SVSParams params = {.dim = dim, .metric = VecSimMetric_L2};
     this->SetTypeParams(params);
     VecSimParams svs_params = CreateParams(params);
-    auto mock_thread_pool = tieredIndexMock();
+    auto mock_thread_pool = tieredIndexMock(1);
     auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, 1, 1);
     ASSERT_NO_FATAL_FAILURE(this->verify_index(tiered_index));
 
@@ -2876,10 +2870,10 @@ TYPED_TEST(FP16SVSTieredIndexTest, deleteVector) {
 TYPED_TEST(FP16SVSTieredIndexTest, deleteVectorMulti) {
     // Create TieredSVS index instance with a mock queue.
     size_t dim = 4;
-    SVSParams params = {.dim = dim, .metric = VecSimMetric_L2, .multi = true, .num_threads = 1};
+    SVSParams params = {.dim = dim, .metric = VecSimMetric_L2, .multi = true};
     this->SetTypeParams(params);
     VecSimParams svs_params = CreateParams(params);
-    auto mock_thread_pool = tieredIndexMock();
+    auto mock_thread_pool = tieredIndexMock(1);
     auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, 1, 1);
     ASSERT_NO_FATAL_FAILURE(this->verify_index(tiered_index));
 
diff --git a/tests/unit/test_svs_tiered.cpp b/tests/unit/test_svs_tiered.cpp
index d079d64e5..c3d67a5f4 100644
--- a/tests/unit/test_svs_tiered.cpp
+++ b/tests/unit/test_svs_tiered.cpp
@@ -76,9 +76,6 @@ class SVSTieredIndexTest : public ::testing::Test {
         trainingThreshold = training_threshold;
         updateThreshold = update_threshold;
         svs_params.algoParams.svsParams.quantBits = index_type_t::get_quant_bits();
-        if (svs_params.algoParams.svsParams.num_threads == 0) {
-            svs_params.algoParams.svsParams.num_threads = mock_thread_pool.thread_pool_size;
-        }
         return TieredIndexParams{
             .jobQueue = &mock_thread_pool.jobQ,
             .jobQueueCtx = mock_thread_pool.ctx,
@@ -90,14 +87,16 @@ class SVSTieredIndexTest : public ::testing::Test {
                                                    .updateJobWaitTime = update_job_wait_time}}};
     }
 
-    void verifyNumThreads(TieredSVSIndex<data_t> *tiered_index, size_t expected_num_threads,
-                          size_t expected_capcity) {
-        ASSERT_EQ(tiered_index->GetSVSIndex()->getParallelism(), expected_num_threads);
+    void verifyNumThreads(TieredSVSIndex<data_t> *tiered_index, size_t expected_capcity) {
+        ASSERT_EQ(tiered_index->GetSVSIndex()->getParallelism(), 1);
         ASSERT_EQ(tiered_index->GetSVSIndex()->getPoolSize(), expected_capcity);
     }
     TieredSVSIndex<data_t> *CreateTieredSVSIndex(const TieredIndexParams &tiered_params,
-                                                 tieredIndexMock &mock_thread_pool,
-                                                 size_t num_available_threads = 1) {
+                                                 tieredIndexMock &mock_thread_pool) {
+        // Resize the shared SVS thread pool singleton to match the mock thread pool size,
+        // simulating what RediSearch does via VecSim_UpdateThreadPoolSize during module init.
+        VecSim_UpdateThreadPoolSize(mock_thread_pool.thread_pool_size);
+
         auto *tiered_index =
             reinterpret_cast<TieredSVSIndex<data_t> *>(TieredFactory::NewIndex(&tiered_params));
 
@@ -108,12 +107,8 @@ class SVSTieredIndexTest : public ::testing::Test {
         // which requires exactly 1 thread. When using tiered index addVector API,
         // the parallelism is managed internally according to the operation and pool
         // size, so testing parallelism remains intact.
-        tiered_index->GetSVSIndex()->setParallelism(num_available_threads);
-        size_t params_threadpool_size =
-            tiered_params.primaryIndexParams->algoParams.svsParams.num_threads;
-        size_t expected_capacity =
-            params_threadpool_size ? params_threadpool_size : mock_thread_pool.thread_pool_size;
-        verifyNumThreads(tiered_index, num_available_threads, expected_capacity);
+        // tiered_index->GetSVSIndex()->setParallelism(num_available_threads);
+        verifyNumThreads(tiered_index, mock_thread_pool.thread_pool_size);
 
         return tiered_index;
     }
@@ -122,18 +117,17 @@ class SVSTieredIndexTest : public ::testing::Test {
     CreateTieredSVSIndex(VecSimParams &svs_params, tieredIndexMock &mock_thread_pool,
                          size_t training_threshold = TestsDefaultTrainingThreshold,
                          size_t update_threshold = TestsDefaultUpdateThreshold,
-                         size_t update_job_wait_time = SVS_DEFAULT_UPDATE_JOB_WAIT_TIME,
-                         size_t num_available_threads = 1) {
+                         size_t update_job_wait_time = SVS_DEFAULT_UPDATE_JOB_WAIT_TIME) {
         svs_params.algoParams.svsParams.quantBits = index_type_t::get_quant_bits();
         TieredIndexParams tiered_params =
             CreateTieredSVSParams(svs_params, mock_thread_pool, training_threshold,
                                   update_threshold, update_job_wait_time);
-        return CreateTieredSVSIndex(tiered_params, mock_thread_pool, num_available_threads);
+        return CreateTieredSVSIndex(tiered_params, mock_thread_pool);
     }
 
     void SetUp() override {
         // Restore the write mode to default.
-        VecSim_SetWriteMode(VecSim_WriteAsync);
+        VecSim_UpdateThreadPoolSize(0);
         // Limit VecSim log level to avoid printing too much information
         VecSimIndexInterface::setLogCallbackFunction(svsTestLogCallBackNoDebug);
     }
@@ -199,11 +193,10 @@ TYPED_TEST(SVSTieredIndexTest, ThreadsReservation) {
     }
 
     std::chrono::milliseconds timeout{1000}; // long enough to reserve all threads
-    SVSParams params = {
-        .type = TypeParam::get_index_type(), .dim = 4, .metric = VecSimMetric_L2, .num_threads = 1};
+    SVSParams params = {.type = TypeParam::get_index_type(), .dim = 4, .metric = VecSimMetric_L2};
     VecSimParams svs_params = CreateParams(params);
-    auto mock_thread_pool = tieredIndexMock();
-    mock_thread_pool.thread_pool_size = num_threads;
+    auto mock_thread_pool = tieredIndexMock(num_threads);
+    ASSERT_EQ(mock_thread_pool.thread_pool_size, num_threads);
 
     // Create TieredSVS index instance with a mock queue.
     auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool);
@@ -281,24 +274,21 @@ TYPED_TEST(SVSTieredIndexTest, TestDebugInfoThreadCount) {
     constexpr size_t update_threshold = 10;
     constexpr size_t update_job_wait_time = 10000;
     constexpr size_t dim = 4;
-    SVSParams params = {.type = TypeParam::get_index_type(),
-                        .dim = dim,
-                        .metric = VecSimMetric_L2,
-                        .num_threads = num_threads};
+    SVSParams params = {.type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_L2};
     VecSimParams svs_params = CreateParams(params);
-    auto mock_thread_pool = tieredIndexMock();
-    mock_thread_pool.thread_pool_size = num_threads;
+    auto mock_thread_pool = tieredIndexMock(num_threads);
+    ASSERT_EQ(mock_thread_pool.thread_pool_size, num_threads);
 
     // Create TieredSVS index instance with a mock queue.
-    auto *tiered_index =
-        this->CreateTieredSVSIndex(svs_params, mock_thread_pool, training_threshold,
-                                   update_threshold, update_job_wait_time, num_threads);
+    auto *tiered_index = this->CreateTieredSVSIndex(
+        svs_params, mock_thread_pool, training_threshold, update_threshold, update_job_wait_time);
     ASSERT_INDEX(tiered_index);
 
-    // Verify initial state: both fields should equal configured thread count
+    // Verify initial state: numThreads reflects the shared pool size,
+    // lastReservedThreads starts at 1 (parallelism_ is initialized to 1).
     VecSimIndexDebugInfo backendIndexInfo = tiered_index->GetBackendIndex()->debugInfo();
     ASSERT_EQ(backendIndexInfo.svsInfo.numThreads, num_threads);
-    ASSERT_EQ(backendIndexInfo.svsInfo.lastReservedThreads, num_threads);
+    ASSERT_EQ(backendIndexInfo.svsInfo.lastReservedThreads, 1);
     // Get the allocator from the tiered index.
     auto allocator = tiered_index->getAllocator();
 
@@ -332,7 +322,9 @@ TYPED_TEST(SVSTieredIndexTest, TestDebugInfoThreadCount) {
     while (tiered_index->GetBackendIndex()->indexSize() != training_threshold) {
         std::this_thread::sleep_for(std::chrono::milliseconds(100));
     }
-    // Verify: numThreads unchanged, lastReservedThreads reflects actual availability
+    // Verify: numThreads (pool size) unchanged, lastReservedThreads (parallelism) reflects
+    // reduced availability — one thread was occupied, so at most num_threads - 1 could reserve.
+    // Use LE because OS timing may cause fewer threads to check in before the reservation timeout.
     backendIndexInfo = tiered_index->GetBackendIndex()->debugInfo();
     ASSERT_EQ(backendIndexInfo.svsInfo.numThreads, num_threads);
     ASSERT_LE(backendIndexInfo.svsInfo.lastReservedThreads, num_threads - 1);
@@ -346,7 +338,9 @@ TYPED_TEST(SVSTieredIndexTest, TestDebugInfoThreadCount) {
     }
     mock_thread_pool.thread_pool_join();
 
-    // Verify: numThreads unchanged, lastReservedThreads reflects we used all configured threads
+    // Verify: numThreads (pool size) unchanged, lastReservedThreads (parallelism) reflects
+    // that all configured threads were available for this operation.
+    // Use LE because OS timing may cause fewer threads to check in before the reservation timeout.
     backendIndexInfo = tiered_index->GetBackendIndex()->debugInfo();
     ASSERT_EQ(backendIndexInfo.svsInfo.numThreads, num_threads);
     ASSERT_LE(backendIndexInfo.svsInfo.lastReservedThreads, num_threads);
@@ -354,8 +348,8 @@ TYPED_TEST(SVSTieredIndexTest, TestDebugInfoThreadCount) {
 
 TYPED_TEST(SVSTieredIndexTest, TestDebugInfoThreadCountWriteInPlace) {
     // Test that write-in-place mode correctly reports thread usage in debug info.
-    // Even when the index is configured with multiple threads, write-in-place operations
-    // should use only 1 thread and lastReservedThreads should reflect this.
+    // Even when the shared pool has multiple threads, write-in-place operations
+    // should use only 1 thread and lastReservedThreads (parallelism) should reflect this.
 
     // Set thread_pool_size to 4 or actual number of available CPUs
     const auto num_threads = std::min(4U, getAvailableCPUs());
@@ -364,31 +358,28 @@ TYPED_TEST(SVSTieredIndexTest, TestDebugInfoThreadCountWriteInPlace) {
         GTEST_SKIP() << "No threads available";
     }
 
-    // Test svs when mode is write in place, but the index is configured with multiple threads.
+    // Test svs when mode is write in place, but the shared pool has multiple threads.
     constexpr size_t training_threshold = 10;
     constexpr size_t update_threshold = 10;
     constexpr size_t update_job_wait_time = 10000;
     constexpr size_t dim = 4;
-    SVSParams params = {.type = TypeParam::get_index_type(),
-                        .dim = dim,
-                        .metric = VecSimMetric_L2,
-                        .num_threads = num_threads};
+    SVSParams params = {.type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_L2};
     VecSimParams svs_params = CreateParams(params);
-    auto mock_thread_pool = tieredIndexMock();
-    mock_thread_pool.thread_pool_size = num_threads;
+    auto mock_thread_pool = tieredIndexMock(num_threads);
+    ASSERT_EQ(mock_thread_pool.thread_pool_size, num_threads);
 
     // Create TieredSVS index instance with a mock queue.
-    auto *tiered_index =
-        this->CreateTieredSVSIndex(svs_params, mock_thread_pool, training_threshold,
-                                   update_threshold, update_job_wait_time, num_threads);
+    auto *tiered_index = this->CreateTieredSVSIndex(
+        svs_params, mock_thread_pool, training_threshold, update_threshold, update_job_wait_time);
     ASSERT_INDEX(tiered_index);
 
-    // Set to mode to write in place even though the index is configured with multiple threads.
+    // Set mode to write in place even though the shared pool has multiple threads.
     VecSim_SetWriteMode(VecSim_WriteInPlace);
-    // Verify initial state: both fields should equal configured thread count
+    // Verify initial state: numThreads reflects the shared pool size,
+    // lastReservedThreads starts at 1 (parallelism_ is initialized to 1).
     VecSimIndexDebugInfo backendIndexInfo = tiered_index->GetBackendIndex()->debugInfo();
     ASSERT_EQ(backendIndexInfo.svsInfo.numThreads, num_threads);
-    ASSERT_EQ(backendIndexInfo.svsInfo.lastReservedThreads, num_threads);
+    ASSERT_EQ(backendIndexInfo.svsInfo.lastReservedThreads, 1);
     // Get the allocator from the tiered index.
     auto allocator = tiered_index->getAllocator();
 
@@ -403,8 +394,8 @@ TYPED_TEST(SVSTieredIndexTest, TestDebugInfoThreadCountWriteInPlace) {
     while (tiered_index->GetBackendIndex()->indexSize() != training_threshold) {
         std::this_thread::sleep_for(std::chrono::milliseconds(100));
     }
-    // Verify: numThreads unchanged, lastReservedThreads reflects we only used one thread (main
-    // thread)
+    // Verify: numThreads (pool size) unchanged, lastReservedThreads (parallelism) reflects
+    // we only used one thread (write-in-place calls updateSVSIndexWrapper with availableThreads=1).
     backendIndexInfo = tiered_index->GetBackendIndex()->debugInfo();
     ASSERT_EQ(backendIndexInfo.svsInfo.numThreads, num_threads);
     ASSERT_EQ(backendIndexInfo.svsInfo.lastReservedThreads, 1);
@@ -413,7 +404,7 @@ TYPED_TEST(SVSTieredIndexTest, TestDebugInfoThreadCountWriteInPlace) {
     size_t num_vectors = 10;
     for (size_t i = 0; i < num_vectors; ++i) {
         GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, training_threshold + i);
-        // Verify: numThreads unchanged, lastReservedThreads reflects we used only one thread
+        // Verify: numThreads (pool size) unchanged, lastReservedThreads (parallelism) = 1
         backendIndexInfo = tiered_index->GetBackendIndex()->debugInfo();
         ASSERT_EQ(backendIndexInfo.svsInfo.numThreads, num_threads);
         ASSERT_EQ(backendIndexInfo.svsInfo.lastReservedThreads, 1);
@@ -1035,10 +1026,9 @@ TYPED_TEST(SVSTieredIndexTest, deleteVector) {
     SVSParams params = {.type = TypeParam::get_index_type(),
                         .dim = dim,
                         .metric = VecSimMetric_L2,
-                        .multi = TypeParam::isMulti(),
-                        .num_threads = 1};
+                        .multi = TypeParam::isMulti()};
     VecSimParams svs_params = CreateParams(params);
-    auto mock_thread_pool = tieredIndexMock();
+    auto mock_thread_pool = tieredIndexMock(1);
 
     auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, 1, 1);
     ASSERT_INDEX(tiered_index);
@@ -1101,12 +1091,9 @@ TYPED_TEST(SVSTieredIndexTestBasic, markedDeleted) {
     size_t dim = 4;
     constexpr size_t n = 10;
     constexpr size_t transfer_trigger = n;
-    SVSParams params = {.type = TypeParam::get_index_type(),
-                        .dim = dim,
-                        .metric = VecSimMetric_L2,
-                        .num_threads = 1};
+    SVSParams params = {.type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_L2};
     VecSimParams svs_params = CreateParams(params);
-    auto mock_thread_pool = tieredIndexMock();
+    auto mock_thread_pool = tieredIndexMock(1);
 
     auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, transfer_trigger,
                                                     transfer_trigger);
@@ -1180,13 +1167,10 @@ TYPED_TEST(SVSTieredIndexTestBasic, markedDeleted) {
 TYPED_TEST(SVSTieredIndexTestBasic, deleteVectorMulti) {
     // Create TieredSVS index instance with a mock queue.
     size_t dim = 4;
-    SVSParams params = {.type = TypeParam::get_index_type(),
-                        .dim = dim,
-                        .metric = VecSimMetric_L2,
-                        .multi = true,
-                        .num_threads = 1};
+    SVSParams params = {
+        .type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_L2, .multi = true};
     VecSimParams svs_params = CreateParams(params);
-    auto mock_thread_pool = tieredIndexMock();
+    auto mock_thread_pool = tieredIndexMock(1);
 
     auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, 1, 1);
     auto allocator = tiered_index->getAllocator();
@@ -2604,13 +2588,10 @@ TYPED_TEST(SVSTieredIndexTestBasic, overwriteVectorBasic) {
     // Create TieredSVS index instance with a mock queue.
     size_t dim = 4;
     size_t n = 1000;
-    SVSParams params = {.type = TypeParam::get_index_type(),
-                        .dim = dim,
-                        .metric = VecSimMetric_L2,
-                        .multi = false,
-                        .num_threads = 1};
+    SVSParams params = {
+        .type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_L2, .multi = false};
     VecSimParams svs_params = CreateParams(params);
-    auto mock_thread_pool = tieredIndexMock();
+    auto mock_thread_pool = tieredIndexMock(1);
 
     auto *tiered_index = this->CreateTieredSVSIndex(svs_params, mock_thread_pool, 1, 1);
     ASSERT_INDEX(tiered_index);
@@ -3828,23 +3809,23 @@ TEST(SVSTieredIndexTest, testThreadPool) {
     auto shared_pool = std::make_shared<VecSimSVSThreadPoolImpl>(num_threads);
     auto pool = VecSimSVSThreadPool(shared_pool);
     ASSERT_EQ(pool.poolSize(), num_threads);
-    ASSERT_EQ(pool.size(), 0); // parallelism starts at 0
-    ASSERT_EQ(pool.getParallelism(), 0);
-
-    // Must call setParallelism before parallel_for
-    pool.setParallelism(num_threads);
-    ASSERT_EQ(pool.size(), num_threads);
-    ASSERT_EQ(pool.getParallelism(), num_threads);
+    ASSERT_EQ(pool.size(), 1); // parallelism starts at 1 (calling thread)
+    ASSERT_EQ(pool.getParallelism(), 1);
 
     std::atomic_int counter(0);
     auto task = [&counter](size_t i) { counter += i + 1; };
 
+    // Can use parallel_for immediately with parallelism 1
+    pool.parallel_for(task, 1);
+    ASSERT_EQ(counter, 1); // 0+1 = 1
+
     // n > parallelism is a bug — asserts in debug mode
 #if !defined(RUNNING_ON_VALGRIND) && !defined(NDEBUG)
     ASSERT_DEATH(pool.parallel_for(task, 10), "n exceeds reserved thread count");
 #endif
 
     counter = 0;
+    pool.setParallelism(4);
     pool.parallel_for(task, num_threads);
     ASSERT_EQ(counter, 10); // 1+2+3+4 = 10
 
@@ -3854,14 +3835,14 @@ TEST(SVSTieredIndexTest, testThreadPool) {
     ASSERT_EQ(counter, 3); // 1+2 = 3
 
     // setParallelism changes per-index parallelism, not the shared pool
-    pool.setParallelism(1);
+    pool.setParallelism(2);
     ASSERT_EQ(pool.poolSize(), num_threads); // shared pool unchanged
-    ASSERT_EQ(pool.size(), 1);               // per-index parallelism
-    ASSERT_EQ(pool.getParallelism(), 1);
+    ASSERT_EQ(pool.size(), 2);               // per-index parallelism
+    ASSERT_EQ(pool.getParallelism(), 2);
 
     counter = 0;
-    pool.parallel_for(task, 1);
-    ASSERT_EQ(counter, 1); // 0+1 = 1
+    pool.parallel_for(task, 2);
+    ASSERT_EQ(counter, 3); // 0+1+2 = 1
 
     // setParallelism boundary checks — asserts in debug mode
 #if !defined(RUNNING_ON_VALGRIND) && !defined(NDEBUG)
@@ -3879,12 +3860,11 @@ TEST(SVSTieredIndexTest, testThreadPool) {
     inplace_pool.parallel_for(task, 1);
     ASSERT_EQ(counter, 1);
 
-    // parallel_for without setParallelism asserts in debug mode
-#if !defined(RUNNING_ON_VALGRIND) && !defined(NDEBUG)
-    auto unset_pool = VecSimSVSThreadPool(shared_pool);
-    ASSERT_DEATH(unset_pool.parallel_for(task, 1),
-                 "setParallelism must be called before parallel_for");
-#endif
+    // parallel_for works immediately with default parallelism 1
+    auto default_pool = VecSimSVSThreadPool(shared_pool);
+    counter = 0;
+    default_pool.parallel_for(task, 1);
+    ASSERT_EQ(counter, 1); // 0+1 = 1
 
     // Test exception handling
     auto err_task = [](size_t) { throw std::runtime_error("Test exception"); };

From 153a7b322ab1abeaed3e3cf601e486869e905477 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 7 Apr 2026 04:33:37 +0000
Subject: [PATCH 10/19] better parallelism docs

---
 src/VecSim/algorithms/svs/svs_utils.h | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index f1f35e8b6..4fe4492e6 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -569,9 +569,17 @@ class VecSimSVSThreadPoolImpl {
 // The pool is always valid — in write-in-place mode it has size 1 (0 worker threads).
 class VecSimSVSThreadPool {
 private:
-    std::shared_ptr<VecSimSVSThreadPoolImpl> pool_;    // shared across all indexes
-    std::shared_ptr<std::atomic<size_t>> parallelism_; // per-index, shared across copies
-    void *log_ctx_ = nullptr;                          // per-index log context
+    std::shared_ptr<VecSimSVSThreadPoolImpl> pool_; // shared across all indexes
+    // Per-index parallelism, shared across copies (SVS stores a copy of VecSimSVSThreadPool).
+    // SVS reads this value via size() during parallel_for to decide how many threads to use
+    // for task partitioning. Because SVS reads size() internally — not under our control —
+    // the caller must ensure that parallelism_ is stable for the entire duration of any SVS
+    // operation (search, build, consolidate, add, etc.). In practice this means:
+    //   setParallelism(n) and the subsequent SVS call must be protected by the same lock,
+    //   and no other code path may call setParallelism() on the same index concurrently.
+    // Currently, mainIndexGuard (exclusive) or updateJobMutex fulfills this role.
+    std::shared_ptr<std::atomic<size_t>> parallelism_;
+    void *log_ctx_ = nullptr; // per-index log context
 
 public:
     // Construct with reference to the shared pool singleton.
@@ -590,6 +598,12 @@ class VecSimSVSThreadPool {
     // RediSearch workers that checked in via ReserveThreadJob). This is what allows
     // us to assert n <= pool size: reserved workers are occupied RediSearch threads,
     // so the pool cannot shrink while they are held, and n cannot exceed the pool size.
+    //
+    // IMPORTANT: The caller must hold a lock that prevents any concurrent SVS operation
+    // on this index from reading size() between setParallelism() and the operation that
+    // depends on it. SVS internally calls pool.size() (which returns parallelism_) during
+    // parallel_for — if another thread calls setParallelism() concurrently, the operation
+    // may see an inconsistent value.
     void setParallelism(size_t n) {
         assert(n >= 1 && "Parallelism must be at least 1 (the calling thread)");
         assert(n <= pool_->size() && "Parallelism exceeds shared pool size");

From ac8028e3e4aca356379a57590a3c1960e9f3445c Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 7 Apr 2026 08:34:59 +0000
Subject: [PATCH 11/19] rent private

thpool tests
fix fp16 tests
---
 src/VecSim/algorithms/svs/svs_utils.h |  70 ++--
 tests/unit/CMakeLists.txt             |   2 +-
 tests/unit/test_svs_fp16.cpp          |  17 +-
 tests/unit/test_svs_threadpool.cpp    | 468 ++++++++++++++++++++++++++
 tests/unit/test_svs_tiered.cpp        |   5 -
 5 files changed, 511 insertions(+), 51 deletions(-)
 create mode 100644 tests/unit/test_svs_threadpool.cpp

diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index 4fe4492e6..084bb7718 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -447,41 +447,6 @@ class VecSimSVSThreadPoolImpl {
         }
     }
 
-    // Rent up to `count` worker threads from the pool. Returns an RAII guard that
-    // automatically releases the threads when destroyed.
-    // The SVS pool is sized to match the RediSearch thread pool, and RediSearch controls
-    // scheduling via reserve jobs, so all requested slots should always be available.
-    // Getting fewer threads than requested indicates a bug in the scheduling logic.
-    RentedThreads rent(size_t count, void *log_ctx = nullptr) {
-        RentedThreads rented;
-        if (count == 0) {
-            return rented;
-        }
-
-        std::lock_guard lock{pool_mutex_};
-        for (auto &slot : slots_) {
-            if (rented.count() >= count) {
-                break;
-            }
-            bool expected = false;
-            if (slot->occupied.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) {
-                rented.add(slot);
-            }
-        }
-
-        if (rented.count() < count) {
-            auto msg = fmt::format("SVS thread pool: rented {} threads out of {} requested "
-                                   "(pool has {} slots). This should not happen.",
-                                   rented.count(), count, slots_.size());
-            if (VecSimIndexInterface::logCallback) {
-                assert(log_ctx && "Log context must be provided when logging is available");
-                VecSimIndexInterface::logCallback(log_ctx, "warning", msg.c_str());
-            }
-            assert(false && "Failed to rent the expected number of SVS threads");
-        }
-        return rented;
-    }
-
     // Execute `f` in parallel with `n` partitions. The calling thread runs partition 0,
     // and up to `n-1` worker threads are rented for partitions 1..n-1.
     // Same signature as the SVS ThreadPool concept.
@@ -524,6 +489,41 @@ class VecSimSVSThreadPoolImpl {
     }
 
 private:
+    // Rent up to `count` worker threads from the pool. Returns an RAII guard that
+    // automatically releases the threads when destroyed.
+    // The SVS pool is sized to match the RediSearch thread pool, and RediSearch controls
+    // scheduling via reserve jobs, so all requested slots should always be available.
+    // Getting fewer threads than requested indicates a bug in the scheduling logic.
+    RentedThreads rent(size_t count, void *log_ctx = nullptr) {
+        RentedThreads rented;
+        if (count == 0) {
+            return rented;
+        }
+
+        std::lock_guard lock{pool_mutex_};
+        for (auto &slot : slots_) {
+            if (rented.count() >= count) {
+                break;
+            }
+            bool expected = false;
+            if (slot->occupied.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) {
+                rented.add(slot);
+            }
+        }
+
+        if (rented.count() < count) {
+            auto msg = fmt::format("SVS thread pool: rented {} threads out of {} requested "
+                                   "(pool has {} slots). This should not happen.",
+                                   rented.count(), count, slots_.size());
+            if (VecSimIndexInterface::logCallback) {
+                assert(log_ctx && "Log context must be provided when logging is available");
+                VecSimIndexInterface::logCallback(log_ctx, "warning", msg.c_str());
+            }
+            assert(false && "Failed to rent the expected number of SVS threads");
+        }
+        return rented;
+    }
+
     // Wait for all rented workers to finish. If any worker (or the main thread) threw,
     // restart crashed workers and throw a combined exception.
     void manage_workers_after_run(const std::string &main_thread_error, RentedThreads &rented,
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index 9d80a0e38..c3e1cc987 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -47,7 +47,7 @@ add_executable(test_fp16 ../utils/test_main_with_timeout.cpp ../utils/mock_threa
 add_executable(test_int8 ../utils/test_main_with_timeout.cpp ../utils/mock_thread_pool.cpp test_int8.cpp unit_test_utils.cpp)
 add_executable(test_uint8 ../utils/test_main_with_timeout.cpp ../utils/mock_thread_pool.cpp test_uint8.cpp unit_test_utils.cpp)
 add_executable(test_index_test_utils ../utils/test_main_with_timeout.cpp ../utils/mock_thread_pool.cpp test_index_test_utils.cpp unit_test_utils.cpp)
-add_executable(test_svs ../utils/test_main_with_timeout.cpp ../utils/mock_thread_pool.cpp test_svs.cpp test_svs_tiered.cpp test_svs_multi.cpp test_svs_fp16.cpp unit_test_utils.cpp)
+add_executable(test_svs ../utils/test_main_with_timeout.cpp ../utils/mock_thread_pool.cpp test_svs.cpp test_svs_tiered.cpp test_svs_multi.cpp test_svs_fp16.cpp test_svs_threadpool.cpp unit_test_utils.cpp)
 
 target_link_libraries(test_hnsw PUBLIC gtest VectorSimilarity)
 target_link_libraries(test_hnsw_parallel PUBLIC gtest VectorSimilarity)
diff --git a/tests/unit/test_svs_fp16.cpp b/tests/unit/test_svs_fp16.cpp
index 2f57b1c16..fcf2c7dd7 100644
--- a/tests/unit/test_svs_fp16.cpp
+++ b/tests/unit/test_svs_fp16.cpp
@@ -2237,18 +2237,16 @@ class FP16SVSTieredIndexTest : public FP16SVSTest<index_type_t> {
                                    TieredSVSParams{.trainingTriggerThreshold = training_threshold,
                                                    .updateTriggerThreshold = update_threshold}}};
     }
-    void verifyNumThreads(TieredSVSIndex<data_t> *tiered_index, size_t expected_num_threads,
-                          size_t expected_capcity) {
-        ASSERT_EQ(tiered_index->GetSVSIndex()->getParallelism(), expected_num_threads);
+    void verifyNumThreads(TieredSVSIndex<data_t> *tiered_index, size_t expected_capcity) {
+        ASSERT_EQ(tiered_index->GetSVSIndex()->getParallelism(), 1);
         ASSERT_EQ(tiered_index->GetSVSIndex()->getPoolSize(), expected_capcity);
     }
 
     TieredSVSIndex<data_t> *CreateTieredSVSIndex(const TieredIndexParams &tiered_params,
-                                                 tieredIndexMock &mock_thread_pool,
-                                                 size_t num_available_threads = 1) {
+                                                 tieredIndexMock &mock_thread_pool) {
         // Resize the shared SVS thread pool singleton to match the mock thread pool size,
         // simulating what RediSearch does via VecSim_UpdateThreadPoolSize during module init.
-        SVSIndexBase::getSharedThreadPool()->resize(mock_thread_pool.thread_pool_size);
+        VecSim_UpdateThreadPoolSize(mock_thread_pool.thread_pool_size);
 
         auto *tiered_index =
             reinterpret_cast<TieredSVSIndex<data_t> *>(TieredFactory::NewIndex(&tiered_params));
@@ -2257,19 +2255,18 @@ class FP16SVSTieredIndexTest : public FP16SVSTest<index_type_t> {
         // the index, and we'll need to release the ctx at the end of the test.
         mock_thread_pool.ctx->index_strong_ref.reset(tiered_index);
 
-        verifyNumThreads(tiered_index, num_available_threads, mock_thread_pool.thread_pool_size);
+        verifyNumThreads(tiered_index, mock_thread_pool.thread_pool_size);
         return tiered_index;
     }
 
     TieredSVSIndex<data_t> *
     CreateTieredSVSIndex(VecSimParams &svs_params, tieredIndexMock &mock_thread_pool,
                          size_t training_threshold = SVS_VAMANA_DEFAULT_TRAINING_THRESHOLD,
-                         size_t update_threshold = SVS_VAMANA_DEFAULT_UPDATE_THRESHOLD,
-                         size_t num_available_threads = 1) {
+                         size_t update_threshold = SVS_VAMANA_DEFAULT_UPDATE_THRESHOLD) {
         svs_params.algoParams.svsParams.quantBits = index_type_t::get_quant_bits();
         TieredIndexParams tiered_params = CreateTieredSVSParams(
             svs_params, mock_thread_pool, training_threshold, update_threshold);
-        return CreateTieredSVSIndex(tiered_params, mock_thread_pool, num_available_threads);
+        return CreateTieredSVSIndex(tiered_params, mock_thread_pool);
     }
 
     void SetUp() override {
diff --git a/tests/unit/test_svs_threadpool.cpp b/tests/unit/test_svs_threadpool.cpp
new file mode 100644
index 000000000..82178592b
--- /dev/null
+++ b/tests/unit/test_svs_threadpool.cpp
@@ -0,0 +1,468 @@
+/*
+ * Copyright (c) 2006-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
+ * GNU Affero General Public License v3 (AGPLv3).
+ */
+
+#include "gtest/gtest.h"
+
+#if HAVE_SVS
+#include <atomic>
+#include <latch>
+#include <set>
+#include <thread>
+
+#include <chrono>
+
+#include "VecSim/algorithms/svs/svs.h"
+#include "VecSim/algorithms/svs/svs_utils.h"
+#include "VecSim/vec_sim.h"
+#include "VecSim/vec_sim_interface.h"
+
+// std::latch::wait() blocks indefinitely. Use this to fail with a diagnostic
+// message instead of hanging if a bug causes a deadlock.
+static bool wait_with_timeout(std::latch &l, std::chrono::seconds timeout) {
+    auto deadline = std::chrono::steady_clock::now() + timeout;
+    while (!l.try_wait()) {
+        if (std::chrono::steady_clock::now() >= deadline) {
+            return false;
+        }
+        std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    }
+    return true;
+}
+
+static constexpr auto kTestTimeout = std::chrono::seconds(5);
+
+class SVSThreadPoolTest : public ::testing::Test {
+protected:
+    void SetUp() override {
+        // Save and clear global log callback so that pool operations
+        // don't assert on nullptr log_ctx (we don't have an index context).
+        saved_callback_ = VecSimIndexInterface::logCallback;
+        VecSimIndexInterface::logCallback = nullptr;
+    }
+    void TearDown() override { VecSimIndexInterface::logCallback = saved_callback_; }
+
+private:
+    logCallbackFunction saved_callback_ = nullptr;
+};
+
+// ---------------------------------------------------------------------------
+// Test 1: VecSimSVSThreadPoolImpl::resize — grow and shrink
+// ---------------------------------------------------------------------------
+TEST_F(SVSThreadPoolTest, ResizeGrowAndShrink) {
+    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(1);
+    ASSERT_EQ(pool->size(), 1);
+
+    // Grow 1 → 4
+    pool->resize(4);
+    ASSERT_EQ(pool->size(), 4);
+
+    // Shrink 4 → 2
+    pool->resize(2);
+    ASSERT_EQ(pool->size(), 2);
+
+    // No-op 2 → 2
+    pool->resize(2);
+    ASSERT_EQ(pool->size(), 2);
+
+    // Shrink to minimum 2 → 1
+    pool->resize(1);
+    ASSERT_EQ(pool->size(), 1);
+
+    // resize(0) clamps to 1
+    pool->resize(0);
+    ASSERT_EQ(pool->size(), 1);
+
+    // Grow from 1 → 8 and verify parallel_for works at new size
+    pool->resize(8);
+    ASSERT_EQ(pool->size(), 8);
+    std::atomic_int counter{0};
+    pool->parallel_for([&counter](size_t) { counter++; }, 8);
+    ASSERT_EQ(counter, 8);
+}
+
+// ---------------------------------------------------------------------------
+// Test 2: Shrink while threads are rented
+// ---------------------------------------------------------------------------
+TEST_F(SVSThreadPoolTest, ShrinkWhileRented) {
+    // Pool size 5: 4 worker slots [s0, s1, s2, s3].
+    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(5);
+    ASSERT_EQ(pool->size(), 5);
+
+    // Wrapper A uses parallelism 3 → rents 2 workers (s0, s1).
+    VecSimSVSThreadPool wrapperA(pool);
+    wrapperA.setParallelism(3);
+
+    std::latch hold(1);          // blocks rented workers
+    std::latch workers_ready(2); // signaled when both workers start
+    std::atomic_int resultA{0};
+
+    // Spawned thread: run parallel_for that blocks until we release the latch.
+    // parallel_for runs tid=0 on the calling thread (no slot rented) and
+    // tid=1,2 on rented worker threads (s0, s1). Only the worker threads
+    // (tid > 0) count down workers_ready, so main can wait until both workers
+    // are actually running and holding their slots before proceeding to shrink.
+    std::thread t([&] {
+        wrapperA.parallel_for(
+            [&](size_t tid) {
+                if (tid > 0) {
+                    workers_ready.count_down();
+                }
+                hold.wait();
+                resultA++;
+            },
+            3);
+    });
+
+    // Wait until both workers are running (blocked on latch).
+    ASSERT_TRUE(wait_with_timeout(workers_ready, kTestTimeout))
+        << "Timed out waiting for wrapper A's 2 workers to start. "
+           "resultA="
+        << resultA << ", pool_size=" << pool->size();
+
+    // Shrink pool from 5 → 4 (slots become [s0, s1, s2]). s3 is free and
+    // gets destroyed. s0, s1 are occupied by wrapperA but remain in the vector.
+    // s2 is free and available for rental.
+    pool->resize(4);
+    ASSERT_EQ(pool->size(), 4);
+
+    // While wrapperA's threads are still alive (blocked on latch), run
+    // parallel_for on the shrunk pool with a second wrapper using a free slot.
+    VecSimSVSThreadPool wrapperB(pool);
+    // Parallelism 2 = 1 rented worker + calling thread. The pool has 3 slots
+    // [s0, s1, s2] after shrink; s0 and s1 are occupied by wrapperA, so the
+    // single rented worker will get s2 (the only free slot).
+    wrapperB.setParallelism(2);
+    std::atomic_int resultB{0};
+    wrapperB.parallel_for([&](size_t) { resultB++; }, 2);
+    ASSERT_EQ(resultB, 2);
+
+    // Release the latch — wrapperA's parallel_for completes.
+    hold.count_down();
+    t.join();
+    ASSERT_EQ(resultA, 3);
+}
+
+// ---------------------------------------------------------------------------
+// Test 3: Grow while threads are rented
+// ---------------------------------------------------------------------------
+TEST_F(SVSThreadPoolTest, GrowWhileRented) {
+    // Pool size 3: 2 worker slots [s0, s1].
+    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(3);
+    ASSERT_EQ(pool->size(), 3);
+
+    // Wrapper A uses parallelism 3 → rents 2 workers (s0, s1).
+    VecSimSVSThreadPool wrapperA(pool);
+    wrapperA.setParallelism(3);
+
+    std::latch hold(1);          // blocks rented workers
+    std::latch workers_ready(2); // signaled when both workers start
+    std::atomic_int resultA{0};
+
+    // Spawned thread: parallel_for blocks all 3 partitions (tid=0 on calling
+    // thread, tid=1,2 on rented workers). Only workers (tid > 0) count down
+    // workers_ready so main knows the slots are occupied before growing.
+    std::thread t([&] {
+        wrapperA.parallel_for(
+            [&](size_t tid) {
+                if (tid > 0) {
+                    workers_ready.count_down();
+                }
+                hold.wait();
+                resultA++;
+            },
+            3);
+    });
+
+    ASSERT_TRUE(wait_with_timeout(workers_ready, kTestTimeout))
+        << "Timed out waiting for wrapper A's 2 workers to start. "
+           "resultA="
+        << resultA << ", pool_size=" << pool->size();
+
+    // Grow pool from 3 → 5 while s0, s1 are occupied. New slots [s2, s3] are
+    // appended. The in-flight parallel_for is unaffected — it holds independent
+    // shared_ptr refs to s0, s1.
+    pool->resize(5);
+    ASSERT_EQ(pool->size(), 5);
+
+    // Wrapper B uses parallelism 3 → rents 2 workers. s0, s1 are occupied by
+    // wrapperA, so it gets the 2 newly created slots s2, s3... but we only
+    // need 2 of the 3 free slots (s2, s3 are free, only need 2).
+    VecSimSVSThreadPool wrapperB(pool);
+    wrapperB.setParallelism(3);
+    std::atomic_int resultB{0};
+    wrapperB.parallel_for([&](size_t) { resultB++; }, 3);
+    ASSERT_EQ(resultB, 3);
+
+    // Release wrapperA's blocked threads.
+    hold.count_down();
+    t.join();
+    ASSERT_EQ(resultA, 3);
+
+    // After all threads are free, verify the full pool is usable at new size.
+    wrapperA.setParallelism(5);
+    std::atomic_int resultFull{0};
+    wrapperA.parallel_for([&](size_t) { resultFull++; }, 5);
+    ASSERT_EQ(resultFull, 5);
+}
+
+// ---------------------------------------------------------------------------
+// Test 4: Parallelism propagation across copies
+// The VecSimSVSThreadPool wrapper is copied internally by SVS (via
+// ThreadPoolHandle). Both the original and the copy must share the same
+// parallelism_ (shared_ptr<atomic<size_t>>), so that setParallelism() on
+// the original is visible to SVS's copy when it calls size().
+// ---------------------------------------------------------------------------
+TEST_F(SVSThreadPoolTest, ParallelismPropagationAcrossCopies) {
+    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(8);
+
+    VecSimSVSThreadPool original(pool);
+    original.setParallelism(2);
+    ASSERT_EQ(original.size(), 2);
+
+    // Copy the wrapper — simulates what SVS does internally.
+    VecSimSVSThreadPool copy = original;
+    ASSERT_EQ(copy.size(), 2);
+
+    // Change parallelism on the original. The copy must see it.
+    original.setParallelism(6);
+    ASSERT_EQ(original.size(), 6);
+    ASSERT_EQ(copy.size(), 6);
+
+    // Change via the copy. The original must see it too.
+    copy.setParallelism(3);
+    ASSERT_EQ(copy.size(), 3);
+    ASSERT_EQ(original.size(), 3);
+
+    // Both share the same pool.
+    ASSERT_EQ(original.poolSize(), 8);
+    ASSERT_EQ(copy.poolSize(), 8);
+}
+
+// ---------------------------------------------------------------------------
+// Test 5: Two indexes sharing one pool with independent parallelism
+// Two wrappers from the same shared pool have independent parallelism values.
+// Changing one does not affect the other. Both can run parallel_for
+// sequentially using disjoint thread budgets.
+// ---------------------------------------------------------------------------
+TEST_F(SVSThreadPoolTest, TwoIndexesIndependentParallelism) {
+    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(8);
+
+    VecSimSVSThreadPool wrapperA(pool);
+    VecSimSVSThreadPool wrapperB(pool);
+
+    wrapperA.setParallelism(2);
+    wrapperB.setParallelism(5);
+
+    // Each wrapper reports its own parallelism via size().
+    ASSERT_EQ(wrapperA.size(), 2);
+    ASSERT_EQ(wrapperB.size(), 5);
+
+    // Both report the same shared pool size.
+    ASSERT_EQ(wrapperA.poolSize(), 8);
+    ASSERT_EQ(wrapperB.poolSize(), 8);
+
+    // Changing A's parallelism does not affect B.
+    wrapperA.setParallelism(4);
+    ASSERT_EQ(wrapperA.size(), 4);
+    ASSERT_EQ(wrapperB.size(), 5);
+
+    // Both can run parallel_for sequentially — all threads are free between calls.
+    std::atomic_int resultA{0};
+    wrapperA.parallel_for([&](size_t) { resultA++; }, 4);
+    ASSERT_EQ(resultA, 4);
+
+    std::atomic_int resultB{0};
+    wrapperB.parallel_for([&](size_t) { resultB++; }, 5);
+    ASSERT_EQ(resultB, 5);
+
+    // Verify both wrappers can use the full pool capacity sequentially.
+    wrapperA.setParallelism(8);
+    std::atomic_int resultFullA{0};
+    wrapperA.parallel_for([&](size_t) { resultFullA++; }, 8);
+    ASSERT_EQ(resultFullA, 8);
+
+    wrapperB.setParallelism(8);
+    std::atomic_int resultFullB{0};
+    wrapperB.parallel_for([&](size_t) { resultFullB++; }, 8);
+    ASSERT_EQ(resultFullB, 8);
+}
+
+// ---------------------------------------------------------------------------
+// Test 6: VecSim_UpdateThreadPoolSize mode transitions
+// The C API sets write mode and resizes the shared singleton pool.
+// ---------------------------------------------------------------------------
+TEST_F(SVSThreadPoolTest, UpdateThreadPoolSizeModeTransitions) {
+    auto pool = SVSIndexBase::getSharedThreadPool();
+
+    // 0 → 4: switch to async mode, pool resizes to 4.
+    VecSim_UpdateThreadPoolSize(4);
+    ASSERT_EQ(VecSimIndex::asyncWriteMode, VecSim_WriteAsync);
+    ASSERT_EQ(pool->size(), 4);
+
+    // 4 → 8: stay in async mode (N→M, both > 0), pool resizes to 8.
+    VecSim_UpdateThreadPoolSize(8);
+    ASSERT_EQ(VecSimIndex::asyncWriteMode, VecSim_WriteAsync);
+    ASSERT_EQ(pool->size(), 8);
+
+    // 8 → 0: switch to in-place mode, pool resizes to 1.
+    VecSim_UpdateThreadPoolSize(0);
+    ASSERT_EQ(VecSimIndex::asyncWriteMode, VecSim_WriteInPlace);
+    ASSERT_EQ(pool->size(), 1);
+
+    // 0 → 0: idempotent, stays in-place, no crash.
+    VecSim_UpdateThreadPoolSize(0);
+    ASSERT_EQ(VecSimIndex::asyncWriteMode, VecSim_WriteInPlace);
+    ASSERT_EQ(pool->size(), 1);
+
+    // 0 → 2: back to async mode.
+    VecSim_UpdateThreadPoolSize(2);
+    ASSERT_EQ(VecSimIndex::asyncWriteMode, VecSim_WriteAsync);
+    ASSERT_EQ(pool->size(), 2);
+
+    // Restore to in-place mode so we don't leak state to other tests.
+    VecSim_UpdateThreadPoolSize(0);
+}
+
+// ---------------------------------------------------------------------------
+// Test 7: Concurrent rental from two indexes
+// Two wrappers share a pool. Both call parallel_for concurrently from
+// different threads with parallelism values that sum to the pool size.
+// No thread should be double-rented — the mutex serializes rental scans.
+// ---------------------------------------------------------------------------
+TEST_F(SVSThreadPoolTest, ConcurrentRentalFromTwoIndexes) {
+    // Pool size 8: wrappers A (4) and B (4) sum to exactly 8.
+    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(8);
+
+    VecSimSVSThreadPool wrapperA(pool);
+    wrapperA.setParallelism(4);
+    VecSimSVSThreadPool wrapperB(pool);
+    wrapperB.setParallelism(4);
+
+    std::atomic_int resultA{0};
+    std::atomic_int resultB{0};
+
+    // Use a latch as a barrier: all 8 tasks (4 from A + 4 from B) must
+    // arrive before any can proceed. If a thread were double-rented, only
+    // 7 tasks could run simultaneously and the latch would never complete.
+    std::latch barrier(8);
+    std::mutex mtx;
+    std::set<std::thread::id> idsA, idsB;
+
+    std::thread tA([&] {
+        wrapperA.parallel_for(
+            [&](size_t) {
+                {
+                    std::lock_guard lock(mtx);
+                    idsA.insert(std::this_thread::get_id());
+                }
+                barrier.count_down();
+                barrier.wait();
+                resultA++;
+            },
+            4);
+    });
+    std::thread tB([&] {
+        wrapperB.parallel_for(
+            [&](size_t) {
+                {
+                    std::lock_guard lock(mtx);
+                    idsB.insert(std::this_thread::get_id());
+                }
+                barrier.count_down();
+                barrier.wait();
+                resultB++;
+            },
+            4);
+    });
+
+    // Wait for both threads to complete. If the barrier deadlocks (e.g., due
+    // to double-renting), these joins would hang — use a timed check.
+    auto deadline = std::chrono::steady_clock::now() + kTestTimeout;
+    tA.join();
+    tB.join();
+    ASSERT_LT(std::chrono::steady_clock::now(), deadline)
+        << "Timed out waiting for concurrent parallel_for calls. "
+           "resultA="
+        << resultA << ", resultB=" << resultB << ", idsA.size=" << idsA.size()
+        << ", idsB.size=" << idsB.size() << ", pool_size=" << pool->size();
+
+    ASSERT_EQ(resultA, 4);
+    ASSERT_EQ(resultB, 4);
+
+    // Each wrapper used 4 distinct OS threads, and no thread was shared.
+    ASSERT_EQ(idsA.size(), 4);
+    ASSERT_EQ(idsB.size(), 4);
+    for (auto &id : idsA) {
+        ASSERT_EQ(idsB.count(id), 0) << "Thread was double-rented across wrappers";
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 8: All threads occupied — graceful degradation
+// When all pool threads are rented by wrapper A, wrapper B's parallel_for
+// cannot rent any workers. In debug builds, rent() asserts. In release
+// builds, parallel_for uses rented.count() (0 workers) and runs only
+// partition 0 on the calling thread.
+//
+// NOTE: This should never happen in production. RediSearch's reserve job
+// mechanism guarantees that the number of concurrent renters never exceeds
+// the pool size. This test exercises the defensive fallback path.
+// ---------------------------------------------------------------------------
+TEST_F(SVSThreadPoolTest, AllThreadsOccupied) {
+    // Pool size 4 (3 worker slots). Wrapper A rents all 3.
+    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(4);
+
+    VecSimSVSThreadPool wrapperA(pool);
+    wrapperA.setParallelism(4);
+
+    std::latch hold(1);
+    std::latch workers_ready(3);
+    std::atomic_int resultA{0};
+
+    // Spawned thread: wrapper A grabs all 3 worker slots and blocks.
+    std::thread t([&] {
+        wrapperA.parallel_for(
+            [&](size_t tid) {
+                if (tid > 0) {
+                    workers_ready.count_down();
+                }
+                hold.wait();
+                resultA++;
+            },
+            4);
+    });
+
+    ASSERT_TRUE(wait_with_timeout(workers_ready, kTestTimeout))
+        << "Timed out waiting for wrapper A's 3 workers to start. "
+           "resultA="
+        << resultA << ", pool_size=" << pool->size();
+
+    // All 3 worker slots are occupied. Wrapper B tries to rent 1 worker.
+    VecSimSVSThreadPool wrapperB(pool);
+    wrapperB.setParallelism(2);
+
+#ifdef NDEBUG
+    // Release: graceful degradation — rent() returns 0 workers, parallel_for
+    // runs only partition 0 on the calling thread. Work is silently dropped.
+    std::atomic_int resultB{0};
+    wrapperB.parallel_for([&](size_t) { resultB++; }, 2);
+    ASSERT_EQ(resultB, 1); // only partition 0 ran
+#else
+    // Debug: rent() asserts because it can't fulfill the request.
+    ASSERT_DEATH(wrapperB.parallel_for([&](size_t) {}, 2),
+                 "Failed to rent the expected number of SVS threads");
+#endif
+
+    // Clean up: release wrapper A's blocked threads.
+    hold.count_down();
+    t.join();
+    ASSERT_EQ(resultA, 4);
+}
+
+#endif // HAVE_SVS
diff --git a/tests/unit/test_svs_tiered.cpp b/tests/unit/test_svs_tiered.cpp
index c3d67a5f4..fd3f0ddb9 100644
--- a/tests/unit/test_svs_tiered.cpp
+++ b/tests/unit/test_svs_tiered.cpp
@@ -103,11 +103,6 @@ class SVSTieredIndexTest : public ::testing::Test {
         // Set the created tiered index in the index external context (it will take ownership over
         // the index, and we'll need to release the ctx at the end of the test.
         mock_thread_pool.ctx->index_strong_ref.reset(tiered_index);
-        // Set parallelism to 1 by default to allow direct calls to SVS addVector() API,
-        // which requires exactly 1 thread. When using tiered index addVector API,
-        // the parallelism is managed internally according to the operation and pool
-        // size, so testing parallelism remains intact.
-        // tiered_index->GetSVSIndex()->setParallelism(num_available_threads);
         verifyNumThreads(tiered_index, mock_thread_pool.thread_pool_size);
 
         return tiered_index;

From 2163e56dbf89abcc6e70bb3c7492865d0b4bd2ff Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 7 Apr 2026 08:56:02 +0000
Subject: [PATCH 12/19] deprecatred num_threads

---
 src/VecSim/algorithms/svs/svs.h |  5 +++
 src/VecSim/vec_sim_common.h     |  4 ++-
 tests/unit/test_svs.cpp         | 64 +++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h
index f44135865..56b353fb6 100644
--- a/src/VecSim/algorithms/svs/svs.h
+++ b/src/VecSim/algorithms/svs/svs.h
@@ -371,6 +371,11 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
           is_two_level_lvq{isTwoLevelLVQ(params.quantBits)},
           threadpool_{SVSIndexBase::getSharedThreadPool(), this->logCallbackCtx}, impl_{nullptr} {
         logger_ = makeLogger();
+        if (params.num_threads != 0) {
+            this->log(VecSimCommonStrings::LOG_WARNING_STRING,
+                      "SVSParams.num_threads is deprecated and ignored. "
+                      "Thread pool size is controlled globally via VecSim_UpdateThreadPoolSize().");
+        }
     }
 
     ~SVSIndex() = default;
diff --git a/src/VecSim/vec_sim_common.h b/src/VecSim/vec_sim_common.h
index 82aceaa1c..ecf6c33b9 100644
--- a/src/VecSim/vec_sim_common.h
+++ b/src/VecSim/vec_sim_common.h
@@ -194,7 +194,9 @@ typedef struct {
     size_t prune_to;                 // Amount that candidates will be pruned.
     VecSimOptionMode use_search_history; // Either the contents of the search buffer can be used or
                                          // the entire search history.
-    size_t num_threads;                  // Maximum number of threads in threadpool.
+    size_t num_threads;                  // DEPRECATED: ignored. Thread pool size is controlled
+                                         // globally via VecSim_UpdateThreadPoolSize(). Setting
+                                         // this field has no effect and will emit a warning log.
     size_t search_window_size;           // Search window size to use during search.
     size_t search_buffer_capacity;       // Search buffer capacity to use during search.
     size_t leanvec_dim;                  // Leanvec dimension to use when LeanVec is enabled.
diff --git a/tests/unit/test_svs.cpp b/tests/unit/test_svs.cpp
index f70681425..aaa912c2a 100644
--- a/tests/unit/test_svs.cpp
+++ b/tests/unit/test_svs.cpp
@@ -3299,6 +3299,70 @@ TEST(SVSTest, compute_distance) {
 }
 #endif // defined(__linux__) && defined(__x86_64__)
 
+// ---------------------------------------------------------------------------
+// SVSParams::num_threads is deprecated and ignored — pool size comes from
+// the shared singleton. Setting it should log a warning but not affect the pool.
+// ---------------------------------------------------------------------------
+TEST(SVSTest, NumThreadsParamIgnored) {
+    // Resize the shared singleton pool to a known size.
+    auto pool = SVSIndexBase::getSharedThreadPool();
+    pool->resize(2);
+    ASSERT_EQ(pool->size(), 2);
+
+    // Capture warning logs emitted by VecSim.
+    std::string captured_log;
+    VecSimIndexInterface::logCallback = [](void *ctx, const char *level, const char *msg) {
+        auto *out = static_cast<std::string *>(ctx);
+        *out += std::string(level) + ": " + msg + "\n";
+    };
+
+    // Create an SVS index with an explicit (deprecated) num_threads value.
+    SVSParams svs_params = {
+        .type = VecSimType_FLOAT32,
+        .dim = 4,
+        .metric = VecSimMetric_L2,
+        .num_threads = 16, // should be ignored
+    };
+    VecSimParams params{.algo = VecSimAlgo_SVS,
+                        .algoParams = {.svsParams = svs_params},
+                        .logCtx = static_cast<void *>(&captured_log)};
+    VecSimIndex *index = VecSimIndex_New(&params);
+    ASSERT_NE(index, nullptr);
+
+    // The shared pool size must remain at 2 — num_threads was ignored.
+    ASSERT_EQ(pool->size(), 2);
+
+    // The index reports the shared pool size, not the deprecated param value.
+    VecSimIndexDebugInfo info = VecSimIndex_DebugInfo(index);
+    ASSERT_EQ(info.svsInfo.numThreads, 2);
+
+    // A deprecation warning should have been logged.
+    EXPECT_NE(captured_log.find("deprecated"), std::string::npos)
+        << "Expected deprecation warning in log, got: " << captured_log;
+
+    VecSimIndex_Free(index);
+
+    // Verify: creating an index without setting num_threads produces no warning.
+    captured_log.clear();
+    SVSParams svs_params_default = {
+        .type = VecSimType_FLOAT32, .dim = 4, .metric = VecSimMetric_L2,
+        // num_threads left as 0 (default / unset)
+    };
+    VecSimParams params_default{.algo = VecSimAlgo_SVS,
+                                .algoParams = {.svsParams = svs_params_default},
+                                .logCtx = static_cast<void *>(&captured_log)};
+    VecSimIndex *index2 = VecSimIndex_New(&params_default);
+    ASSERT_NE(index2, nullptr);
+    EXPECT_EQ(captured_log.find("deprecated"), std::string::npos)
+        << "No deprecation warning expected when num_threads is not set, got: " << captured_log;
+
+    VecSimIndex_Free(index2);
+
+    // Restore pool to default size and clear log callback.
+    pool->resize(1);
+    VecSimIndexInterface::logCallback = nullptr;
+}
+
 #else // HAVE_SVS
 
 TEST(SVSTest, svs_not_supported) {

From ee9751eff642712941276cc372ddcfd0553592e5 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 7 Apr 2026 12:53:41 +0000
Subject: [PATCH 13/19] assert pool size align bm

---
 tests/benchmark/bm_vecsim_svs.h    | 54 ++++++++++++------------------
 tests/unit/test_svs_threadpool.cpp |  8 +++++
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/tests/benchmark/bm_vecsim_svs.h b/tests/benchmark/bm_vecsim_svs.h
index c4b167421..20d1880f7 100644
--- a/tests/benchmark/bm_vecsim_svs.h
+++ b/tests/benchmark/bm_vecsim_svs.h
@@ -93,7 +93,6 @@ class BM_VecSimSVS : public BM_VecSimGeneral {
             .quantBits = this->quantBits,
             .graph_max_degree = BM_VecSimGeneral::M,
             .construction_window_size = BM_VecSimGeneral::EF_C,
-            .num_threads = mock_thread_pool.thread_pool_size,
         };
         VecSimParams params{.algo = VecSimAlgo_SVS, .algoParams = {.svsParams = svs_params}};
 
@@ -107,30 +106,21 @@ class BM_VecSimSVS : public BM_VecSimGeneral {
         // Set the created tiered index in the index external context (it will take ownership over
         // the index, and we'll need to release the ctx at the end of the test.
         mock_thread_pool.ctx->index_strong_ref.reset(tiered_index);
-        // Set numThreads to 1 by default to allow direct calls to SVS addVector() API,
-        // which requires exactly 1 thread. When using tiered index addVector API,
-        // the thread count is managed internally according to the operation and threadpool
-        // capacity, so testing parallelism remains intact.
-        size_t params_threadpool_size =
-            tiered_params.primaryIndexParams->algoParams.svsParams.num_threads;
-        size_t num_threads =
-            params_threadpool_size ? params_threadpool_size : mock_thread_pool.thread_pool_size;
-        tiered_index->GetSVSIndex()->setParallelism(num_threads);
-        test_utils::verifyNumThreads(tiered_index, num_threads, num_threads,
+        VecSim_UpdateThreadPoolSize(mock_thread_pool.thread_pool_size);
+        test_utils::verifyNumThreads(tiered_index, 1,
+                                     std::max(mock_thread_pool.thread_pool_size, size_t{1}),
                                      std::string("CreateTieredSVSIndex"));
 
         return tiered_index;
     }
 
-    VecSimIndexAbstract<data_t, float> *CreateSVSIndexFromFile(size_t update_threshold,
-                                                               size_t num_threads = 1) {
+    VecSimIndexAbstract<data_t, float> *CreateSVSIndexFromFile(size_t update_threshold) {
         SVSParams svs_params = {.type = this->data_type,
                                 .dim = BM_VecSimGeneral::dim,
                                 .metric = VecSimMetric_Cosine,
                                 .quantBits = this->quantBits,
                                 .graph_max_degree = BM_VecSimGeneral::M,
-                                .construction_window_size = BM_VecSimGeneral::EF_C,
-                                .num_threads = num_threads};
+                                .construction_window_size = BM_VecSimGeneral::EF_C};
         VecSimParams params{.algo = VecSimAlgo_SVS, .algoParams = {.svsParams = svs_params}};
 
         // Load svs index - construct path based on quantBits
@@ -153,13 +143,11 @@ class BM_VecSimSVS : public BM_VecSimGeneral {
                                 .metric = VecSimMetric_Cosine,
                                 .quantBits = this->quantBits,
                                 .graph_max_degree = BM_VecSimGeneral::M,
-                                .construction_window_size = BM_VecSimGeneral::EF_C,
-                                .num_threads = mock_thread_pool.thread_pool_size};
+                                .construction_window_size = BM_VecSimGeneral::EF_C};
         VecSimParams params{.algo = VecSimAlgo_SVS, .algoParams = {.svsParams = svs_params}};
 
         // Load svs index
-        auto *svs_index =
-            CreateSVSIndexFromFile(update_threshold, mock_thread_pool.thread_pool_size);
+        auto *svs_index = CreateSVSIndexFromFile(update_threshold);
         TieredIndexParams tiered_params = test_utils::CreateTieredSVSParams(
             params, mock_thread_pool, update_threshold, update_threshold);
 
@@ -171,7 +159,10 @@ class BM_VecSimSVS : public BM_VecSimGeneral {
         // the index, and we'll need to release the ctx at the end of the test.
         mock_thread_pool.ctx->index_strong_ref.reset(tiered_index);
         size_t num_threads = mock_thread_pool.thread_pool_size;
-        test_utils::verifyNumThreads(tiered_index, num_threads, num_threads,
+        VecSim_UpdateThreadPoolSize(num_threads);
+        tiered_index->GetSVSIndex()->setParallelism(std::max(num_threads, size_t{1}));
+        test_utils::verifyNumThreads(tiered_index, std::max(num_threads, size_t{1}),
+                                     std::max(num_threads, size_t{1}),
                                      std::string("CreateTieredSVSIndexFromFile"));
 
         return tiered_index;
@@ -237,6 +228,13 @@ void BM_VecSimSVS<index_type_t>::runTrainBMIteration(benchmark::State &st,
     this->quantBits = static_cast<VecSimSvsQuantBits>(st.range(0));
     auto *tiered_index = CreateTieredSVSIndex(mock_thread_pool, training_threshold);
 
+    // Verify write mode
+    if (mock_thread_pool.thread_pool_size) {
+        ASSERT_EQ(VecSimIndexInterface::asyncWriteMode, VecSim_WriteAsync);
+    } else {
+        ASSERT_EQ(VecSimIndexInterface::asyncWriteMode, VecSim_WriteInPlace);
+    }
+
     auto verify_index_size = [&](size_t expected_tiered_index_size, size_t expected_frontend_size,
                                  size_t expected_backend_size, std::string msg = "") {
         VecSimIndexDebugInfo info = VecSimIndex_DebugInfo(tiered_index);
@@ -292,30 +290,22 @@ void BM_VecSimSVS<index_type_t>::runTrainBMIteration(benchmark::State &st,
 
 template <typename index_type_t>
 void BM_VecSimSVS<index_type_t>::Train(benchmark::State &st) {
-    // set write mode to inplace
-    auto original_mode = VecSimIndexInterface::asyncWriteMode;
-    VecSim_SetWriteMode(VecSim_WriteInPlace);
-
     auto training_threshold = st.range(1);
 
     // Ensure we have enough vectors to train.
     ASSERT_GE(N_QUERIES, training_threshold);
     for (auto _ : st) {
         st.PauseTiming();
-        // In each iteration create a new index
-        auto mock_thread_pool = tieredIndexMock(1);
+        // In each iteration create a new index with 0 threads (in-place mode).
+        // VecSim_UpdateThreadPoolSize(0) sets write mode to VecSim_WriteInPlace.
+        auto mock_thread_pool = tieredIndexMock(0);
         runTrainBMIteration<false>(st, mock_thread_pool, training_threshold);
     }
-    // Restore original write mode
     ASSERT_EQ(VecSimIndexInterface::asyncWriteMode, VecSim_WriteInPlace);
-    VecSim_SetWriteMode(original_mode);
 }
 
 template <typename index_type_t>
 void BM_VecSimSVS<index_type_t>::TrainAsync(benchmark::State &st) {
-    // ensure mode is async
-    ASSERT_EQ(VecSimIndexInterface::asyncWriteMode, VecSim_WriteAsync);
-
     auto training_threshold = st.range(1);
     int unsigned num_threads = st.range(2);
 
@@ -339,7 +329,7 @@ template <typename index_type_t>
 void BM_VecSimSVS<index_type_t>::AddLabel(benchmark::State &st) {
 
     size_t label = 0;
-    auto index = CreateSVSIndexFromFile(1, 1);
+    auto index = CreateSVSIndexFromFile(1);
     size_t memory_delta = index->getAllocationSize();
     for (auto _ : st) {
         VecSimIndex_AddVector(index, test_vectors[label].data(), label + N_VECTORS);
diff --git a/tests/unit/test_svs_threadpool.cpp b/tests/unit/test_svs_threadpool.cpp
index 82178592b..3a85926cd 100644
--- a/tests/unit/test_svs_threadpool.cpp
+++ b/tests/unit/test_svs_threadpool.cpp
@@ -146,6 +146,11 @@ TEST_F(SVSThreadPoolTest, ShrinkWhileRented) {
     hold.count_down();
     t.join();
     ASSERT_EQ(resultA, 3);
+
+    // After the renter's RentedThreads guard is destroyed, the old slots'
+    // shared_ptrs drop to refcount 0 and the threads are destroyed.
+    // Pool size remains at the shrunk value.
+    ASSERT_EQ(pool->size(), 4);
 }
 
 // ---------------------------------------------------------------------------
@@ -204,6 +209,9 @@ TEST_F(SVSThreadPoolTest, GrowWhileRented) {
     t.join();
     ASSERT_EQ(resultA, 3);
 
+    // Pool size remains at the grown value after renter releases.
+    ASSERT_EQ(pool->size(), 5);
+
     // After all threads are free, verify the full pool is usable at new size.
     wrapperA.setParallelism(5);
     std::atomic_int resultFull{0};

From 8098145cb43fecf1be62dd316410f7656c7f6878 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Tue, 7 Apr 2026 12:59:39 +0000
Subject: [PATCH 14/19] minor fixes

---
 tests/benchmark/bm_vecsim_svs.h | 4 +---
 tests/unit/test_svs_tiered.cpp  | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/benchmark/bm_vecsim_svs.h b/tests/benchmark/bm_vecsim_svs.h
index 20d1880f7..5acb882c0 100644
--- a/tests/benchmark/bm_vecsim_svs.h
+++ b/tests/benchmark/bm_vecsim_svs.h
@@ -160,9 +160,7 @@ class BM_VecSimSVS : public BM_VecSimGeneral {
         mock_thread_pool.ctx->index_strong_ref.reset(tiered_index);
         size_t num_threads = mock_thread_pool.thread_pool_size;
         VecSim_UpdateThreadPoolSize(num_threads);
-        tiered_index->GetSVSIndex()->setParallelism(std::max(num_threads, size_t{1}));
-        test_utils::verifyNumThreads(tiered_index, std::max(num_threads, size_t{1}),
-                                     std::max(num_threads, size_t{1}),
+        test_utils::verifyNumThreads(tiered_index, 1, std::max(num_threads, size_t{1}),
                                      std::string("CreateTieredSVSIndexFromFile"));
 
         return tiered_index;
diff --git a/tests/unit/test_svs_tiered.cpp b/tests/unit/test_svs_tiered.cpp
index fd3f0ddb9..54a00f429 100644
--- a/tests/unit/test_svs_tiered.cpp
+++ b/tests/unit/test_svs_tiered.cpp
@@ -3837,7 +3837,7 @@ TEST(SVSTieredIndexTest, testThreadPool) {
 
     counter = 0;
     pool.parallel_for(task, 2);
-    ASSERT_EQ(counter, 3); // 0+1+2 = 1
+    ASSERT_EQ(counter, 3); // 1+2 = 3
 
     // setParallelism boundary checks — asserts in debug mode
 #if !defined(RUNNING_ON_VALGRIND) && !defined(NDEBUG)

From b90ac4da0f14c1523c7931c95cd11ca4912570a7 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 9 Apr 2026 11:33:23 +0000
Subject: [PATCH 15/19] address review

---
 src/VecSim/algorithms/svs/svs.h       | 11 ++----
 src/VecSim/algorithms/svs/svs_utils.h | 36 +++++++++++--------
 src/VecSim/vec_sim.cpp                |  4 +--
 tests/unit/test_svs.cpp               |  9 +++--
 tests/unit/test_svs_threadpool.cpp    | 50 +++++++++++++++------------
 tests/unit/test_svs_tiered.cpp        | 14 +++++---
 6 files changed, 67 insertions(+), 57 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs.h b/src/VecSim/algorithms/svs/svs.h
index 56b353fb6..ffe40d2c1 100644
--- a/src/VecSim/algorithms/svs/svs.h
+++ b/src/VecSim/algorithms/svs/svs.h
@@ -38,13 +38,6 @@ struct SVSIndexBase
     SVSIndexBase() : num_marked_deleted{0} {};
     virtual ~SVSIndexBase() = default;
 
-    // Singleton accessor for the shared SVS thread pool.
-    // Always valid — initialized with size 1 (write-in-place mode: 0 worker threads,
-    // only the calling thread participates). Resized on VecSim_UpdateThreadPoolSize() calls.
-    static std::shared_ptr<VecSimSVSThreadPoolImpl> getSharedThreadPool() {
-        static auto shared_pool = std::make_shared<VecSimSVSThreadPoolImpl>(1);
-        return shared_pool;
-    }
     virtual int addVectors(const void *vectors_data, const labelType *labels, size_t n) = 0;
     virtual int deleteVectors(const labelType *labels, size_t n) = 0;
     virtual bool isLabelExists(labelType label) const = 0;
@@ -368,8 +361,8 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
           leanvec_dim{
               svs_details::getOrDefault(params.leanvec_dim, SVS_VAMANA_DEFAULT_LEANVEC_DIM)},
           epsilon{svs_details::getOrDefault(params.epsilon, SVS_VAMANA_DEFAULT_EPSILON)},
-          is_two_level_lvq{isTwoLevelLVQ(params.quantBits)},
-          threadpool_{SVSIndexBase::getSharedThreadPool(), this->logCallbackCtx}, impl_{nullptr} {
+          is_two_level_lvq{isTwoLevelLVQ(params.quantBits)}, threadpool_{this->logCallbackCtx},
+          impl_{nullptr} {
         logger_ = makeLogger();
         if (params.num_threads != 0) {
             this->log(VecSimCommonStrings::LOG_WARNING_STRING,
diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index 084bb7718..741d46ef3 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -403,11 +403,11 @@ class RentedThreads {
 // * Multiple callers can rent disjoint subsets of threads concurrently
 // * Shrinking while threads are rented is safe (shared_ptr lifecycle)
 class VecSimSVSThreadPoolImpl {
-public:
     // Create a pool with `num_threads` total parallelism (including the calling thread).
     // Spawns `num_threads - 1` worker OS threads. num_threads must be >= 1.
     // In write-in-place mode, the pool is created with num_threads == 1 (0 worker threads,
     // only the calling thread participates).
+    // Private — use instance() to access the shared singleton.
     explicit VecSimSVSThreadPoolImpl(size_t num_threads = 1) {
         assert(num_threads && "VecSimSVSThreadPoolImpl should not be created with 0 threads");
         slots_.reserve(num_threads - 1);
@@ -416,6 +416,16 @@ class VecSimSVSThreadPoolImpl {
         }
     }
 
+public:
+    // Singleton accessor for the shared SVS thread pool.
+    // Always valid — initialized with size 1 (write-in-place mode: 0 worker threads,
+    // only the calling thread participates). Resized on VecSim_UpdateThreadPoolSize() calls.
+    static std::shared_ptr<VecSimSVSThreadPoolImpl> instance() {
+        static auto shared_pool =
+            std::shared_ptr<VecSimSVSThreadPoolImpl>(new VecSimSVSThreadPoolImpl(1));
+        return shared_pool;
+    }
+
     // Total parallelism: worker slots + 1 (the calling thread always participates).
     size_t size() const {
         std::lock_guard lock{pool_mutex_};
@@ -468,10 +478,9 @@ class VecSimSVSThreadPoolImpl {
 
         // Rent n-1 worker threads
         auto rented = rent(n - 1, log_ctx);
-        size_t num_workers = rented.count();
 
         // Assign work to rented workers (partitions 1..n-1)
-        for (size_t i = 0; i < num_workers; ++i) {
+        for (size_t i = 0; i < rented.count(); ++i) {
             rented[i].assign({&f, i + 1});
         }
 
@@ -485,7 +494,7 @@ class VecSimSVSThreadPoolImpl {
 
         // Wait for all rented workers and collect errors.
         // RentedThreads destructor will release the slots after this block.
-        manage_workers_after_run(main_thread_error, rented, num_workers);
+        manage_workers_after_run(main_thread_error, rented);
     }
 
 private:
@@ -526,8 +535,7 @@ class VecSimSVSThreadPoolImpl {
 
     // Wait for all rented workers to finish. If any worker (or the main thread) threw,
     // restart crashed workers and throw a combined exception.
-    void manage_workers_after_run(const std::string &main_thread_error, RentedThreads &rented,
-                                  size_t rented_count) {
+    void manage_workers_after_run(const std::string &main_thread_error, RentedThreads &rented) {
         auto message = std::string{};
         auto inserter = std::back_inserter(message);
         bool has_error = !main_thread_error.empty();
@@ -536,7 +544,7 @@ class VecSimSVSThreadPoolImpl {
             fmt::format_to(inserter, "Thread 0: {}\n", main_thread_error);
         }
 
-        for (size_t i = 0; i < rented_count; ++i) {
+        for (size_t i = 0; i < rented.count(); ++i) {
             auto &thread = rented[i];
             thread.wait();
             if (!thread.is_okay()) {
@@ -582,16 +590,16 @@ class VecSimSVSThreadPool {
     void *log_ctx_ = nullptr; // per-index log context
 
 public:
-    // Construct with reference to the shared pool singleton.
+    // Construct using the shared pool singleton.
     // parallelism_ starts at 1 (the calling thread always participates), matching the
     // pool's minimum size. Safe for immediate use in write-in-place mode without an
     // explicit setParallelism() call.
-    explicit VecSimSVSThreadPool(std::shared_ptr<VecSimSVSThreadPoolImpl> pool,
-                                 void *log_ctx = nullptr)
-        : pool_(std::move(pool)), parallelism_(std::make_shared<std::atomic<size_t>>(1)),
-          log_ctx_(log_ctx) {
-        assert(pool_ && "Pool must not be null");
-    }
+    explicit VecSimSVSThreadPool(void *log_ctx = nullptr)
+        : pool_(VecSimSVSThreadPoolImpl::instance()),
+          parallelism_(std::make_shared<std::atomic<size_t>>(1)), log_ctx_(log_ctx) {}
+
+    // Resize the shared pool singleton. Delegates to VecSimSVSThreadPoolImpl::instance().
+    static void resize(size_t new_size) { VecSimSVSThreadPoolImpl::instance()->resize(new_size); }
 
     // Set the degree of parallelism for this index's next operation.
     // n must be the number of threads actually reserved by the caller (i.e., the
diff --git a/src/VecSim/vec_sim.cpp b/src/VecSim/vec_sim.cpp
index c9237d6fe..0d3abcaa7 100644
--- a/src/VecSim/vec_sim.cpp
+++ b/src/VecSim/vec_sim.cpp
@@ -15,7 +15,7 @@
 #include "VecSim/vec_sim_index.h"
 #include "VecSim/vec_sim_adhoc_bf_ctx.h"
 #include "VecSim/types/bfloat16.h"
-#include "VecSim/algorithms/svs/svs.h"
+#include "VecSim/algorithms/svs/svs_utils.h"
 #include <cassert>
 #include "memory.h"
 
@@ -44,7 +44,7 @@ extern "C" void VecSim_UpdateThreadPoolSize(size_t new_size) {
     // Resize the shared SVS thread pool. resize() clamps to minimum size 1.
     // new_size == 0 → pool size 1 (only calling thread, write-in-place).
     // new_size >  0 → pool size new_size (new_size - 1 worker threads).
-    SVSIndexBase::getSharedThreadPool()->resize(std::max(new_size, size_t{1}));
+    VecSimSVSThreadPool::resize(std::max(new_size, size_t{1}));
 }
 
 static VecSimResolveCode _ResolveParams_EFRuntime(VecSimAlgo index_type, VecSimRawParam rparam,
diff --git a/tests/unit/test_svs.cpp b/tests/unit/test_svs.cpp
index aaa912c2a..1d47dc927 100644
--- a/tests/unit/test_svs.cpp
+++ b/tests/unit/test_svs.cpp
@@ -3305,9 +3305,8 @@ TEST(SVSTest, compute_distance) {
 // ---------------------------------------------------------------------------
 TEST(SVSTest, NumThreadsParamIgnored) {
     // Resize the shared singleton pool to a known size.
-    auto pool = SVSIndexBase::getSharedThreadPool();
-    pool->resize(2);
-    ASSERT_EQ(pool->size(), 2);
+    VecSimSVSThreadPool::resize(2);
+    ASSERT_EQ(VecSimSVSThreadPoolImpl::instance()->size(), 2);
 
     // Capture warning logs emitted by VecSim.
     std::string captured_log;
@@ -3330,7 +3329,7 @@ TEST(SVSTest, NumThreadsParamIgnored) {
     ASSERT_NE(index, nullptr);
 
     // The shared pool size must remain at 2 — num_threads was ignored.
-    ASSERT_EQ(pool->size(), 2);
+    ASSERT_EQ(VecSimSVSThreadPoolImpl::instance()->size(), 2);
 
     // The index reports the shared pool size, not the deprecated param value.
     VecSimIndexDebugInfo info = VecSimIndex_DebugInfo(index);
@@ -3359,7 +3358,7 @@ TEST(SVSTest, NumThreadsParamIgnored) {
     VecSimIndex_Free(index2);
 
     // Restore pool to default size and clear log callback.
-    pool->resize(1);
+    VecSimSVSThreadPool::resize(1);
     VecSimIndexInterface::logCallback = nullptr;
 }
 
diff --git a/tests/unit/test_svs_threadpool.cpp b/tests/unit/test_svs_threadpool.cpp
index 3a85926cd..25d1c8229 100644
--- a/tests/unit/test_svs_threadpool.cpp
+++ b/tests/unit/test_svs_threadpool.cpp
@@ -45,7 +45,11 @@ class SVSThreadPoolTest : public ::testing::Test {
         saved_callback_ = VecSimIndexInterface::logCallback;
         VecSimIndexInterface::logCallback = nullptr;
     }
-    void TearDown() override { VecSimIndexInterface::logCallback = saved_callback_; }
+    void TearDown() override {
+        // Reset the shared singleton pool to size 1 so tests don't leak state.
+        VecSimSVSThreadPool::resize(1);
+        VecSimIndexInterface::logCallback = saved_callback_;
+    }
 
 private:
     logCallbackFunction saved_callback_ = nullptr;
@@ -55,7 +59,7 @@ class SVSThreadPoolTest : public ::testing::Test {
 // Test 1: VecSimSVSThreadPoolImpl::resize — grow and shrink
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, ResizeGrowAndShrink) {
-    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(1);
+    auto pool = VecSimSVSThreadPoolImpl::instance();
     ASSERT_EQ(pool->size(), 1);
 
     // Grow 1 → 4
@@ -91,11 +95,12 @@ TEST_F(SVSThreadPoolTest, ResizeGrowAndShrink) {
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, ShrinkWhileRented) {
     // Pool size 5: 4 worker slots [s0, s1, s2, s3].
-    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(5);
+    auto pool = VecSimSVSThreadPoolImpl::instance();
+    pool->resize(5);
     ASSERT_EQ(pool->size(), 5);
 
     // Wrapper A uses parallelism 3 → rents 2 workers (s0, s1).
-    VecSimSVSThreadPool wrapperA(pool);
+    VecSimSVSThreadPool wrapperA;
     wrapperA.setParallelism(3);
 
     std::latch hold(1);          // blocks rented workers
@@ -133,7 +138,7 @@ TEST_F(SVSThreadPoolTest, ShrinkWhileRented) {
 
     // While wrapperA's threads are still alive (blocked on latch), run
     // parallel_for on the shrunk pool with a second wrapper using a free slot.
-    VecSimSVSThreadPool wrapperB(pool);
+    VecSimSVSThreadPool wrapperB;
     // Parallelism 2 = 1 rented worker + calling thread. The pool has 3 slots
     // [s0, s1, s2] after shrink; s0 and s1 are occupied by wrapperA, so the
     // single rented worker will get s2 (the only free slot).
@@ -158,11 +163,12 @@ TEST_F(SVSThreadPoolTest, ShrinkWhileRented) {
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, GrowWhileRented) {
     // Pool size 3: 2 worker slots [s0, s1].
-    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(3);
+    auto pool = VecSimSVSThreadPoolImpl::instance();
+    pool->resize(3);
     ASSERT_EQ(pool->size(), 3);
 
     // Wrapper A uses parallelism 3 → rents 2 workers (s0, s1).
-    VecSimSVSThreadPool wrapperA(pool);
+    VecSimSVSThreadPool wrapperA;
     wrapperA.setParallelism(3);
 
     std::latch hold(1);          // blocks rented workers
@@ -198,7 +204,7 @@ TEST_F(SVSThreadPoolTest, GrowWhileRented) {
     // Wrapper B uses parallelism 3 → rents 2 workers. s0, s1 are occupied by
     // wrapperA, so it gets the 2 newly created slots s2, s3... but we only
     // need 2 of the 3 free slots (s2, s3 are free, only need 2).
-    VecSimSVSThreadPool wrapperB(pool);
+    VecSimSVSThreadPool wrapperB;
     wrapperB.setParallelism(3);
     std::atomic_int resultB{0};
     wrapperB.parallel_for([&](size_t) { resultB++; }, 3);
@@ -227,9 +233,9 @@ TEST_F(SVSThreadPoolTest, GrowWhileRented) {
 // the original is visible to SVS's copy when it calls size().
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, ParallelismPropagationAcrossCopies) {
-    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(8);
+    VecSimSVSThreadPool::resize(8);
 
-    VecSimSVSThreadPool original(pool);
+    VecSimSVSThreadPool original;
     original.setParallelism(2);
     ASSERT_EQ(original.size(), 2);
 
@@ -259,10 +265,10 @@ TEST_F(SVSThreadPoolTest, ParallelismPropagationAcrossCopies) {
 // sequentially using disjoint thread budgets.
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, TwoIndexesIndependentParallelism) {
-    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(8);
+    VecSimSVSThreadPool::resize(8);
 
-    VecSimSVSThreadPool wrapperA(pool);
-    VecSimSVSThreadPool wrapperB(pool);
+    VecSimSVSThreadPool wrapperA;
+    VecSimSVSThreadPool wrapperB;
 
     wrapperA.setParallelism(2);
     wrapperB.setParallelism(5);
@@ -306,7 +312,7 @@ TEST_F(SVSThreadPoolTest, TwoIndexesIndependentParallelism) {
 // The C API sets write mode and resizes the shared singleton pool.
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, UpdateThreadPoolSizeModeTransitions) {
-    auto pool = SVSIndexBase::getSharedThreadPool();
+    auto pool = VecSimSVSThreadPoolImpl::instance();
 
     // 0 → 4: switch to async mode, pool resizes to 4.
     VecSim_UpdateThreadPoolSize(4);
@@ -345,11 +351,11 @@ TEST_F(SVSThreadPoolTest, UpdateThreadPoolSizeModeTransitions) {
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, ConcurrentRentalFromTwoIndexes) {
     // Pool size 8: wrappers A (4) and B (4) sum to exactly 8.
-    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(8);
+    VecSimSVSThreadPool::resize(8);
 
-    VecSimSVSThreadPool wrapperA(pool);
+    VecSimSVSThreadPool wrapperA;
     wrapperA.setParallelism(4);
-    VecSimSVSThreadPool wrapperB(pool);
+    VecSimSVSThreadPool wrapperB;
     wrapperB.setParallelism(4);
 
     std::atomic_int resultA{0};
@@ -398,7 +404,7 @@ TEST_F(SVSThreadPoolTest, ConcurrentRentalFromTwoIndexes) {
         << "Timed out waiting for concurrent parallel_for calls. "
            "resultA="
         << resultA << ", resultB=" << resultB << ", idsA.size=" << idsA.size()
-        << ", idsB.size=" << idsB.size() << ", pool_size=" << pool->size();
+        << ", idsB.size=" << idsB.size() << ", pool_size=" << wrapperA.poolSize();
 
     ASSERT_EQ(resultA, 4);
     ASSERT_EQ(resultB, 4);
@@ -424,9 +430,9 @@ TEST_F(SVSThreadPoolTest, ConcurrentRentalFromTwoIndexes) {
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, AllThreadsOccupied) {
     // Pool size 4 (3 worker slots). Wrapper A rents all 3.
-    auto pool = std::make_shared<VecSimSVSThreadPoolImpl>(4);
+    VecSimSVSThreadPool::resize(4);
 
-    VecSimSVSThreadPool wrapperA(pool);
+    VecSimSVSThreadPool wrapperA;
     wrapperA.setParallelism(4);
 
     std::latch hold(1);
@@ -449,10 +455,10 @@ TEST_F(SVSThreadPoolTest, AllThreadsOccupied) {
     ASSERT_TRUE(wait_with_timeout(workers_ready, kTestTimeout))
         << "Timed out waiting for wrapper A's 3 workers to start. "
            "resultA="
-        << resultA << ", pool_size=" << pool->size();
+        << resultA << ", pool_size=" << wrapperA.poolSize();
 
     // All 3 worker slots are occupied. Wrapper B tries to rent 1 worker.
-    VecSimSVSThreadPool wrapperB(pool);
+    VecSimSVSThreadPool wrapperB;
     wrapperB.setParallelism(2);
 
 #ifdef NDEBUG
diff --git a/tests/unit/test_svs_tiered.cpp b/tests/unit/test_svs_tiered.cpp
index 54a00f429..247c62537 100644
--- a/tests/unit/test_svs_tiered.cpp
+++ b/tests/unit/test_svs_tiered.cpp
@@ -3801,8 +3801,8 @@ TYPED_TEST(SVSTieredIndexTestBasic, testDeletedJournalMulti) {
 TEST(SVSTieredIndexTest, testThreadPool) {
     // Test VecSimSVSThreadPool with shared pool
     const size_t num_threads = 4;
-    auto shared_pool = std::make_shared<VecSimSVSThreadPoolImpl>(num_threads);
-    auto pool = VecSimSVSThreadPool(shared_pool);
+    VecSimSVSThreadPool::resize(num_threads);
+    VecSimSVSThreadPool pool;
     ASSERT_EQ(pool.poolSize(), num_threads);
     ASSERT_EQ(pool.size(), 1); // parallelism starts at 1 (calling thread)
     ASSERT_EQ(pool.getParallelism(), 1);
@@ -3846,8 +3846,8 @@ TEST(SVSTieredIndexTest, testThreadPool) {
 #endif
 
     // Test write-in-place mode (pool with size 1)
-    auto inplace_shared = std::make_shared<VecSimSVSThreadPoolImpl>(1);
-    auto inplace_pool = VecSimSVSThreadPool(inplace_shared);
+    VecSimSVSThreadPool::resize(1);
+    VecSimSVSThreadPool inplace_pool;
     inplace_pool.setParallelism(1);
     ASSERT_EQ(inplace_pool.size(), 1);
     ASSERT_EQ(inplace_pool.poolSize(), 1);
@@ -3856,7 +3856,8 @@ TEST(SVSTieredIndexTest, testThreadPool) {
     ASSERT_EQ(counter, 1);
 
     // parallel_for works immediately with default parallelism 1
-    auto default_pool = VecSimSVSThreadPool(shared_pool);
+    VecSimSVSThreadPool::resize(num_threads);
+    VecSimSVSThreadPool default_pool;
     counter = 0;
     default_pool.parallel_for(task, 1);
     ASSERT_EQ(counter, 1); // 0+1 = 1
@@ -3868,6 +3869,9 @@ TEST(SVSTieredIndexTest, testThreadPool) {
     pool.setParallelism(num_threads);
     ASSERT_THROW(pool.parallel_for(err_task, 1), svs::threads::ThreadingException);
     ASSERT_THROW(pool.parallel_for(err_task, num_threads), svs::threads::ThreadingException);
+
+    // Restore pool to default size so we don't leak state to other tests.
+    VecSimSVSThreadPool::resize(1);
 }
 
 #else // HAVE_SVS

From 3d1fb81adb1f2b300ff66462d5f4fadef3d81631 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 13 Apr 2026 12:58:03 +0000
Subject: [PATCH 16/19] use no - op deleter

---
 src/VecSim/algorithms/svs/svs_utils.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index 741d46ef3..b900a44a8 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -421,8 +421,8 @@ class VecSimSVSThreadPoolImpl {
     // Always valid — initialized with size 1 (write-in-place mode: 0 worker threads,
     // only the calling thread participates). Resized on VecSim_UpdateThreadPoolSize() calls.
     static std::shared_ptr<VecSimSVSThreadPoolImpl> instance() {
-        static auto shared_pool =
-            std::shared_ptr<VecSimSVSThreadPoolImpl>(new VecSimSVSThreadPoolImpl(1));
+        static auto shared_pool = std::shared_ptr<VecSimSVSThreadPoolImpl>(
+            new VecSimSVSThreadPoolImpl(1), [](VecSimSVSThreadPoolImpl *) { /* leak at exit */ });
         return shared_pool;
     }
 

From cf76aa2f1cdfab6d3cf55291cd430bcc1e48ae14 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 13 Apr 2026 13:43:52 +0000
Subject: [PATCH 17/19] poolSize static

---
 src/VecSim/algorithms/svs/svs_utils.h |  2 +-
 tests/unit/test_svs.cpp               |  4 +-
 tests/unit/test_svs_threadpool.cpp    | 69 +++++++++++++--------------
 3 files changed, 35 insertions(+), 40 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index b900a44a8..c9a6f6d0b 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -623,7 +623,7 @@ class VecSimSVSThreadPool {
     size_t size() const { return parallelism_->load(); }
 
     // Shared pool size — used by scheduling to decide how many reserve jobs to submit.
-    size_t poolSize() const { return pool_->size(); }
+    static size_t poolSize() { return VecSimSVSThreadPoolImpl::instance()->size(); }
 
     // Delegates to the shared pool's parallel_for, passing the per-index log context.
     // n may be less than parallelism_ when the problem size is smaller than the
diff --git a/tests/unit/test_svs.cpp b/tests/unit/test_svs.cpp
index 1d47dc927..ab47e4a45 100644
--- a/tests/unit/test_svs.cpp
+++ b/tests/unit/test_svs.cpp
@@ -3306,7 +3306,7 @@ TEST(SVSTest, compute_distance) {
 TEST(SVSTest, NumThreadsParamIgnored) {
     // Resize the shared singleton pool to a known size.
     VecSimSVSThreadPool::resize(2);
-    ASSERT_EQ(VecSimSVSThreadPoolImpl::instance()->size(), 2);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 2);
 
     // Capture warning logs emitted by VecSim.
     std::string captured_log;
@@ -3329,7 +3329,7 @@ TEST(SVSTest, NumThreadsParamIgnored) {
     ASSERT_NE(index, nullptr);
 
     // The shared pool size must remain at 2 — num_threads was ignored.
-    ASSERT_EQ(VecSimSVSThreadPoolImpl::instance()->size(), 2);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 2);
 
     // The index reports the shared pool size, not the deprecated param value.
     VecSimIndexDebugInfo info = VecSimIndex_DebugInfo(index);
diff --git a/tests/unit/test_svs_threadpool.cpp b/tests/unit/test_svs_threadpool.cpp
index 25d1c8229..dfa225476 100644
--- a/tests/unit/test_svs_threadpool.cpp
+++ b/tests/unit/test_svs_threadpool.cpp
@@ -56,37 +56,36 @@ class SVSThreadPoolTest : public ::testing::Test {
 };
 
 // ---------------------------------------------------------------------------
-// Test 1: VecSimSVSThreadPoolImpl::resize — grow and shrink
+// Test 1: VecSimSVSThreadPool::resize — grow and shrink
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, ResizeGrowAndShrink) {
-    auto pool = VecSimSVSThreadPoolImpl::instance();
-    ASSERT_EQ(pool->size(), 1);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 1);
 
     // Grow 1 → 4
-    pool->resize(4);
-    ASSERT_EQ(pool->size(), 4);
+    VecSimSVSThreadPool::resize(4);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 4);
 
     // Shrink 4 → 2
-    pool->resize(2);
-    ASSERT_EQ(pool->size(), 2);
+    VecSimSVSThreadPool::resize(2);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 2);
 
     // No-op 2 → 2
-    pool->resize(2);
-    ASSERT_EQ(pool->size(), 2);
+    VecSimSVSThreadPool::resize(2);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 2);
 
     // Shrink to minimum 2 → 1
-    pool->resize(1);
-    ASSERT_EQ(pool->size(), 1);
+    VecSimSVSThreadPool::resize(1);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 1);
 
     // resize(0) clamps to 1
-    pool->resize(0);
-    ASSERT_EQ(pool->size(), 1);
+    VecSimSVSThreadPool::resize(0);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 1);
 
     // Grow from 1 → 8 and verify parallel_for works at new size
-    pool->resize(8);
-    ASSERT_EQ(pool->size(), 8);
+    VecSimSVSThreadPool::resize(8);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 8);
     std::atomic_int counter{0};
-    pool->parallel_for([&counter](size_t) { counter++; }, 8);
+    VecSimSVSThreadPoolImpl::instance()->parallel_for([&counter](size_t) { counter++; }, 8);
     ASSERT_EQ(counter, 8);
 }
 
@@ -95,9 +94,8 @@ TEST_F(SVSThreadPoolTest, ResizeGrowAndShrink) {
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, ShrinkWhileRented) {
     // Pool size 5: 4 worker slots [s0, s1, s2, s3].
-    auto pool = VecSimSVSThreadPoolImpl::instance();
-    pool->resize(5);
-    ASSERT_EQ(pool->size(), 5);
+    VecSimSVSThreadPool::resize(5);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 5);
 
     // Wrapper A uses parallelism 3 → rents 2 workers (s0, s1).
     VecSimSVSThreadPool wrapperA;
@@ -128,13 +126,13 @@ TEST_F(SVSThreadPoolTest, ShrinkWhileRented) {
     ASSERT_TRUE(wait_with_timeout(workers_ready, kTestTimeout))
         << "Timed out waiting for wrapper A's 2 workers to start. "
            "resultA="
-        << resultA << ", pool_size=" << pool->size();
+        << resultA << ", pool_size=" << VecSimSVSThreadPool::poolSize();
 
     // Shrink pool from 5 → 4 (slots become [s0, s1, s2]). s3 is free and
     // gets destroyed. s0, s1 are occupied by wrapperA but remain in the vector.
     // s2 is free and available for rental.
-    pool->resize(4);
-    ASSERT_EQ(pool->size(), 4);
+    VecSimSVSThreadPool::resize(4);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 4);
 
     // While wrapperA's threads are still alive (blocked on latch), run
     // parallel_for on the shrunk pool with a second wrapper using a free slot.
@@ -155,7 +153,7 @@ TEST_F(SVSThreadPoolTest, ShrinkWhileRented) {
     // After the renter's RentedThreads guard is destroyed, the old slots'
     // shared_ptrs drop to refcount 0 and the threads are destroyed.
     // Pool size remains at the shrunk value.
-    ASSERT_EQ(pool->size(), 4);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 4);
 }
 
 // ---------------------------------------------------------------------------
@@ -163,9 +161,8 @@ TEST_F(SVSThreadPoolTest, ShrinkWhileRented) {
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, GrowWhileRented) {
     // Pool size 3: 2 worker slots [s0, s1].
-    auto pool = VecSimSVSThreadPoolImpl::instance();
-    pool->resize(3);
-    ASSERT_EQ(pool->size(), 3);
+    VecSimSVSThreadPool::resize(3);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 3);
 
     // Wrapper A uses parallelism 3 → rents 2 workers (s0, s1).
     VecSimSVSThreadPool wrapperA;
@@ -193,13 +190,13 @@ TEST_F(SVSThreadPoolTest, GrowWhileRented) {
     ASSERT_TRUE(wait_with_timeout(workers_ready, kTestTimeout))
         << "Timed out waiting for wrapper A's 2 workers to start. "
            "resultA="
-        << resultA << ", pool_size=" << pool->size();
+        << resultA << ", pool_size=" << VecSimSVSThreadPool::poolSize();
 
     // Grow pool from 3 → 5 while s0, s1 are occupied. New slots [s2, s3] are
     // appended. The in-flight parallel_for is unaffected — it holds independent
     // shared_ptr refs to s0, s1.
-    pool->resize(5);
-    ASSERT_EQ(pool->size(), 5);
+    VecSimSVSThreadPool::resize(5);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 5);
 
     // Wrapper B uses parallelism 3 → rents 2 workers. s0, s1 are occupied by
     // wrapperA, so it gets the 2 newly created slots s2, s3... but we only
@@ -216,7 +213,7 @@ TEST_F(SVSThreadPoolTest, GrowWhileRented) {
     ASSERT_EQ(resultA, 3);
 
     // Pool size remains at the grown value after renter releases.
-    ASSERT_EQ(pool->size(), 5);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 5);
 
     // After all threads are free, verify the full pool is usable at new size.
     wrapperA.setParallelism(5);
@@ -312,32 +309,30 @@ TEST_F(SVSThreadPoolTest, TwoIndexesIndependentParallelism) {
 // The C API sets write mode and resizes the shared singleton pool.
 // ---------------------------------------------------------------------------
 TEST_F(SVSThreadPoolTest, UpdateThreadPoolSizeModeTransitions) {
-    auto pool = VecSimSVSThreadPoolImpl::instance();
-
     // 0 → 4: switch to async mode, pool resizes to 4.
     VecSim_UpdateThreadPoolSize(4);
     ASSERT_EQ(VecSimIndex::asyncWriteMode, VecSim_WriteAsync);
-    ASSERT_EQ(pool->size(), 4);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 4);
 
     // 4 → 8: stay in async mode (N→M, both > 0), pool resizes to 8.
     VecSim_UpdateThreadPoolSize(8);
     ASSERT_EQ(VecSimIndex::asyncWriteMode, VecSim_WriteAsync);
-    ASSERT_EQ(pool->size(), 8);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 8);
 
     // 8 → 0: switch to in-place mode, pool resizes to 1.
     VecSim_UpdateThreadPoolSize(0);
     ASSERT_EQ(VecSimIndex::asyncWriteMode, VecSim_WriteInPlace);
-    ASSERT_EQ(pool->size(), 1);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 1);
 
     // 0 → 0: idempotent, stays in-place, no crash.
     VecSim_UpdateThreadPoolSize(0);
     ASSERT_EQ(VecSimIndex::asyncWriteMode, VecSim_WriteInPlace);
-    ASSERT_EQ(pool->size(), 1);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 1);
 
     // 0 → 2: back to async mode.
     VecSim_UpdateThreadPoolSize(2);
     ASSERT_EQ(VecSimIndex::asyncWriteMode, VecSim_WriteAsync);
-    ASSERT_EQ(pool->size(), 2);
+    ASSERT_EQ(VecSimSVSThreadPool::poolSize(), 2);
 
     // Restore to in-place mode so we don't leak state to other tests.
     VecSim_UpdateThreadPoolSize(0);

From ca4b396166f3fc0e46efa728ba3dab8c0fdda832 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 13 Apr 2026 16:18:49 +0000
Subject: [PATCH 18/19] move VecSimSVSThreadPoolImpl as a sublass of
 VecSimSVSThreadPoolImpl

---
 src/VecSim/algorithms/svs/svs_utils.h | 64 ++++++++++++---------------
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index c9a6f6d0b..db42da86c 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -360,49 +360,43 @@ struct ThreadSlot {
     ThreadSlot &operator=(ThreadSlot &&) = delete;
 };
 
-// RAII guard for threads rented from the shared pool. On destruction, marks all
-// rented slots as unoccupied (lock-free atomic stores). Holds shared_ptr references
-// to keep slots alive even if the pool shrinks while the rental is active.
-class RentedThreads {
-public:
-    RentedThreads() = default;
-
-    // Move-only
-    RentedThreads(RentedThreads &&other) noexcept : slots_(std::move(other.slots_)) {}
-    RentedThreads &operator=(RentedThreads &&other) noexcept {
-        release();
-        slots_ = std::move(other.slots_);
-        return *this;
-    }
-    RentedThreads(const RentedThreads &) = delete;
-    RentedThreads &operator=(const RentedThreads &) = delete;
+// Shared thread pool for SVS indexes with rental model.
+// Based on svs::threads::NativeThreadPoolBase with changes:
+// * Pool is physically resizable (creates/destroys OS threads)
+// * Threads are rented for the duration of a parallel_for call
+// * Multiple callers can rent disjoint subsets of threads concurrently
+// * Shrinking while threads are rented is safe (shared_ptr lifecycle)
+class VecSimSVSThreadPoolImpl {
+    // RAII guard for threads rented from the shared pool. On destruction, marks all
+    // rented slots as unoccupied (lock-free atomic stores). Holds shared_ptr references
+    // to keep slots alive even if the pool shrinks while the rental is active.
+    class RentedThreads {
+    public:
+        RentedThreads() = default;
 
-    ~RentedThreads() { release(); }
+        // Move-only
+        RentedThreads(RentedThreads &&other) noexcept : slots_(std::move(other.slots_)) {}
+        RentedThreads(const RentedThreads &) = delete;
 
-    void add(std::shared_ptr<ThreadSlot> slot) { slots_.push_back(std::move(slot)); }
+        ~RentedThreads() { release(); }
 
-    size_t count() const { return slots_.size(); }
+        void add(std::shared_ptr<ThreadSlot> slot) { slots_.push_back(std::move(slot)); }
 
-    svs::threads::Thread &operator[](size_t i) { return slots_[i]->thread; }
+        size_t count() const { return slots_.size(); }
 
-private:
-    void release() {
-        for (auto &slot : slots_) {
-            slot->occupied.store(false, std::memory_order_release);
+        svs::threads::Thread &operator[](size_t i) { return slots_[i]->thread; }
+
+    private:
+        void release() {
+            for (auto &slot : slots_) {
+                slot->occupied.store(false, std::memory_order_release);
+            }
+            slots_.clear();
         }
-        slots_.clear();
-    }
 
-    std::vector<std::shared_ptr<ThreadSlot>> slots_;
-};
+        std::vector<std::shared_ptr<ThreadSlot>> slots_;
+    };
 
-// Shared thread pool for SVS indexes with rental model.
-// Based on svs::threads::NativeThreadPoolBase with changes:
-// * Pool is physically resizable (creates/destroys OS threads)
-// * Threads are rented for the duration of a parallel_for call
-// * Multiple callers can rent disjoint subsets of threads concurrently
-// * Shrinking while threads are rented is safe (shared_ptr lifecycle)
-class VecSimSVSThreadPoolImpl {
     // Create a pool with `num_threads` total parallelism (including the calling thread).
     // Spawns `num_threads - 1` worker OS threads. num_threads must be >= 1.
     // In write-in-place mode, the pool is created with num_threads == 1 (0 worker threads,

From 8210172a95658886af7c59bed070dde0a666c82a Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Mon, 13 Apr 2026 16:45:21 +0000
Subject: [PATCH 19/19] review comment

---
 src/VecSim/algorithms/svs/svs_utils.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/VecSim/algorithms/svs/svs_utils.h b/src/VecSim/algorithms/svs/svs_utils.h
index db42da86c..d51e814b5 100644
--- a/src/VecSim/algorithms/svs/svs_utils.h
+++ b/src/VecSim/algorithms/svs/svs_utils.h
@@ -384,7 +384,10 @@ class VecSimSVSThreadPoolImpl {
 
         size_t count() const { return slots_.size(); }
 
-        svs::threads::Thread &operator[](size_t i) { return slots_[i]->thread; }
+        svs::threads::Thread &operator[](size_t i) {
+            assert(i < slots_.size());
+            return slots_[i]->thread;
+        }
 
     private:
         void release() {
@@ -504,13 +507,14 @@ class VecSimSVSThreadPoolImpl {
         }
 
         std::lock_guard lock{pool_mutex_};
+        size_t rented_count = 0;
         for (auto &slot : slots_) {
-            if (rented.count() >= count) {
-                break;
-            }
             bool expected = false;
             if (slot->occupied.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) {
                 rented.add(slot);
+                if (++rented_count >= count) {
+                    break;
+                }
             }
         }