lib/mempool: Fix spurious -ENOMEM due to agressive latency control

Andy Ross · carlescufi · commit 7845e1b01e90 · 2018-11-20T11:35:10.000+01:00
The mempool operations need to be atomic, but because of latency concerns (the allocator is intended for use in an ISR) the locking was designed to be as minimal as possible. And it... mostly got it right. All the list handling was correctly synchronized. The merging of four child blocks into a parent block was atomic. The splitting of a block into four children was atomic. BUT: there was a moment between the allocation of a large block and the re-addition of its three unused children where the lock was being released. This meant that another context (e.g. an ISR that just fired, interrupting the existing call to k_mem_pool_alloc()) would see some memory "missing" that wasn't actually allocated. And if this happens to have been the top level block, it's entirely possible that the whole heap looks empty, even though the other allocator might have been doing only the smallest allocation! Fix that by making the "remove a block then add back the three children we don't use" into an atomic step. We can still relax the lock between levels as we split the subblocks further. (Finally, note that this trick allows a somewhat cleaner API as we can do our "retry due to race" step internally by walking back up the block size list instead of forcing our caller to do it via that weird -EAGAIN return value.) Fixes #11022 Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
diff --git a/lib/mempool/mempool.c b/lib/mempool/mempool.c
@@ -144,13 +144,11 @@ static inline void pool_irq_unlock(struct sys_mem_pool_base *p, int key)
 static void *block_alloc(struct sys_mem_pool_base *p, int l, size_t lsz)
 {
 	sys_dnode_t *block;
-	int key = pool_irq_lock(p);
 
 	block = sys_dlist_get(&p->levels[l].free_list);
 	if (block != NULL) {
 		clear_free_bit(p, l, block_num(p, block, lsz));
 	}
-	pool_irq_unlock(p, key);
 
 	return block;
 }
@@ -198,9 +196,7 @@ static void block_free(struct sys_mem_pool_base *p, int level,
 static void *block_break(struct sys_mem_pool_base *p, void *block, int l,
 				size_t *lsizes)
 {
-	int i, bn, key;
-
-	key = pool_irq_lock(p);
+	int i, bn;
 
 	bn = block_num(p, block, lsizes[l]);
 
@@ -215,17 +211,15 @@ static void *block_break(struct sys_mem_pool_base *p, void *block, int l,
 		}
 	}
 
-	pool_irq_unlock(p, key);
-
 	return block;
 }
 
 int _sys_mem_pool_block_alloc(struct sys_mem_pool_base *p, size_t size,
 			      u32_t *level_p, u32_t *block_p, void **data_p)
 {
-	int i, from_l;
-	int alloc_l = -1, free_l = -1;
-	void *data;
+	int i, from_l, alloc_l = -1, free_l = -1;
+	unsigned int key;
+	void *data = NULL;
 	size_t lsizes[p->n_levels];
 
 	/* Walk down through levels, finding the one from which we
@@ -255,26 +249,36 @@ int _sys_mem_pool_block_alloc(struct sys_mem_pool_base *p, size_t size,
 		return -ENOMEM;
 	}
 
-	/* Iteratively break the smallest enclosing block... */
-	data = block_alloc(p, free_l, lsizes[free_l]);
-
-	if (data == NULL) {
-		/* This can happen if we race with another allocator.
-		 * It's OK, just back out and the timeout code will
-		 * retry.  Note mild overloading: -EAGAIN isn't for
-		 * propagation to the caller, it's to tell the loop in
-		 * k_mem_pool_alloc() to try again synchronously.  But
-		 * it means exactly what it says.
-		 *
-		 * This doesn't happen for user mode memory pools as this
-		 * entire function runs with a semaphore held.
-		 */
-		return -EAGAIN;
-	}
+	/* Now walk back down the levels (i.e. toward bigger sizes)
+	 * looking for an available block.  Start at the smallest
+	 * enclosing block found above (note that because that loop
+	 * was done without synchronization, it may no longer be
+	 * available!) as a useful optimization.  Note that the
+	 * removal of the block from the list and the re-addition of
+	 * its the three unused children needs to be performed
+	 * atomically, otherwise we open up a situation where we can
+	 * "steal" the top level block of the whole heap, causing a
+	 * spurious -ENOMEM.
+	 */
+	key = pool_irq_lock(p);
+	for (i = free_l; i >= 0; i--) {
+		data = block_alloc(p, i, lsizes[i]);
 
-	for (from_l = free_l; from_l < alloc_l; from_l++) {
-		data = block_break(p, data, from_l, lsizes);
+		/* Found one.  Iteratively break it down to the size
+		 * we need.  Note that we relax the lock to allow a
+		 * pending interrupt to fire so we don't hurt latency
+		 * by locking the full loop.
+		 */
+		if (data != NULL) {
+			for (from_l = i; from_l < alloc_l; from_l++) {
+				data = block_break(p, data, from_l, lsizes);
+				pool_irq_unlock(p, key);
+				key = pool_irq_lock(p);
+			}
+			break;
+		}
 	}
+	pool_irq_unlock(p, key);
 
 	*level_p = alloc_l;
 	*block_p = block_num(p, data, lsizes[alloc_l]);