@@ -394,8 +394,8 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
394394{
395395 struct lpm_trie * trie = container_of (map , struct lpm_trie , map );
396396 struct bpf_lpm_trie_key * key = _key ;
397- struct lpm_trie_node __rcu * * trim ;
398- struct lpm_trie_node * node ;
397+ struct lpm_trie_node __rcu * * trim , * * trim2 ;
398+ struct lpm_trie_node * node , * parent ;
399399 unsigned long irq_flags ;
400400 unsigned int next_bit ;
401401 size_t matchlen = 0 ;
@@ -407,31 +407,26 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
407407 raw_spin_lock_irqsave (& trie -> lock , irq_flags );
408408
409409 /* Walk the tree looking for an exact key/length match and keeping
410- * track of where we could begin trimming the tree. The trim-point
411- * is the sub-tree along the walk consisting of only single-child
412- * intermediate nodes and ending at a leaf node that we want to
413- * remove .
410+ * track of the path we traverse. We will need to know the node
411+ * we wish to delete, and the slot that points to the node we want
412+ * to delete. We may also need to know the nodes parent and the
413+ * slot that contains it .
414414 */
415415 trim = & trie -> root ;
416- node = rcu_dereference_protected (
417- trie -> root , lockdep_is_held (& trie -> lock ));
418- while (node ) {
416+ trim2 = trim ;
417+ parent = NULL ;
418+ while ((node = rcu_dereference_protected (
419+ * trim , lockdep_is_held (& trie -> lock )))) {
419420 matchlen = longest_prefix_match (trie , node , key );
420421
421422 if (node -> prefixlen != matchlen ||
422423 node -> prefixlen == key -> prefixlen )
423424 break ;
424425
426+ parent = node ;
427+ trim2 = trim ;
425428 next_bit = extract_bit (key -> data , node -> prefixlen );
426- /* If we hit a node that has more than one child or is a valid
427- * prefix itself, do not remove it. Reset the root of the trim
428- * path to its descendant on our path.
429- */
430- if (!(node -> flags & LPM_TREE_NODE_FLAG_IM ) ||
431- (node -> child [0 ] && node -> child [1 ]))
432- trim = & node -> child [next_bit ];
433- node = rcu_dereference_protected (
434- node -> child [next_bit ], lockdep_is_held (& trie -> lock ));
429+ trim = & node -> child [next_bit ];
435430 }
436431
437432 if (!node || node -> prefixlen != key -> prefixlen ||
@@ -442,27 +437,47 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
442437
443438 trie -> n_entries -- ;
444439
445- /* If the node we are removing is not a leaf node , simply mark it
440+ /* If the node we are removing has two children , simply mark it
446441 * as intermediate and we are done.
447442 */
448- if (rcu_access_pointer (node -> child [0 ]) ||
443+ if (rcu_access_pointer (node -> child [0 ]) &&
449444 rcu_access_pointer (node -> child [1 ])) {
450445 node -> flags |= LPM_TREE_NODE_FLAG_IM ;
451446 goto out ;
452447 }
453448
454- /* trim should now point to the slot holding the start of a path from
455- * zero or more intermediate nodes to our leaf node for deletion.
449+ /* If the parent of the node we are about to delete is an intermediate
450+ * node, and the deleted node doesn't have any children, we can delete
451+ * the intermediate parent as well and promote its other child
452+ * up the tree. Doing this maintains the invariant that all
453+ * intermediate nodes have exactly 2 children and that there are no
454+ * unnecessary intermediate nodes in the tree.
456455 */
457- while ((node = rcu_dereference_protected (
458- * trim , lockdep_is_held (& trie -> lock )))) {
459- RCU_INIT_POINTER (* trim , NULL );
460- trim = rcu_access_pointer (node -> child [0 ]) ?
461- & node -> child [0 ] :
462- & node -> child [1 ];
456+ if (parent && (parent -> flags & LPM_TREE_NODE_FLAG_IM ) &&
457+ !node -> child [0 ] && !node -> child [1 ]) {
458+ if (node == rcu_access_pointer (parent -> child [0 ]))
459+ rcu_assign_pointer (
460+ * trim2 , rcu_access_pointer (parent -> child [1 ]));
461+ else
462+ rcu_assign_pointer (
463+ * trim2 , rcu_access_pointer (parent -> child [0 ]));
464+ kfree_rcu (parent , rcu );
463465 kfree_rcu (node , rcu );
466+ goto out ;
464467 }
465468
469+ /* The node we are removing has either zero or one child. If there
470+ * is a child, move it into the removed node's slot then delete
471+ * the node. Otherwise just clear the slot and delete the node.
472+ */
473+ if (node -> child [0 ])
474+ rcu_assign_pointer (* trim , rcu_access_pointer (node -> child [0 ]));
475+ else if (node -> child [1 ])
476+ rcu_assign_pointer (* trim , rcu_access_pointer (node -> child [1 ]));
477+ else
478+ RCU_INIT_POINTER (* trim , NULL );
479+ kfree_rcu (node , rcu );
480+
466481out :
467482 raw_spin_unlock_irqrestore (& trie -> lock , irq_flags );
468483
0 commit comments