From c6aa63be53142534690f267db0beab8924eb3dfa Mon Sep 17 00:00:00 2001 From: Alex de la Cruz Date: Mon, 3 Oct 2022 15:49:53 +0200 Subject: [PATCH 1/9] added radix tree to data structures --- data_structures/trie/radix_tree.py | 208 +++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 data_structures/trie/radix_tree.py diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py new file mode 100644 index 000000000000..19381f8ddcc0 --- /dev/null +++ b/data_structures/trie/radix_tree.py @@ -0,0 +1,208 @@ +""" +A Radix Tree is a data structure that represents a space-optimized trie (prefix tree) in which +each node that is the only child is merged with its parent +""" + + +class RadixNode: + def __init__(self, prefix="", is_leaf=False) -> None: + # Mapping from the first character of the prefix of the node + self.nodes: dict[str, RadixNode] = dict() + + # A node will be a leaf if the tree contains its word + self.is_leaf = is_leaf + + self.prefix = prefix + + def match(self, word): + """Compute the common substring of the prefix of the node and a word + + Args: + word (str): word to compare + + Returns: + (str, str, str): common substring, remaining prefix, remaining word + """ + x = 0 + for q, w in zip(self.prefix, word): + if q != w: + break + + x += 1 + + return self.prefix[:x], self.prefix[x:], word[x:] + + def insert_many(self, words: list[str]) -> None: + """Insert many words in the tree + + Args: + words (list[str]): list of words + """ + for word in words: + self.insert(word) + + def insert(self, word: str) -> None: + """Insert a word into the tree + + Args: + word (str): word to insert + """ + # Case 1: If the word is the prefix of the node + # Solution: We set the current node as leaf + if self.prefix == word: + self.is_leaf = True + + # Case 2: The node has no edges that have a prefix to the word + # Solution: We create an edge from the current node to a new one containing the word + elif word[0] not in self.nodes: + self.nodes[word[0]] = RadixNode(prefix=word, is_leaf=True) + + else: + incoming_node = self.nodes[word[0]] + matching_string, remaining_prefix, remaining_word = incoming_node.match( + word + ) + + # Case 3: The node prefix is equal to the matching + # Solution: We insert remaining word on the next node + if remaining_prefix == "": + self.nodes[matching_string[0]].insert(remaining_word) + + # Case 4: The word is greater equal to the matching + # Solution: Create a node in between both nodes, change prefixes and add the new node for the remaining word + else: + incoming_node.prefix = remaining_prefix + + aux_node = self.nodes[matching_string[0]] + self.nodes[matching_string[0]] = RadixNode(matching_string, False) + self.nodes[matching_string[0]].nodes[remaining_prefix[0]] = aux_node + + if remaining_word == "": + self.nodes[matching_string[0]].is_leaf = True + else: + self.nodes[matching_string[0]].insert(remaining_word) + + def find(self, word: str) -> bool: + """Returns if the word is on the tree + + Args: + word (str): word to check + + Returns: + bool: True if the word appears on the tree + """ + incoming_node = self.nodes.get(word[0], None) + if not incoming_node: + return False + else: + matching_string, remaining_prefix, remaining_word = incoming_node.match( + word + ) + # If there is remaining prefix, the word can't be on the tree + if remaining_prefix != "": + return False + # This applies when the word and the prefix are equal + elif remaining_word == "": + return incoming_node.is_leaf + # We have word remaining so we check the next node + else: + return incoming_node.find(remaining_word) + + def delete(self, word: str) -> bool: + """Deletes a word from the tree if it exists + + Args: + word (str): _description_ + + Returns: + bool: _description_ + """ + incoming_node = self.nodes.get(word[0], None) + if not incoming_node: + return False + else: + matching_string, remaining_prefix, remaining_word = incoming_node.match( + word + ) + # If there is remaining prefix, the word can't be on the tree + if remaining_prefix != "": + return False + # We have word remaining so we check the next node + elif remaining_word != "": + return incoming_node.delete(remaining_word) + else: + # If it is not a leaf, we don't have to delete + if not incoming_node.is_leaf: + return False + else: + # We delete the nodes if no edges go from it + if len(incoming_node.nodes) == 0: + del self.nodes[word[0]] + # We merge the current node with its only child + if len(self.nodes) == 1 and not self.is_leaf: + merging_node = list(self.nodes.values())[0] + self.is_leaf = merging_node.is_leaf + self.prefix += merging_node.prefix + self.nodes = merging_node.nodes + # If there is more than 1 edge, we just mark it as non-leaf + elif len(incoming_node.nodes) > 1: + incoming_node.is_leaf = False + # If there is 1 edge, we merge it with its child + else: + merging_node = list(incoming_node.nodes.values())[0] + incoming_node.is_leaf = merging_node.is_leaf + incoming_node.prefix += merging_node.prefix + incoming_node.nodes = merging_node.nodes + + return True + + def print_tree(self, height=0) -> None: + """Print the tree + + Args: + height (int, optional): Height of the node that is being printed. Defaults to 0. + """ + if self.prefix != "": + print("-" * height, self.prefix, " (leaf)" if self.is_leaf else "") + + for key, value in self.nodes.items(): + value.print_tree(height + 1) + + +def test_trie() -> bool: + words = "banana bananas bandana band apple all beast".split() + root = RadixNode() + root.insert_many(words) + + assert all(root.find(word) for word in words) + assert not root.find("bandanas") + assert not root.find("apps") + root.delete("all") + assert not root.find("all") + root.delete("banana") + assert not root.find("banana") + assert root.find("bananas") + + return True + + +def pytests() -> None: + assert test_trie() + + +def main() -> None: + """ + >>> pytests() + """ + root = RadixNode() + words = "banana bananas bandanas bandana band apple all beast".split() + root.insert_many(words) + + print("Words:", words) + print("Tree:") + root.print_tree() + + + +if __name__ == "__main__": + main() From b691fa3eaa852de751384b6ad265a93e6fb57544 Mon Sep 17 00:00:00 2001 From: Alex de la Cruz Date: Mon, 3 Oct 2022 16:14:04 +0200 Subject: [PATCH 2/9] added doctests --- data_structures/trie/radix_tree.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py index 19381f8ddcc0..642773736951 100644 --- a/data_structures/trie/radix_tree.py +++ b/data_structures/trie/radix_tree.py @@ -1,6 +1,6 @@ """ -A Radix Tree is a data structure that represents a space-optimized trie (prefix tree) in which -each node that is the only child is merged with its parent +A Radix Tree is a data structure that represents a space-optimized trie (prefix tree) in which +each node that is the only child is merged with its parent [https://en.wikipedia.org/wiki/Radix_tree] """ @@ -22,6 +22,9 @@ def match(self, word): Returns: (str, str, str): common substring, remaining prefix, remaining word + + >>> RadixNode("myprefix").match("mystring") + ('my', 'prefix', 'string') """ x = 0 for q, w in zip(self.prefix, word): @@ -37,6 +40,8 @@ def insert_many(self, words: list[str]) -> None: Args: words (list[str]): list of words + + >>> RadixNode("myprefix").insert_many(["mystring", "hello"]) """ for word in words: self.insert(word) @@ -46,6 +51,8 @@ def insert(self, word: str) -> None: Args: word (str): word to insert + + >>> RadixNode("myprefix").insert("mystring") """ # Case 1: If the word is the prefix of the node # Solution: We set the current node as leaf @@ -90,6 +97,9 @@ def find(self, word: str) -> bool: Returns: bool: True if the word appears on the tree + + >>> RadixNode("myprefix").find("mystring") + False """ incoming_node = self.nodes.get(word[0], None) if not incoming_node: @@ -116,6 +126,9 @@ def delete(self, word: str) -> bool: Returns: bool: _description_ + + >>> RadixNode("myprefix").delete("mystring") + False """ incoming_node = self.nodes.get(word[0], None) if not incoming_node: @@ -203,6 +216,5 @@ def main() -> None: root.print_tree() - if __name__ == "__main__": main() From ed4bf48d76524e03b5d384421588c78bde52d59e Mon Sep 17 00:00:00 2001 From: Alex de la Cruz Date: Mon, 3 Oct 2022 16:21:39 +0200 Subject: [PATCH 3/9] solved flake8 --- data_structures/trie/radix_tree.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py index 642773736951..353f35351b93 100644 --- a/data_structures/trie/radix_tree.py +++ b/data_structures/trie/radix_tree.py @@ -1,6 +1,7 @@ """ -A Radix Tree is a data structure that represents a space-optimized trie (prefix tree) in which -each node that is the only child is merged with its parent [https://en.wikipedia.org/wiki/Radix_tree] +A Radix Tree is a data structure that represents a space-optimized +trie (prefix tree) in whicheach node that is the only child is merged +with its parent [https://en.wikipedia.org/wiki/Radix_tree] """ @@ -60,7 +61,8 @@ def insert(self, word: str) -> None: self.is_leaf = True # Case 2: The node has no edges that have a prefix to the word - # Solution: We create an edge from the current node to a new one containing the word + # Solution: We create an edge from the current node to a new one + # containing the word elif word[0] not in self.nodes: self.nodes[word[0]] = RadixNode(prefix=word, is_leaf=True) @@ -76,7 +78,8 @@ def insert(self, word: str) -> None: self.nodes[matching_string[0]].insert(remaining_word) # Case 4: The word is greater equal to the matching - # Solution: Create a node in between both nodes, change prefixes and add the new node for the remaining word + # Solution: Create a node in between both nodes, change + # prefixes and add the new node for the remaining word else: incoming_node.prefix = remaining_prefix @@ -173,7 +176,7 @@ def print_tree(self, height=0) -> None: """Print the tree Args: - height (int, optional): Height of the node that is being printed. Defaults to 0. + height (int, optional): Height of the printed node """ if self.prefix != "": print("-" * height, self.prefix, " (leaf)" if self.is_leaf else "") From 520dca7896ff427aac56a0bb3098a55a51cd033a Mon Sep 17 00:00:00 2001 From: Alex de la Cruz Date: Mon, 3 Oct 2022 16:26:57 +0200 Subject: [PATCH 4/9] added type hints --- data_structures/trie/radix_tree.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py index 353f35351b93..613a7b5cc7c7 100644 --- a/data_structures/trie/radix_tree.py +++ b/data_structures/trie/radix_tree.py @@ -6,7 +6,7 @@ class RadixNode: - def __init__(self, prefix="", is_leaf=False) -> None: + def __init__(self, prefix: str = "", is_leaf: bool = False) -> None: # Mapping from the first character of the prefix of the node self.nodes: dict[str, RadixNode] = dict() @@ -15,7 +15,7 @@ def __init__(self, prefix="", is_leaf=False) -> None: self.prefix = prefix - def match(self, word): + def match(self, word: str) -> tuple[str, str, str]: """Compute the common substring of the prefix of the node and a word Args: @@ -172,7 +172,7 @@ def delete(self, word: str) -> bool: return True - def print_tree(self, height=0) -> None: + def print_tree(self, height: int = 0) -> None: """Print the tree Args: From a4d22a741130adf29ca41e2f45b1c2c917eae1ee Mon Sep 17 00:00:00 2001 From: Alex de la Cruz Date: Tue, 11 Oct 2022 11:40:50 +0200 Subject: [PATCH 5/9] added description for delete function --- data_structures/trie/radix_tree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py index 613a7b5cc7c7..9d1f185b6e8b 100644 --- a/data_structures/trie/radix_tree.py +++ b/data_structures/trie/radix_tree.py @@ -125,10 +125,10 @@ def delete(self, word: str) -> bool: """Deletes a word from the tree if it exists Args: - word (str): _description_ + word (str): word to be deleted Returns: - bool: _description_ + bool: True if the word was found and deleted. False if word is not found >>> RadixNode("myprefix").delete("mystring") False From d387a78528f462ea387d72dff1e1ac3fd72b8258 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 30 Oct 2022 11:18:05 +0100 Subject: [PATCH 6/9] Update data_structures/trie/radix_tree.py --- data_structures/trie/radix_tree.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py index 9d1f185b6e8b..c71201566d4e 100644 --- a/data_structures/trie/radix_tree.py +++ b/data_structures/trie/radix_tree.py @@ -27,13 +27,9 @@ def match(self, word: str) -> tuple[str, str, str]: >>> RadixNode("myprefix").match("mystring") ('my', 'prefix', 'string') """ - x = 0 - for q, w in zip(self.prefix, word): + for x, (q, w) in enumerate(zip(self.prefix, word)): if q != w: break - - x += 1 - return self.prefix[:x], self.prefix[x:], word[x:] def insert_many(self, words: list[str]) -> None: From 695be9182333358c08de10be9ff53259c5600180 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 30 Oct 2022 11:32:31 +0100 Subject: [PATCH 7/9] Update radix_tree.py --- data_structures/trie/radix_tree.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py index c71201566d4e..42c47b20bfd9 100644 --- a/data_structures/trie/radix_tree.py +++ b/data_structures/trie/radix_tree.py @@ -8,7 +8,7 @@ class RadixNode: def __init__(self, prefix: str = "", is_leaf: bool = False) -> None: # Mapping from the first character of the prefix of the node - self.nodes: dict[str, RadixNode] = dict() + self.nodes: dict[str, RadixNode] = {} # A node will be a leaf if the tree contains its word self.is_leaf = is_leaf @@ -27,7 +27,7 @@ def match(self, word: str) -> tuple[str, str, str]: >>> RadixNode("myprefix").match("mystring") ('my', 'prefix', 'string') """ - for x, (q, w) in enumerate(zip(self.prefix, word)): + for x, (q, w) in enumerate(zip(self.prefix, word)): # noqa: B007 if q != w: break return self.prefix[:x], self.prefix[x:], word[x:] @@ -177,7 +177,7 @@ def print_tree(self, height: int = 0) -> None: if self.prefix != "": print("-" * height, self.prefix, " (leaf)" if self.is_leaf else "") - for key, value in self.nodes.items(): + for value in self.nodes.values(): value.print_tree(height + 1) From e5534e430b737067cfd1f114151270d39bae91de Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 30 Oct 2022 11:42:47 +0100 Subject: [PATCH 8/9] Update radix_tree.py --- data_structures/trie/radix_tree.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py index 42c47b20bfd9..9d1f185b6e8b 100644 --- a/data_structures/trie/radix_tree.py +++ b/data_structures/trie/radix_tree.py @@ -8,7 +8,7 @@ class RadixNode: def __init__(self, prefix: str = "", is_leaf: bool = False) -> None: # Mapping from the first character of the prefix of the node - self.nodes: dict[str, RadixNode] = {} + self.nodes: dict[str, RadixNode] = dict() # A node will be a leaf if the tree contains its word self.is_leaf = is_leaf @@ -27,9 +27,13 @@ def match(self, word: str) -> tuple[str, str, str]: >>> RadixNode("myprefix").match("mystring") ('my', 'prefix', 'string') """ - for x, (q, w) in enumerate(zip(self.prefix, word)): # noqa: B007 + x = 0 + for q, w in zip(self.prefix, word): if q != w: break + + x += 1 + return self.prefix[:x], self.prefix[x:], word[x:] def insert_many(self, words: list[str]) -> None: @@ -177,7 +181,7 @@ def print_tree(self, height: int = 0) -> None: if self.prefix != "": print("-" * height, self.prefix, " (leaf)" if self.is_leaf else "") - for value in self.nodes.values(): + for key, value in self.nodes.items(): value.print_tree(height + 1) From b1a30c7840d0dfd8a11937dd08d0b14f2a42441f Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 30 Oct 2022 11:48:44 +0100 Subject: [PATCH 9/9] Update radix_tree.py --- data_structures/trie/radix_tree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_structures/trie/radix_tree.py b/data_structures/trie/radix_tree.py index 9d1f185b6e8b..66890346ec2b 100644 --- a/data_structures/trie/radix_tree.py +++ b/data_structures/trie/radix_tree.py @@ -8,7 +8,7 @@ class RadixNode: def __init__(self, prefix: str = "", is_leaf: bool = False) -> None: # Mapping from the first character of the prefix of the node - self.nodes: dict[str, RadixNode] = dict() + self.nodes: dict[str, RadixNode] = {} # A node will be a leaf if the tree contains its word self.is_leaf = is_leaf @@ -181,7 +181,7 @@ def print_tree(self, height: int = 0) -> None: if self.prefix != "": print("-" * height, self.prefix, " (leaf)" if self.is_leaf else "") - for key, value in self.nodes.items(): + for value in self.nodes.values(): value.print_tree(height + 1)