@@ -53,7 +53,7 @@ def find_choices(
5353 synonyms .append ( SortedValue (value = synonym , index = index ) )
5454
5555 # Find synonyms in utterance and map back to their choices_list
56- # WRITE FindValues()!!
56+ return Find . _find_values ( utterance , synonyms , options )
5757
5858 @staticmethod
5959 def _find_values (
@@ -86,10 +86,63 @@ def _find_values(
8686 searched_tokens = tokenizer (entry .value .strip (), opt .locale )
8787
8888 while start_pos < len (tokens ):
89- # match =
90- # write match_value
91- pass
92-
89+ match : Union [ModelResult , None ] = Find ._match_value (
90+ tokens ,
91+ max_distance ,
92+ opt ,
93+ entry .index ,
94+ entry .value ,
95+ searched_tokens ,
96+ start_pos
97+ )
98+
99+ if match != None :
100+ start_pos = match .end + 1
101+ matches .append (match )
102+ else :
103+ break
104+
105+ # Sort matches by score descending
106+ sorted_matches = sorted (
107+ matches ,
108+ key = lambda model_result : model_result .resolution .score ,
109+ reverse = True
110+ )
111+
112+ # Filter out duplicate matching indexes and overlapping characters
113+ # - The start & end positions are token positions and need to be translated to
114+ # character positions before returning. We also need to populate the "text"
115+ # field as well.
116+ results : List [ModelResult ] = []
117+ found_indexes = set ()
118+ used_tokens = set ()
119+
120+ for match in sorted_matches :
121+ # Apply filters.
122+ add = match .resolution .index not in found_indexes
123+
124+ for i in range (match .start , match .end + 1 ):
125+ if i in used_tokens :
126+ add = False
127+ break
128+
129+ # Add to results
130+ if add :
131+ # Update filter info
132+ found_indexes .add (match .resolution .index )
133+
134+ for i in range (match .start , match .end + 1 ):
135+ used_tokens .add (i )
136+
137+ # Translate start & end and populate text field
138+ match .start = tokens [match .start ].start
139+ match .end = tokens [match .end ].end
140+ match .text = utterance [match .start : match .end + 1 ]
141+ results .append (match )
142+
143+ # Return the results sorted by position in the utterance
144+ return sorted (results , key = lambda model_result : model_result .start )
145+
93146 @staticmethod
94147 def _match_value (
95148 source_tokens : List [Token ],
@@ -99,7 +152,7 @@ def _match_value(
99152 value : str ,
100153 searched_tokens : List [Token ],
101154 start_pos : int
102- ) -> ModelResult :
155+ ) -> Union [ ModelResult , None ] :
103156 # Match value to utterance and calculate total deviation.
104157 # - The tokens are matched in order so "second last" will match in
105158 # "the second from last one" but not in "the last from the second one".
@@ -154,7 +207,7 @@ def _match_value(
154207
155208 # Format result
156209 result = ModelResult (
157- text = 'FILLER - FIND ACTUAL TEXT TO PLACE ' ,
210+ text = '' ,
158211 start = start ,
159212 end = end ,
160213 type_name = "value" ,
0 commit comments