@@ -161,34 +161,22 @@ def __tregex_compile(self, pattern):
161161 def __compile_cpython_sre (self ):
162162 if not self .__compiled_sre_pattern :
163163 import _cpython_sre
164- self .__compiled_sre_pattern = _cpython_sre .compile (self ._emit ( self . pattern ) , self .flags , self .code , self .num_groups , self .groupindex , self .indexgroup )
164+ self .__compiled_sre_pattern = _cpython_sre .compile (self .pattern , self .flags , self .code , self .num_groups , self .groupindex , self .indexgroup )
165165 return self .__compiled_sre_pattern
166166
167167
168- def _decode_string (self , string , flags = 0 ):
168+ def _decode_pattern (self , string , flags = 0 ):
169169 if isinstance (string , str ):
170+ # TODO: fix this in the regex engine
171+ pattern = string .replace (r'\"' , '"' ).replace (r"\'" , "'" )
172+
173+ # TODO: that's not nearly complete but should be sufficient for now
174+ from sre_compile import SRE_FLAG_VERBOSE
175+ if flags & SRE_FLAG_VERBOSE :
176+ pattern = tregex_preprocess_for_verbose (pattern )
177+ return tregex_preprocess_default (pattern )
178+ else :
170179 return string
171- elif isinstance (string , bytes ):
172- return string .decode ()
173- elif isinstance (string , bytearray ):
174- return string .decode ()
175- elif isinstance (string , memoryview ):
176- # return bytes(string).decode()
177- raise TypeError ("'memoryview' is currently unsupported as search pattern" )
178- raise TypeError ("invalid search pattern {!r}" .format (string ))
179-
180-
181- def _decode_pattern (self , string , flags = 0 ):
182- pattern = self ._decode_string (string , flags )
183-
184- # TODO: fix this in the regex engine
185- pattern = pattern .replace (r'\"' , '"' ).replace (r"\'" , "'" )
186-
187- # TODO: that's not nearly complete but should be sufficient for now
188- from sre_compile import SRE_FLAG_VERBOSE
189- if flags & SRE_FLAG_VERBOSE :
190- pattern = tregex_preprocess_for_verbose (pattern )
191- return tregex_preprocess_default (pattern )
192180
193181
194182 def __repr__ (self ):
@@ -210,7 +198,6 @@ def __repr__(self):
210198
211199 def _search (self , pattern , string , pos , endpos ):
212200 pattern = self .__tregex_compile (pattern )
213- string = self ._decode_string (string )
214201 if endpos == - 1 or endpos >= len (string ):
215202 result = tregex_call_safe (pattern .exec , string , pos )
216203 else :
@@ -227,29 +214,33 @@ def search(self, string, pos=0, endpos=None):
227214 return self .__compile_cpython_sre ().search (string , pos , default (endpos , maxsize ()))
228215
229216 def match (self , string , pos = 0 , endpos = None ):
230- try :
231- if not self .pattern .startswith ("^" ):
232- return self ._search ("^" + self .pattern , string , pos , default (endpos , - 1 ))
233- else :
234- return self ._search (self .pattern , string , pos , default (endpos , - 1 ))
235- except RuntimeError :
236- return self .__compile_cpython_sre ().match (string , pos , default (endpos , maxsize ()))
217+ pattern = self .pattern
218+ if isinstance (pattern , str ):
219+ try :
220+ if not pattern .startswith ("^" ):
221+ return self ._search ("^" + pattern , string , pos , default (endpos , - 1 ))
222+ else :
223+ return self ._search (pattern , string , pos , default (endpos , - 1 ))
224+ except RuntimeError :
225+ pass
226+ return self .__compile_cpython_sre ().match (string , pos , default (endpos , maxsize ()))
237227
238228 def fullmatch (self , string , pos = 0 , endpos = None ):
239- try :
240- pattern = self .pattern
241- if not pattern .startswith ("^" ):
242- pattern = "^" + pattern
243- if not pattern .endswith ("$" ):
244- pattern = pattern + "$"
245- return self ._search (pattern , string , pos , default (endpos , - 1 ))
246- except RuntimeError :
247- return self .__compile_cpython_sre ().fullmatch (string , pos , default (endpos , maxsize ()))
229+ pattern = self .pattern
230+ if isinstance (pattern , str ):
231+ try :
232+ if not pattern .startswith ("^" ):
233+ pattern = "^" + pattern
234+ if not pattern .endswith ("$" ):
235+ pattern = pattern + "$"
236+ return self ._search (pattern , string , pos , default (endpos , - 1 ))
237+ except RuntimeError :
238+ pass
239+ return self .__compile_cpython_sre ().fullmatch (string , pos , default (endpos , maxsize ()))
248240
249241 def findall (self , string , pos = 0 , endpos = - 1 ):
250242 try :
251243 pattern = self .__tregex_compile (self .pattern )
252- string = self ._decode_string (string )
253244 if endpos > len (string ):
254245 endpos = len (string )
255246 elif endpos < 0 :
@@ -281,9 +272,9 @@ def group(match_result, group_nr, string):
281272 return string [group_start :group_end ]
282273
283274 n = len (repl )
284- result = self . _emit ( "" )
275+ result = ""
285276 start = 0
286- backslash = self . _emit ( '\\ ' )
277+ backslash = '\\ '
287278 pos = repl .find (backslash , start )
288279 while pos != - 1 and start < n :
289280 if pos + 1 < n :
@@ -292,15 +283,15 @@ def group(match_result, group_nr, string):
292283 group_str = group (match_result , group_nr , string )
293284 if group_str is None :
294285 raise ValueError ("invalid group reference %s at position %s" % (group_nr , pos ))
295- result += repl [start :pos ] + self . _emit ( group_str )
286+ result += repl [start :pos ] + group_str
296287 start = pos + 2
297288 elif repl [pos + 1 ] == 'g' :
298289 group_ref , group_ref_end , digits_only = self .__extract_groupname (repl , pos + 2 )
299290 if group_ref :
300291 group_str = group (match_result , int (group_ref ) if digits_only else pattern .groups [group_ref ], string )
301292 if group_str is None :
302293 raise ValueError ("invalid group reference %s at position %s" % (group_ref , pos ))
303- result += repl [start :pos ] + self . _emit ( group_str )
294+ result += repl [start :pos ] + group_str
304295 start = group_ref_end + 1
305296 elif repl [pos + 1 ] == backslash :
306297 result += repl [start :pos ] + backslash
@@ -331,40 +322,33 @@ def sub(self, repl, string, count=0):
331322 n = 0
332323 try :
333324 pattern = self .__tregex_compile (self .pattern )
334- decoded_string = self ._decode_string (string )
335325 result = []
336326 pos = 0
337327 is_string_rep = isinstance (repl , str ) or isinstance (repl , bytes ) or isinstance (repl , bytearray )
338328 if is_string_rep :
339329 repl = _process_escape_sequences (repl )
340330 progress = True
341- while (count == 0 or n < count ) and pos <= len (decoded_string ) and progress :
342- match_result = tregex_call_safe (pattern .exec , decoded_string , pos )
331+ while (count == 0 or n < count ) and pos <= len (string ) and progress :
332+ match_result = tregex_call_safe (pattern .exec , string , pos )
343333 if not match_result .isMatch :
344334 break
345335 n += 1
346336 start = match_result .start [0 ]
347337 end = match_result .end [0 ]
348- result .append (self . _emit ( decoded_string [pos :start ]) )
338+ result .append (string [pos :start ])
349339 if is_string_rep :
350- result .append (self .__replace_groups (repl , decoded_string , match_result , pattern ))
340+ result .append (self .__replace_groups (repl , string , match_result , pattern ))
351341 else :
352342 _srematch = SRE_Match (self , pos , - 1 , match_result )
353343 _repl = repl (_srematch )
354344 result .append (_repl )
355345 pos = end
356346 progress = (start != end )
357- result .append (self . _emit ( decoded_string [pos :]) )
358- return self . _emit ( "" ) .join (result )
347+ result .append (string [pos :])
348+ return "" .join (result )
359349 except BaseException :
360350 return self .__compile_cpython_sre ().sub (repl , string , count )
361351
362- def _emit (self , str_like_obj ):
363- assert isinstance (str_like_obj , str ) or isinstance (str_like_obj , bytes )
364- if self .__was_bytes != isinstance (str_like_obj , bytes ):
365- return str_like_obj .encode ()
366- return str_like_obj
367-
368352
369353compile = SRE_Pattern
370354
0 commit comments