@@ -320,22 +320,31 @@ PHP_MINFO_FUNCTION(tokenizer)
320
320
}
321
321
/* }}} */
322
322
323
- static inline zend_string * make_str (unsigned char * text , size_t leng ) {
323
+ static zend_string * make_str (unsigned char * text , size_t leng , HashTable * interned_strings ) {
324
324
if (leng == 1 ) {
325
325
return ZSTR_CHAR (text [0 ]);
326
+ } else if (interned_strings ) {
327
+ zend_string * interned_str = zend_hash_str_find_ptr (interned_strings , (char * ) text , leng );
328
+ if (interned_str ) {
329
+ return zend_string_copy (interned_str );
330
+ }
331
+ interned_str = zend_string_init ((char * ) text , leng , 0 );
332
+ zend_hash_add_new_ptr (interned_strings , interned_str , interned_str );
333
+ return interned_str ;
326
334
} else {
327
335
return zend_string_init ((char * ) text , leng , 0 );
328
336
}
329
337
}
330
338
331
- static void add_token (zval * return_value , int token_type ,
332
- unsigned char * text , size_t leng , int lineno , zend_class_entry * token_class ) {
339
+ static void add_token (
340
+ zval * return_value , int token_type , unsigned char * text , size_t leng , int lineno ,
341
+ zend_class_entry * token_class , HashTable * interned_strings ) {
333
342
zval token ;
334
343
if (token_class ) {
335
344
zend_object * obj = zend_objects_new (token_class );
336
345
ZVAL_OBJ (& token , obj );
337
346
ZVAL_LONG (OBJ_PROP_NUM (obj , 0 ), token_type );
338
- ZVAL_STR (OBJ_PROP_NUM (obj , 1 ), make_str (text , leng ));
347
+ ZVAL_STR (OBJ_PROP_NUM (obj , 1 ), make_str (text , leng , interned_strings ));
339
348
ZVAL_LONG (OBJ_PROP_NUM (obj , 2 ), lineno );
340
349
ZVAL_LONG (OBJ_PROP_NUM (obj , 3 ), text - LANG_SCNG (yy_start ));
341
350
@@ -352,10 +361,10 @@ static void add_token(zval *return_value, int token_type,
352
361
} else if (token_type >= 256 ) {
353
362
array_init (& token );
354
363
add_next_index_long (& token , token_type );
355
- add_next_index_str (& token , make_str (text , leng ));
364
+ add_next_index_str (& token , make_str (text , leng , interned_strings ));
356
365
add_next_index_long (& token , lineno );
357
366
} else {
358
- ZVAL_STR (& token , make_str (text , leng ));
367
+ ZVAL_STR (& token , make_str (text , leng , interned_strings ));
359
368
}
360
369
zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & token );
361
370
}
@@ -368,6 +377,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
368
377
int token_type ;
369
378
int token_line = 1 ;
370
379
int need_tokens = -1 ; /* for __halt_compiler lexing. -1 = disabled */
380
+ HashTable interned_strings ;
371
381
372
382
ZVAL_STR_COPY (& source_zval , source );
373
383
zend_save_lexical_state (& original_lex_state );
@@ -378,10 +388,13 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
378
388
}
379
389
380
390
LANG_SCNG (yy_state ) = yycINITIAL ;
391
+ zend_hash_init (& interned_strings , 0 , NULL , NULL , 0 );
381
392
array_init (return_value );
382
393
383
394
while ((token_type = lex_scan (& token , NULL ))) {
384
- add_token (return_value , token_type , zendtext , zendleng , token_line , token_class );
395
+ add_token (
396
+ return_value , token_type , zendtext , zendleng , token_line ,
397
+ token_class , & interned_strings );
385
398
386
399
if (Z_TYPE (token ) != IS_UNDEF ) {
387
400
zval_ptr_dtor_nogc (& token );
@@ -396,8 +409,9 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
396
409
) {
397
410
/* fetch the rest into a T_INLINE_HTML */
398
411
if (zendcursor != zendlimit ) {
399
- add_token (return_value , T_INLINE_HTML ,
400
- zendcursor , zendlimit - zendcursor , token_line , token_class );
412
+ add_token (
413
+ return_value , T_INLINE_HTML , zendcursor , zendlimit - zendcursor ,
414
+ token_line , token_class , & interned_strings );
401
415
}
402
416
break ;
403
417
}
@@ -415,6 +429,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
415
429
416
430
zval_ptr_dtor_str (& source_zval );
417
431
zend_restore_lexical_state (& original_lex_state );
432
+ zend_hash_destroy (& interned_strings );
418
433
419
434
return 1 ;
420
435
}
@@ -440,7 +455,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
440
455
token = T_OPEN_TAG_WITH_ECHO ;
441
456
}
442
457
add_token (ctx -> tokens , token ,
443
- LANG_SCNG (yy_text ), LANG_SCNG (yy_leng ), line , ctx -> token_class );
458
+ LANG_SCNG (yy_text ), LANG_SCNG (yy_leng ), line , ctx -> token_class , NULL );
444
459
break ;
445
460
case ON_FEEDBACK :
446
461
tokens_ht = Z_ARRVAL_P (ctx -> tokens );
@@ -455,7 +470,8 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
455
470
case ON_STOP :
456
471
if (LANG_SCNG (yy_cursor ) != LANG_SCNG (yy_limit )) {
457
472
add_token (ctx -> tokens , T_INLINE_HTML , LANG_SCNG (yy_cursor ),
458
- LANG_SCNG (yy_limit ) - LANG_SCNG (yy_cursor ), CG (zend_lineno ), ctx -> token_class );
473
+ LANG_SCNG (yy_limit ) - LANG_SCNG (yy_cursor ), CG (zend_lineno ),
474
+ ctx -> token_class , NULL );
459
475
}
460
476
break ;
461
477
}
0 commit comments