Skip to content

Commit 10590f6

Browse files
committed
Locally intern strings in token_get_all()
1 parent 24e5967 commit 10590f6

File tree

1 file changed

+27
-11
lines changed

1 file changed

+27
-11
lines changed

ext/tokenizer/tokenizer.c

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -320,22 +320,31 @@ PHP_MINFO_FUNCTION(tokenizer)
320320
}
321321
/* }}} */
322322

323-
static inline zend_string *make_str(unsigned char *text, size_t leng) {
323+
static zend_string *make_str(unsigned char *text, size_t leng, HashTable *interned_strings) {
324324
if (leng == 1) {
325325
return ZSTR_CHAR(text[0]);
326+
} else if (interned_strings) {
327+
zend_string *interned_str = zend_hash_str_find_ptr(interned_strings, (char *) text, leng);
328+
if (interned_str) {
329+
return zend_string_copy(interned_str);
330+
}
331+
interned_str = zend_string_init((char *) text, leng, 0);
332+
zend_hash_add_new_ptr(interned_strings, interned_str, interned_str);
333+
return interned_str;
326334
} else {
327335
return zend_string_init((char *) text, leng, 0);
328336
}
329337
}
330338

331-
static void add_token(zval *return_value, int token_type,
332-
unsigned char *text, size_t leng, int lineno, zend_class_entry *token_class) {
339+
static void add_token(
340+
zval *return_value, int token_type, unsigned char *text, size_t leng, int lineno,
341+
zend_class_entry *token_class, HashTable *interned_strings) {
333342
zval token;
334343
if (token_class) {
335344
zend_object *obj = zend_objects_new(token_class);
336345
ZVAL_OBJ(&token, obj);
337346
ZVAL_LONG(OBJ_PROP_NUM(obj, 0), token_type);
338-
ZVAL_STR(OBJ_PROP_NUM(obj, 1), make_str(text, leng));
347+
ZVAL_STR(OBJ_PROP_NUM(obj, 1), make_str(text, leng, interned_strings));
339348
ZVAL_LONG(OBJ_PROP_NUM(obj, 2), lineno);
340349
ZVAL_LONG(OBJ_PROP_NUM(obj, 3), text - LANG_SCNG(yy_start));
341350

@@ -352,10 +361,10 @@ static void add_token(zval *return_value, int token_type,
352361
} else if (token_type >= 256) {
353362
array_init(&token);
354363
add_next_index_long(&token, token_type);
355-
add_next_index_str(&token, make_str(text, leng));
364+
add_next_index_str(&token, make_str(text, leng, interned_strings));
356365
add_next_index_long(&token, lineno);
357366
} else {
358-
ZVAL_STR(&token, make_str(text, leng));
367+
ZVAL_STR(&token, make_str(text, leng, interned_strings));
359368
}
360369
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &token);
361370
}
@@ -368,6 +377,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
368377
int token_type;
369378
int token_line = 1;
370379
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
380+
HashTable interned_strings;
371381

372382
ZVAL_STR_COPY(&source_zval, source);
373383
zend_save_lexical_state(&original_lex_state);
@@ -378,10 +388,13 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
378388
}
379389

380390
LANG_SCNG(yy_state) = yycINITIAL;
391+
zend_hash_init(&interned_strings, 0, NULL, NULL, 0);
381392
array_init(return_value);
382393

383394
while ((token_type = lex_scan(&token, NULL))) {
384-
add_token(return_value, token_type, zendtext, zendleng, token_line, token_class);
395+
add_token(
396+
return_value, token_type, zendtext, zendleng, token_line,
397+
token_class, &interned_strings);
385398

386399
if (Z_TYPE(token) != IS_UNDEF) {
387400
zval_ptr_dtor_nogc(&token);
@@ -396,8 +409,9 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
396409
) {
397410
/* fetch the rest into a T_INLINE_HTML */
398411
if (zendcursor != zendlimit) {
399-
add_token(return_value, T_INLINE_HTML,
400-
zendcursor, zendlimit - zendcursor, token_line, token_class);
412+
add_token(
413+
return_value, T_INLINE_HTML, zendcursor, zendlimit - zendcursor,
414+
token_line, token_class, &interned_strings);
401415
}
402416
break;
403417
}
@@ -415,6 +429,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
415429

416430
zval_ptr_dtor_str(&source_zval);
417431
zend_restore_lexical_state(&original_lex_state);
432+
zend_hash_destroy(&interned_strings);
418433

419434
return 1;
420435
}
@@ -440,7 +455,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
440455
token = T_OPEN_TAG_WITH_ECHO;
441456
}
442457
add_token(ctx->tokens, token,
443-
LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->token_class);
458+
LANG_SCNG(yy_text), LANG_SCNG(yy_leng), line, ctx->token_class, NULL);
444459
break;
445460
case ON_FEEDBACK:
446461
tokens_ht = Z_ARRVAL_P(ctx->tokens);
@@ -455,7 +470,8 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
455470
case ON_STOP:
456471
if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) {
457472
add_token(ctx->tokens, T_INLINE_HTML, LANG_SCNG(yy_cursor),
458-
LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor), CG(zend_lineno), ctx->token_class);
473+
LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor), CG(zend_lineno),
474+
ctx->token_class, NULL);
459475
}
460476
break;
461477
}

0 commit comments

Comments
 (0)