From a5fa16cae5fd6b681105be7d58fad79ffe2b5987 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 2 Feb 2023 21:30:54 +0100 Subject: [PATCH 1/2] Introduce convenience macros for copying flags that hold when concatenating two strings This abstracts away, and cleans up, the flag handling for properties of strings that hold when concatenating two strings if they both hold that property. (These macros also work with simply copies of strings because a copy of a string can be considered a concatenation with the empty string.) This gets rid of some branches and some repetitive code, and leaves room for adding more flags like these in the future. --- Zend/zend_operators.c | 6 +- Zend/zend_string.c | 8 +- Zend/zend_string.h | 21 +++++ Zend/zend_vm_def.h | 16 +--- Zend/zend_vm_execute.h | 138 ++++++----------------------- ext/opcache/jit/zend_jit_helpers.c | 18 +--- 6 files changed, 57 insertions(+), 150 deletions(-) diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 5a4c30168adef..1eaa11eccf793 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -1955,11 +1955,7 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2)); if (UNEXPECTED(op1_len > ZSTR_MAX_LEN - op2_len)) { zend_throw_error(NULL, "String size overflow"); diff --git a/Zend/zend_string.c b/Zend/zend_string.c index 2d6a30d37cbaa..d65101e778c14 100644 --- a/Zend/zend_string.c +++ b/Zend/zend_string.c @@ -191,11 +191,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_str return zend_interned_string_ht_lookup(str, &interned_strings_permanent); } -static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) { - uint32_t flags = 0; - if (ZSTR_IS_VALID_UTF8(str)) { - flags = IS_STR_VALID_UTF8; - } +static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) +{ + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(str); zend_ulong h = ZSTR_H(str); zend_string_delref(str); str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), persistent); diff --git a/Zend/zend_string.h b/Zend/zend_string.h index d7bca020c3e19..3eca7343a008b 100644 --- a/Zend/zend_string.h +++ b/Zend/zend_string.h @@ -82,6 +82,27 @@ END_EXTERN_C() #define ZSTR_IS_INTERNED(s) (GC_FLAGS(s) & IS_STR_INTERNED) #define ZSTR_IS_VALID_UTF8(s) (GC_FLAGS(s) & IS_STR_VALID_UTF8) +/* These are properties, encoded as flags, that will hold on the resulting string + * after concatenating two strings that have these property. + * Example: concatenating two UTF-8 strings yields another UTF-8 string. */ +#define ZSTR_COPYABLE_CONCAT_PROPERTIES (IS_STR_VALID_UTF8) + +#define ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(s) (GC_FLAGS(s) & ZSTR_COPYABLE_CONCAT_PROPERTIES) +/* This macro returns the copyable concat properties which hold on both strings. */ +#define ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(s1, s2) (GC_FLAGS(s1) & GC_FLAGS(s2) & ZSTR_COPYABLE_CONCAT_PROPERTIES) + +#define ZSTR_COPY_CONCAT_PROPERTIES(out, in) do { \ + zend_string *_out = (out); \ + uint32_t properties = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES((in)); \ + GC_ADD_FLAGS(_out, properties); \ +} while (0) + +#define ZSTR_COPY_CONCAT_PROPERTIES_BOTH(out, in1, in2) do { \ + zend_string *_out = (out); \ + uint32_t properties = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH((in1), (in2)); \ + GC_ADD_FLAGS(_out, properties); \ +} while (0) + #define ZSTR_EMPTY_ALLOC() zend_empty_string #define ZSTR_CHAR(c) zend_one_char_string[c] #define ZSTR_KNOWN(idx) zend_known_strings[idx] diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index ca062b9512ac8..91864e7a7273b 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -384,11 +384,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -3147,11 +3143,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) { @@ -3248,9 +3240,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (OP1_TYPE != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index d9dd9b5890241..ee249b51f308a 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -6624,11 +6624,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -6725,9 +6721,7 @@ static ZEND_VM_COLD ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_ memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -8702,11 +8696,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_TMPVAR_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -9134,11 +9124,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -9235,9 +9221,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_TMPVAR_ memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -11086,11 +11070,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CONST_CV_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11518,11 +11498,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CONST != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -11619,9 +11595,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CONST_CV_HAND memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CONST != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -15135,11 +15109,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CONST_HANDL zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15881,11 +15851,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -15982,9 +15948,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CONST_ memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -16590,11 +16554,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_TMPVAR_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -17336,11 +17296,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -17437,9 +17393,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_TMPVAR memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -18296,11 +18250,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_TMPVAR_CV_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18680,11 +18630,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -18781,9 +18727,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_TMPVAR_CV_HAN memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if ((IS_TMP_VAR|IS_VAR) != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -40356,11 +40300,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CONST_HANDLER(Z zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -42859,11 +42799,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CONST == IS_CONST || IS_CONST == IS_CV) { @@ -42960,9 +42896,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CONST_HAND memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -44186,11 +44120,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_TMPVAR_HANDLER( zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -46618,11 +46548,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if ((IS_TMP_VAR|IS_VAR) == IS_CONST || (IS_TMP_VAR|IS_VAR) == IS_CV) { @@ -46719,9 +46645,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_TMPVAR_HAN memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); @@ -49498,11 +49422,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CONCAT_SPEC_CV_CV_HANDLER(ZEND zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -52029,11 +51949,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER zend_string *op1_str = Z_STR_P(op1); zend_string *op2_str = Z_STR_P(op2); zend_string *str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str); if (IS_CV != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) { if (IS_CV == IS_CONST || IS_CV == IS_CV) { @@ -52130,9 +52046,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FAST_CONCAT_SPEC_CV_CV_HANDLER memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str)); memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1); - if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) { - GC_ADD_FLAGS(str, IS_STR_VALID_UTF8); - } + ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str); ZVAL_NEW_STR(EX_VAR(opline->result.var), str); if (IS_CV != IS_CONST) { zend_string_release_ex(op1_str, 0); diff --git a/ext/opcache/jit/zend_jit_helpers.c b/ext/opcache/jit/zend_jit_helpers.c index 261b456a4fc31..9c4b80e28513e 100644 --- a/ext/opcache/jit/zend_jit_helpers.c +++ b/ext/opcache/jit/zend_jit_helpers.c @@ -1633,11 +1633,7 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2)); if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1673,11 +1669,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_helper(zval *result, zval *op1, z size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2)); if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); @@ -1701,11 +1693,7 @@ static void ZEND_FASTCALL zend_jit_fast_concat_tmp_helper(zval *result, zval *op size_t op2_len = Z_STRLEN_P(op2); size_t result_len = op1_len + op2_len; zend_string *result_str; - uint32_t flags = 0; - - if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) { - flags = IS_STR_VALID_UTF8; - } + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2)); if (UNEXPECTED(op1_len > SIZE_MAX - op2_len)) { zend_throw_error(NULL, "String size overflow"); From d17f41c4fe5ef62f545a014d867e6486b10eacd5 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 3 Feb 2023 20:48:04 +0100 Subject: [PATCH 2/2] Copy UTF-8 flag for str_repeat --- ext/standard/string.c | 1 + ext/zend_test/tests/strings_marked_as_utf8.phpt | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/ext/standard/string.c b/ext/standard/string.c index 622ed3a9d4369..881e172c6c260 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -5059,6 +5059,7 @@ PHP_FUNCTION(str_repeat) /* Initialize the result string */ result = zend_string_safe_alloc(ZSTR_LEN(input_str), mult, 0, 0); result_len = ZSTR_LEN(input_str) * mult; + ZSTR_COPY_CONCAT_PROPERTIES(result, input_str); /* Heavy optimization for situations where input string is 1 byte long */ if (ZSTR_LEN(input_str) == 1) { diff --git a/ext/zend_test/tests/strings_marked_as_utf8.phpt b/ext/zend_test/tests/strings_marked_as_utf8.phpt index 5b6dfb6a0763d..1519459f1dad1 100644 --- a/ext/zend_test/tests/strings_marked_as_utf8.phpt +++ b/ext/zend_test/tests/strings_marked_as_utf8.phpt @@ -107,6 +107,14 @@ $s = $o . $o; var_dump($s); var_dump(zend_test_is_string_marked_as_valid_utf8($s)); +echo "str_repeat:\n"; +$string = "a"; +$string_concat = str_repeat($string, 100); +var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat)); +$string = "\xff"; +$string_concat = str_repeat($string, 100); +var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat)); + ?> --EXPECT-- Empty strings: @@ -148,3 +156,6 @@ bool(true) Concatenation of objects: string(2) "zz" bool(true) +str_repeat: +bool(true) +bool(false)