From fafc86c9d6ad3d37f4ae8b220f3a1dea4226091f Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Thu, 2 Apr 2020 17:05:43 +0200 Subject: [PATCH 1/7] Promote some warnings in MBString Regex --- ext/mbstring/php_mbregex.c | 94 +++++++++------- ext/mbstring/tests/bug43994.phpt | 104 +++++++----------- ext/mbstring/tests/bug69151.phpt | 33 ++++-- ext/mbstring/tests/bug72164.phpt | 14 ++- ext/mbstring/tests/bug72399.phpt | 15 ++- ext/mbstring/tests/bug73532.phpt | 8 +- ext/mbstring/tests/bug77367.phpt | 8 +- ext/mbstring/tests/bug77418.phpt | 9 +- ext/mbstring/tests/empty_pattern.phpt | 21 +++- ext/mbstring/tests/mb_ereg1.phpt | 17 ++- ext/mbstring/tests/mb_ereg_search_setpos.phpt | 40 ++++--- 11 files changed, 200 insertions(+), 163 deletions(-) diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index b5bd79fbe4399..6f1f6f325dab5 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -26,6 +26,7 @@ #include "php_mbregex.h" #include "mbstring.h" #include "libmbfl/filters/mbfilter_utf8.h" +#include #include "php_onig_compat.h" /* must come prior to the oniguruma header */ #include @@ -591,8 +592,8 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionT /* }}} */ /* {{{ _php_mb_regex_init_options */ -static void -_php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval) +static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option, + OnigSyntaxType **syntax, uint32_t option_arg_num) { size_t n; char c; @@ -651,14 +652,16 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option *syntax = ONIG_SYNTAX_POSIX_EXTENDED; break; case 'e': - if (eval != NULL) *eval = 1; - break; + zend_argument_value_error(option_arg_num, "option 'e' is not supported"); + return false; default: + // TODO Unsupported ValueError break; } } if (option != NULL) *option|=optm; } + return true; } /* }}} */ @@ -900,6 +903,11 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) RETURN_THROWS(); } + if (arg_pattern_len == 0) { + zend_argument_value_error(1, "must not be empty"); + RETURN_THROWS(); + } + if (array != NULL) { array = zend_try_array_init(array); if (!array) { @@ -912,7 +920,8 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) string_len, php_mb_regex_get_mbctype_encoding() )) { - RETURN_FALSE; + zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype()); + RETURN_THROWS(); } options = MBREX(regex_default_options); @@ -920,12 +929,6 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) options |= ONIG_OPTION_IGNORECASE; } - if (arg_pattern_len == 0) { - php_error_docref(NULL, E_WARNING, "Empty pattern"); - RETVAL_FALSE; - goto out; - } - re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(regex_default_syntax)); if (re == NULL) { RETVAL_FALSE; @@ -1007,7 +1010,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp smart_str out_buf = {0}; smart_str eval_buf = {0}; smart_str *pbuf; - int err, eval, n; + int err, n; OnigUChar *pos; OnigUChar *string_lim; char *description = NULL; @@ -1015,7 +1018,6 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp const mbfl_encoding *enc = php_mb_regex_get_mbctype_encoding(); ZEND_ASSERT(enc != NULL); - eval = 0; { char *option_str = NULL; size_t option_str_len = 0; @@ -1039,28 +1041,25 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp } if (!php_mb_check_encoding(string, string_len, enc)) { - RETURN_NULL(); + zend_argument_value_error(3, "must be a valid string in '%s'", php_mb_regex_get_mbctype()); + RETURN_THROWS(); } if (option_str != NULL) { - _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval); + /* Initialize option and in case of failure it means there is a value error */ + if(!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, 4)) { + RETURN_THROWS(); + } } else { options |= MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); } } - if (eval) { - if (is_callable) { - php_error_docref(NULL, E_WARNING, "Option 'e' cannot be used with replacement callback"); - } else { - php_error_docref(NULL, E_WARNING, "The 'e' option is no longer supported, use mb_ereg_replace_callback instead"); - } - RETURN_FALSE; - } /* create regex pattern buffer */ re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, syntax); if (re == NULL) { + // Should this be considered an error instead? RETURN_FALSE; } @@ -1122,7 +1121,9 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp zval_ptr_dtor(&retval); } else { if (!EG(exception)) { - php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function"); + zend_throw_error(NULL, "Unable to call custom replacement function"); + zval_ptr_dtor(&subpats); + RETURN_THROWS(); } } zval_ptr_dtor(&subpats); @@ -1154,6 +1155,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp } smart_str_free(&eval_buf); + // Need to investigate if failure in Oniguruma and if should throw. if (err <= -2) { smart_str_free(&out_buf); RETVAL_FALSE; @@ -1210,11 +1212,13 @@ PHP_FUNCTION(mb_split) } if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) { - RETURN_FALSE; + zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype()); + RETURN_THROWS(); } /* create regex pattern buffer */ if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(regex_default_syntax))) == NULL) { + // TODO throw as invalid regex? RETURN_FALSE; } @@ -1251,6 +1255,7 @@ PHP_FUNCTION(mb_split) onig_region_free(regs, 1); /* see if we encountered an error */ + // ToDo investigate if this can actually/should happen ... if (err <= -2) { OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str(err_str, err); @@ -1295,7 +1300,9 @@ PHP_FUNCTION(mb_ereg_match) } if (option_str != NULL) { - _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL); + if(!_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, 3)) { + RETURN_THROWS(); + } } else { option |= MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); @@ -1303,10 +1310,12 @@ PHP_FUNCTION(mb_ereg_match) } if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) { - RETURN_FALSE; + zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype()); + RETURN_THROWS(); } if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { + // TODO throw as invalid regex? RETURN_FALSE; } @@ -1362,6 +1371,7 @@ static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mod if (arg_pattern) { /* create regex pattern buffer */ if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { + // TODO throw as invalid regex? RETURN_FALSE; } } @@ -1375,13 +1385,13 @@ static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mod } if (MBREX(search_re) == NULL) { - php_error_docref(NULL, E_WARNING, "No regex given"); - RETURN_FALSE; + zend_throw_error(NULL, "No pattern was provided"); + RETURN_THROWS(); } if (str == NULL) { - php_error_docref(NULL, E_WARNING, "No string given"); - RETURN_FALSE; + zend_throw_error(NULL, "No string was provided"); + RETURN_THROWS(); } MBREX(search_regs) = onig_region_new(); @@ -1480,8 +1490,8 @@ PHP_FUNCTION(mb_ereg_search_init) } if (arg_pattern && arg_pattern_len == 0) { - php_error_docref(NULL, E_WARNING, "Empty pattern"); - RETURN_FALSE; + zend_argument_value_error(2, "must not be empty"); + RETURN_THROWS(); } if (arg_options) { @@ -1495,6 +1505,7 @@ PHP_FUNCTION(mb_ereg_search_init) if (arg_pattern) { /* create regex pattern buffer */ if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { + // TODO throw as invalid regex? RETURN_FALSE; } } @@ -1510,9 +1521,13 @@ PHP_FUNCTION(mb_ereg_search_init) RETVAL_TRUE; } else { MBREX(search_pos) = ZSTR_LEN(arg_str); - RETVAL_FALSE; + zend_argument_value_error(1, "must be a valid string in '%s'", php_mb_regex_get_mbctype()); + RETURN_THROWS(); } + MBREX(search_pos) = 0; + RETVAL_TRUE; + if (MBREX(search_regs) != NULL) { onig_region_free(MBREX(search_regs), 1); MBREX(search_regs) = NULL; @@ -1557,6 +1572,7 @@ PHP_FUNCTION(mb_ereg_search_getregs) onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args); } } else { + // TODO This seems to be some logical error, promote to Error RETVAL_FALSE; } } @@ -1588,12 +1604,12 @@ PHP_FUNCTION(mb_ereg_search_setpos) } if (position < 0 || (!Z_ISUNDEF(MBREX(search_str)) && Z_TYPE(MBREX(search_str)) == IS_STRING && (size_t)position > Z_STRLEN(MBREX(search_str)))) { - php_error_docref(NULL, E_WARNING, "Position is out of range"); - MBREX(search_pos) = 0; - RETURN_FALSE; + zend_argument_value_error(1, "is out of range"); + RETURN_THROWS(); } MBREX(search_pos) = position; + // TODO Return void RETURN_TRUE; } /* }}} */ @@ -1628,7 +1644,9 @@ PHP_FUNCTION(mb_regex_set_options) if (string != NULL) { opt = 0; syntax = NULL; - _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL); + if(!_php_mb_regex_init_options(string, string_len, &opt, &syntax, 1)) { + RETURN_THROWS(); + } _php_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax); opt = prev_opt; syntax = prev_syntax; diff --git a/ext/mbstring/tests/bug43994.phpt b/ext/mbstring/tests/bug43994.phpt index 96c862e6970cf..d41bf90a9fc83 100644 --- a/ext/mbstring/tests/bug43994.phpt +++ b/ext/mbstring/tests/bug43994.phpt @@ -25,106 +25,76 @@ foreach($inputs as $input) { } echo "\n-- Iteration $iterator --\n"; echo "Without \$regs arg:\n"; - var_dump( mb_ereg($input, 'hello, world') ); + try { + var_dump( mb_ereg($input, 'hello, world') ); + } catch (\ValueError $e) { + echo $e->getMessage() . \PHP_EOL; + } + echo "With \$regs arg:\n"; - var_dump(mb_ereg($input, 'hello, world', $mb_regs)); + try { + var_dump(mb_ereg($input, 'hello, world', $mb_regs)); + } catch (\ValueError $e) { + echo $e->getMessage() . \PHP_EOL; + } + var_dump($mb_regs); $iterator++; }; ?> ---EXPECTF-- +--EXPECT-- -- Iteration 1 -- Without $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) +mb_ereg(): Argument #1 ($pattern) must not be empty With $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) -array(0) { -} +mb_ereg(): Argument #1 ($pattern) must not be empty +NULL -- Iteration 2 -- Without $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) +mb_ereg(): Argument #1 ($pattern) must not be empty With $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) -array(0) { -} +mb_ereg(): Argument #1 ($pattern) must not be empty +NULL -- Iteration 3 -- Without $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) +mb_ereg(): Argument #1 ($pattern) must not be empty With $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) -array(0) { -} +mb_ereg(): Argument #1 ($pattern) must not be empty +NULL -- Iteration 4 -- Without $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) +mb_ereg(): Argument #1 ($pattern) must not be empty With $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) -array(0) { -} +mb_ereg(): Argument #1 ($pattern) must not be empty +NULL -- Iteration 5 -- Without $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) +mb_ereg(): Argument #1 ($pattern) must not be empty With $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) -array(0) { -} +mb_ereg(): Argument #1 ($pattern) must not be empty +NULL -- Iteration 6 -- Without $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) +mb_ereg(): Argument #1 ($pattern) must not be empty With $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) -array(0) { -} +mb_ereg(): Argument #1 ($pattern) must not be empty +NULL -- Iteration 7 -- Without $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) +mb_ereg(): Argument #1 ($pattern) must not be empty With $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) -array(0) { -} +mb_ereg(): Argument #1 ($pattern) must not be empty +NULL -- Iteration 8 -- Without $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) +mb_ereg(): Argument #1 ($pattern) must not be empty With $regs arg: - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) -array(0) { -} +mb_ereg(): Argument #1 ($pattern) must not be empty +NULL diff --git a/ext/mbstring/tests/bug69151.phpt b/ext/mbstring/tests/bug69151.phpt index a839e8aa74f5a..d3448cde6b604 100644 --- a/ext/mbstring/tests/bug69151.phpt +++ b/ext/mbstring/tests/bug69151.phpt @@ -8,17 +8,30 @@ if (!function_exists('mb_ereg')) die('skip mbregex support not available'); --FILE-- getMessage() . \PHP_EOL; +} +var_dump([] === $matches); + +try { + var_dump(NULL === mb_ereg_replace('.', "\\0", $str)); +} catch (\ValueError $e) { + echo $e->getMessage() . \PHP_EOL; +} + +try { + var_dump(false === mb_ereg_search_init("\x80", '.')); +} catch (\ValueError $e) { + echo $e->getMessage() . \PHP_EOL; +} +var_dump(false === mb_ereg_search()); ?> --EXPECT-- +mb_eregi(): Argument #2 ($string) must be a valid string in 'UTF-8' bool(true) -bool(true) -bool(true) -bool(true) +mb_ereg_replace(): Argument #3 ($string) must be a valid string in 'UTF-8' +mb_ereg_search_init(): Argument #1 ($string) must be a valid string in 'UTF-8' bool(true) diff --git a/ext/mbstring/tests/bug72164.phpt b/ext/mbstring/tests/bug72164.phpt index f90fe89938d2f..1e3f3c37e3a27 100644 --- a/ext/mbstring/tests/bug72164.phpt +++ b/ext/mbstring/tests/bug72164.phpt @@ -10,9 +10,13 @@ if (!function_exists('mb_ereg')) die('skip mbregex support not available'); $var0 = "e"; $var2 = ""; $var3 = NULL; -$var8 = mb_ereg_replace($var2,$var3,$var3,$var0); -var_dump($var8); +try { + $var8 = mb_ereg_replace($var2,$var3,$var3,$var0); + var_dump($var8); +} catch (\ValueError $e) { + echo $e->getMessage() . \PHP_EOL; +} + ?> ---EXPECTF-- -Warning: mb_ereg_replace(): The 'e' option is no longer supported, use mb_ereg_replace_callback instead in %s on line %d -bool(false) +--EXPECT-- +mb_ereg_replace(): Argument #4 ($option) option 'e' is not supported diff --git a/ext/mbstring/tests/bug72399.phpt b/ext/mbstring/tests/bug72399.phpt index b50ca7369801b..f5bd4396d30aa 100644 --- a/ext/mbstring/tests/bug72399.phpt +++ b/ext/mbstring/tests/bug72399.phpt @@ -8,8 +8,17 @@ if (!function_exists('mb_ereg')) die('skip mbregex support not available'); --FILE-- getMessage() . \PHP_EOL; +} ?> ---EXPECTF-- -Warning: mb_ereg_search_pos(): No regex given in %sbug72399.php on line %d +--EXPECT-- +bool(true) +string(0) "" +No pattern was provided diff --git a/ext/mbstring/tests/bug73532.phpt b/ext/mbstring/tests/bug73532.phpt index 7195fc0a689e7..87493d7d15770 100644 --- a/ext/mbstring/tests/bug73532.phpt +++ b/ext/mbstring/tests/bug73532.phpt @@ -7,7 +7,11 @@ if (!function_exists('mb_ereg')) die('skip mbregex support not available'); ?> --FILE-- getMessage() . \PHP_EOL; +} ?> --EXPECT-- -bool(false) +mb_eregi(): Argument #2 ($string) must be a valid string in 'UTF-8' diff --git a/ext/mbstring/tests/bug77367.phpt b/ext/mbstring/tests/bug77367.phpt index 4925852375e16..d38f9debad33c 100644 --- a/ext/mbstring/tests/bug77367.phpt +++ b/ext/mbstring/tests/bug77367.phpt @@ -8,7 +8,11 @@ if (!function_exists('mb_split')) die('skip mb_split() not available'); --FILE-- getMessage() . \PHP_EOL; +} ?> --EXPECT-- -bool(false) +mb_split(): Argument #2 ($string) must be a valid string in 'UTF-8' diff --git a/ext/mbstring/tests/bug77418.phpt b/ext/mbstring/tests/bug77418.phpt index 4e3130bdd1bc9..5c6c692a02402 100644 --- a/ext/mbstring/tests/bug77418.phpt +++ b/ext/mbstring/tests/bug77418.phpt @@ -8,7 +8,12 @@ if (!function_exists('mb_split')) die('skip mb_split() not available'); --FILE-- getMessage() . \PHP_EOL; +} ?> --EXPECT-- -bool(false) +mb_split(): Argument #2 ($string) must be a valid string in 'UCS-4' diff --git a/ext/mbstring/tests/empty_pattern.phpt b/ext/mbstring/tests/empty_pattern.phpt index 019ccba02c51d..c5834a6cad4eb 100644 --- a/ext/mbstring/tests/empty_pattern.phpt +++ b/ext/mbstring/tests/empty_pattern.phpt @@ -8,12 +8,21 @@ if (!function_exists('mb_ereg')) die('skip mbregex support not available'); --FILE-- getMessage() . \PHP_EOL; +} + mb_split("",""); -mb_ereg_search_regs(); -?> ---EXPECTF-- -Warning: mb_ereg_search_init(): Empty pattern in %s on line %d +try { + mb_ereg_search_regs(); +} catch (\Error $e) { + echo $e->getMessage() . \PHP_EOL; +} -Warning: mb_ereg_search_regs(): No regex given in %s on line %d +?> +--EXPECT-- +mb_ereg_search_init(): Argument #2 ($pattern) must not be empty +No pattern was provided diff --git a/ext/mbstring/tests/mb_ereg1.phpt b/ext/mbstring/tests/mb_ereg1.phpt index 5fa830fa63bfa..653df93ce0638 100644 --- a/ext/mbstring/tests/mb_ereg1.phpt +++ b/ext/mbstring/tests/mb_ereg1.phpt @@ -16,13 +16,13 @@ $a = array( foreach ($a as $args) { try { var_dump(mb_ereg($args[0], $args[1], $args[2])); - } catch (TypeError $e) { - echo $e->getMessage(), "\n"; + } catch (\TypeError|\ValueError $e) { + echo get_class($e) . ': ' . $e->getMessage() . \PHP_EOL; } var_dump($args); } ?> ---EXPECTF-- +--EXPECT-- bool(false) array(3) { [0]=> @@ -33,19 +33,16 @@ array(3) { array(0) { } } - -Warning: mb_ereg(): Empty pattern in %s on line %d -bool(false) +ValueError: mb_ereg(): Argument #1 ($pattern) must not be empty array(3) { [0]=> string(0) "" [1]=> string(0) "" [2]=> - array(0) { - } + string(0) "" } -mb_ereg(): Argument #1 ($pattern) must be of type string, array given +TypeError: mb_ereg(): Argument #1 ($pattern) must be of type string, array given array(3) { [0]=> array(0) { @@ -55,7 +52,7 @@ array(3) { [2]=> string(0) "" } -mb_ereg(): Argument #2 ($string) must be of type string, array given +TypeError: mb_ereg(): Argument #2 ($string) must be of type string, array given array(3) { [0]=> int(1) diff --git a/ext/mbstring/tests/mb_ereg_search_setpos.phpt b/ext/mbstring/tests/mb_ereg_search_setpos.phpt index 0a90d005dd8f7..3b73025489fc2 100644 --- a/ext/mbstring/tests/mb_ereg_search_setpos.phpt +++ b/ext/mbstring/tests/mb_ereg_search_setpos.phpt @@ -11,22 +11,32 @@ mb_regex_encoding('iso-8859-1'); $test_str = 'Iñtërnâtiônàlizætiøn'; // Length = 20 var_dump(mb_ereg_search_setpos(50)); // OK -var_dump(mb_ereg_search_setpos(-1)); // Error +try { + var_dump(mb_ereg_search_setpos(-1)); // Error +} catch (\ValueError $e) { + echo $e->getMessage() . \PHP_EOL; +} mb_ereg_search_init($test_str); $positions = array( 5, 20, 21, 25, 0, -5, -20, -30); foreach($positions as $pos) { echo("\n* Position: $pos :\n"); - var_dump(mb_ereg_search_setpos($pos)); - var_dump(mb_ereg_search_getpos()); + try { + var_dump(mb_ereg_search_setpos($pos)); + } catch (\ValueError $e) { + echo $e->getMessage() . \PHP_EOL; + } + try { + var_dump(mb_ereg_search_getpos()); + } catch (\ValueError $e) { + echo $e->getMessage() . \PHP_EOL; + } } ?> ---EXPECTF-- +--EXPECT-- bool(true) - -Warning: mb_ereg_search_setpos(): Position is out of range in %s on line %d -bool(false) +mb_ereg_search_setpos(): Argument #1 ($position) is out of range * Position: 5 : bool(true) @@ -37,16 +47,12 @@ bool(true) int(20) * Position: 21 : - -Warning: mb_ereg_search_setpos(): Position is out of range in %s on line %d -bool(false) -int(0) +mb_ereg_search_setpos(): Argument #1 ($position) is out of range +int(20) * Position: 25 : - -Warning: mb_ereg_search_setpos(): Position is out of range in %s on line %d -bool(false) -int(0) +mb_ereg_search_setpos(): Argument #1 ($position) is out of range +int(20) * Position: 0 : bool(true) @@ -61,7 +67,5 @@ bool(true) int(0) * Position: -30 : - -Warning: mb_ereg_search_setpos(): Position is out of range in %s on line %d -bool(false) +mb_ereg_search_setpos(): Argument #1 ($position) is out of range int(0) From 323cde712b440b4e7a6bf5b0b1df35b957581b56 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Thu, 6 Aug 2020 16:07:35 +0200 Subject: [PATCH 2/7] Reviews --- ext/mbstring/php_mbregex.c | 14 ++++++-------- ext/mbstring/tests/bug69151.phpt | 10 +++------- ext/mbstring/tests/bug72164.phpt | 2 +- ext/mbstring/tests/bug73532.phpt | 2 +- ext/mbstring/tests/bug76999.phpt | 8 ++++++-- 5 files changed, 17 insertions(+), 19 deletions(-) diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 6f1f6f325dab5..7607ccd9ede3d 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -652,10 +652,10 @@ static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOption *syntax = ONIG_SYNTAX_POSIX_EXTENDED; break; case 'e': - zend_argument_value_error(option_arg_num, "option 'e' is not supported"); + zend_value_error("Option \"e\" is not supported"); return false; default: - // TODO Unsupported ValueError + zend_value_error("Option \"%c\" is not supported", c); break; } } @@ -920,7 +920,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) string_len, php_mb_regex_get_mbctype_encoding() )) { - zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype()); + zend_argument_value_error(2, "must be a valid string in \"%s\" encoding", php_mb_regex_get_mbctype()); RETURN_THROWS(); } @@ -1041,8 +1041,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp } if (!php_mb_check_encoding(string, string_len, enc)) { - zend_argument_value_error(3, "must be a valid string in '%s'", php_mb_regex_get_mbctype()); - RETURN_THROWS(); + RETURN_NULL(); } if (option_str != NULL) { @@ -1059,7 +1058,6 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp /* create regex pattern buffer */ re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, syntax); if (re == NULL) { - // Should this be considered an error instead? RETURN_FALSE; } @@ -1357,7 +1355,7 @@ static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mod } if (arg_options) { - _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL); + _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, 2); } else { option |= MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); @@ -1496,7 +1494,7 @@ PHP_FUNCTION(mb_ereg_search_init) if (arg_options) { option = 0; - _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL); + _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, 3); } else { option = MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); diff --git a/ext/mbstring/tests/bug69151.phpt b/ext/mbstring/tests/bug69151.phpt index d3448cde6b604..eb3039b7994ea 100644 --- a/ext/mbstring/tests/bug69151.phpt +++ b/ext/mbstring/tests/bug69151.phpt @@ -16,11 +16,7 @@ try { } var_dump([] === $matches); -try { - var_dump(NULL === mb_ereg_replace('.', "\\0", $str)); -} catch (\ValueError $e) { - echo $e->getMessage() . \PHP_EOL; -} +var_dump(NULL === mb_ereg_replace('.', "\\0", $str)); try { var_dump(false === mb_ereg_search_init("\x80", '.')); @@ -30,8 +26,8 @@ try { var_dump(false === mb_ereg_search()); ?> --EXPECT-- -mb_eregi(): Argument #2 ($string) must be a valid string in 'UTF-8' +mb_eregi(): Argument #2 ($string) must be a valid string in "UTF-8" encoding +bool(true) bool(true) -mb_ereg_replace(): Argument #3 ($string) must be a valid string in 'UTF-8' mb_ereg_search_init(): Argument #1 ($string) must be a valid string in 'UTF-8' bool(true) diff --git a/ext/mbstring/tests/bug72164.phpt b/ext/mbstring/tests/bug72164.phpt index 1e3f3c37e3a27..eff18982d5ea5 100644 --- a/ext/mbstring/tests/bug72164.phpt +++ b/ext/mbstring/tests/bug72164.phpt @@ -19,4 +19,4 @@ try { ?> --EXPECT-- -mb_ereg_replace(): Argument #4 ($option) option 'e' is not supported +Option "e" is not supported diff --git a/ext/mbstring/tests/bug73532.phpt b/ext/mbstring/tests/bug73532.phpt index 87493d7d15770..9e4efbbbcaf78 100644 --- a/ext/mbstring/tests/bug73532.phpt +++ b/ext/mbstring/tests/bug73532.phpt @@ -14,4 +14,4 @@ try { } ?> --EXPECT-- -mb_eregi(): Argument #2 ($string) must be a valid string in 'UTF-8' +mb_eregi(): Argument #2 ($string) must be a valid string in "UTF-8" encoding diff --git a/ext/mbstring/tests/bug76999.phpt b/ext/mbstring/tests/bug76999.phpt index 5ba9f0e8e38b0..6457c60090ee8 100644 --- a/ext/mbstring/tests/bug76999.phpt +++ b/ext/mbstring/tests/bug76999.phpt @@ -11,12 +11,16 @@ mb_regex_set_options("pr"); var_dump(mb_regex_set_options("m")); var_dump(mb_regex_set_options("mdi")); var_dump(mb_regex_set_options("m")); -var_dump(mb_regex_set_options("a")); +try { + var_dump(mb_regex_set_options("a")); +} catch (\ValueError $e) { + echo $e->getMessage() . \PHP_EOL; +} var_dump(mb_regex_set_options()); ?> --EXPECT-- string(2) "pr" string(2) "mr" string(3) "imd" -string(2) "mr" +Option "a" is not supported string(1) "r" From 547be3f3f999847f1e1fc478e9ea977ccd8b5cfe Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Tue, 8 Sep 2020 15:46:29 +0200 Subject: [PATCH 3/7] Reviews --- ext/mbstring/php_mbregex.c | 18 +++++------------- ext/mbstring/tests/bug69151.phpt | 16 ++++------------ ext/mbstring/tests/bug73532.phpt | 8 ++------ ext/mbstring/tests/bug76999.phpt | 2 +- ext/mbstring/tests/bug77367.phpt | 8 ++------ ext/mbstring/tests/bug77418.phpt | 8 ++------ 6 files changed, 16 insertions(+), 44 deletions(-) diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 7607ccd9ede3d..ff852e777ec1f 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -26,7 +26,6 @@ #include "php_mbregex.h" #include "mbstring.h" #include "libmbfl/filters/mbfilter_utf8.h" -#include #include "php_onig_compat.h" /* must come prior to the oniguruma header */ #include @@ -656,7 +655,7 @@ static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOption return false; default: zend_value_error("Option \"%c\" is not supported", c); - break; + return false; } } if (option != NULL) *option|=optm; @@ -920,8 +919,7 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) string_len, php_mb_regex_get_mbctype_encoding() )) { - zend_argument_value_error(2, "must be a valid string in \"%s\" encoding", php_mb_regex_get_mbctype()); - RETURN_THROWS(); + RETURN_FALSE; } options = MBREX(regex_default_options); @@ -1046,7 +1044,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp if (option_str != NULL) { /* Initialize option and in case of failure it means there is a value error */ - if(!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, 4)) { + if (!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, 4)) { RETURN_THROWS(); } } else { @@ -1153,7 +1151,6 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp } smart_str_free(&eval_buf); - // Need to investigate if failure in Oniguruma and if should throw. if (err <= -2) { smart_str_free(&out_buf); RETVAL_FALSE; @@ -1210,8 +1207,7 @@ PHP_FUNCTION(mb_split) } if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) { - zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype()); - RETURN_THROWS(); + RETURN_FALSE; } /* create regex pattern buffer */ @@ -1519,13 +1515,9 @@ PHP_FUNCTION(mb_ereg_search_init) RETVAL_TRUE; } else { MBREX(search_pos) = ZSTR_LEN(arg_str); - zend_argument_value_error(1, "must be a valid string in '%s'", php_mb_regex_get_mbctype()); - RETURN_THROWS(); + RETURN_FALSE; } - MBREX(search_pos) = 0; - RETVAL_TRUE; - if (MBREX(search_regs) != NULL) { onig_region_free(MBREX(search_regs), 1); MBREX(search_regs) = NULL; diff --git a/ext/mbstring/tests/bug69151.phpt b/ext/mbstring/tests/bug69151.phpt index eb3039b7994ea..a91525c3067ff 100644 --- a/ext/mbstring/tests/bug69151.phpt +++ b/ext/mbstring/tests/bug69151.phpt @@ -9,25 +9,17 @@ if (!function_exists('mb_ereg')) die('skip mbregex support not available'); getMessage() . \PHP_EOL; -} +var_dump(false === mb_eregi('.', $str, $matches)); var_dump([] === $matches); var_dump(NULL === mb_ereg_replace('.', "\\0", $str)); -try { - var_dump(false === mb_ereg_search_init("\x80", '.')); -} catch (\ValueError $e) { - echo $e->getMessage() . \PHP_EOL; -} +var_dump(false === mb_ereg_search_init("\x80", '.')); var_dump(false === mb_ereg_search()); ?> --EXPECT-- -mb_eregi(): Argument #2 ($string) must be a valid string in "UTF-8" encoding bool(true) bool(true) -mb_ereg_search_init(): Argument #1 ($string) must be a valid string in 'UTF-8' +bool(true) +bool(true) bool(true) diff --git a/ext/mbstring/tests/bug73532.phpt b/ext/mbstring/tests/bug73532.phpt index 9e4efbbbcaf78..7195fc0a689e7 100644 --- a/ext/mbstring/tests/bug73532.phpt +++ b/ext/mbstring/tests/bug73532.phpt @@ -7,11 +7,7 @@ if (!function_exists('mb_ereg')) die('skip mbregex support not available'); ?> --FILE-- getMessage() . \PHP_EOL; -} +var_dump(mb_eregi("a", "\xf5")); ?> --EXPECT-- -mb_eregi(): Argument #2 ($string) must be a valid string in "UTF-8" encoding +bool(false) diff --git a/ext/mbstring/tests/bug76999.phpt b/ext/mbstring/tests/bug76999.phpt index 6457c60090ee8..b631a2041d796 100644 --- a/ext/mbstring/tests/bug76999.phpt +++ b/ext/mbstring/tests/bug76999.phpt @@ -23,4 +23,4 @@ string(2) "pr" string(2) "mr" string(3) "imd" Option "a" is not supported -string(1) "r" +string(2) "mr" diff --git a/ext/mbstring/tests/bug77367.phpt b/ext/mbstring/tests/bug77367.phpt index d38f9debad33c..4925852375e16 100644 --- a/ext/mbstring/tests/bug77367.phpt +++ b/ext/mbstring/tests/bug77367.phpt @@ -8,11 +8,7 @@ if (!function_exists('mb_split')) die('skip mb_split() not available'); --FILE-- getMessage() . \PHP_EOL; -} +var_dump(mb_split("\\w", "\xfc")); ?> --EXPECT-- -mb_split(): Argument #2 ($string) must be a valid string in 'UTF-8' +bool(false) diff --git a/ext/mbstring/tests/bug77418.phpt b/ext/mbstring/tests/bug77418.phpt index 5c6c692a02402..1f00371057db2 100644 --- a/ext/mbstring/tests/bug77418.phpt +++ b/ext/mbstring/tests/bug77418.phpt @@ -9,11 +9,7 @@ if (!function_exists('mb_split')) die('skip mb_split() not available'); getMessage() . \PHP_EOL; -} +var_dump(mb_split("\x00\x00\x00\x5c\x00\x00\x00B","000000000000000000000000000000")); ?> --EXPECT-- -mb_split(): Argument #2 ($string) must be a valid string in 'UCS-4' +bool(false) From c9e387946a7ddbbfbfdfac2b60e7352ca2d4c86e Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Tue, 8 Sep 2020 17:09:03 +0200 Subject: [PATCH 4/7] Drop Todo about invalid regex --- ext/mbstring/php_mbregex.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index ff852e777ec1f..deb2ed713a3ee 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -1212,7 +1212,6 @@ PHP_FUNCTION(mb_split) /* create regex pattern buffer */ if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(regex_default_syntax))) == NULL) { - // TODO throw as invalid regex? RETURN_FALSE; } @@ -1309,7 +1308,6 @@ PHP_FUNCTION(mb_ereg_match) } if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { - // TODO throw as invalid regex? RETURN_FALSE; } @@ -1365,7 +1363,6 @@ static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mod if (arg_pattern) { /* create regex pattern buffer */ if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { - // TODO throw as invalid regex? RETURN_FALSE; } } @@ -1499,7 +1496,6 @@ PHP_FUNCTION(mb_ereg_search_init) if (arg_pattern) { /* create regex pattern buffer */ if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { - // TODO throw as invalid regex? RETURN_FALSE; } } From 31a471d12b2d6857098c9909061df117e5e08257 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Tue, 8 Sep 2020 17:09:39 +0200 Subject: [PATCH 5/7] Use RETVAL_FALSE again instead of RETURN_FALSE --- ext/mbstring/php_mbregex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index deb2ed713a3ee..d8f34ac1377a3 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -1511,7 +1511,7 @@ PHP_FUNCTION(mb_ereg_search_init) RETVAL_TRUE; } else { MBREX(search_pos) = ZSTR_LEN(arg_str); - RETURN_FALSE; + RETVAL_FALSE; } if (MBREX(search_regs) != NULL) { From c0cb68990d431687e08453ef6a5dda5123438e7d Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Tue, 8 Sep 2020 17:24:15 +0200 Subject: [PATCH 6/7] Return False on invalid encoded string --- ext/mbstring/php_mbregex.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index d8f34ac1377a3..933e1110d2871 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -1303,8 +1303,7 @@ PHP_FUNCTION(mb_ereg_match) } if (!php_mb_check_encoding(string, string_len, php_mb_regex_get_mbctype_encoding())) { - zend_argument_value_error(2, "must be a valid string in '%s'", php_mb_regex_get_mbctype()); - RETURN_THROWS(); + RETURN_FALSE; } if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, syntax)) == NULL) { From fdac9bd5ccb3d8460883e39cb78613e13848a8cf Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Tue, 8 Sep 2020 17:26:27 +0200 Subject: [PATCH 7/7] Drop option_arg_num which now unused --- ext/mbstring/php_mbregex.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index 933e1110d2871..2b489b5515131 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -592,7 +592,7 @@ static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionT /* {{{ _php_mb_regex_init_options */ static bool _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option, - OnigSyntaxType **syntax, uint32_t option_arg_num) + OnigSyntaxType **syntax) { size_t n; char c; @@ -1044,7 +1044,7 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp if (option_str != NULL) { /* Initialize option and in case of failure it means there is a value error */ - if (!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, 4)) { + if (!_php_mb_regex_init_options(option_str, option_str_len, &options, &syntax)) { RETURN_THROWS(); } } else { @@ -1293,7 +1293,7 @@ PHP_FUNCTION(mb_ereg_match) } if (option_str != NULL) { - if(!_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, 3)) { + if(!_php_mb_regex_init_options(option_str, option_str_len, &option, &syntax)) { RETURN_THROWS(); } } else { @@ -1348,7 +1348,7 @@ static void _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mod } if (arg_options) { - _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, 2); + _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax); } else { option |= MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); @@ -1486,7 +1486,7 @@ PHP_FUNCTION(mb_ereg_search_init) if (arg_options) { option = 0; - _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, 3); + _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax); } else { option = MBREX(regex_default_options); syntax = MBREX(regex_default_syntax); @@ -1629,7 +1629,7 @@ PHP_FUNCTION(mb_regex_set_options) if (string != NULL) { opt = 0; syntax = NULL; - if(!_php_mb_regex_init_options(string, string_len, &opt, &syntax, 1)) { + if(!_php_mb_regex_init_options(string, string_len, &opt, &syntax)) { RETURN_THROWS(); } _php_mb_regex_set_options(opt, syntax, &prev_opt, &prev_syntax);