2929#include "ext/standard/php_string.h"
3030
3131#include "intl_error.h"
32- #include "intl_convert.h"
32+ #include "intl_convert.h"
3333/* }}} */
3434
35+ #ifdef UIDNA_INFO_INITIALIZER
36+ #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
37+ #endif
38+
39+ enum {
40+ INTL_IDN_VARIANT_2003 = 0 ,
41+ INTL_IDN_VARIANT_UTS46
42+ };
43+
3544/* {{{ grapheme_register_constants
3645 * Register API constants
3746 */
3847void idn_register_constants ( INIT_FUNC_ARGS )
3948{
49+ /* OPTIONS */
50+
4051 /* Option to prohibit processing of unassigned codepoints in the input and
4152 do not check if the input conforms to STD-3 ASCII rules. */
4253 REGISTER_LONG_CONSTANT ("IDNA_DEFAULT" , UIDNA_DEFAULT , CONST_CS | CONST_PERSISTENT );
@@ -46,6 +57,50 @@ void idn_register_constants( INIT_FUNC_ARGS )
4657
4758 /* Option to check if input conforms to STD-3 ASCII rules */
4859 REGISTER_LONG_CONSTANT ("IDNA_USE_STD3_RULES" , UIDNA_USE_STD3_RULES , CONST_CS | CONST_PERSISTENT );
60+
61+ #ifdef HAVE_46_API
62+
63+ /* Option to check for whether the input conforms to the BiDi rules.
64+ * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
65+ REGISTER_LONG_CONSTANT ("IDNA_CHECK_BIDI" , UIDNA_CHECK_BIDI , CONST_CS | CONST_PERSISTENT );
66+
67+ /* Option to check for whether the input conforms to the CONTEXTJ rules.
68+ * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
69+ REGISTER_LONG_CONSTANT ("IDNA_CHECK_CONTEXTJ" , UIDNA_CHECK_CONTEXTJ , CONST_CS | CONST_PERSISTENT );
70+
71+ /* Option for nontransitional processing in ToASCII().
72+ * By default, ToASCII() uses transitional processing.
73+ * Ignored by the IDNA2003 implementation. */
74+ REGISTER_LONG_CONSTANT ("IDNA_NONTRANSITIONAL_TO_ASCII" , UIDNA_NONTRANSITIONAL_TO_ASCII , CONST_CS | CONST_PERSISTENT );
75+
76+ /* Option for nontransitional processing in ToUnicode().
77+ * By default, ToUnicode() uses transitional processing.
78+ * Ignored by the IDNA2003 implementation. */
79+ REGISTER_LONG_CONSTANT ("IDNA_NONTRANSITIONAL_TO_UNICODE" , UIDNA_NONTRANSITIONAL_TO_UNICODE , CONST_CS | CONST_PERSISTENT );
80+ #endif
81+
82+ /* VARIANTS */
83+ REGISTER_LONG_CONSTANT ("INTL_IDNA_VARIANT_2003" , INTL_IDN_VARIANT_2003 , CONST_CS | CONST_PERSISTENT );
84+ #ifdef HAVE_46_API
85+ REGISTER_LONG_CONSTANT ("INTL_IDNA_VARIANT_UTS46" , INTL_IDN_VARIANT_UTS46 , CONST_CS | CONST_PERSISTENT );
86+ #endif
87+
88+ #ifdef HAVE_46_API
89+ /* PINFO ERROR CODES */
90+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_EMPTY_LABEL" , UIDNA_ERROR_EMPTY_LABEL , CONST_CS | CONST_PERSISTENT );
91+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_LABEL_TOO_LONG" , UIDNA_ERROR_LABEL_TOO_LONG , CONST_CS | CONST_PERSISTENT );
92+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_DOMAIN_NAME_TOO_LONG" , UIDNA_ERROR_DOMAIN_NAME_TOO_LONG , CONST_CS | CONST_PERSISTENT );
93+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_LEADING_HYPHEN" , UIDNA_ERROR_LEADING_HYPHEN , CONST_CS | CONST_PERSISTENT );
94+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_TRAILING_HYPHEN" , UIDNA_ERROR_TRAILING_HYPHEN , CONST_CS | CONST_PERSISTENT );
95+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_HYPHEN_3_4" , UIDNA_ERROR_HYPHEN_3_4 , CONST_CS | CONST_PERSISTENT );
96+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_LEADING_COMBINING_MARK" , UIDNA_ERROR_LEADING_COMBINING_MARK , CONST_CS | CONST_PERSISTENT );
97+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_DISALLOWED" , UIDNA_ERROR_DISALLOWED , CONST_CS | CONST_PERSISTENT );
98+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_PUNYCODE" , UIDNA_ERROR_PUNYCODE , CONST_CS | CONST_PERSISTENT );
99+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_LABEL_HAS_DOT" , UIDNA_ERROR_LABEL_HAS_DOT , CONST_CS | CONST_PERSISTENT );
100+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_INVALID_ACE_LABEL" , UIDNA_ERROR_INVALID_ACE_LABEL , CONST_CS | CONST_PERSISTENT );
101+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_BIDI" , UIDNA_ERROR_BIDI , CONST_CS | CONST_PERSISTENT );
102+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_CONTEXTJ" , UIDNA_ERROR_CONTEXTJ , CONST_CS | CONST_PERSISTENT );
103+ #endif
49104}
50105/* }}} */
51106
@@ -54,11 +109,100 @@ enum {
54109 INTL_IDN_TO_UTF8
55110};
56111
57- static void php_intl_idn_to (INTERNAL_FUNCTION_PARAMETERS , int mode )
112+ /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
113+ static int php_intl_idn_check_status (UErrorCode err , const char * msg , int mode TSRMLS_DC )
114+ {
115+ intl_error_set_code (NULL , err TSRMLS_CC );
116+ if (U_FAILURE (err )) {
117+ char * buff ;
118+ spprintf (& buff , 0 , "%s: %s" ,
119+ mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8" ,
120+ msg );
121+ intl_error_set_custom_msg (NULL , buff , 1 TSRMLS_CC );
122+ efree (buff );
123+ return FAILURE ;
124+ }
125+
126+ return SUCCESS ;
127+ }
128+
129+ static inline void php_intl_bad_args (const char * msg , int mode TSRMLS_DC )
130+ {
131+ php_intl_idn_check_status (U_ILLEGAL_ARGUMENT_ERROR , msg , mode TSRMLS_CC );
132+ }
133+
134+ #ifdef HAVE_46_API
135+ static void php_intl_idn_to_46 (INTERNAL_FUNCTION_PARAMETERS ,
136+ const char * domain , int domain_len , uint32_t option , int mode , zval * idna_info )
137+ {
138+ UErrorCode status = U_ZERO_ERROR ;
139+ UIDNA * uts46 ;
140+ int32_t len ;
141+ int32_t buffer_capac = 255 ; /* no domain name may exceed this */
142+ char * buffer = emalloc (buffer_capac );
143+ UIDNAInfo info = UIDNA_INFO_INITIALIZER ;
144+ int buffer_used = 0 ;
145+
146+ uts46 = uidna_openUTS46 (option , & status );
147+ if (php_intl_idn_check_status (status , "failed to open UIDNA instance" ,
148+ mode TSRMLS_CC ) == FAILURE ) {
149+ efree (buffer );
150+ RETURN_FALSE ;
151+ }
152+
153+ if (mode == INTL_IDN_TO_ASCII ) {
154+ len = uidna_nameToASCII_UTF8 (uts46 , domain , (int32_t )domain_len ,
155+ buffer , buffer_capac , & info , & status );
156+ } else {
157+ len = uidna_nameToUnicodeUTF8 (uts46 , domain , (int32_t )domain_len ,
158+ buffer , buffer_capac , & info , & status );
159+ }
160+ if (php_intl_idn_check_status (status , "failed to convert name" ,
161+ mode TSRMLS_CC ) == FAILURE ) {
162+ uidna_close (uts46 );
163+ efree (buffer );
164+ RETURN_FALSE ;
165+ }
166+ if (len >= 255 ) {
167+ php_error_docref (NULL TSRMLS_CC , E_ERROR , "ICU returned an unexpected length" );
168+ }
169+
170+ buffer [len ] = '\0' ;
171+
172+ if (info .errors == 0 ) {
173+ RETVAL_STRINGL (buffer , len , 0 );
174+ buffer_used = 1 ;
175+ } else {
176+ RETVAL_FALSE ;
177+ }
178+
179+ if (idna_info ) {
180+ if (buffer_used ) { /* used in return_value then */
181+ zval_addref_p (return_value );
182+ add_assoc_zval_ex (idna_info , "result" , sizeof ("result" ), return_value );
183+ } else {
184+ zval * zv ;
185+ ALLOC_INIT_ZVAL (zv );
186+ ZVAL_STRINGL (zv , buffer , len , 0 );
187+ buffer_used = 1 ;
188+ add_assoc_zval_ex (idna_info , "result" , sizeof ("result" ), zv );
189+ }
190+ add_assoc_bool_ex (idna_info , "isTransitionalDifferent" ,
191+ sizeof ("isTransitionalDifferent" ), info .isTransitionalDifferent );
192+ add_assoc_long_ex (idna_info , "errors" , sizeof ("errors" ), (long )info .errors );
193+ }
194+
195+ if (!buffer_used ) {
196+ efree (buffer );
197+ }
198+
199+ uidna_close (uts46 );
200+ }
201+ #endif
202+
203+ static void php_intl_idn_to (INTERNAL_FUNCTION_PARAMETERS ,
204+ const char * domain , int domain_len , uint32_t option , int mode )
58205{
59- unsigned char * domain ;
60- int domain_len ;
61- long option = 0 ;
62206 UChar * ustring = NULL ;
63207 int ustring_len = 0 ;
64208 UErrorCode status ;
@@ -67,18 +211,9 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
67211 UChar converted [MAXPATHLEN ];
68212 int32_t converted_ret_len ;
69213
70- if (zend_parse_parameters (ZEND_NUM_ARGS () TSRMLS_CC , "s|l" , (char * * )& domain , & domain_len , & option ) == FAILURE ) {
71- return ;
72- }
73-
74- if (domain_len < 1 ) {
75- intl_error_set ( NULL , U_ILLEGAL_ARGUMENT_ERROR , "idn_to_ascii: empty domain name" , 0 TSRMLS_CC );
76- RETURN_FALSE ;
77- }
78-
79214 /* convert the string to UTF-16. */
80215 status = U_ZERO_ERROR ;
81- intl_convert_utf8_to_utf16 (& ustring , & ustring_len , ( char * ) domain , domain_len , & status );
216+ intl_convert_utf8_to_utf16 (& ustring , & ustring_len , domain , domain_len , & status );
82217
83218 if (U_FAILURE (status )) {
84219 intl_error_set_code (NULL , status TSRMLS_CC );
@@ -123,11 +258,75 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
123258 RETURN_STRINGL (((char * )converted_utf8 ), converted_utf8_len , 0 );
124259}
125260
261+ static void php_intl_idn_handoff (INTERNAL_FUNCTION_PARAMETERS , int mode )
262+ {
263+ char * domain ;
264+ int domain_len ;
265+ long option = 0 ,
266+ variant = INTL_IDN_VARIANT_2003 ;
267+ zval * idna_info = NULL ;
268+
269+ intl_error_reset (NULL TSRMLS_CC );
270+
271+ if (zend_parse_parameters (ZEND_NUM_ARGS () TSRMLS_CC , "s|llz" ,
272+ & domain , & domain_len , & option , & variant , & idna_info ) == FAILURE ) {
273+ php_intl_bad_args ("bad arguments" , mode TSRMLS_CC );
274+ RETURN_NULL (); /* don't set FALSE because that's not the way it was before... */
275+ }
276+
277+ #ifdef HAVE_46_API
278+ if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46 ) {
279+ php_intl_bad_args ("invalid variant, must be one of {"
280+ "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}" , mode TSRMLS_CC );
281+ RETURN_FALSE ;
282+ }
283+ #else
284+ if (variant != INTL_IDN_VARIANT_2003 ) {
285+ php_intl_bad_args ("invalid variant, PHP was compiled against "
286+ "an old version of ICU and only supports INTL_IDN_VARIANT_2003" ,
287+ mode TSRMLS_CC );
288+ RETURN_FALSE ;
289+ }
290+ #endif
291+
292+ if (domain_len < 1 ) {
293+ php_intl_bad_args ("empty domain name" , mode TSRMLS_CC );
294+ RETURN_FALSE ;
295+ }
296+ if (domain_len > INT32_MAX - 1 ) {
297+ php_intl_bad_args ("domain name too large" , mode TSRMLS_CC );
298+ RETURN_FALSE ;
299+ }
300+ /* don't check options; it wasn't checked before */
301+
302+ if (idna_info != NULL ) {
303+ if (variant == INTL_IDN_VARIANT_2003 ) {
304+ php_error_docref0 (NULL TSRMLS_CC , E_NOTICE ,
305+ "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
306+ "takes 3 - extra argument ignored" );
307+ } else {
308+ zval_dtor (idna_info );
309+ array_init (idna_info );
310+ }
311+ }
312+
313+ if (variant == INTL_IDN_VARIANT_2003 ) {
314+ php_intl_idn_to (INTERNAL_FUNCTION_PARAM_PASSTHRU ,
315+ domain , domain_len , (uint32_t )option , mode );
316+ }
317+ #ifdef HAVE_46_API
318+ else {
319+ php_intl_idn_to_46 (INTERNAL_FUNCTION_PARAM_PASSTHRU , domain , domain_len ,
320+ (uint32_t )option , mode , idna_info );
321+ }
322+ #endif
323+ }
324+
126325/* {{{ proto int idn_to_ascii(string domain[, int options])
127326 Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
128327PHP_FUNCTION (idn_to_ascii )
129328{
130- php_intl_idn_to (INTERNAL_FUNCTION_PARAM_PASSTHRU , INTL_IDN_TO_ASCII );
329+ php_intl_idn_handoff (INTERNAL_FUNCTION_PARAM_PASSTHRU , INTL_IDN_TO_ASCII );
131330}
132331/* }}} */
133332
@@ -136,7 +335,7 @@ PHP_FUNCTION(idn_to_ascii)
136335 Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
137336PHP_FUNCTION (idn_to_utf8 )
138337{
139- php_intl_idn_to (INTERNAL_FUNCTION_PARAM_PASSTHRU , INTL_IDN_TO_UTF8 );
338+ php_intl_idn_handoff (INTERNAL_FUNCTION_PARAM_PASSTHRU , INTL_IDN_TO_UTF8 );
140339}
141340/* }}} */
142341
0 commit comments