@@ -262,6 +262,73 @@ strio_initialize(int argc, VALUE *argv, VALUE self)
262262 return strio_init (argc , argv , ptr , self );
263263}
264264
265+ static int
266+ detect_bom (VALUE str , int * bomlen )
267+ {
268+ const char * p ;
269+ long len ;
270+
271+ RSTRING_GETMEM (str , p , len );
272+ if (len < 1 ) return 0 ;
273+ switch ((unsigned char )p [0 ]) {
274+ case 0xEF :
275+ if (len < 2 ) break ;
276+ if ((unsigned char )p [1 ] == 0xBB && len > 2 ) {
277+ if ((unsigned char )p [2 ] == 0xBF ) {
278+ * bomlen = 3 ;
279+ return rb_utf8_encindex ();
280+ }
281+ }
282+ break ;
283+
284+ case 0xFE :
285+ if (len < 2 ) break ;
286+ if ((unsigned char )p [1 ] == 0xFF ) {
287+ * bomlen = 2 ;
288+ return rb_enc_find_index ("UTF-16BE" );
289+ }
290+ break ;
291+
292+ case 0xFF :
293+ if (len < 2 ) break ;
294+ if ((unsigned char )p [1 ] == 0xFE ) {
295+ if (len >= 4 && (unsigned char )p [2 ] == 0 && (unsigned char )p [3 ] == 0 ) {
296+ * bomlen = 4 ;
297+ return rb_enc_find_index ("UTF-32LE" );
298+ }
299+ * bomlen = 2 ;
300+ return rb_enc_find_index ("UTF-16LE" );
301+ }
302+ break ;
303+
304+ case 0 :
305+ if (len < 4 ) break ;
306+ if ((unsigned char )p [1 ] == 0 && (unsigned char )p [2 ] == 0xFE & (unsigned char )p [3 ] == 0xFF ) {
307+ * bomlen = 4 ;
308+ return rb_enc_find_index ("UTF-32BE" );
309+ }
310+ break ;
311+ }
312+ return 0 ;
313+ }
314+
315+ static rb_encoding *
316+ set_encoding_by_bom (struct StringIO * ptr )
317+ {
318+ int bomlen , idx = detect_bom (ptr -> string , & bomlen );
319+ rb_encoding * extenc = NULL ;
320+
321+ if (idx ) {
322+ extenc = rb_enc_from_index (idx );
323+ ptr -> pos = bomlen ;
324+ if (ptr -> flags & FMODE_WRITABLE ) {
325+ rb_enc_associate_index (ptr -> string , idx );
326+ }
327+ }
328+ ptr -> enc = extenc ;
329+ return extenc ;
330+ }
331+
265332static VALUE
266333strio_init (int argc , VALUE * argv , struct StringIO * ptr , VALUE self )
267334{
@@ -294,6 +361,7 @@ strio_init(int argc, VALUE *argv, struct StringIO *ptr, VALUE self)
294361 ptr -> enc = convconfig .enc ;
295362 ptr -> pos = 0 ;
296363 ptr -> lineno = 0 ;
364+ if (ptr -> flags & FMODE_SETENC_BY_BOM ) set_encoding_by_bom (ptr );
297365 RBASIC (self )-> flags |= (ptr -> flags & FMODE_READWRITE ) * (STRIO_READABLE / FMODE_READABLE );
298366 return self ;
299367}
@@ -1677,6 +1745,18 @@ strio_set_encoding(int argc, VALUE *argv, VALUE self)
16771745 return self ;
16781746}
16791747
1748+ static VALUE
1749+ strio_set_encoding_by_bom (VALUE self )
1750+ {
1751+ struct StringIO * ptr = StringIO (self );
1752+
1753+ if (ptr -> enc ) {
1754+ rb_raise (rb_eArgError , "encoding conversion is set" );
1755+ }
1756+ if (!set_encoding_by_bom (ptr )) return Qnil ;
1757+ return rb_enc_from_encoding (ptr -> enc );
1758+ }
1759+
16801760/*
16811761 * Pseudo I/O on String object.
16821762 *
@@ -1778,6 +1858,7 @@ Init_stringio(void)
17781858 rb_define_method (StringIO , "external_encoding" , strio_external_encoding , 0 );
17791859 rb_define_method (StringIO , "internal_encoding" , strio_internal_encoding , 0 );
17801860 rb_define_method (StringIO , "set_encoding" , strio_set_encoding , -1 );
1861+ rb_define_method (StringIO , "set_encoding_by_bom" , strio_set_encoding_by_bom , 0 );
17811862
17821863 {
17831864 VALUE mReadable = rb_define_module_under (rb_cIO , "generic_readable" );
0 commit comments