@@ -34,46 +34,35 @@ const utf8_trailing = [
3434# # required core functionality ##
3535
3636function endof (s:: String )
37- d = s . data
38- i = length (d)
39- @inbounds while i > 0 && is_valid_continuation (d[i] )
37+ p = pointer (s)
38+ i = s . len
39+ while i > 0 && is_valid_continuation (unsafe_load (p,i) )
4040 i -= 1
4141 end
4242 i
4343end
4444
4545function length (s:: String )
46- d = s . data
46+ p = pointer (s)
4747 cnum = 0
48- for i = 1 : length (d)
49- @inbounds cnum += ! is_valid_continuation (d[i] )
48+ for i = 1 : s . len
49+ cnum += ! is_valid_continuation (unsafe_load (p,i) )
5050 end
5151 cnum
5252end
5353
54- @noinline function slow_utf8_next (d:: Vector{UInt8} , b:: UInt8 , i:: Int )
55- # potentially faster version
56- # d = s.data
57- # a::UInt32 = d[i]
58- # if a < 0x80; return Char(a); end
59- # #if a&0xc0==0x80; return '\ufffd'; end
60- # b::UInt32 = a<<6 + d[i+1]
61- # if a < 0xe0; return Char(b - 0x00003080); end
62- # c::UInt32 = b<<6 + d[i+2]
63- # if a < 0xf0; return Char(c - 0x000e2080); end
64- # return Char(c<<6 + d[i+3] - 0x03c82080)
65-
54+ @noinline function slow_utf8_next (p:: Ptr{UInt8} , b:: UInt8 , i:: Int , l:: Int )
6655 if is_valid_continuation (b)
67- throw (UnicodeError (UTF_ERR_INVALID_INDEX, i, d[i] ))
56+ throw (UnicodeError (UTF_ERR_INVALID_INDEX, i, unsafe_load (p,i) ))
6857 end
6958 trailing = utf8_trailing[b + 1 ]
70- if length (d) < i + trailing
59+ if l < i + trailing
7160 return ' \u fffd' , i+ 1
7261 end
7362 c:: UInt32 = 0
7463 for j = 1 : (trailing + 1 )
7564 c <<= 6
76- c += d[i]
65+ c += unsafe_load (p,i)
7766 i += 1
7867 end
7968 c -= utf8_offset[trailing + 1 ]
8473 # function is split into this critical fast-path
8574 # for pure ascii data, such as parsing numbers,
8675 # and a longer function that can handle any utf8 data
87- d = s. data
88- b = d[i]
76+ if i < 1 || i > s. len
77+ throw (BoundsError (s,i))
78+ end
79+ p = pointer (s)
80+ b = unsafe_load (p, i)
8981 if b < 0x80
90- return Char (b), i + 1
82+ return Char (b), i+ 1
9183 end
92- return slow_utf8_next (d , b, i)
84+ return slow_utf8_next (p , b, i, s . len )
9385end
9486
9587function first_utf8_byte (ch:: Char )
@@ -102,20 +94,20 @@ function first_utf8_byte(ch::Char)
10294end
10395
10496function reverseind (s:: String , i:: Integer )
105- j = length (s . data) + 1 - i
106- d = s . data
107- while is_valid_continuation (d[j] )
97+ j = s . len + 1 - i
98+ p = pointer (s)
99+ while is_valid_continuation (unsafe_load (p,j) )
108100 j -= 1
109101 end
110102 return j
111103end
112104
113105# # overload methods for efficiency ##
114106
115- sizeof (s:: String ) = sizeof (s . data)
107+ sizeof (s:: String ) = s . len
116108
117109isvalid (s:: String , i:: Integer ) =
118- (1 <= i <= endof (s . data)) && ! is_valid_continuation (s . data[i] )
110+ (1 <= i <= s . len) && ! is_valid_continuation (unsafe_load ( pointer (s),i) )
119111
120112const empty_utf8 = String (UInt8[])
121113
@@ -237,10 +229,10 @@ function reverse(s::String)
237229 String (buf)
238230end
239231
240- write (io:: IO , s:: String ) = write (io, s . data )
232+ write (io:: IO , s:: String ) = write (io, pointer (s), s . len )
241233
242- pointer (x :: String ) = pointer (x . data )
243- pointer (x:: String , i:: Integer ) = pointer (x. data )+ (i- 1 )
234+ pointer (s :: String ) = convert (Ptr{UInt8}, pointer_from_objref (s) + sizeof (Int) )
235+ pointer (x:: String , i:: Integer ) = pointer (x)+ (i- 1 )
244236
245237convert (:: Type{String} , s:: String ) = s
246238convert (:: Type{String} , v:: Vector{UInt8} ) = String (v)
0 commit comments