@@ -17,55 +17,64 @@ internal func _allASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
1717
1818 // NOTE: Avoiding for-in syntax to avoid bounds checks
1919 //
20- // TODO(String performance): SIMD-ize
21- //
20+ // TODO(String performance): please remove this SIMD-ization when Swift compiler will be smart enough
21+ // to vectorize this simple loop on its own:
22+ // for i in 0..<count where ptr[i] & byteASCIIMask != 0 {
23+ // return false
24+ // }
25+
26+ let ptr = input. baseAddress. _unsafelyUnwrappedUnchecked
27+ var i = 0
28+
2229 let count = input. count
23- var ptr = unsafe UnsafeRawPointer( input. baseAddress. _unsafelyUnwrappedUnchecked)
24-
25- let asciiMask64 = 0x8080_8080_8080_8080 as UInt64
26- let asciiMask32 = UInt32 ( truncatingIfNeeded: asciiMask64)
27- let asciiMask16 = UInt16 ( truncatingIfNeeded: asciiMask64)
28- let asciiMask8 = UInt8 ( truncatingIfNeeded: asciiMask64)
29-
30- let end128 = unsafe ptr + count & ~ ( MemoryLayout < ( UInt64 , UInt64 ) > . stride &- 1 )
31- let end64 = unsafe ptr + count & ~ ( MemoryLayout < UInt64 > . stride &- 1 )
32- let end32 = unsafe ptr + count & ~ ( MemoryLayout < UInt32 > . stride &- 1 )
33- let end16 = unsafe ptr + count & ~ ( MemoryLayout < UInt16 > . stride &- 1 )
34- let end = unsafe ptr + count
35-
36-
37- while unsafe ptr < end128 {
38- let pair = unsafe ptr. loadUnaligned ( as: ( UInt64, UInt64) . self)
39- let result = ( pair. 0 | pair. 1 ) & asciiMask64
40- guard result == 0 else { return false }
41- unsafe ptr = unsafe ptr + MemoryLayout< ( UInt64, UInt64) > . stride
30+ let stride = MemoryLayout< UInt> . stride
31+ let simd4UintStride = MemoryLayout< SIMD4< UInt>>. stride
32+ assert ( simd4UintStride == stride * 4 ) // Memory layout of SIMD4<UInt> should match one of 4 UInt words
33+ let address = Int ( bitPattern: ptr)
34+
35+ let wordASCIIMask = UInt ( truncatingIfNeeded: 0x8080_8080_8080_8080 as UInt64 )
36+ let byteASCIIMask = UInt8 ( truncatingIfNeeded: 0x80 as UInt8 )
37+ let simd4ASCIIMask = SIMD4 < UInt > ( repeating: wordASCIIMask)
38+ let simd4Zero = SIMD4 < UInt > ( repeating: 0 )
39+
40+ // Bytes up to beginning of a word
41+ while ( address &+ i) % stride != 0 && i < count {
42+ guard ptr [ i] & byteASCIIMask == 0 else { return false }
43+ i &+= 1
4244 }
43-
44- // If we had enough bytes for two iterations of this, we would have hit
45- // the loop above, so we only need to do this once
46- if unsafe ptr < end64 {
47- let value = unsafe ptr. loadUnaligned ( as: UInt64 . self)
48- guard value & asciiMask64 == 0 else { return false }
49- unsafe ptr = unsafe ptr + MemoryLayout< UInt64> . stride
45+
46+ // Words up to beginning of a 4-word
47+ while ( address &+ i) % simd4UintStride != 0 && ( i &+ stride) <= count {
48+ let word : UInt = UnsafePointer (
49+ bitPattern: address &+ i
50+ ) . _unsafelyUnwrappedUnchecked. pointee
51+ guard word & wordASCIIMask == 0 else { return false }
52+ i &+= stride
5053 }
51-
52- if unsafe ptr < end32 {
53- let value = unsafe ptr. loadUnaligned ( as: UInt32 . self)
54- guard value & asciiMask32 == 0 else { return false }
55- unsafe ptr = unsafe ptr + MemoryLayout< UInt32> . stride
54+
55+ // Full 4-words
56+ while ( i &+ simd4UintStride) <= count {
57+ let simd4 : SIMD4 < UInt > = UnsafePointer < SIMD4 < UInt > > (
58+ bitPattern: address &+ i
59+ ) . _unsafelyUnwrappedUnchecked. pointee
60+ guard simd4 & simd4ASCIIMask == simd4Zero else { return false }
61+ i &+= simd4UintStride
5662 }
57-
58- if unsafe ptr < end16 {
59- let value = unsafe ptr. loadUnaligned ( as: UInt16 . self)
60- guard value & asciiMask16 == 0 else { return false }
61- unsafe ptr = unsafe ptr + MemoryLayout< UInt16> . stride
63+
64+ // Full words
65+ while ( i &+ stride) <= count {
66+ let word : UInt = UnsafePointer (
67+ bitPattern: address &+ i
68+ ) . _unsafelyUnwrappedUnchecked. pointee
69+ guard word & wordASCIIMask == 0 else { return false }
70+ i &+= stride
6271 }
6372
64- if unsafe ptr < end {
65- let value = unsafe ptr. loadUnaligned ( fromByteOffset: 0 , as: UInt8 . self)
66- guard value & asciiMask8 == 0 else { return false }
73+ // Rest bytes up to end
74+ while i < count {
75+ guard ptr [ i] & byteASCIIMask == 0 else { return false }
76+ i &+= 1
6777 }
68- unsafe _internalInvariant ( ptr == end || ptr + 1 == end)
6978 return true
7079}
7180
0 commit comments