Skip to content

Commit 9500d87

Browse files
committed
[stdlib] Implement StaticString.UnicodeScalarView
Add a new type `StaticString.UnicodeScalarView`, accessible via a property `.unicodeScalars` on `StaticString`. Also add initializers for `StaticString` that take a `UnicodeScalarView` or a `Slice<UnicodeScalarView>`. The motivating reason for this is to make it possible to slice a `StaticString` down to a substring that's still typed as `StaticString`. Fixes rdar://problem/23382521
1 parent a42ce37 commit 9500d87

File tree

3 files changed

+293
-1
lines changed

3 files changed

+293
-1
lines changed

stdlib/public/core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ set(SWIFTLIB_SOURCES
127127
Mirror.swift
128128
Process.swift
129129
SliceBuffer.swift
130+
StaticStringUnicodeScalarView.swift
130131
VarArgs.swift
131132
Zip.swift
132133
Prespecialized.swift
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See http://swift.org/LICENSE.txt for license information
9+
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
extension StaticString {
14+
/// The value of `self` as a collection of [Unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value).
15+
public var unicodeScalars: UnicodeScalarView {
16+
get {
17+
return UnicodeScalarView(self)
18+
}
19+
}
20+
21+
/// Construct the `StaticString` corresponding to the given
22+
/// `UnicodeScalarView`.
23+
public init(_ unicodeScalars: UnicodeScalarView) {
24+
switch unicodeScalars._data {
25+
case let .Pointer(ptr, isASCII):
26+
self.init(start: ptr.baseAddress._rawValue, byteSize: ptr.count._builtinWordValue, isASCII: isASCII._value)
27+
case .Scalar(let scalar):
28+
self.init(unicodeScalar: unsafeBitCast(scalar.value, Int32.self)._value)
29+
}
30+
}
31+
32+
/// Construct the `StaticString` corresponding to the given
33+
/// `UnicodeScalarView` slice.
34+
public init(_ unicodeScalars: Slice<UnicodeScalarView>) {
35+
UnicodeScalarView.Index._failEarlyRangeCheck2(
36+
unicodeScalars.startIndex, rangeEnd: unicodeScalars.endIndex,
37+
boundsStart: unicodeScalars._base.startIndex, boundsEnd: unicodeScalars._base.endIndex)
38+
switch unicodeScalars._base._data {
39+
case let .Pointer(ptr, isASCII):
40+
self.init(
41+
start: (ptr.baseAddress + unicodeScalars.startIndex._position)._rawValue,
42+
byteSize: (unicodeScalars.endIndex._position - unicodeScalars.startIndex._position)._builtinWordValue,
43+
isASCII: isASCII._value)
44+
case .Scalar(let scalar):
45+
if unicodeScalars.isEmpty {
46+
self.init()
47+
} else {
48+
self.init(unicodeScalar: unsafeBitCast(scalar.value, Int32.self)._value)
49+
}
50+
}
51+
}
52+
53+
/// A collection of [Unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value) that
54+
/// encode a `StaticString`.
55+
public struct UnicodeScalarView : CollectionType, _Reflectable,
56+
CustomStringConvertible, CustomDebugStringConvertible {
57+
enum Data {
58+
case Pointer(UnsafeBufferPointer<UInt8>, isASCII: Bool)
59+
case Scalar(UnicodeScalar)
60+
}
61+
62+
let _data: Data
63+
64+
init(_ _base: StaticString) {
65+
if _base.hasPointerRepresentation {
66+
let ptr = UnsafeBufferPointer(start: _base.utf8Start, count: Int(_base.byteSize))
67+
_data = .Pointer(ptr, isASCII: _base.isASCII)
68+
} else {
69+
_data = .Scalar(_base.unicodeScalar)
70+
}
71+
}
72+
73+
/// A position in a `StaticString.UnicodeScalarView`.
74+
public struct Index : BidirectionalIndexType, Comparable {
75+
/// An index into the UTF-8 data of _base. If _base does not have a
76+
/// pointer representation, then a position of 0 is startIndex and 1 is
77+
/// endIndex.
78+
let _position: Int
79+
let _data: Data
80+
81+
init(_ _position: Int, _ _data: Data) {
82+
self._position = _position
83+
self._data = _data
84+
}
85+
86+
/// Returns the next consecutive value after `self`.
87+
///
88+
/// - Requires: The next value is representable.
89+
@warn_unused_result
90+
public func successor() -> Index {
91+
switch _data {
92+
case .Pointer(let ptr, _):
93+
let count = Int(UTF8._numTrailingBytes(ptr[_position]))
94+
return Index(_position + count + 1, _data)
95+
case .Scalar:
96+
_precondition(_position == 0, "index points past StaticString end")
97+
return Index(1, _data)
98+
}
99+
}
100+
101+
/// Returns the previous consecutive value before `self`.
102+
///
103+
/// - Requires: The previous value is representable.
104+
@warn_unused_result
105+
public func predecessor() -> Index {
106+
_precondition(_position > 0, "index precedes StaticString start")
107+
var position = _position - 1
108+
if case .Pointer(let ptr, _) = _data {
109+
while UTF8.isContinuation(ptr[position]) {
110+
position -= 1
111+
}
112+
}
113+
return Index(position, _data)
114+
}
115+
}
116+
117+
/// The position of the first `UnicodeScalar` if the `StaticString` is
118+
/// non-empty; identical to `endIndex` otherwise.
119+
public var startIndex: Index {
120+
return Index(0, _data)
121+
}
122+
123+
/// The "past the end" position.
124+
///
125+
/// `endIndex` is not a valid argument to `subscript`, and is always
126+
/// reachable from `startIndex` by zero or more applications of
127+
/// `successor()`.
128+
public var endIndex: Index {
129+
switch _data {
130+
case .Pointer(let ptr, _):
131+
return Index(ptr.endIndex, _data)
132+
case .Scalar:
133+
return Index(1, _data)
134+
}
135+
}
136+
137+
/// Returns `true` iff `self` is empty.
138+
public var isEmpty: Bool {
139+
switch _data {
140+
case .Pointer(let ptr, _):
141+
return ptr.isEmpty
142+
case .Scalar:
143+
return false
144+
}
145+
}
146+
147+
public subscript(position: Index) -> UnicodeScalar {
148+
switch _data {
149+
case let .Pointer(ptr, isASCII):
150+
_precondition(position._position < ptr.endIndex, "subscript: index cannot be endIndex")
151+
let start = ptr.baseAddress + position._position
152+
if isASCII {
153+
return UnicodeScalar(UInt32(start.memory))
154+
}
155+
let slice = UnsafeBufferPointer<UInt8>(start: start, count: ptr.endIndex - position._position)
156+
var gen = slice.generate()
157+
var decoder = UTF8()
158+
switch decoder.decode(&gen) {
159+
case .Result(let scalar): return scalar
160+
default:
161+
_sanityCheckFailure("StaticString UTF-8 decoding failed")
162+
}
163+
case .Scalar(let scalar):
164+
_precondition(position._position == 0, "subscript: index cannot be endIndex")
165+
return scalar
166+
}
167+
}
168+
169+
/// A textual representation of `self`.
170+
public var description: String {
171+
return StaticString(self).stringValue
172+
}
173+
174+
/// A textual representation of `self`, suitable for debugging.
175+
public var debugDescription: String {
176+
return "StaticString.UnicodeScalarView(\(StaticString(self).debugDescription))"
177+
}
178+
179+
public func _getMirror() -> _MirrorType {
180+
return _reflect(StaticString(self).stringValue)
181+
}
182+
}
183+
}
184+
185+
@warn_unused_result
186+
public func ==(
187+
lhs: StaticString.UnicodeScalarView.Index,
188+
rhs: StaticString.UnicodeScalarView.Index
189+
) -> Bool {
190+
return lhs._position == rhs._position
191+
}
192+
193+
@warn_unused_result
194+
public func <(
195+
lhs: StaticString.UnicodeScalarView.Index,
196+
rhs: StaticString.UnicodeScalarView.Index
197+
) -> Bool {
198+
return lhs._position < rhs._position
199+
}

test/1_stdlib/StaticString.swift

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,5 +144,97 @@ StaticStringTestSuite.test("UnicodeScalarRepresentation/byteSize")
144144
strOpaque.byteSize
145145
}
146146

147-
runAllTests()
147+
StaticStringTestSuite.test("UnicodeScalarView/round-trip/UTF8") {
148+
// round-tripping through UnicodeScalarView should return the same value
149+
let str: StaticString = "абв"
150+
let str2 = StaticString(str.unicodeScalars)
151+
expectEqual(str.utf8Start, str2.utf8Start)
152+
expectEqual(str.byteSize, str2.byteSize)
153+
expectEqual(str.isASCII, str2.isASCII)
154+
}
155+
156+
StaticStringTestSuite.test("UnicodeScalarView/round-trip/ASCII") {
157+
// round-tripping through UnicodeScalarView should return the same value
158+
let str: StaticString = "abc"
159+
let str2 = StaticString(str.unicodeScalars)
160+
expectEqual(str.utf8Start, str2.utf8Start)
161+
expectEqual(str.byteSize, str2.byteSize)
162+
expectEqual(str.isASCII, str2.isASCII)
163+
}
164+
165+
StaticStringTestSuite.test("UnicodeScalarView/round-trip/Scalar") {
166+
// round-tripping through UnicodeScalarView should return the same value
167+
let str: StaticString = StaticString(_builtinUnicodeScalarLiteral: UInt32(0x5a)._value)
168+
let str2 = StaticString(str.unicodeScalars)
169+
expectEqual(str.hasPointerRepresentation, str2.hasPointerRepresentation)
170+
expectEqual(str.unicodeScalar, str2.unicodeScalar)
171+
}
172+
173+
StaticStringTestSuite.test("UnicodeScalarView/generate/NonEmpty") {
174+
let str: StaticString = "абв"
175+
expectEqual(["а", "б", "в"], Array(str.unicodeScalars))
176+
}
177+
178+
StaticStringTestSuite.test("UnicodeScalarView/generate/Empty") {
179+
let str: StaticString = ""
180+
expectEqual([], Array(str.unicodeScalars))
181+
}
182+
183+
StaticStringTestSuite.test("UnicodeScalarView/generate/Scalar") {
184+
let str: StaticString = StaticString(_builtinUnicodeScalarLiteral: UInt32(0x5a)._value)
185+
expectEqual(["Z"], Array(str.unicodeScalars))
186+
}
187+
188+
StaticStringTestSuite.test("UnicodeScalarView/subscript/Pointer") {
189+
let str: StaticString = "абв"
190+
let scalars = str.unicodeScalars
191+
expectEqual("а", scalars[scalars.startIndex])
192+
expectEqual("б", scalars[scalars.startIndex.advancedBy(1)])
193+
expectEqual("в", scalars[scalars.startIndex.advancedBy(2)])
194+
expectEqual(scalars.endIndex, scalars.startIndex.advancedBy(3))
195+
}
196+
197+
StaticStringTestSuite.test("UnicodeScalarView/subscript/Scalar") {
198+
let str: StaticString = StaticString(_builtinUnicodeScalarLiteral: UInt32(0x5a)._value)
199+
let scalars = str.unicodeScalars
200+
expectEqual("Z", scalars[scalars.startIndex])
201+
expectEqual(scalars.endIndex, scalars.startIndex.advancedBy(1))
202+
}
203+
204+
StaticStringTestSuite.test("UnicodeScalarView/subscript/Empty") {
205+
let str = StaticString()
206+
let scalars = str.unicodeScalars
207+
expectEqual(scalars.startIndex, scalars.endIndex)
208+
}
209+
210+
StaticStringTestSuite.test("UnicodeScalarView/Slice/round-trip/NonEmpty/Pointer") {
211+
let str: StaticString = "абв"
212+
let scalars = str.unicodeScalars
213+
let slice = scalars[scalars.startIndex.successor()...scalars.startIndex.successor()]
214+
expectEqual("б", StaticString(slice).stringValue)
215+
}
148216

217+
StaticStringTestSuite.test("UnicodeScalarView/Slice/round-trip/NonEmpty/Scalar") {
218+
let str: StaticString = StaticString(_builtinUnicodeScalarLiteral: UInt32(0x5a)._value)
219+
let scalars = str.unicodeScalars
220+
let slice = scalars[scalars.indices]
221+
expectEqual("Z", StaticString(slice).stringValue)
222+
}
223+
224+
StaticStringTestSuite.test("UnicodeScalarView/Slice/round-trip/Empty/Pointer") {
225+
let str: StaticString = "abc"
226+
let scalars = str.unicodeScalars
227+
let slice = scalars[scalars.startIndex..<scalars.startIndex]
228+
expectEqual("", StaticString(slice).stringValue)
229+
expectNotEqual(nil, StaticString(slice).utf8Start)
230+
}
231+
232+
StaticStringTestSuite.test("UnicodeScalarView/Slice/round-trip/Empty/Scalar") {
233+
let str: StaticString = StaticString(_builtinUnicodeScalarLiteral: UInt32(0x5a)._value)
234+
let scalars = str.unicodeScalars
235+
let slice = scalars[scalars.startIndex..<scalars.startIndex]
236+
expectEqual("", StaticString(slice).stringValue)
237+
expectNotEqual(nil, StaticString(slice).utf8Start)
238+
}
239+
240+
runAllTests()

0 commit comments

Comments
 (0)