From 10706b7863eec5b8ddaa822b3eea24bc1c9d0943 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 6 Apr 2016 13:03:46 -0500 Subject: [PATCH 1/2] [stdlib] Revise documentation for string-related types This documentation revision covers a large number of types & protocols: String, its views and their indices, the Unicode codec types and protocol, as well as Character, UnicodeScalar, and StaticString, among others. This also includes a few small changes across the standard library for consistency. Cherry-picked and merged from 44b2d56 --- stdlib/public/core/Arrays.swift.gyb | 8 +- stdlib/public/core/Bool.swift | 2 +- stdlib/public/core/CString.swift | 107 +++- stdlib/public/core/Character.swift | 107 +++- stdlib/public/core/ClosedRange.swift | 4 +- stdlib/public/core/Collection.swift | 11 +- stdlib/public/core/CollectionOfOne.swift | 7 +- stdlib/public/core/CompilerProtocols.swift | 156 +++++- .../core/ExistentialCollection.swift.gyb | 8 +- stdlib/public/core/Filter.swift.gyb | 10 +- .../public/core/HashedCollections.swift.gyb | 18 +- .../core/ImplicitlyUnwrappedOptional.swift | 4 +- stdlib/public/core/LazyCollection.swift.gyb | 8 +- stdlib/public/core/LifetimeManager.swift | 13 +- stdlib/public/core/Mirror.swift | 97 +++- stdlib/public/core/MutableCollection.swift | 4 +- stdlib/public/core/Optional.swift | 2 +- stdlib/public/core/OutputStream.swift | 215 ++++++-- stdlib/public/core/Policy.swift | 5 +- stdlib/public/core/SetAlgebra.swift | 2 +- stdlib/public/core/SliceBuffer.swift | 8 +- stdlib/public/core/StaticString.swift | 82 ++- stdlib/public/core/String.swift | 442 +++++++++++---- stdlib/public/core/StringBridge.swift | 6 +- stdlib/public/core/StringCharacterView.swift | 190 +++++-- .../public/core/StringIndexConversions.swift | 165 +++++- .../public/core/StringInterpolation.swift.gyb | 35 +- stdlib/public/core/StringLegacy.swift | 158 +++++- ...StringRangeReplaceableCollection.swift.gyb | 186 +++++-- stdlib/public/core/StringUTF16.swift | 310 +++++++++-- stdlib/public/core/StringUTF8.swift | 319 +++++++++-- .../public/core/StringUnicodeScalarView.swift | 329 ++++++++--- stdlib/public/core/Unicode.swift | 520 +++++++++++++++--- stdlib/public/core/UnicodeScalar.swift | 157 +++++- .../public/core/UnsafeBufferPointer.swift.gyb | 8 +- 35 files changed, 3015 insertions(+), 688 deletions(-) diff --git a/stdlib/public/core/Arrays.swift.gyb b/stdlib/public/core/Arrays.swift.gyb index 2f7c913da78e0..5795b31a89301 100644 --- a/stdlib/public/core/Arrays.swift.gyb +++ b/stdlib/public/core/Arrays.swift.gyb @@ -482,8 +482,8 @@ public struct ${Self} %end } - /// The array's "past the end" position, or one greater than the last valid - /// subscript argument. + /// The array's "past the end" position---that is, the position one greater + /// than the last valid subscript argument. /// /// When you need a range that includes the last element of an array, use the /// half-open range operator (`..<`) with `endIndex`. The `..<` operator @@ -946,7 +946,7 @@ extension ${Self} : ArrayLiteralConvertible { // Optimized implementation for Array /// Creates an array from the given array literal. /// - /// Don't directly call this initializer, which is used by the compiler + /// Do not call this initializer directly. It is used by the compiler /// when you use an array literal. Instead, create a new array by using an /// array literal as its value. To do this, enclose a comma-separated list of /// values in square brackets. @@ -963,7 +963,7 @@ extension ${Self} : ArrayLiteralConvertible { %else: /// Creates an array from the given array literal. /// - /// Don't directly call this initializer, which is used by the compiler when + /// Do not call this initializer directly. It is used by the compiler when /// you use an array literal. Instead, create a new array by using an array /// literal as its value. To do this, enclose a comma-separated list of /// values in square brackets. diff --git a/stdlib/public/core/Bool.swift b/stdlib/public/core/Bool.swift index d94903ef669ab..719c98a9ec396 100644 --- a/stdlib/public/core/Bool.swift +++ b/stdlib/public/core/Bool.swift @@ -79,7 +79,7 @@ extension Bool : _BuiltinBooleanLiteralConvertible, BooleanLiteralConvertible { /// Creates an instance initialized to the specified Boolean literal. /// - /// Don't directly call this initializer, which is used by the compiler when + /// Do not call this initializer directly. It is used by the compiler when /// you use a Boolean literal. Instead, create a new `Bool` instance by /// using one of the Boolean literals `true` and `false`. /// diff --git a/stdlib/public/core/CString.swift b/stdlib/public/core/CString.swift index 15d6095714ed6..0240e02a13016 100644 --- a/stdlib/public/core/CString.swift +++ b/stdlib/public/core/CString.swift @@ -16,25 +16,64 @@ import SwiftShims extension String { - /// Create a new `String` by copying the nul-terminated UTF-8 data - /// referenced by a `cString`. + /// Creates a new string by copying the null-terminated UTF-8 data referenced + /// by the given pointer. /// - /// If `cString` contains ill-formed UTF-8 code unit sequences, replaces them - /// with replacement characters (U+FFFD). + /// If `cString` contains ill-formed UTF-8 code unit sequences, this + /// initializer replaces them with the Unicode replacement character + /// (`"\u{FFFD}"`). /// - /// - Precondition: `cString != nil` + /// The following example calls this initializer with pointers to the + /// contents of two different `CChar` arrays---the first with well-formed + /// UTF-8 code unit sequences and the second with an ill-formed sequence at + /// the end. + /// + /// let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0] + /// validUTF8.withUnsafeBufferPointer { ptr in + /// let s = String(cString: ptr.baseAddress!) + /// print(s) + /// } + /// // Prints "Café" + /// + /// let invalidUTF8: [CChar] = [67, 97, 102, -61, 0] + /// invalidUTF8.withUnsafeBufferPointer { ptr in + /// let s = String(cString: ptr.baseAddress!) + /// print(s) + /// } + /// // Prints "Caf�" + /// + /// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence. public init(cString: UnsafePointer) { self = String.decodeCString(UnsafePointer(cString), as: UTF8.self, repairingInvalidCodeUnits: true)!.result } - /// Create a new `String` by copying the nul-terminated UTF-8 data - /// referenced by a `cString`. + /// Creates a new string by copying and validating the null-terminated UTF-8 + /// data referenced by the given pointer. + /// + /// This initializer does not try to repair ill-formed UTF-8 code unit + /// sequences. If any are found, the result of the initializer is `nil`. + /// + /// The following example calls this initializer with pointers to the + /// contents of two different `CChar` arrays---the first with well-formed + /// UTF-8 code unit sequences and the second with an ill-formed sequence at + /// the end. + /// + /// let validUTF8: [CChar] = [67, 97, 102, -61, -87, 0] + /// validUTF8.withUnsafeBufferPointer { ptr in + /// let s = String(validatingUTF8: ptr.baseAddress!) + /// print(s) + /// } + /// // Prints "Optional(Café)" /// - /// Does not try to repair ill-formed UTF-8 code unit sequences, fails if any - /// such sequences are found. + /// let invalidUTF8: [CChar] = [67, 97, 102, -61, 0] + /// invalidUTF8.withUnsafeBufferPointer { ptr in + /// let s = String(validatingUTF8: ptr.baseAddress!) + /// print(s) + /// } + /// // Prints "nil" /// - /// - Precondition: `cString != nil` + /// - Parameter cString: A pointer to a null-terminated UTF-8 code sequence. public init?(validatingUTF8 cString: UnsafePointer) { guard let (result, _) = String.decodeCString( UnsafePointer(cString), @@ -45,12 +84,50 @@ extension String { self = result } - /// Create a new `String` by copying the nul-terminated data - /// referenced by a `cString` using `encoding`. + /// Creates a new string by copying the null-terminated data referenced by + /// the given pointer using the specified encoding. + /// + /// When you pass `true` as `isRepairing`, this method replaces ill-formed + /// sequences with the Unicode replacement character (`"\u{FFFD}"`); + /// otherwise, an ill-formed sequence causes this method to stop decoding + /// and return `nil`. + /// + /// The following example calls this method with pointers to the contents of + /// two different `CChar` arrays---the first with well-formed UTF-8 code + /// unit sequences and the second with an ill-formed sequence at the end. + /// + /// let validUTF8: [UInt8] = [67, 97, 102, 195, 169, 0] + /// validUTF8.withUnsafeBufferPointer { ptr in + /// let s = String.decodeCString(ptr.baseAddress, + /// as: UTF8.self, + /// repairingInvalidCodeUnits: true) + /// print(s) + /// } + /// // Prints "Optional((Café, false))" + /// + /// let invalidUTF8: [UInt8] = [67, 97, 102, 195, 0] + /// invalidUTF8.withUnsafeBufferPointer { ptr in + /// let s = String.decodeCString(ptr.baseAddress, + /// as: UTF8.self, + /// repairingInvalidCodeUnits: true) + /// print(s) + /// } + /// // Prints "Optional((Caf�, true))" + /// + /// - Parameters: + /// - cString: A pointer to a null-terminated code sequence encoded in + /// `encoding`. + /// - encoding: The Unicode encoding of the data referenced by `cString`. + /// - isRepairing: Pass `true` to create a new string, even when the data + /// referenced by `cString` contains ill-formed sequences. Ill-formed + /// sequences are replaced with the Unicode replacement character + /// (`"\u{FFFD}"`). Pass `false` to interrupt the creation of the new + /// string if an ill-formed sequence is detected. + /// - Returns: A tuple with the new string and a Boolean value that indicates + /// whether any repairs were made. If `isRepairing` is `false` and an + /// ill-formed sequence is detected, this method returns `nil`. /// - /// Returns `nil` if the `cString` is `nil` or if it contains ill-formed code - /// units and no repairing has been requested. Otherwise replaces - /// ill-formed code units with replacement characters (U+FFFD). + /// - SeeAlso: `UnicodeCodec` @warn_unused_result public static func decodeCString( _ cString: UnsafePointer?, diff --git a/stdlib/public/core/Character.swift b/stdlib/public/core/Character.swift index 9126ddf83c1d7..d7d035ee5b03f 100644 --- a/stdlib/public/core/Character.swift +++ b/stdlib/public/core/Character.swift @@ -10,9 +10,56 @@ // //===----------------------------------------------------------------------===// -/// `Character` represents some Unicode grapheme cluster as -/// defined by a canonical, localized, or otherwise tailored -/// segmentation algorithm. +/// A single extended grapheme cluster, which approximates a user-perceived +/// character. +/// +/// The `Character` type represents a character made up of one or more Unicode +/// scalar values, grouped by a Unicode boundary algorithm. Generally, a +/// `Character` instance matches what the reader of a string will perceive as +/// a single character. The number of visible characters is generally the most +/// natural way to count the length of a string. +/// +/// let greeting = "Hello! 🐥" +/// print("Character count: \(greeting.characters.count)") +/// // Prints "Character count: 8" +/// +/// Because each character in a string can be made up of one or more Unicode +/// code points, the number of characters in a string may not match the length +/// of the Unicode code point representation or the length of the string in a +/// particular binary representation. +/// +/// print("Unicode code point count: \(greeting.unicodeScalars.count)") +/// // Prints "Unicode code point count: 15" +/// +/// print("UTF-8 representation count: \(greeting.utf8.count)") +/// // Prints "UTF-8 representation count: 18" +/// +/// Every `Character` instance is composed of one or more Unicode code points +/// that are grouped together as an *extended grapheme cluster*. The way these +/// code points are grouped is defined by a canonical, localized, or otherwise +/// tailored Unicode segmentation algorithm. +/// +/// For example, a country's Unicode flag character is made up of two regional +/// indicator code points that correspond to that country's ISO 3166-1 alpha-2 +/// code. The alpha-2 code for The United States is "US", so its flag +/// character is made up of the Unicode code points `"\u{1F1FA}"` (REGIONAL +/// INDICATOR SYMBOL LETTER U) and `"\u{1F1F8}"` (REGIONAL INDICATOR SYMBOL +/// LETTER S). When placed next to each other in a Swift string literal, these +/// two code points are combined into a single grapheme cluster, represented +/// by a `Character` instance in Swift. +/// +/// let usFlag: Character = "\u{1F1FA}\u{1F1F8}" +/// print(usFlag) +/// // Prints "🇺🇸" +/// +/// For more information about the Unicode terms used in this discussion, see +/// the [Unicode.org glossary][glossary]. In particular, this discussion +/// mentions [extended grapheme clusters][clusters] and [Unicode scalar +/// values][scalars]. +/// +/// [glossary]: http://www.unicode.org/glossary/ +/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster +/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value public struct Character : _BuiltinExtendedGraphemeClusterLiteralConvertible, ExtendedGraphemeClusterLiteralConvertible, Equatable, Hashable, Comparable { @@ -33,7 +80,9 @@ public struct Character : case small(Builtin.Int63) } - /// Construct a `Character` containing just the given `scalar`. + /// Creates a character containing the given Unicode scalar value. + /// + /// - Parameter scalar: The Unicode scalar value to convert into a character. public init(_ scalar: UnicodeScalar) { var asInt: UInt64 = 0 var shift: UInt64 = 0 @@ -55,7 +104,17 @@ public struct Character : UTF32.self, input: CollectionOfOne(UInt32(value)))) } - /// Create an instance initialized to `value`. + /// Creates a character with the specified value. + /// + /// Don't call this initializer directly. It is used by the compiler when you + /// use a string literal to initialize a `Character` instance. For example: + /// + /// let snowflake: Character = "❄︎" + /// print(snowflake) + /// // Prints "❄︎" + /// + /// The assignment to the `snowflake` constant calls this initializer behind + /// the scenes. public init(unicodeScalarLiteral value: Character) { self = value } @@ -73,14 +132,31 @@ public struct Character : isASCII: isASCII)) } - /// Create an instance initialized to `value`. + /// Creates a character with the specified value. + /// + /// Don't call this initializer directly. It is used by the compiler when you + /// use a string literal to initialize a `Character` instance. For example: + /// + /// let oBreve: Character = "o\u{306}" + /// print(oBreve) + /// // Prints "ŏ" + /// + /// The assignment to the `oBreve` constant calls this initializer behind the + /// scenes. public init(extendedGraphemeClusterLiteral value: Character) { self = value } - /// Create an instance from a single-character `String`. + /// Creates a character from a single-character string. + /// + /// The following example creates a new character from the uppercase version + /// of a string that only holds one character. + /// + /// let a = "a" + /// let capitalA = Character(a.uppercased()) /// - /// - Precondition: `s` contains exactly one extended grapheme cluster. + /// - Parameter s: The single-character string to convert to a `Character` + /// instance. `s` must contain exactly one extended grapheme cluster. public init(_ s: String) { // The small representation can accept up to 8 code units as long // as the last one is a continuation. Since the high bit of the @@ -260,13 +336,10 @@ public struct Character : var data: UInt64 } - /// The hash value. + /// The character's hash value. /// - /// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`. - /// - /// - Note: The hash value is not guaranteed to be stable across - /// different invocations of the same program. Do not persist the - /// hash value across program runs. + /// Hash values are not guaranteed to be equal across different executions of + /// your program. Do not save hash values to use during a future execution. public var hashValue: Int { // FIXME(performance): constructing a temporary string is extremely // wasteful and inefficient. @@ -283,14 +356,16 @@ public struct Character : } extension Character : CustomDebugStringConvertible { - /// A textual representation of `self`, suitable for debugging. + /// A textual representation of the character, suitable for debugging. public var debugDescription: String { return String(self).debugDescription } } extension String { - /// Construct an instance containing just the given `Character`. + /// Creates a string containing the given character. + /// + /// - Parameter c: The character to convert to a string. public init(_ c: Character) { switch c._representation { case let .small(_63bits): diff --git a/stdlib/public/core/ClosedRange.swift b/stdlib/public/core/ClosedRange.swift index 6edb67a32c92d..062bab9b0d8dc 100644 --- a/stdlib/public/core/ClosedRange.swift +++ b/stdlib/public/core/ClosedRange.swift @@ -212,8 +212,8 @@ public struct CountableClosedRange< return ClosedRangeIndex(lowerBound) } - /// The range's "past the end" position, or one greater than the last valid - /// subscript argument. + /// The range's "past the end" position---that is, the position one greater + /// than the last valid subscript argument. public var endIndex: ClosedRangeIndex { return ClosedRangeIndex() } diff --git a/stdlib/public/core/Collection.swift b/stdlib/public/core/Collection.swift index 24266df003711..cc439927d4846 100644 --- a/stdlib/public/core/Collection.swift +++ b/stdlib/public/core/Collection.swift @@ -38,8 +38,8 @@ public protocol IndexableBase { /// If the collection is empty, `startIndex` is equal to `endIndex`. var startIndex: Index { get } - /// The collection's "past the end" position, or one greater than the last - /// valid subscript argument. + /// The collection's "past the end" position---that is, the position one + /// greater than the last valid subscript argument. /// /// When you need a range that includes the last element of a collection, use /// the half-open range operator (`..<`) with `endIndex`. The `..<` operator @@ -158,8 +158,11 @@ public protocol IndexableBase { /// In most cases, it's best to ignore this protocol and use the `Collection` /// protocol instead, because it has a more complete interface. public protocol Indexable : IndexableBase { - /// A type that can represent the number of steps between a pair of - /// indices. + /// A type used to represent the number of steps between two indices, where + /// one value is reachable from the other. + /// + /// In Swift, *reachability* refers to the ability to produce one value from + /// the other through zero or more applications of `index(after:)`. associatedtype IndexDistance : SignedInteger = Int /// Returns an index that is the specified distance from the given index. diff --git a/stdlib/public/core/CollectionOfOne.swift b/stdlib/public/core/CollectionOfOne.swift index bdd53ec40059f..92ce3e19b1eac 100644 --- a/stdlib/public/core/CollectionOfOne.swift +++ b/stdlib/public/core/CollectionOfOne.swift @@ -50,10 +50,11 @@ public struct CollectionOfOne return 0 } - /// The "past the end" position; always identical to - /// `index(after: startIndex)`. + /// The "past the end" position---that is, the position one greater than the + /// last valid subscript argument. /// - /// - Note: `endIndex` is not a valid argument to `subscript`. + /// In a `CollectionOfOne` instance, `endIndex` is always identical to + /// `index(after: startIndex)`. public var endIndex: Int { return 1 } diff --git a/stdlib/public/core/CompilerProtocols.swift b/stdlib/public/core/CompilerProtocols.swift index 9157989bb0208..eea8f0329503d 100644 --- a/stdlib/public/core/CompilerProtocols.swift +++ b/stdlib/public/core/CompilerProtocols.swift @@ -300,11 +300,31 @@ public protocol _BuiltinUnicodeScalarLiteralConvertible { init(_builtinUnicodeScalarLiteral value: Builtin.Int32) } -/// Conforming types can be initialized with string literals -/// containing a single [Unicode scalar value](http://www.unicode.org/glossary/#unicode_scalar_value). +/// A type that can be initialized with a string literal containing a single +/// Unicode scalar value. +/// +/// The `String`, `StaticString`, `Character`, and `UnicodeScalar` types all +/// conform to the `UnicodeScalarLiteralConvertible` protocol. You can +/// initialize a variable of any of these types using a string literal that +/// holds a single Unicode scalar. +/// +/// let ñ: UnicodeScalar = "ñ" +/// print(ñ) +/// // Prints "ñ" +/// +/// Conforming to UnicodeScalarLiteralConvertible +/// ============================================= +/// +/// To add `UnicodeScalarLiteralConvertible` conformance to your custom type, +/// implement the required initializer. public protocol UnicodeScalarLiteralConvertible { + /// A type that can represent a Unicode scalar literal. + /// + /// Valid types for `UnicodeScalarLiteralType` are `UnicodeScalar`, + /// `String`, and `StaticString`. associatedtype UnicodeScalarLiteralType : _BuiltinUnicodeScalarLiteralConvertible - /// Create an instance initialized to `value`. + + /// Creates an instance initialized to the given value. init(unicodeScalarLiteral value: UnicodeScalarLiteralType) } @@ -317,14 +337,40 @@ public protocol _BuiltinExtendedGraphemeClusterLiteralConvertible isASCII: Builtin.Int1) } -/// Conforming types can be initialized with string literals -/// containing a single [Unicode extended grapheme cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster). +/// A type that can be initialized with a string literal containing a single +/// extended grapheme cluster. +/// +/// An *extended grapheme cluster* is a group of one or more Unicode code +/// points that approximates a single user-perceived character. Many +/// individual characters, such as "é", "김", and "🇮🇳", can be made up of +/// multiple Unicode code points. These code points are combined by Unicode's +/// boundary algorithms into extended grapheme clusters. +/// +/// The `String`, `StaticString`, and `Character` types conform to the +/// `ExtendedGraphemeClusterLiteralConvertible` protocol. You can initialize a +/// variable or constant of any of these types using a string literal that +/// holds a single character. +/// +/// let snowflake: Character = "❄︎" +/// print(snowflake) +/// // Prints "❄︎" +/// +/// Conforming to ExtendedGraphemeClusterLiteralConvertible +/// ======================================================= +/// +/// To add `ExtendedGraphemeClusterLiteralConvertible` conformance to your +/// custom type, implement the required initializer. public protocol ExtendedGraphemeClusterLiteralConvertible : UnicodeScalarLiteralConvertible { + /// A type that can represent an extended grapheme cluster literal. + /// + /// Valid types for `ExtendedGraphemeClusterLiteralType` are `Character`, + /// `String`, and `StaticString`. associatedtype ExtendedGraphemeClusterLiteralType : _BuiltinExtendedGraphemeClusterLiteralConvertible - /// Create an instance initialized to `value`. + + /// Creates an instance initialized to the given value. init(extendedGraphemeClusterLiteral value: ExtendedGraphemeClusterLiteralType) } @@ -345,14 +391,30 @@ public protocol _BuiltinUTF16StringLiteralConvertible utf16CodeUnitCount: Builtin.Word) } -/// Conforming types can be initialized with arbitrary string literals. +/// A type that can be initialized with a string literal. +/// +/// The `String` and `StaticString` types conform to the +/// `StringLiteralConvertible` protocol. You can initialize a variable or +/// constant of either of these types using a string literal of any length. +/// +/// let picnicGuest = "Deserving porcupine" +/// +/// Conforming to StringLiteralConvertible +/// ====================================== +/// +/// To add `StringLiteralConvertible` conformance to your custom type, +/// implement the required initializer. public protocol StringLiteralConvertible : ExtendedGraphemeClusterLiteralConvertible { // FIXME: when we have default function implementations in protocols, provide // an implementation of init(extendedGraphemeClusterLiteral:). - + + /// A type that can represent a string literal. + /// + /// Valid types for `StringLiteralType` are `String` and `StaticString`. associatedtype StringLiteralType : _BuiltinStringLiteralConvertible - /// Create an instance initialized to `value`. + + /// Creates an instance initialized to the given string value. init(stringLiteral value: StringLiteralType) } @@ -540,12 +602,80 @@ public protocol DictionaryLiteralConvertible { init(dictionaryLiteral elements: (Key, Value)...) } -/// Conforming types can be initialized with string interpolations -/// containing `\(`...`)` clauses. +/// A type that can be initialized by string interpolation with a string +/// literal that includes expressions. +/// +/// Use string interpolation to include one or more expressions in a string +/// literal, wrapped in a set of parentheses and prefixed by a backslash. For +/// example: +/// +/// let price = 2 +/// let number = 3 +/// let message = "One cookie: $\(price), \(number) cookies: $\(price * number)." +/// print(message) +/// // Prints "One cookie: $2, 3 cookies: $6." +/// +/// Conforming to the StringInterpolationConvertible Protocol +/// ========================================================= +/// +/// To use string interpolation to initialize instances of your custom type, +/// implement the required initializers for `StringInterpolationConvertible` +/// conformance. String interpolation is a multiple-step initialization +/// process. When you use string interpolation, the following steps occur: +/// +/// 1. The string literal is broken into pieces. Each segment of the string +/// literal before, between, and after any included expressions, along with +/// the individual expressions themselves, are passed to the +/// `init(stringInterpolationSegment:)` initializer. +/// 2. The results of those calls are passed to the +/// `init(stringInterpolation:)` initializer in the order in which they +/// appear in the string literal. +/// +/// In other words, initializing the `message` constant in the example above +/// using string interpolation is equivalent to the following code: +/// +/// let message = String(stringInterpolation: +/// String(stringInterpolationSegment: "One cookie: $"), +/// String(stringInterpolationSegment: price), +/// String(stringInterpolationSegment: ", "), +/// String(stringInterpolationSegment: number), +/// String(stringInterpolationSegment: " cookies: $"), +/// String(stringInterpolationSegment: price * number), +/// String(stringInterpolationSegment: ".")) public protocol StringInterpolationConvertible { - /// Create an instance by concatenating the elements of `strings`. + /// Creates an instance by concatenating the given values. + /// + /// Do not call this initializer directly. It is used by the compiler when + /// you use string interpolation. For example: + /// + /// let s = "\(5) x \(2) = \(5 * 2)" + /// print(s) + /// // Prints "5 x 2 = 10" + /// + /// After calling `init(stringInterpolationSegment:)` with each segment of + /// the string literal, this initializer is called with their string + /// representations. + /// + /// - Parameter strings: An array of instances of the conforming type. init(stringInterpolation strings: Self...) - /// Create an instance containing `expr`'s `print` representation. + + /// Creates an instance containing the appropriate representation for the + /// given value. + /// + /// Do not call this initializer directly. It is used by the compiler for + /// each string interpolation segment when you use string interpolation. For + /// example: + /// + /// let s = "\(5) x \(2) = \(5 * 2)" + /// print(s) + /// // Prints "5 x 2 = 10" + /// + /// This initializer is called five times when processing the string literal + /// in the example above; once each for the following: the integer `5`, the + /// string `" x "`, the integer `2`, the string `" = "`, and the result of + /// the expression `5 * 2`. + /// + /// - Parameter expr: The expression to represent. init(stringInterpolationSegment expr: T) } diff --git a/stdlib/public/core/ExistentialCollection.swift.gyb b/stdlib/public/core/ExistentialCollection.swift.gyb index a15d0ccb981f7..b8c741a455096 100644 --- a/stdlib/public/core/ExistentialCollection.swift.gyb +++ b/stdlib/public/core/ExistentialCollection.swift.gyb @@ -968,11 +968,11 @@ public struct ${Self} return AnyIndex(_box: _box._startIndex) } - /// The collection's "past the end" position. + /// The collection's "past the end" position---that is, the position one + /// greater than the last valid subscript argument. /// - /// `endIndex` is not a valid argument to `subscript`, and is always - /// reachable from `startIndex` by zero or more applications of - /// `index(after:)`. + /// `endIndex` is always reachable from `startIndex` by zero or more + /// applications of `index(after:)`. public var endIndex: AnyIndex { return AnyIndex(_box: _box._endIndex) } diff --git a/stdlib/public/core/Filter.swift.gyb b/stdlib/public/core/Filter.swift.gyb index 38a0265262848..624a34f1c94cc 100644 --- a/stdlib/public/core/Filter.swift.gyb +++ b/stdlib/public/core/Filter.swift.gyb @@ -215,13 +215,11 @@ public struct ${Self}< return LazyFilterIndex(base: index) } - /// The collection's "past the end" position. + /// The collection's "past the end" position---that is, the position one + /// greater than the last valid subscript argument. /// - /// `endIndex` is not a valid argument to `subscript`, and is always - /// reachable from `startIndex` by zero or more applications of - /// `index(after:)`. - /// - /// - Complexity: O(1). + /// `endIndex` is always reachable from `startIndex` by zero or more + /// applications of `index(after:)`. public var endIndex: Index { return LazyFilterIndex(base: _base.endIndex) } diff --git a/stdlib/public/core/HashedCollections.swift.gyb b/stdlib/public/core/HashedCollections.swift.gyb index d33548dc5507e..ed24bb4ee5a1b 100644 --- a/stdlib/public/core/HashedCollections.swift.gyb +++ b/stdlib/public/core/HashedCollections.swift.gyb @@ -503,10 +503,10 @@ public struct Set : return _variantStorage.startIndex } - /// The "past the end" position for iterating members of the set. + /// The "past the end" position for the set---that is, the position one + /// greater than the last valid subscript argument. /// - /// The `endIndex` property is never a valid subscript argument. If the set - /// is empty, `endIndex` is equal to `startIndex`. + /// If the set is empty, `endIndex` is equal to `startIndex`. public var endIndex: Index { return _variantStorage.endIndex } @@ -690,7 +690,7 @@ public struct Set : // /// Creates a set containing the elements of the given array literal. /// - /// Don't directly call this initializer, which is used by the compiler when + /// Do not call this initializer directly. It is used by the compiler when /// you use an array literal. Instead, create a new set using an array /// literal as its value by enclosing a comma-separated list of values in /// square brackets. You can use an array literal anywhere a set is expected @@ -1105,9 +1105,8 @@ public struct Set : /// /// Two sets that are equal will always have equal hash values. /// - /// - Note: The hash value is not guaranteed to be stable across - /// different invocations of the same program. Do not persist the hash value - /// across program runs. + /// Hash values are not guaranteed to be equal across different executions of + /// your program. Do not save hash values to use during a future execution. public var hashValue: Int { // FIXME: Cache Set hashValue var result: Int = _mixInt(0) @@ -1712,7 +1711,8 @@ public struct Dictionary : return _variantStorage.startIndex } - /// The dictionary's "past the end" position. + /// The dictionary's "past the end" position---that is, the position one + /// greater than the last valid subscript argument. /// /// If the collection is empty, `endIndex` is equal to `startIndex`. /// @@ -1988,7 +1988,7 @@ public struct Dictionary : /// Creates a dictionary initialized with a dictionary literal. /// - /// Don't directly call this initializer, which is called by the compiler to + /// Do not call this initializer directly. It is called by the compiler to /// handle dictionary literals. To use a dictionary literal as the initial /// value of a dictionary, enclose a comma-separated list of key-value pairs /// in square brackets. diff --git a/stdlib/public/core/ImplicitlyUnwrappedOptional.swift b/stdlib/public/core/ImplicitlyUnwrappedOptional.swift index 5c2a441a53203..99175f39e4828 100644 --- a/stdlib/public/core/ImplicitlyUnwrappedOptional.swift +++ b/stdlib/public/core/ImplicitlyUnwrappedOptional.swift @@ -30,8 +30,8 @@ public enum ImplicitlyUnwrappedOptional : NilLiteralConvertible { /// Creates an instance initialized with `nil`. /// - /// Don't use this initializer directly; it is used by the compiler when you - /// initialize an `Optional` instance with a `nil` literal. For example: + /// Do not call this initializer directly. It is used by the compiler when + /// you initialize an `Optional` instance with a `nil` literal. For example: /// /// let i: Index! = nil @_transparent diff --git a/stdlib/public/core/LazyCollection.swift.gyb b/stdlib/public/core/LazyCollection.swift.gyb index 4e91a37b3924f..c07d2691c9593 100644 --- a/stdlib/public/core/LazyCollection.swift.gyb +++ b/stdlib/public/core/LazyCollection.swift.gyb @@ -116,11 +116,11 @@ extension ${Self} : ${TraversalCollection} { return _base.startIndex } - /// The collection's "past the end" position. + /// The collection's "past the end" position---that is, the position one + /// greater than the last valid subscript argument. /// - /// `endIndex` is not a valid argument to `subscript`, and is always - /// reachable from `startIndex` by zero or more applications of - /// `index(after:)`. + /// `endIndex` is always reachable from `startIndex` by zero or more + /// applications of `index(after:)`. public var endIndex: Base.Index { return _base.endIndex } diff --git a/stdlib/public/core/LifetimeManager.swift b/stdlib/public/core/LifetimeManager.swift index 26aca644f00b8..519e721369c8e 100644 --- a/stdlib/public/core/LifetimeManager.swift +++ b/stdlib/public/core/LifetimeManager.swift @@ -30,9 +30,16 @@ public func withExtendedLifetime( extension String { - /// Invoke `f` on the contents of this string, represented as - /// a nul-terminated array of char, ensuring that the array's - /// lifetime extends through the execution of `f`. + /// Invokes the given closure on the contents of the string, represented as a + /// pointer to a null-terminated sequence of UTF-8 code units. + /// + /// The `withCString(_:)` method ensures that the sequence's lifetime extends + /// through the execution of `f`. + /// + /// - Parameter f: A closure that takes a pointer to the string's UTF-8 code + /// unit sequence as its sole argument. If the closure has a return value, + /// it is used as the return value of the `withCString(_:)` method. + /// - Returns: The return value of the `f` closure, if any. public func withCString( _ f: @noescape (UnsafePointer) throws -> Result ) rethrows -> Result { diff --git a/stdlib/public/core/Mirror.swift b/stdlib/public/core/Mirror.swift index f11ed0cb2f61f..6de2fb5b02d3f 100644 --- a/stdlib/public/core/Mirror.swift +++ b/stdlib/public/core/Mirror.swift @@ -821,7 +821,7 @@ extension DictionaryLiteral : RandomAccessCollection { /// `endIndex`. public var startIndex: Int { return 0 } - /// The collection's "past the end" position, or one + /// The collection's "past the end" position---that is, the position one /// greater than the last valid subscript argument. /// /// If the `DictionaryLiteral` instance is empty, `endIndex` is equal to @@ -845,16 +845,44 @@ extension DictionaryLiteral : RandomAccessCollection { } extension String { - /// Initialize `self` with the textual representation of `instance`. + /// Creates a string representing the given value. + /// + /// Use this initializer to convert an instance of any type to its preferred + /// representation as a `String` instance. The initializer creates the + /// string representation of `instance` in one of the following ways, + /// depending on its protocol conformance: + /// + /// - If `instance` conforms to the `Streamable` protocol, the result is + /// obtained by calling `instance.write(to: s)` on an empty string `s`. + /// - If `instance` conforms to the `CustomStringConvertible` protocol, the + /// result is `instance.description`. + /// - If `instance` conforms to the `CustomDebugStringConvertible` protocol, + /// the result is `instance.debugDescription`. + /// - An unspecified result is supplied automatically by the Swift standard + /// library. + /// + /// For example, this custom `Point` struct uses the default representation + /// supplied by the standard library. + /// + /// struct Point { + /// let x: Int, y: Int + /// } + /// + /// let p = Point(x: 21, y: 30) + /// print(String(p)) + /// // Prints "Point(x: 21, y: 30)" + /// + /// After adding `CustomStringConvertible` conformance by implementing the + /// `description` property, `Point` provides its own custom representation. + /// + /// extension Point: CustomStringConvertible { + /// var description: String { + /// return "(\(x), \(y))" + /// } + /// } /// - /// * If `Subject` conforms to `Streamable`, the result is obtained by - /// calling `instance.write(to: s)` on an empty string `s`. - /// * Otherwise, if `Subject` conforms to `CustomStringConvertible`, the - /// result is `instance`'s `description` - /// * Otherwise, if `Subject` conforms to `CustomDebugStringConvertible`, - /// the result is `instance`'s `debugDescription` - /// * Otherwise, an unspecified result is supplied automatically by - /// the Swift standard library. + /// print(String(p)) + /// // Prints "(21, 30)" /// /// - SeeAlso: `String.init(reflecting: Subject)` public init(_ instance: Subject) { @@ -862,20 +890,49 @@ extension String { _print_unlocked(instance, &self) } - /// Initialize `self` with a detailed textual representation of - /// `subject`, suitable for debugging. + /// Creates a string with a detailed representation of the given value, + /// suitable for debugging. /// - /// * If `Subject` conforms to `CustomDebugStringConvertible`, the result - /// is `subject`'s `debugDescription`. + /// Use this initializer to convert an instance of any type to its custom + /// debugging representation. The initializer creates the string + /// representation of `instance` in one of the following ways, depending on + /// its protocol conformance: /// - /// * Otherwise, if `Subject` conforms to `CustomStringConvertible`, - /// the result is `subject`'s `description`. - /// - /// * Otherwise, if `Subject` conforms to `Streamable`, the result is + /// - If `subject` conforms to the `CustomDebugStringConvertible` protocol, + /// the result is `subject.debugDescription`. + /// - If `subject` conforms to the `CustomStringConvertible` protocol, the + /// result is `subject.description`. + /// - If `subject` conforms to the `Streamable` protocol, the result is /// obtained by calling `subject.write(to: s)` on an empty string `s`. + /// - An unspecified result is supplied automatically by the Swift standard + /// library. + /// + /// For example, this custom `Point` struct uses the default representation + /// supplied by the standard library. + /// + /// struct Point { + /// let x: Int, y: Int + /// } + /// + /// let p = Point(x: 21, y: 30) + /// print(String(reflecting: p)) + /// // Prints "p: Point = { + /// // x = 21 + /// // y = 30 + /// // }" + /// + /// After adding `CustomDebugStringConvertible` conformance by implementing + /// the `debugDescription` property, `Point` provides its own custom + /// debugging representation. + /// + /// extension Point: CustomDebugStringConvertible { + /// var debugDescription: String { + /// return "Point(x: \(x), y: \(y))" + /// } + /// } /// - /// * Otherwise, an unspecified result is supplied automatically by - /// the Swift standard library. + /// print(String(reflecting: p)) + /// // Prints "Point(x: 21, y: 30)" /// /// - SeeAlso: `String.init(Subject)` public init(reflecting subject: Subject) { diff --git a/stdlib/public/core/MutableCollection.swift b/stdlib/public/core/MutableCollection.swift index ac9575251d7d8..dc60e8d6418dd 100644 --- a/stdlib/public/core/MutableCollection.swift +++ b/stdlib/public/core/MutableCollection.swift @@ -36,8 +36,8 @@ public protocol MutableIndexable : Indexable { /// If the collection is empty, `startIndex` is equal to `endIndex`. var startIndex: Index { get } - /// The collection's "past the end" position, or one greater than the last - /// valid subscript argument. + /// The collection's "past the end" position---that is, the position one + /// greater than the last valid subscript argument. /// /// When you need a range that includes the last element of a collection, use /// the half-open range operator (`..<`) with `endIndex`. The `..<` operator diff --git a/stdlib/public/core/Optional.swift b/stdlib/public/core/Optional.swift index 88c85f8330e1c..10cebd1028385 100644 --- a/stdlib/public/core/Optional.swift +++ b/stdlib/public/core/Optional.swift @@ -196,7 +196,7 @@ public enum Optional : NilLiteralConvertible { /// Creates an instance initialized with `nil`. /// - /// Don't use this initializer directly; it is used by the compiler when you + /// Do not call this initializer directly. It is used by the compiler when you /// initialize an `Optional` instance with a `nil` literal. For example: /// /// var i: Index? = nil diff --git a/stdlib/public/core/OutputStream.swift b/stdlib/public/core/OutputStream.swift index e42669cf9bb89..86820d21ac53e 100644 --- a/stdlib/public/core/OutputStream.swift +++ b/stdlib/public/core/OutputStream.swift @@ -16,12 +16,60 @@ import SwiftShims // Input/Output interfaces //===----------------------------------------------------------------------===// -/// A target of text streaming operations. +/// A type that can be the target of text-streaming operations. +/// +/// You can send the output of the standard library's `print(_:to:)` and +/// `dump(_:to:)` functions to an instance of a type that conforms to the +/// `OutputStream` protocol instead of to standard output. Swift's `String` +/// type conforms to `OutputStream` already, so you can capture the output +/// from `print(_:to:)` and `dump(_:to:)` in a string instead of logging it to +/// standard output. +/// +/// var s = "" +/// for n in 1 ... 5 { +/// print(n, terminator: "", to: &s) +/// } +/// // s == "12345" +/// +/// Conforming to the OutputStream Protocol +/// ======================================= +/// +/// To make your custom type conform to the `OutputStream` protocol, implement +/// the required `write(_:)` method. Functions that use an `OutputStream` +/// target may call `write(_:)` multiple times per writing operation. +/// +/// As an example, here's an implementation of an output stream that converts +/// any input to its plain ASCII representation before sending it to standard +/// output. +/// +/// struct ASCIILogger: OutputStream { +/// mutating func write(_ string: String) { +/// let ascii = string.unicodeScalars.lazy.map { scalar in +/// scalar == "\n" +/// ? "\n" +/// : scalar.escaped(asASCII: true) +/// } +/// print(ascii.joined(separator: ""), terminator: "") +/// } +/// } +/// +/// The `ASCIILogger` type's `write(_:)` method processes its string input by +/// escaping each Unicode scalar, with the exception of `"\n"` line returns. +/// By sending the output of the `print(_:to:)` function to an instance of +/// `ASCIILogger`, you invoke its `write(_:)` method. +/// +/// let s = "Hearts ♡ and Diamonds ♢" +/// print(s) +/// // Prints "Hearts ♡ and Diamonds ♢" +/// +/// var asciiLogger = ASCIILogger() +/// print(s, to: &asciiLogger) +/// // Prints "Hearts \u{2661} and Diamonds \u{2662}" public protocol OutputStream { mutating func _lock() mutating func _unlock() - /// Append the given `string` to this stream. + /// Appends the given string to the stream. mutating func write(_ string: String) } @@ -30,51 +78,143 @@ extension OutputStream { public mutating func _unlock() {} } -/// A source of text streaming operations. `Streamable` instances can -/// be written to any *output stream*. +/// A source of text-streaming operations. +/// +/// Instances of types that conform to the `Streamable` protocol can write +/// their value to instances of any type that conforms to the `OutputStream` +/// protocol. The Swift standard library's text-related types, `String`, +/// `Character`, and `UnicodeScalar`, all conform to `Streamable`. /// -/// For example: `String`, `Character`, `UnicodeScalar`. +/// Conforming to the Streamable Protocol +/// ===================================== +/// +/// To add `Streamable` conformance to a custom type, implement the required +/// `write(to:)` method. Call the given output stream's `write(_:)` method in +/// your implementation. public protocol Streamable { - /// Write a textual representation of `self` into `target`. + /// Writes a textual representation of this instance into the given output + /// stream. func write(to target: inout Target) } /// A type with a customized textual representation. /// -/// This textual representation is used when values are written to an -/// *output stream*, for example, by `print`. +/// Types that conform to the `CustomStringConvertible` protocol can provide +/// their own representation to be used when converting an instance to a +/// string. The `String(_:)` initializer is the preferred way to convert an +/// instance of *any* type to a string. If the passed instance conforms to +/// `CustomStringConvertible`, the `String(_:)` initializer and the +/// `print(_:)` function use the instance's custom `description` property. +/// +/// Accessing a type's `description` property directly or using +/// `CustomStringConvertible` as a generic constraint is discouraged. +/// +/// Conforming to the CustomStringConvertible Protocol +/// ================================================== /// -/// - Note: `String(instance)` will work for an `instance` of *any* -/// type, returning its `description` if the `instance` happens to be -/// `CustomStringConvertible`. Using `CustomStringConvertible` as a -/// generic constraint, or accessing a conforming type's `description` -/// directly, is therefore discouraged. +/// Add `CustomStringConvertible` conformance to your custom types by defining +/// a `description` property. +/// +/// For example, this custom `Point` struct uses the default representation +/// supplied by the standard library: +/// +/// struct Point { +/// let x: Int, y: Int +/// } +/// +/// let p = Point(x: 21, y: 30) +/// print(p) +/// // Prints "Point(x: 21, y: 30)" +/// +/// After implementing the `description` property and declaring +/// `CustomStringConvertible` conformance, the `Point` type provides its own +/// custom representation. +/// +/// extension Point: CustomStringConvertible { +/// var description: String { +/// return "(\(x), \(y))" +/// } +/// } +/// +/// print(p) +/// // Prints "(21, 30)" /// /// - SeeAlso: `String.init(T)`, `CustomDebugStringConvertible` public protocol CustomStringConvertible { - /// A textual representation of the instance. + /// A textual representation of this instance. + /// + /// Instead of accessing this property directly, convert an instance of any + /// type to a string by using the `String(_:)` initializer. For example: + /// + /// struct Point: CustomStringConvertible { + /// let x: Int, y: Int + /// + /// var description: String { + /// return "(\(x), \(y))" + /// } + /// } + /// + /// let p = Point(x: 21, y: 30) + /// let s = String(p) + /// print(s) + /// // Prints "(21, 30)" + /// + /// The conversion of `p` to a string in the assignment to `s` uses the + /// `Point` type's `description` property. var description: String { get } } -/// A type with a customized textual representation suitable for -/// debugging purposes. +/// A type with a customized textual representation suitable for debugging +/// purposes. +/// +/// Swift provides a default debugging textual representation for any type. +/// That default representation is used by the `String(reflecting:)` +/// initializer and the `debugPrint(_:)` function for types that don't provide +/// their own. To customize that representation, make your type conform to the +/// `CustomDebugStringConvertible` protocol. +/// +/// Because the `String(reflecting:)` initializer works for instances of *any* +/// type, returning an instance's `debugDescription` if the value passed +/// conforms to `CustomDebugStringConvertible`, accessing a type's +/// `debugDescription` property directly or using +/// `CustomDebugStringConvertible` as a generic constraint is discouraged. +/// +/// Conforming to the CustomDebugStringConvertible Protocol +/// ======================================================= +/// +/// Add `CustomDebugStringConvertible` conformance to your custom types by +/// defining a `debugDescription` property. +/// +/// For example, this custom `Point` struct uses the default representation +/// supplied by the standard library: +/// +/// struct Point { +/// let x: Int, y: Int +/// } +/// +/// let p = Point(x: 21, y: 30) +/// print(String(reflecting: p)) +/// // Prints "p: Point = { +/// // x = 21 +/// // y = 30 +/// // }" +/// +/// After adding `CustomDebugStringConvertible` conformance by implementing the +/// `debugDescription` property, `Point` provides its own custom debugging +/// representation. /// -/// This textual representation is used when values are written to an -/// *output stream* by `debugPrint`, and is -/// typically more verbose than the text provided by a -/// `CustomStringConvertible`'s `description` property. +/// extension Point: CustomDebugStringConvertible { +/// var debugDescription: String { +/// return "Point(x: \(x), y: \(y))" +/// } +/// } /// -/// - Note: `String(reflecting: instance)` will work for an `instance` -/// of *any* type, returning its `debugDescription` if the `instance` -/// happens to be `CustomDebugStringConvertible`. Using -/// `CustomDebugStringConvertible` as a generic constraint, or -/// accessing a conforming type's `debugDescription` directly, is -/// therefore discouraged. +/// print(String(reflecting: p)) +/// // Prints "Point(x: 21, y: 30)" /// -/// - SeeAlso: `String.init(reflecting: T)`, -/// `CustomStringConvertible` +/// - SeeAlso: `String.init(reflecting: T)`, `CustomStringConvertible` public protocol CustomDebugStringConvertible { - /// A textual representation of the instance, suitable for debugging. + /// A textual representation of this instance, suitable for debugging. var debugDescription: String { get } } @@ -350,7 +490,9 @@ internal struct _Stdout : OutputStream { } extension String : OutputStream { - /// Append `other` to this stream. + /// Appends the given string to this string. + /// + /// - Parameter other: A string to append. public mutating func write(_ other: String) { self += other } @@ -361,21 +503,28 @@ extension String : OutputStream { //===----------------------------------------------------------------------===// extension String : Streamable { - /// Write a textual representation of `self` into `target`. + /// Writes the string into the given output stream. + /// + /// - Parameter target: An output stream. public func write(to target: inout Target) { target.write(self) } } extension Character : Streamable { - /// Write a textual representation of `self` into `target`. + /// Writes the character into the given output stream. + /// + /// - Parameter target: An output stream. public func write(to target: inout Target) { target.write(String(self)) } } extension UnicodeScalar : Streamable { - /// Write a textual representation of `self` into `target`. + /// Writes the textual representation of the Unicode scalar into the given + /// output stream. + /// + /// - Parameter target: An output stream. public func write(to target: inout Target) { target.write(String(Character(self))) } diff --git a/stdlib/public/core/Policy.swift b/stdlib/public/core/Policy.swift index f4b81bbbcc9e8..6a932dc7d1fc4 100644 --- a/stdlib/public/core/Policy.swift +++ b/stdlib/public/core/Policy.swift @@ -1075,9 +1075,8 @@ public func ^= (lhs: inout T, rhs: T) { public protocol Hashable : Equatable { /// The hash value. /// - /// - Important: Hash values are not guaranteed to be equal across different - /// executions of your program. Do not save hash values to use during a - /// future execution. + /// Hash values are not guaranteed to be equal across different executions of + /// your program. Do not save hash values to use during a future execution. var hashValue: Int { get } } diff --git a/stdlib/public/core/SetAlgebra.swift b/stdlib/public/core/SetAlgebra.swift index 01ff13dce6129..2f462dac9993c 100644 --- a/stdlib/public/core/SetAlgebra.swift +++ b/stdlib/public/core/SetAlgebra.swift @@ -400,7 +400,7 @@ extension SetAlgebra { /// Creates a set containing the elements of the given array literal. /// - /// Don't directly call this initializer, which is used by the compiler when + /// Do not call this initializer directly. It is used by the compiler when /// you use an array literal. Instead, create a new set using an array /// literal as its value by enclosing a comma-separated list of values in /// square brackets. You can use an array literal anywhere a set is expected diff --git a/stdlib/public/core/SliceBuffer.swift b/stdlib/public/core/SliceBuffer.swift index 078d45a249e4b..6b00094d72f95 100644 --- a/stdlib/public/core/SliceBuffer.swift +++ b/stdlib/public/core/SliceBuffer.swift @@ -294,11 +294,11 @@ struct _SliceBuffer : _ArrayBufferProtocol, RandomAccessCollection { /// In an empty collection, `startIndex == endIndex`. public var startIndex: Int - /// The collection's "past the end" position. + /// The collection's "past the end" position---that is, the position one + /// greater than the last valid subscript argument. /// - /// `endIndex` is not a valid argument to `subscript`, and is always - /// reachable from `startIndex` by zero or more applications of - /// `index(after:)`. + /// `endIndex` is always reachable from `startIndex` by zero or more + /// applications of `index(after:)`. public var endIndex: Int { get { return Int(endIndexAndFlags >> 1) diff --git a/stdlib/public/core/StaticString.swift b/stdlib/public/core/StaticString.swift index 20fca0185b3de..ee74749b2eb5f 100644 --- a/stdlib/public/core/StaticString.swift +++ b/stdlib/public/core/StaticString.swift @@ -17,16 +17,13 @@ // are involved in its construction. This feature is crucial for // preventing infinite recursion even in non-asserting cases. -/// A simple string designed to represent text that is "knowable at -/// compile-time". +/// A string type designed to represent text that is known at compile time. /// -/// Logically speaking, each instance looks something like this: -/// -/// enum StaticString { -/// case ascii(start: UnsafePointer, count: Int) -/// case utf8(start: UnsafePointer, count: Int) -/// case scalar(UnicodeScalar) -/// } +/// Instances of the `StaticString` type are immutable. `StaticString` provides +/// limited, pointer-based access to its contents, unlike Swift's more +/// commonly used `String` type. A static string can store its value as a +/// pointer to an ASCII code unit sequence, as a pointer to a UTF-8 code unit +/// sequence, or as a single Unicode scalar value. @_fixed_layout public struct StaticString : _BuiltinUnicodeScalarLiteralConvertible, @@ -56,10 +53,11 @@ public struct StaticString /// ASCII. internal var _flags: Builtin.Int8 - /// A pointer to the beginning of UTF-8 code units. + /// A pointer to the beginning of the string's UTF-8 encoded representation. /// - /// - Precondition: `self` stores a pointer to either ASCII or UTF-8 code - /// units. + /// The static string must store a pointer to either ASCII or UTF-8 code + /// units. Accessing this property when `hasPointerRepresentation` is + /// `false` triggers a runtime error. @_transparent public var utf8Start: UnsafePointer { _precondition( @@ -70,7 +68,9 @@ public struct StaticString /// The stored Unicode scalar value. /// - /// - Precondition: `self` stores a single Unicode scalar value. + /// The static string must store a single Unicode scalar value. Accessing + /// this property when `hasPointerRepresentation` is `true` triggers a + /// runtime error. @_transparent public var unicodeScalar: UnicodeScalar { _precondition( @@ -79,11 +79,10 @@ public struct StaticString return UnicodeScalar(UInt32(UInt(_startPtrOrData))) } - /// If `self` stores a pointer to ASCII or UTF-8 code units, the - /// length in bytes of that data. + /// The length in bytes of the static string's ASCII or UTF-8 representation. /// - /// If `self` stores a single Unicode scalar value, the value of - /// `utf8CodeUnitCount` is unspecified. + /// - Warning: If the static string stores a single Unicode scalar value, the + /// value of `utf8CodeUnitCount` is unspecified. @_transparent public var utf8CodeUnitCount: Int { _precondition( @@ -92,25 +91,38 @@ public struct StaticString return Int(_utf8CodeUnitCount) } - /// `true` iff `self` stores a pointer to ASCII or UTF-8 code units. + /// A Boolean value indicating whether the static string stores a pointer to + /// ASCII or UTF-8 code units. @_transparent public var hasPointerRepresentation: Bool { return (UInt8(_flags) & 0x1) == 0 } - /// `true` if `self` stores a pointer to ASCII code units. + /// A Boolean value that is `true` if the static string stores a pointer to + /// ASCII code units. + /// + /// Use this property in conjunction with `hasPointerRepresentation` to + /// determine whether a static string with pointer representation stores an + /// ASCII or UTF-8 code unit sequence. /// - /// If `self` stores a single Unicode scalar value, the value of - /// `isASCII` is unspecified. + /// - Warning: If the static string stores a single Unicode scalar value, the + /// value of `isASCII` is unspecified. @_transparent public var isASCII: Bool { return (UInt8(_flags) & 0x2) != 0 } - /// Invoke `body` with a buffer containing the UTF-8 code units of - /// `self`. + /// Invokes the given closure with a buffer containing the static string's + /// UTF-8 code unit sequence. + /// + /// This method works regardless of whether the static string stores a + /// pointer or a single Unicode scalar value. /// - /// This method works regardless of what `self` stores. + /// - Parameter body: A closure that takes a buffer pointer to the static + /// string's UTF-8 code unit sequence as its sole argument. If the closure + /// has a return value, it is used as the return value of the + /// `withUTF8Buffer(invoke:)` method. + /// - Returns: The return value of the `body` closure, if any. public func withUTF8Buffer( invoke body: @noescape (UnsafeBufferPointer) -> R) -> R { if hasPointerRepresentation { @@ -130,7 +142,7 @@ public struct StaticString } } - /// Create an empty instance. + /// Creates an empty static string. @_transparent public init() { self = "" @@ -169,7 +181,10 @@ public struct StaticString self = StaticString(unicodeScalar: value) } - /// Create an instance initialized to `value`. + /// Creates an instance initialized to a single Unicode scalar. + /// + /// Do not call this initializer directly. It may be used by the compiler + /// when you initialize a static string with a Unicode scalar. @effects(readonly) @_transparent public init(unicodeScalarLiteral value: StaticString) { @@ -190,7 +205,11 @@ public struct StaticString ) } - /// Create an instance initialized to `value`. + /// Creates an instance initialized to a single character that is made up of + /// one or more Unicode code points. + /// + /// Do not call this initializer directly. It may be used by the compiler + /// when you initialize a static string using an extended grapheme cluster. @effects(readonly) @_transparent public init(extendedGraphemeClusterLiteral value: StaticString) { @@ -210,14 +229,17 @@ public struct StaticString isASCII: isASCII) } - /// Create an instance initialized to `value`. + /// Creates an instance initialized to the value of a string literal. + /// + /// Do not call this initializer directly. It may be used by the compiler + /// when you initialize a static string using a string literal. @effects(readonly) @_transparent public init(stringLiteral value: StaticString) { self = value } - /// A textual representation of `self`. + /// A string representation of the static string. public var description: String { return withUTF8Buffer { (buffer) in @@ -225,7 +247,7 @@ public struct StaticString } } - /// A textual representation of `self`, suitable for debugging. + /// A textual representation of the static string, suitable for debugging. public var debugDescription: String { return self.description.debugDescription } diff --git a/stdlib/public/core/String.swift b/stdlib/public/core/String.swift index 214e27ce2c068..57985b57a9030 100644 --- a/stdlib/public/core/String.swift +++ b/stdlib/public/core/String.swift @@ -15,101 +15,276 @@ import SwiftShims // FIXME: complexity documentation for most of methods on String is ought to be // qualified with "amortized" at least, as Characters are variable-length. -/// An arbitrary Unicode string value. -/// -/// Unicode-Correct -/// =============== -/// -/// Swift strings are designed to be Unicode-correct. In particular, -/// the APIs make it easy to write code that works correctly, and does -/// not surprise end-users, regardless of where you venture in the -/// Unicode character space. For example, the `==` operator checks -/// for [Unicode canonical -/// equivalence](http://www.unicode.org/glossary/#deterministic_comparison), -/// so two different representations of the same string will always -/// compare equal. -/// -/// Locale-Insensitive -/// ================== -/// -/// The fundamental operations on Swift strings are not sensitive to -/// locale settings. That's because, for example, the validity of a -/// `Dictionary` in a running program depends on a given -/// string comparison having a single, stable result. Therefore, -/// Swift always uses the default, -/// un-[tailored](http://www.unicode.org/glossary/#tailorable) Unicode -/// algorithms for basic string operations. -/// -/// Importing `Foundation` endows swift strings with the full power of -/// the `NSString` API, which allows you to choose more complex -/// locale-sensitive operations explicitly. -/// -/// Value Semantics -/// =============== -/// -/// Each string variable, `let` binding, or stored property has an -/// independent value, so mutations to the string are not observable -/// through its copies: -/// -/// var a = "foo" -/// var b = a -/// b.append("bar") -/// print("a=\(a), b=\(b)") // a=foo, b=foobar -/// -/// Strings use Copy-on-Write so that their data is only copied -/// lazily, upon mutation, when more than one string instance is using -/// the same buffer. Therefore, the first in any sequence of mutating -/// operations may cost `O(N)` time and space, where `N` is the length -/// of the string's (unspecified) underlying representation. -/// -/// Views -/// ===== -/// -/// `String` is not itself a collection of anything. Instead, it has -/// properties that present the string's contents as meaningful -/// collections: -/// -/// - `characters`: a collection of `Character` ([extended grapheme -/// cluster](http://www.unicode.org/glossary/#extended_grapheme_cluster)) -/// elements, a unit of text that is meaningful to most humans. -/// -/// - `unicodeScalars`: a collection of `UnicodeScalar` ([Unicode -/// scalar -/// values](http://www.unicode.org/glossary/#unicode_scalar_value)) -/// the 21-bit codes that are the basic unit of Unicode. These -/// values are equivalent to UTF-32 code units. -/// -/// - `utf16`: a collection of `UTF16.CodeUnit`, the 16-bit -/// elements of the string's UTF-16 encoding. -/// -/// - `utf8`: a collection of `UTF8.CodeUnit`, the 8-bit -/// elements of the string's UTF-8 encoding. -/// -/// Growth and Capacity -/// =================== -/// -/// When a string's contiguous storage fills up, new storage must be -/// allocated and characters must be moved to the new storage. -/// `String` uses an exponential growth strategy that makes `append` a -/// constant time operation *when amortized over many invocations*. -/// -/// Objective-C Bridge -/// ================== -/// -/// `String` is bridged to Objective-C as `NSString`, and a `String` -/// that originated in Objective-C may store its characters in an -/// `NSString`. Since any arbitrary subclass of `NSString` can -/// become a `String`, there are no guarantees about representation or -/// efficiency in this case. Since `NSString` is immutable, it is -/// just as though the storage was shared by some copy: the first in -/// any sequence of mutating operations causes elements to be copied -/// into unique, contiguous storage which may cost `O(N)` time and -/// space, where `N` is the length of the string representation (or -/// more, if the underlying `NSString` has unusual performance -/// characteristics). +/// A Unicode string value. +/// +/// A string is a series of characters, such as `"Swift"`. Strings in Swift are +/// Unicode correct, locale insensitive, and designed to be efficient. The +/// `String` type bridges with the Objective-C class `NSString` and offers +/// interoperability with C functions that works with strings. +/// +/// You can create new strings using string literals or string interpolations. +/// A string literal is a series of characters enclosed in quotes. +/// +/// let greeting = "Welcome!" +/// +/// String interpolations are string literals that evaluate any included +/// expressions and convert the results to string form. String interpolations +/// are an easy way to build a string from multiple pieces. Wrap each +/// expression in a string interpolation in parentheses, prefixed by a +/// backslash. +/// +/// let name = "Rosa" +/// let personalizedGreeting = "Welcome, \(name)!" +/// +/// let price = 2 +/// let number = 3 +/// let cookiePrice = "\(number) cookies: $\(price * number)." +/// +/// Combine strings using the concatenation operator (`+`). +/// +/// let longerGreeting = greeting + " We're glad you're here!" +/// print(longerGreeting) +/// // Prints "Welcome! We're glad you're here!" +/// +/// Modifying and Comparing Strings +/// =============================== +/// +/// Strings always have value semantics. Modifying a copy of a string leaves +/// the original unaffected. +/// +/// var otherGreeting = greeting +/// otherGreeting += " Have a nice time!" +/// print(otherGreeting) +/// // Prints "Welcome! Have a nice time!" +/// +/// print(greeting) +/// // Prints "Welcome!" +/// +/// Comparing strings for equality using the is-equal-to operator (`==`) or a +/// relational operator (like `<` and `>=`) is always performed using the +/// Unicode canonical representation. This means that different +/// representations of a string compare as being equal. +/// +/// let cafe1 = "Cafe\u{301}" +/// let cafe2 = "Café" +/// print(cafe1 == cafe2) +/// // Prints "true" +/// +/// The Unicode code point `"\u{301}"` modifies the preceding character to +/// include an accent, so `"e\u{301}"` has the same canonical representation +/// as the single Unicode code point `"é"`. +/// +/// Basic string operations are not sensitive to locale settings. This ensures +/// that string comparisons and other operations always have a single, stable +/// result, allowing strings to be used as keys in `Dictionary` instances and +/// for other purposes. +/// +/// Representing Strings: Views +/// =========================== +/// +/// A string is not itself a collection. Instead, it has properties that +/// present its contents as meaningful collections. Each of these collections +/// is a particular type of *view* of the string's visible and data +/// representation. +/// +/// To demonstrate the different views available for every string, the +/// following examples use this `String` instance: +/// +/// let cafe = "Cafe\u{301} du 🌍" +/// print(cafe) +/// // Prints "Café du 🌍" +/// +/// Character View +/// -------------- +/// +/// A string's `characters` property is a collection of *extended grapheme +/// clusters*, which approximate human-readable characters. Many individual +/// characters, such as "é", "김", and "🇮🇳", can be made up of multiple Unicode +/// code points. These code points are combined by Unicode's boundary +/// algorithms into extended grapheme clusters, represented by Swift's +/// `Character` type. Each element of the `characters` view is represented by +/// a `Character` instance. +/// +/// print(cafe.characters.count) +/// // Prints "9" +/// print(Array(cafe.characters)) +/// // Prints "["C", "a", "f", "é", " ", "d", "u", " ", "🌍"]" +/// +/// Each visible character in the `cafe` string is a separate element of the +/// `characters` view. +/// +/// Unicode Scalar View +/// ------------------- +/// +/// A string's `unicodeScalars` property is a collection of Unicode scalar +/// values, the 21-bit codes that are the basic unit of Unicode. Each scalar +/// value is represented by a `UnicodeScalar` instance and is equivalent to a +/// UTF-32 code unit. +/// +/// print(cafe.unicodeScalars.count) +/// // Prints "10" +/// print(Array(cafe.unicodeScalars)) +/// // Prints "["C", "a", "f", "e", "\u{0301}", " ", "d", "u", " ", "\u{0001F30D}"]" +/// print(cafe.unicodeScalars.map { $0.value }) +/// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 127757]" +/// +/// The `unicodeScalars` view's elements comprise each Unicode scalar value in +/// the `cafe` string. In particular, because `cafe` was declared using the +/// decomposed form of the `"é"` character, `unicodeScalars` contains the code +/// points for both the letter `"e"` (101) and the accent character `"´"` +/// (769). +/// +/// UTF-16 View +/// ----------- +/// +/// A string's `utf16` property is a collection of UTF-16 code units, the +/// 16-bit encoding form of the string's Unicode scalar values. Each code unit +/// is stored as a `UInt16` instance. +/// +/// print(cafe.utf16.count) +/// // Prints "11" +/// print(Array(cafe.utf16)) +/// // Prints "[67, 97, 102, 101, 769, 32, 100, 117, 32, 55356, 57101]" +/// +/// The elements of the `utf16` view are the code units for the string when +/// encoded in UTF-16. +/// +/// The elements of this collection match those accessed through indexed +/// `NSString` APIs. +/// +/// let nscafe = cafe as NSString +/// print(nscafe.length) +/// // Prints "11" +/// print(nscafe.character(at: 3)) +/// // Prints "101" +/// +/// UTF-8 View +/// ---------- +/// +/// A string's `utf8` property is a collection of UTF-8 code units, the 8-bit +/// encoding form of the string's Unicode scalar values. Each code unit is +/// stored as a `UInt8` instance. +/// +/// print(cafe.utf8.count) +/// // Prints "14" +/// print(Array(cafe.utf8)) +/// // Prints "[67, 97, 102, 101, 204, 129, 32, 100, 117, 32, 240, 159, 140, 141]" +/// +/// The elements of the `utf8` view are the code units for the string when +/// encoded in UTF-8. This representation matches the one used when `String` +/// instances are passed to C APIs. +/// +/// let cLength = strlen(cafe) +/// print(cLength) +/// // Prints "14" +/// +/// Counting the Length of a String +/// =============================== +/// +/// When you need to know the length of a string, you must first consider what +/// you'll use the length for. Are you measuring the number of characters that +/// will be displayed on the screen, or are you measuring the amount of +/// storage needed for the string in a particular encoding? A single string +/// can have greatly differing lengths when measured by its different views. +/// +/// For example, an ASCII character like the capital letter *A* is represented +/// by a single element in each of its four views. The Unicode scalar value of +/// *A* is `65`, which is small enough to fit in a single code unit in both +/// UTF-16 and UTF-8. +/// +/// let capitalA = "A" +/// print(capitalA.characters.count) +/// // Prints "1" +/// print(capitalA.unicodeScalars.count) +/// // Prints "1" +/// print(capitalA.utf16.count) +/// // Prints "1" +/// print(capitalA.utf8.count) +/// // Prints "1" +/// +/// +/// On the other hand, an emoji flag character is constructed from a pair of +/// Unicode scalars values, like `"\u{1F1F5}"` and `"\u{1F1F7}"`. Each of +/// these scalar values, in turn, is too large to fit into a single UTF-16 or +/// UTF-8 code unit. As a result, each view of the string `"🇵🇷"` reports a +/// different length. +/// +/// let flag = "🇵🇷" +/// print(flag.characters.count) +/// // Prints "1" +/// print(flag.unicodeScalars.count) +/// // Prints "2" +/// print(flag.utf16.count) +/// // Prints "4" +/// print(flag.utf8.count) +/// // Prints "8" +/// +/// Accessing String View Elements +/// ============================== +/// +/// To find individual elements of a string, use the appropriate view for your +/// task. For example, to retrieve the first word of a longer string, you can +/// search the `characters` view for a space and then create a new string from +/// a prefix of the `characters` view up to that point. +/// +/// let name = "Marie Curie" +/// let firstSpace = name.characters.index(of: " ")! +/// let firstName = String(name.characters.prefix(upTo: firstSpace)) +/// print(firstName) +/// // Prints "Marie" +/// +/// You can convert an index into one of a string's views to an index into +/// another view. +/// +/// let firstSpaceUTF8 = firstSpace.samePosition(in: name.utf8) +/// print(Array(name.utf8.prefix(upTo: firstSpaceUTF8))) +/// // Prints "[77, 97, 114, 105, 101]" +/// +/// Performance Optimizations +/// ========================= +/// +/// Although strings in Swift have value semantics, strings use a copy-on-write +/// strategy to store their data in a buffer. This buffer can then be shared +/// by different copies of a string. A string's data is only copied lazily, +/// upon mutation, when more than one string instance is using the same +/// buffer. Therefore, the first in any sequence of mutating operations may +/// cost O(*n*) time and space. +/// +/// When a string's contiguous storage fills up, a new buffer must be allocated +/// and data must be moved to the new storage. String buffers use an +/// exponential growth strategy that makes appending to a string a constant +/// time operation when averaged over many append operations. +/// +/// Bridging between String and NSString +/// ==================================== +/// +/// Any `String` instance can be bridged to `NSString` using the type-cast +/// operator (`as`), and any `String` instance that originates in Objective-C +/// may use an `NSString` instance as its storage. Because any arbitrary +/// subclass of `NSString` can become a `String` instance, there are no +/// guarantees about representation or efficiency when a `String` instance is +/// backed by `NSString` storage. Because `NSString` is immutable, it is just +/// as though the storage was shared by a copy: The first in any sequence of +/// mutating operations causes elements to be copied into unique, contiguous +/// storage which may cost O(*n*) time and space, where *n* is the length of +/// the string's encoded representation (or more, if the underlying `NSString` +/// has unusual performance characteristics). +/// +/// For more information about the Unicode terms used in this discussion, see +/// the [Unicode.org glossary][glossary]. In particular, this discussion +/// mentions [extended grapheme clusters][clusters], +/// [Unicode scalar values][scalars], and [canonical equivalence][equivalence]. +/// +/// [glossary]: http://www.unicode.org/glossary/ +/// [clusters]: http://www.unicode.org/glossary/#extended_grapheme_cluster +/// [scalars]: http://www.unicode.org/glossary/#unicode_scalar_value +/// [equivalence]: http://www.unicode.org/glossary/#canonical_equivalent +/// +/// - SeeAlso: `String.CharacterView`, `String.UnicodeScalarView`, +/// `String.UTF16View`, `String.UTF8View` @_fixed_layout public struct String { - /// An empty `String`. + /// Creates an empty string. public init() { _core = _StringCore() } @@ -178,7 +353,11 @@ extension String : _BuiltinUnicodeScalarLiteralConvertible { } extension String : UnicodeScalarLiteralConvertible { - /// Create an instance initialized to `value`. + /// Creates an instance initialized to the given Unicode scalar value. + /// + /// Don't call this initializer directly. It may be used by the compiler when + /// you initialize a string using a string literal that contains a single + /// Unicode scalar value. public init(unicodeScalarLiteral value: String) { self = value } @@ -200,7 +379,12 @@ extension String : _BuiltinExtendedGraphemeClusterLiteralConvertible { } extension String : ExtendedGraphemeClusterLiteralConvertible { - /// Create an instance initialized to `value`. + /// Creates an instance initialized to the given extended grapheme cluster + /// literal. + /// + /// Don't call this initializer directly. It may be used by the compiler when + /// you initialize a string using a string literal containing a single + /// extended grapheme cluster. public init(extendedGraphemeClusterLiteral value: String) { self = value } @@ -250,14 +434,22 @@ extension String : _BuiltinStringLiteralConvertible { } extension String : StringLiteralConvertible { - /// Create an instance initialized to `value`. + /// Creates an instance initialized to the given string value. + /// + /// Don't call this initializer directly. It is used by the compiler when you + /// initialize a string using a string literal. For example: + /// + /// let nextStop = "Clark & Lake" + /// + /// This assignment to the `nextStop` constant calls this string literal + /// initializer behind the scenes. public init(stringLiteral value: String) { self = value } } extension String : CustomDebugStringConvertible { - /// A textual representation of `self`, suitable for debugging. + /// A representation of the string that is suitable for debugging. public var debugDescription: String { var result = "\"" for us in self.unicodeScalars { @@ -298,8 +490,8 @@ extension String { #if _runtime(_ObjC) /// Compare two strings using the Unicode collation algorithm in the -/// deterministic comparison mode. (The strings which are equivalent according -/// to their NFD form are considered equal. Strings which are equivalent +/// deterministic comparison mode. (The strings which are equivalent according +/// to their NFD form are considered equal. Strings which are equivalent /// according to the plain Unicode collation algorithm are additionally ordered /// based on their NFD.) /// @@ -440,14 +632,31 @@ public func <(lhs: String, rhs: String) -> Bool { // Support for copy-on-write extension String { - /// Append the elements of `other` to `self`. + /// Appends the given string to this string. + /// + /// The following example builds a customized greeting by using the + /// `append(_:)` method: + /// + /// var greeting = "Hello, " + /// if let name = getUserName() { + /// greeting.append(name) + /// } else { + /// greeting.append("friend") + /// } + /// print(greeting) + /// // Prints "Hello, friend" + /// + /// - Parameter other: Another string. public mutating func append(_ other: String) { _core.append(other._core) } - /// Append `x` to `self`. + /// Appends the given Unicode scalar to the string. + /// + /// - Parameter x: A Unicode scalar value. /// - /// - Complexity: Amortized O(1). + /// - Complexity: Appending a Unicode scalar to a string averages to O(1) + /// over many additions. public mutating func append(_ x: UnicodeScalar) { _core.append(x) } @@ -469,13 +678,10 @@ func _stdlib_NSStringHashValuePointer(_ str: OpaquePointer, _ isASCII: Bool) -> #endif extension String : Hashable { - /// The hash value. - /// - /// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`. + /// The string's hash value. /// - /// - Note: The hash value is not guaranteed to be stable across - /// different invocations of the same program. Do not persist the - /// hash value across program runs. + /// Hash values are not guaranteed to be equal across different executions of + /// your program. Do not save hash values to use during a future execution. public var hashValue: Int { #if _runtime(_ObjC) // Mix random bits into NSString's hash so that clients don't rely on @@ -694,7 +900,15 @@ extension String { } } - /// Return `self` converted to lower case. + /// Returns a lowercase version of the string. + /// + /// Here's an example of transforming a string to all lowercase letters. + /// + /// let cafe = "Café 🍵" + /// print(cafe.lowercased()) + /// // Prints "café 🍵" + /// + /// - Returns: A lowercase copy of the string. /// /// - Complexity: O(n) public func lowercased() -> String { @@ -735,7 +949,15 @@ extension String { #endif } - /// Return `self` converted to upper case. + /// Returns an uppercase version of the string. + /// + /// The following example transforms a string to uppercase letters: + /// + /// let cafe = "Café 🍵" + /// print(cafe.uppercased()) + /// // Prints "CAFÉ 🍵" + /// + /// - Returns: An uppercase copy of the string. /// /// - Complexity: O(n) public func uppercased() -> String { diff --git a/stdlib/public/core/StringBridge.swift b/stdlib/public/core/StringBridge.swift index 0a6b64c10f8f1..027e9ebc7410b 100644 --- a/stdlib/public/core/StringBridge.swift +++ b/stdlib/public/core/StringBridge.swift @@ -71,7 +71,7 @@ internal func _cocoaStringToContiguous( source: _CocoaString, range: Range, minimumCapacity: Int ) -> _StringBuffer { _sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(source) == nil, - "Known contiguously-stored strings should already be converted to Swift") + "Known contiguously stored strings should already be converted to Swift") let startIndex = range.lowerBound let count = range.upperBound - startIndex @@ -107,7 +107,7 @@ internal func _cocoaStringSlice( _sanityCheck( _swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil, - "Known contiguously-stored strings should already be converted to Swift") + "Known contiguously stored strings should already be converted to Swift") let cfResult: AnyObject = _swift_stdlib_CFStringCreateWithSubstring( nil, cfSelf, _swift_shims_CFRange( @@ -124,7 +124,7 @@ internal func _cocoaStringSubscript( let cfSelf: _swift_shims_CFStringRef = target.cocoaBuffer.unsafelyUnwrapped _sanityCheck(_swift_stdlib_CFStringGetCharactersPtr(cfSelf) == nil, - "Known contiguously-stored strings should already be converted to Swift") + "Known contiguously stored strings should already be converted to Swift") return _swift_stdlib_CFStringGetCharacterAtIndex(cfSelf, position) } diff --git a/stdlib/public/core/StringCharacterView.swift b/stdlib/public/core/StringCharacterView.swift index 82c81e89f0afb..336e8b78208db 100644 --- a/stdlib/public/core/StringCharacterView.swift +++ b/stdlib/public/core/StringCharacterView.swift @@ -19,13 +19,42 @@ // allow performance optimizations of linear traversals. extension String { - /// A `String`'s collection of `Character`s ([extended grapheme - /// clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster)) - /// elements. + /// A view of a string's contents as a collection of characters. + /// + /// In Swift, every string provides a view of its contents as characters. In + /// this view, many individual characters---for example, "é", "김", and + /// "🇮🇳"---can be made up of multiple Unicode code points. These code points + /// are combined by Unicode's boundary algorithms into *extended grapheme + /// clusters*, represented by the `Character` type. Each element of a + /// `CharacterView` collection is a `Character` instance. + /// + /// let flowers = "Flowers 💐" + /// for c in flowers { + /// print(c) + /// } + /// // F + /// // l + /// // o + /// // w + /// // e + /// // r + /// // s + /// // + /// // 💐 + /// + /// You can convert a `String.CharacterView` instance back into a string + /// using the `String` type's `init(_:)` initializer. + /// + /// let name = "Marie Curie" + /// if let firstSpace = name.characters.index(of: " ") { + /// let firstName = String(name.characters.prefix(upTo: firstSpace)) + /// print(firstName) + /// } + /// // Prints "Marie" public struct CharacterView { internal var _core: _StringCore - /// Create a view of the `Character`s in `text`. + /// Creates a view of the given string. public init(_ text: String) { self._core = text._core } @@ -36,9 +65,7 @@ extension String { } } - /// A collection of `Characters` representing the `String`'s - /// [extended grapheme - /// clusters](http://www.unicode.org/glossary/#extended_grapheme_cluster). + /// A view of the string's contents as a collection of characters. public var characters: CharacterView { get { return CharacterView(self) @@ -48,12 +75,34 @@ extension String { } } - /// Efficiently mutate `self` by applying `body` to its `characters`. + /// Applies the given closure to a mutable view of the string's characters. + /// + /// Do not use the string that is the target of this method inside the + /// closure passed to `body`, as it may not have its correct value. + /// Instead, use the closure's `String.CharacterView` argument. + /// + /// This example below uses the `withMutableCharacters(_:)` method to truncate + /// the string `str` at the first space and to return the remainder of the + /// string. /// - /// - Warning: Do not rely on anything about `self` (the `String` - /// that is the target of this method) during the execution of - /// `body`: it may not appear to have its correct value. Instead, - /// use only the `String.CharacterView` argument to `body`. + /// var str = "All this happened, more or less." + /// let afterSpace = str.withMutableCharacters { chars -> String.CharacterView in + /// if let i = chars.index(of: " ") { + /// let result = chars.suffix(from: chars.index(after: i)) + /// chars.removeSubrange(i..(_ body: (inout CharacterView) -> R) -> R { // Naively mutating self.characters forces multiple references to // exist at the point of mutation. Instead, temporarily move the @@ -65,8 +114,20 @@ extension String { return r } - /// Construct the `String` corresponding to the given sequence of - /// Unicode scalars. + /// Creates a string from the given character view. + /// + /// Use this initializer to recover a string after performing a collection + /// slicing operation on a character view. + /// + /// let poem = "'Twas brillig, and the slithy toves / " + + /// "Did gyre and gimbal in the wabe: / " + + /// "All mimsy were the borogoves / " + + /// "And the mome raths outgrabe." + /// let excerpt = String(poem.characters.prefix(22)) + "..." + /// print(excerpt) + /// // Prints "'Twas brillig, and the..." + /// + /// - Parameter characters: A character view to convert to a string. public init(_ characters: CharacterView) { self.init(characters._core) } @@ -79,7 +140,20 @@ extension String.CharacterView : BidirectionalCollection { return UnicodeScalarView(_core) } - /// A character position. + /// A position in a string's `CharacterView` instance. + /// + /// You can convert between indices of the different string views by using + /// conversion initializers and the `samePosition(in:)` method overloads. + /// The following example finds the index of the first space in the string's + /// character view and then converts that to the same position in the UTF-8 + /// view: + /// + /// let hearts = "Hearts <3 ♥︎ 💘" + /// if let i = hearts.characters.index(of: " ") { + /// let j = i.samePosition(in: hearts.utf8) + /// print(Array(hearts.utf8.prefix(upTo: j))) + /// } + /// // Prints "[72, 101, 97, 114, 116, 115]" public struct Index : Comparable, CustomPlaygroundQuickLookable { public // SPI(Foundation) init(_base: String.UnicodeScalarView.Index) { @@ -223,17 +297,17 @@ extension String.CharacterView : BidirectionalCollection { public typealias IndexDistance = Int - /// The position of the first `Character` if `self` is - /// non-empty; identical to `endIndex` otherwise. + /// The position of the first character in a nonempty character view. + /// + /// In an empty character view, `startIndex` is equal to `endIndex`. public var startIndex: Index { return Index(_base: unicodeScalars.startIndex) } - /// The "past the end" position. + /// A character view's "past the end" position---that is, the position one + /// greater than the last valid subscript argument. /// - /// `endIndex` is not a valid argument to `subscript`, and is always - /// reachable from `startIndex` by zero or more applications of - /// `index(after:)`. + /// In an empty character view, `endIndex` is equal to `startIndex`. public var endIndex: Index { return Index(_base: unicodeScalars.endIndex) } @@ -251,27 +325,44 @@ extension String.CharacterView : BidirectionalCollection { return i._predecessor() } - /// Access the `Character` at `position`. + /// Accesses the character at the given position. + /// + /// The following example searches a string's character view for a capital + /// letter and then prints the character at the found index: /// - /// - Precondition: `position` is a valid position in `self` and - /// `position != endIndex`. + /// let greeting = "Hello, friend!" + /// if let i = greeting.characters.index(where: { "A"..."Z" ~= $0 }) { + /// print("First capital letter: \(greeting.characters[i])") + /// } + /// // Prints "First capital letter: H" + /// + /// - Parameter position: A valid index of the character view. `position` + /// must be less than the view's end index. public subscript(i: Index) -> Character { return Character(String(unicodeScalars[i._base..( @@ -284,16 +375,25 @@ extension String.CharacterView : RangeReplaceableCollection { _core.replaceSubrange(rawSubRange, with: lazyUTF16) } - /// Reserve enough space to store `n` ASCII characters. + /// Reserves enough space in the character view's underlying storage to store + /// the specified number of ASCII characters. /// - /// - Complexity: O(`n`). + /// Because each element of a character view can require more than a single + /// ASCII character's worth of storage, additional allocation may be + /// necessary when adding characters to the character view after a call to + /// `reserveCapacity(_:)`. + /// + /// - Parameter n: The minimum number of ASCII character's worth of storage + /// to allocate. + /// + /// - Complexity: O(*n*), where *n* is the capacity being reserved. public mutating func reserveCapacity(_ n: Int) { _core.reserveCapacity(n) } - /// Append `c` to `self`. + /// Appends the given character to the character view. /// - /// - Complexity: Amortized O(1). + /// - Parameter c: The character to append to the character view. public mutating func append(_ c: Character) { switch c._representation { case .small(let _63bits): @@ -304,7 +404,9 @@ extension String.CharacterView : RangeReplaceableCollection { } } - /// Append the elements of `newElements` to `self`. + /// Appends the characters in the given sequence to the character view. + /// + /// - Parameter newElements: A sequence of characters. public mutating func append< S : Sequence where S.Iterator.Element == Character >(contentsOf newElements: S) { @@ -314,7 +416,10 @@ extension String.CharacterView : RangeReplaceableCollection { } } - /// Create an instance containing `characters`. + /// Creates a new character view containing the characters in the given + /// sequence. + /// + /// - Parameter characters: A sequence of characters. public init< S : Sequence where S.Iterator.Element == Character >(_ characters: S) { @@ -325,10 +430,19 @@ extension String.CharacterView : RangeReplaceableCollection { // Algorithms extension String.CharacterView { - /// Access the characters in `bounds`. + /// Accesses the characters in the given range. + /// + /// The example below uses this subscript to access the characters up to, but + /// not including, the first comma (`","`) in the string. + /// + /// let str = "All this happened, more or less." + /// let i = str.characters.index(of: ",")! + /// let substring = str.characters[str.characters.startIndex ..< i] + /// print(String(substring)) + /// // Prints "All this happened" /// - /// - Complexity: O(1) unless bridging from Objective-C requires an - /// O(N) conversion. + /// - Complexity: O(*n*) if the underlying string is bridged from + /// Objective-C, where *n* is the length of the string; otherwise, O(1). public subscript(bounds: Range) -> String.CharacterView { let unicodeScalarRange = bounds.lowerBound._base..(stringInterpolationSegment expr: T) { self = String(expr) } % for Type in StreamableTypes: + /// Creates a string containing the given value's textual representation. + /// + /// Do not call this initializer directly. It is used by the compiler when + /// interpreting string interpolations. + /// + /// - SeeAlso: `StringInterpolationConvertible` public init(stringInterpolationSegment expr: ${Type}) { self = _toStringReadOnlyStreamable(expr) } % end % for Type in PrintableTypes: + /// Creates a string containing the given value's textual representation. + /// + /// Do not call this initializer directly. It is used by the compiler when + /// interpreting string interpolations. + /// + /// - SeeAlso: `StringInterpolationConvertible` public init(stringInterpolationSegment expr: ${Type}) { self = _toStringReadOnlyPrintable(expr) } diff --git a/stdlib/public/core/StringLegacy.swift b/stdlib/public/core/StringLegacy.swift index c4ad99442c464..ab12018cbc520 100644 --- a/stdlib/public/core/StringLegacy.swift +++ b/stdlib/public/core/StringLegacy.swift @@ -13,8 +13,15 @@ import SwiftShims extension String { - /// Construct an instance that is the concatenation of `count` copies - /// of `repeatedValue`. + /// Creates a string representing the given character repeated the specified + /// number of times. + /// + /// For example, use this initializer to create a string with ten `"0"` + /// characters in a row. + /// + /// let zeroes = String("0" as Character, count: 10) + /// print(zeroes) + /// // Prints "0000000000" public init(repeating repeatedValue: Character, count: Int) { let s = String(repeatedValue) self = String(_storage: _StringBuffer( @@ -26,8 +33,15 @@ extension String { } } - /// Construct an instance that is the concatenation of `count` copies - /// of `Character(repeatedValue)`. + /// Creates a string representing the given Unicode scalar repeated the + /// specified number of times. + /// + /// For example, use this initializer to create a string with ten `"0"` + /// scalars in a row. + /// + /// let zeroes = String("0" as UnicodeScalar, count: 10) + /// print(zeroes) + /// // Prints "0000000000" public init(repeating repeatedValue: UnicodeScalar, count: Int) { self = String._fromWellFormedCodeUnitSequence( UTF32.self, @@ -44,7 +58,7 @@ extension String { return scalarSlices.map { String($0) } } - /// `true` iff `self` contains no characters. + /// A Boolean value indicating whether a string has no characters. public var isEmpty : Bool { return _core.count == 0 } @@ -73,7 +87,36 @@ func _stdlib_NSStringHasSuffixNFD(_ theString: AnyObject, _ suffix: AnyObject) - func _stdlib_NSStringHasSuffixNFDPointer(_ theString: OpaquePointer, _ suffix: OpaquePointer) -> Bool extension String { - /// Returns `true` iff `self` begins with `prefix`. + /// Returns a Boolean value indicating whether the string begins with the + /// specified prefix. + /// + /// The comparison is both case sensitive and Unicode safe. The + /// case-sensitive comparision will only match strings whose corresponding + /// characters have the same case. + /// + /// let cafe = "Café du Monde" + /// + /// // Case sensitive + /// print(cafe.hasPrefix("café")) + /// // Prints "false" + /// + /// The Unicode-safe comparison matches Unicode scalar values rather than the + /// code points used to compose them. The example below uses two strings + /// with different forms of the `"é"` character---the first uses the composed + /// form and the second uses the decomposed form. + /// + /// // Unicode safe + /// let composedCafe = "Café" + /// let decomposedCafe = "Cafe\u{0301}" + /// + /// print(cafe.hasPrefix(composedCafe)) + /// // Prints "true" + /// print(cafe.hasPrefix(decomposedCafe)) + /// // Prints "true" + /// + /// - Parameter prefix: A possible prefix to test against this string. + /// Passing an empty string (`""`) as `prefix` always results in `false`. + /// - Returns: `true` if the string begins with `prefix`, otherwise, `false`. public func hasPrefix(_ prefix: String) -> Bool { let selfCore = self._core let prefixCore = prefix._core @@ -97,7 +140,36 @@ extension String { self._bridgeToObjectiveCImpl(), prefix._bridgeToObjectiveCImpl()) } - /// Returns `true` iff `self` ends with `suffix`. + /// Returns a Boolean value indicating whether the string ends with the + /// specified suffix. + /// + /// The comparison is both case sensitive and Unicode safe. The + /// case-sensitive comparision will only match strings whose corresponding + /// characters have the same case. + /// + /// let plans = "Let's meet at the café" + /// + /// // Case sensitive + /// print(plans.hasSuffix("Café")) + /// // Prints "false" + /// + /// The Unicode-safe comparison matches Unicode scalar values rather than the + /// code points used to compose them. The example below uses two strings + /// with different forms of the `"é"` character---the first uses the composed + /// form and the second uses the decomposed form. + /// + /// // Unicode safe + /// let composedCafe = "café" + /// let decomposedCafe = "cafe\u{0301}" + /// + /// print(plans.hasSuffix(composedCafe)) + /// // Prints "true" + /// print(plans.hasSuffix(decomposedCafe)) + /// // Prints "true" + /// + /// - Parameter suffix: A possible suffix to test against this string. + /// Passing an empty string (`""`) as `suffix` always results in `false`. + /// - Returns: `true` if the string ends with `suffix`, otherwise, `false`. public func hasSuffix(_ suffix: String) -> Bool { let selfCore = self._core let suffixCore = suffix._core @@ -134,38 +206,86 @@ extension String { // FIXME: can't just use a default arg for radix below; instead we // need these single-arg overloads - /// Create an instance representing `v` in base 10. + /// Creates a string representing the given value in base 10. + /// + /// The following example converts the maximal `Int` value to a string and + /// prints its length: + /// + /// let max = String(Int.max) + /// print("\(max) has \(max.utf16.count) digits.") + /// // Prints "9223372036854775807 has 19 digits." public init(_ v: T) { self = _int64ToString(v.toIntMax()) } - /// Create an instance representing `v` in base 10. + /// Creates a string representing the given value in base 10. + /// + /// The following example converts the maximal `UInt` value to a string and + /// prints its length: + /// + /// let max = String(UInt.max) + /// print("\(max) has \(max.utf16.count) digits.") + /// // Prints "18446744073709551615 has 20 digits." public init(_ v: T) { self = _uint64ToString(v.toUIntMax()) } - /// Create an instance representing `v` in the given `radix` (base). + /// Creates a string representing the given value in the specified base. + /// + /// Numerals greater than 9 are represented as Roman letters. These letters + /// start with `"A"` if `uppercase` is `true`; otherwise, with `"a"`. + /// + /// let v = 999_999 + /// print(String(v, radix: 2)) + /// // Prints "11110100001000111111" + /// + /// print(String(v, radix: 16)) + /// // Prints "f423f" + /// print(String(v, radix: 16, uppercase: true)) + /// // Prints "F423F" /// - /// Numerals greater than 9 are represented as roman letters, - /// starting with `a` if `uppercase` is `false` or `A` otherwise. + /// - Parameters: + /// - value: The value to convert to a string. + /// - radix: The base to use for the string representation. `radix` must be + /// at least 2 and at most 36. + /// - uppercase: Pass `true` to use uppercase letters to represent numerals + /// greater than 9, or `false` to use lowercase letters. The default is + /// `false`. public init( - _ v: T, radix: Int, uppercase: Bool = false + _ value: T, radix: Int, uppercase: Bool = false ) { _precondition(radix > 1, "Radix must be greater than 1") self = _int64ToString( - v.toIntMax(), radix: Int64(radix), uppercase: uppercase) + value.toIntMax(), radix: Int64(radix), uppercase: uppercase) } - /// Create an instance representing `v` in the given `radix` (base). + /// Creates a string representing the given value in the specified base. + /// + /// Numerals greater than 9 are represented as Roman letters. These letters + /// start with `"A"` if `uppercase` is `true`; otherwise, with `"a"`. + /// + /// let v: UInt = 999_999 + /// print(String(v, radix: 2)) + /// // Prints "11110100001000111111" + /// + /// print(String(v, radix: 16)) + /// // Prints "f423f" + /// print(String(v, radix: 16, uppercase: true)) + /// // Prints "F423F" /// - /// Numerals greater than 9 are represented as roman letters, - /// starting with `a` if `uppercase` is `false` or `A` otherwise. + /// - Parameters: + /// - value: The value to convert to a string. + /// - radix: The base to use for the string representation. `radix` must be + /// at least 2 and at most 36. + /// - uppercase: Pass `true` to use uppercase letters to represent numerals + /// greater than 9, or `false` to use lowercase letters. The default is + /// `false`. public init( - _ v: T, radix: Int, uppercase: Bool = false + _ value: T, radix: Int, uppercase: Bool = false ) { _precondition(radix > 1, "Radix must be greater than 1") self = _uint64ToString( - v.toUIntMax(), radix: Int64(radix), uppercase: uppercase) + value.toUIntMax(), radix: Int64(radix), uppercase: uppercase) } } diff --git a/stdlib/public/core/StringRangeReplaceableCollection.swift.gyb b/stdlib/public/core/StringRangeReplaceableCollection.swift.gyb index 2bccb1984e77c..85c90d47b570a 100644 --- a/stdlib/public/core/StringRangeReplaceableCollection.swift.gyb +++ b/stdlib/public/core/StringRangeReplaceableCollection.swift.gyb @@ -14,18 +14,25 @@ // similar API. extension String { + /// The index type for subscripting a string. public typealias Index = CharacterView.Index + + /// A type used to represent the number of steps between two `String.Index` + /// values, where one value is reachable from the other. + /// + /// In Swift, *reachability* refers to the ability to produce one value from + /// the other through zero or more applications of `index(after:)`. public typealias IndexDistance = CharacterView.IndexDistance - /// The position of the first `Character` in `self.characters` if - /// `self` is non-empty; identical to `endIndex` otherwise. + /// The position of the first character in a nonempty string. + /// + /// In an empty string, `startIndex` is equal to `endIndex`. public var startIndex: Index { return characters.startIndex } - /// The "past the end" position in `self.characters`. + /// A string's "past the end" position---that is, the position one greater + /// than the last valid subscript argument. /// - /// `endIndex` is not a valid argument to `subscript`, and is always - /// reachable from `startIndex` by zero or more applications of - /// `index(after:)`. + /// In an empty string, `endIndex` is equal to `startIndex`. public var endIndex: Index { return characters.endIndex } // TODO: swift-3-indexing-model - add docs @@ -60,16 +67,25 @@ extension String { return characters.distance(from: start, to: end) } - /// Access the `Character` at `position`. + /// Accesses the character at the given position. /// - /// - Precondition: `position` is a valid position in `self.characters` - /// and `position != endIndex`. + /// Indices for a subscripting a string are shared with the string's + /// `characters` view. For example: + /// + /// let greeting = "Hello, friend!" + /// if let i = greeting.characters.index(where: { $0 >= "A" && $0 <= "Z" }) { + /// print("First capital letter: \(greeting[i])") + /// } + /// // Prints "First capital letter: H" + /// + /// - Parameter i: A valid index of the string. `i` must be less than the + /// string's end index. public subscript(i: Index) -> Character { return characters[i] } - /// Return the characters within the given `bounds`. + /// Accesses the text in the given range. /// - /// - Complexity: O(1) unless bridging from Objective-C requires an - /// O(N) conversion. + /// - Complexity: O(*n*) if the underlying string is bridged from + /// Objective-C, where *n* is the length of the string; otherwise, O(1). public subscript(bounds: Range) -> String { return String(characters[bounds]) } @@ -86,24 +102,63 @@ public func < (lhs: String.Index, rhs: String.Index) -> Bool { } extension String { - /// Create an instance containing `characters`. + /// Creates a new string containing the characters in the given sequence. + /// + /// You can use this initializer to create a new string from the result of + /// one or more operations on a string's `characters` view. For example: + /// + /// let str = "The rain in Spain stays mainly in the plain." + /// + /// let vowels: Set = ["a", "e", "i", "o", "u"] + /// let disemvowelled = String(str.characters.lazy.filter { !vowels.contains($0) }) + /// + /// print(disemvowelled) + /// // Prints "Th rn n Spn stys mnly n th pln." + /// + /// - Parameter characters: A sequence of characters. public init< S : Sequence where S.Iterator.Element == Character >(_ characters: S) { self._core = CharacterView(characters)._core } + /// Reserves enough space in the string's underlying storage to store the + /// specified number of ASCII characters. + /// + /// Because each character in a string can require more than a single ASCII + /// character's worth of storage, additional allocation may be necessary + /// when adding characters to a string after a call to + /// `reserveCapacity(_:)`. + /// + /// - Parameter n: The minimum number of ASCII character's worth of storage + /// to allocate. + /// + /// - Complexity: O(*n*) public mutating func reserveCapacity(_ n: Int) { withMutableCharacters { (v: inout CharacterView) in v.reserveCapacity(n) } } + + /// Appends the given character to the string. + /// + /// The following example adds an emoji globe to the end of a string. + /// + /// var globe = "Globe " + /// globe.append("🌍") + /// print(globe) + /// // Prints "Globe 🌍" + /// + /// - Parameter c: The character to append to the string. public mutating func append(_ c: Character) { withMutableCharacters { (v: inout CharacterView) in v.append(c) } } + /// Appends the characters in the given sequence to the string. + /// + /// - Parameter newElements: A sequence of characters. public mutating func append< S : Sequence where S.Iterator.Element == Character >(contentsOf newElements: S) { @@ -113,13 +168,20 @@ extension String { } % for Range in ['Range', 'ClosedRange']: - /// Replace the characters within `bounds` with the elements of - /// `replacement`. + /// Replaces the text within the specified bounds with the given characters. + /// + /// Calling this method invalidates any existing indices for use with this + /// string. /// - /// Invalidates all indices with respect to `self`. + /// - Parameters: + /// - bounds: The range of text to replace. The bounds of the range must be + /// valid indices of the string. + /// - newElements: The new characters to add to the string. /// - /// - Complexity: O(`bounds.count`) if `bounds.upperBound - /// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise. + /// - Complexity: O(*m*), where *m* is the combined length of the string and + /// `newElements`. If the call to `replaceSubrange(_:with:)` simply + /// removes text at the end of the string, the complexity is O(*n*), where + /// *n* is equal to `bounds.count`. public mutating func replaceSubrange< C : Collection where C.Iterator.Element == Character >( @@ -132,12 +194,20 @@ extension String { } } - /// Replace the text in `bounds` with `replacement`. + /// Replaces the text within the specified bounds with the given string. /// - /// Invalidates all indices with respect to `self`. + /// Calling this method invalidates any existing indices for use with this + /// string. /// - /// - Complexity: O(`bounds.count`) if `bounds.upperBound - /// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise. + /// - Parameters: + /// - bounds: The range of text to replace. The bounds of the range must be + /// valid indices of the string. + /// - newElements: The new text to add to the string. + /// + /// - Complexity: O(*m*), where *m* is the combined length of the string and + /// `newElements`. If the call to `replaceSubrange(_:with:)` simply + /// removes text at the end of the string, the complexity is O(*n*), where + /// *n* is equal to `bounds.count`. public mutating func replaceSubrange( _ bounds: ${Range}, with newElements: String ) { @@ -146,22 +216,37 @@ extension String { } % end - /// Insert `newElement` at position `i`. + /// Inserts a new character at the specified position. + /// + /// Calling this method invalidates any existing indices for use with this + /// string. /// - /// Invalidates all indices with respect to `self`. + /// - Parameters: + /// - newElement: The new character to insert into the string. + /// - i: A valid index of the string. If `i` is equal to the string's end + /// index, this methods appends `newElement` to the string. /// - /// - Complexity: O(`self.count`). + /// - Complexity: O(*n*), where *n* is the length of the string. public mutating func insert(_ newElement: Character, at i: Index) { withMutableCharacters { (v: inout CharacterView) in v.insert(newElement, at: i) } } - /// Insert `newElements` at position `i`. + /// Inserts a collection of characters at the specified position. + /// + /// Calling this method invalidates any existing indices for use with this + /// string. /// - /// Invalidates all indices with respect to `self`. + /// - Parameters: + /// - newElements: A collection of `Character` elements to insert into the + /// string. + /// - i: A valid index of the string. If `i` is equal to the string's end + /// index, this methods appends the contents of `newElements` to the + /// string. /// - /// - Complexity: O(`self.count + newElements.count`). + /// - Complexity: O(*n*), where *n* is the combined length of the string and + /// `newElements`. public mutating func insert< S : Collection where S.Iterator.Element == Character >(contentsOf newElements: S, at i: Index) { @@ -170,11 +255,24 @@ extension String { } } - /// Remove and return the `Character` at position `i`. + /// Removes and returns the character at the specified position. /// - /// Invalidates all indices with respect to `self`. + /// All the elements following `i` are moved to close the gap. This example + /// removes the hyphen from the middle of a string. /// - /// - Complexity: O(`self.count`). + /// var nonempty = "non-empty" + /// if let i = nonempty.characters.index(of: "-") { + /// nonempty.remove(at: i) + /// } + /// print(nonempty) + /// // Prints "nonempty" + /// + /// Calling this method invalidates any existing indices for use with this + /// string. + /// + /// - Parameter i: The position of the character to remove. `i` must be a + /// valid index of the string that is not equal to the string's end index. + /// - Returns: The character that was removed. @discardableResult public mutating func remove(at i: Index) -> Character { return withMutableCharacters { @@ -183,11 +281,19 @@ extension String { } % for Range in ['Range', 'ClosedRange']: - /// Remove the characters in `bounds`. + /// Removes the characters in the given range. /// - /// Invalidates all indices with respect to `self`. + /// Calling this method invalidates any existing indices for use with this + /// string. /// - /// - Complexity: O(`self.count`). +% if Range == 'ClosedRange': + /// - Parameter bounds: The range of the elements to remove. The upper and + /// lower bounds of `bounds` must be valid indices of the string and not + /// equal to the string's end index. +% else: + /// - Parameter bounds: The range of the elements to remove. The upper and + /// lower bounds of `bounds` must be valid indices of the string. +% end public mutating func removeSubrange(_ bounds: ${Range}) { // FIXME: swift-3-indexing-model: tests. withMutableCharacters { @@ -196,13 +302,15 @@ extension String { } % end - /// Replace `self` with the empty string. + /// Replaces this string with the empty string. /// - /// Invalidates all indices with respect to `self`. + /// Calling this method invalidates any existing indices for use with this + /// string. /// - /// - parameter keepCapacity: If `true`, prevents the release of - /// allocated storage, which can be a useful optimization - /// when `self` is going to be grown again. + /// - Parameter keepCapacity: Pass `true` to prevent the release of the + /// string's allocated storage. Retaining the storage can be a useful + /// optimization when you're planning to grow the string again. The + /// default value is `false`. public mutating func removeAll(keepingCapacity keepCapacity: Bool = false) { withMutableCharacters { (v: inout CharacterView) in v.removeAll(keepingCapacity: keepCapacity) diff --git a/stdlib/public/core/StringUTF16.swift b/stdlib/public/core/StringUTF16.swift index b3ab5a3dc0c76..5e3bdaa256006 100644 --- a/stdlib/public/core/StringUTF16.swift +++ b/stdlib/public/core/StringUTF16.swift @@ -14,13 +14,123 @@ // allow performance optimizations of linear traversals. extension String { - /// A collection of UTF-16 code units that encodes a `String` value. + /// A view of a string's contents as a collection of UTF-16 code units. + /// + /// You can access a string's view of UTF-16 code units by using its `utf16` + /// property. A string's UTF-16 view encodes the string's Unicode scalar + /// values as 16-bit integers. + /// + /// let flowers = "Flowers 💐" + /// for v in flowers.utf16 { + /// print(v) + /// } + /// // 70 + /// // 108 + /// // 111 + /// // 119 + /// // 101 + /// // 114 + /// // 115 + /// // 32 + /// // 55357 + /// // 56464 + /// + /// Unicode scalar values that make up a string's contents can be up to 21 + /// bits long. The longer scalar values may need two `UInt16` values for + /// storage. Those "pairs" of code units are called *surrogate pairs*. + /// + /// let flowermoji = "💐" + /// for v in flowermoji.unicodeScalars { + /// print(v, v.value) + /// } + /// // 💐 128144 + /// + /// for v in flowermoji.utf16 { + /// print(v) + /// } + /// // 55357 + /// // 56464 + /// + /// To convert a `String.UTF16View` instance back into a string, use the + /// `String` type's `init(_:)` initializer. + /// + /// let favemoji = "My favorite emoji is 🎉" + /// if let i = favemoji.utf16.index(where: { $0 >= 128 }) { + /// let asciiPrefix = String(favemoji.utf16.prefix(upTo: i)) + /// print(asciiPrefix) + /// } + /// // Prints "My favorite emoji is " + /// + /// UTF16View Elements Match NSString Characters + /// ============================================ + /// + /// The UTF-16 code units of a string's `utf16` view match the elements + /// accessed through indexed `NSString` APIs. + /// + /// print(flowers.utf16.count) + /// // Prints "10" + /// + /// let nsflowers = flowers as NSString + /// print(nsflowers.length) + /// // Prints "10" + /// + /// Unlike `NSString`, however, `String.UTF16View` does not use integer + /// indices. If you need to access a specific position in a UTF-16 view, use + /// Swift's index manipulation methods. The following example accesses the + /// fourth code unit in both the `flowers` and `nsflowers` strings: + /// + /// print(nsflowers.character(at: 3)) + /// // Prints "119" + /// + /// let i = flowers.utf16.index(flowers.utf16.startIndex, offsetBy: 3) + /// print(flowers.utf16[i]) + /// // Prints "119" + /// + /// Although the Swift overlay updates many Objective-C methods to return + /// native Swift indices and index ranges, some still return instances of + /// `NSRange`. To convert an `NSRange` instance to a range of + /// `String.UTF16View.Index`, follow these steps: + /// + /// 1. Use the `NSRange` type's `toRange` method to convert the instance to + /// an optional range of `Int` values. + /// 2. Use your string's `utf16` view's index manipulation methods to convert + /// the integer bounds to `String.UTF16View.Index` values. + /// 3. Create a new `Range` instance from the new index values. + /// + /// Here's an implementation of those steps, showing how to retrieve a + /// substring described by an `NSRange` instance from the middle of a + /// string. + /// + /// let snowy = "❄️ Let it snow! ☃️" + /// let nsrange = NSRange(location: 3, length: 12) + /// if let r = nsrange.toRange() { + /// let start = snowy.utf16.index(snowy.utf16.startIndex, offsetBy: r.lowerBound) + /// let end = snowy.utf16.index(snowy.utf16.startIndex, offsetBy: r.upperBound) + /// let substringRange = start.. UTF16.CodeUnit { let position = i._offset _precondition(position >= 0 && position < _length, @@ -158,10 +275,11 @@ extension String { } #endif - /// Get the contiguous subrange of elements enclosed by `bounds`. + /// Accesses the contiguous subrange of elements enclosed by the specified + /// range. /// - /// - Complexity: O(1) unless bridging from Objective-C requires an - /// O(N) conversion. + /// - Complexity: O(*n*) if the underlying string is bridged from + /// Objective-C, where *n* is the length of the string; otherwise, O(1). public subscript(bounds: Range) -> UTF16View { return UTF16View( _core, @@ -206,9 +324,24 @@ extension String { } } - /// Construct the `String` corresponding to the given sequence of - /// UTF-16 code units. If `utf16` contains unpaired surrogates, the - /// result is `nil`. + /// Creates a string corresponding to the given sequence of UTF-8 code units. + /// + /// If `utf16` contains unpaired UTF-16 surrogates, the result is `nil`. + /// + /// You can use this initializer to create a new string from a slice of + /// another string's `utf16` view. + /// + /// let picnicGuest = "Deserving porcupine" + /// if let i = picnicGuest.utf16.index(of: 32) { + /// let adjective = String(picnicGuest.utf16.prefix(upTo: i)) + /// print(adjective) + /// } + /// // Prints "Optional(Deserving)" + /// + /// The `adjective` constant is created by calling this initializer with a + /// slice of the `picnicGuest.utf16` view. + /// + /// - Parameter utf16: A UTF-16 code sequence. public init?(_ utf16: UTF16View) { let wholeString = String(utf16._core) @@ -225,7 +358,7 @@ extension String { return nil } - /// The index type for subscripting a `String`'s `utf16` view. + /// The index type for subscripting a string's `utf16` view. public typealias UTF16Index = UTF16View.Index } @@ -247,11 +380,31 @@ public func < ( // Index conversions extension String.UTF16View.Index { - /// Construct the position in `utf16` that corresponds exactly to - /// `utf8Index`. If no such position exists, the result is `nil`. + /// Creates an index in the given UTF-16 view that corresponds exactly to the + /// specified `UTF8View` position. + /// + /// The following example finds the position of a space in a string's `utf8` + /// view and then converts that position to an index in the the string's + /// `utf16` view. + /// + /// let cafe = "Café 🍵" + /// + /// let utf8Index = cafe.utf8.index(of: 32)! + /// let utf16Index = String.UTF16View.Index(utf8Index, within: cafe.utf16)! + /// + /// print(cafe.utf16.prefix(upTo: utf16Index)) + /// // Prints "Café" /// - /// - Precondition: `utf8Index` is an element of - /// `String(utf16)!.utf8.indices`. + /// If the position passed as `utf8Index` doesn't have an exact corresponding + /// position in `utf16`, the result of the initializer is `nil`. For + /// example, because UTF-8 and UTF-16 represent high Unicode code points + /// differently, an attempt to convert the position of a UTF-8 continuation + /// byte fails. + /// + /// - Parameters: + /// - utf8Index: A position in a `UTF8View` instance. `utf8Index` must be + /// an element in `String(utf16).utf8.indices`. + /// - utf16: The `UTF16View` in which to find the new position. public init?( _ utf8Index: String.UTF8Index, within utf16: String.UTF16View ) { @@ -268,31 +421,76 @@ extension String.UTF16View.Index { _offset = utf8Index._coreIndex } - /// Construct the position in `utf16` that corresponds exactly to - /// `unicodeScalarIndex`. + /// Creates an index in the given UTF-16 view that corresponds exactly to the + /// specified `UnicodeScalarView` position. + /// + /// The following example finds the position of a space in a string's `utf8` + /// view and then converts that position to an index in the the string's + /// `utf16` view. + /// + /// let cafe = "Café 🍵" /// - /// - Precondition: `unicodeScalarIndex` is an element of - /// `String(utf16)!.unicodeScalars.indices`. + /// let scalarIndex = cafe.unicodeScalars.index(of: "é")! + /// let utf16Index = String.UTF16View.Index(scalarIndex, within: cafe.utf16) + /// + /// print(cafe.utf16.prefix(through: utf16Index)) + /// // Prints "Café" + /// + /// - Parameters: + /// - unicodeScalarIndex: A position in a `UnicodeScalarView` instance. + /// `unicodeScalarIndex` must be an element in + /// `String(utf16).unicodeScalarIndex.indices`. + /// - utf16: The `UTF16View` in which to find the new position. public init( _ unicodeScalarIndex: String.UnicodeScalarIndex, within utf16: String.UTF16View) { _offset = unicodeScalarIndex._position } - /// Construct the position in `utf16` that corresponds exactly to - /// `characterIndex`. + /// Creates an index in the given UTF-16 view that corresponds exactly to the + /// specified `CharacterView` position. + /// + /// The following example finds the position of a space in a string's `characters` + /// view and then converts that position to an index in the the string's + /// `utf16` view. /// - /// - Precondition: `characterIndex` is an element of - /// `String(utf16)!.indices`. + /// let cafe = "Café 🍵" + /// + /// let characterIndex = cafe.characters.index(of: "é")! + /// let utf16Index = String.UTF16View.Index(characterIndex, within: cafe.utf16) + /// + /// print(cafe.utf16.prefix(through: utf16Index)) + /// // Prints "Café" + /// + /// - Parameters: + /// - characterIndex: A position in a `CharacterView` instance. + /// `characterIndex` must be an element in + /// `String(utf16).characters.indices`. + /// - utf16: The `UTF16View` in which to find the new position. public init(_ characterIndex: String.Index, within utf16: String.UTF16View) { _offset = characterIndex._utf16Index } - /// Returns the position in `utf8` that corresponds exactly - /// to `self`, or if no such position exists, `nil`. + /// Returns the position in the given UTF-8 view that corresponds exactly to + /// this index. + /// + /// The index must be a valid index of `String(utf8).utf16`. + /// + /// This example first finds the position of a space (UTF-16 code point `32`) + /// in a string's `utf16` view and then uses this method to find the same + /// position in the string's `utf8` view. + /// + /// let cafe = "Café 🍵" + /// let i = cafe.utf16.index(of: 32)! + /// let j = i.samePosition(in: cafe.utf8)! + /// print(Array(cafe.utf8.prefix(upTo: j))) + /// // Prints "[67, 97, 102, 195, 169]" /// - /// - Precondition: `self` is an element of - /// `String(utf8)!.utf16.indices`. + /// - Parameter utf8: The view to use for the index conversion. + /// - Returns: The position in `utf8` that corresponds exactly to this index. + /// If this index does not have an exact corresponding position in `utf8`, + /// this method returns `nil`. For example, an attempt to convert the + /// position of a UTF-16 trailing surrogate returns `nil`. @warn_unused_result public func samePosition( in utf8: String.UTF8View @@ -300,11 +498,27 @@ extension String.UTF16View.Index { return String.UTF8View.Index(self, within: utf8) } - /// Returns the position in `unicodeScalars` that corresponds exactly - /// to `self`, or if no such position exists, `nil`. + /// Returns the position in the given view of Unicode scalars that + /// corresponds exactly to this index. /// - /// - Precondition: `self` is an element of - /// `String(unicodeScalars).utf16.indices`. + /// This index must be a valid index of `String(unicodeScalars).utf16`. + /// + /// This example first finds the position of a space (UTF-16 code point `32`) + /// in a string's `utf16` view and then uses this method to find the same + /// position in the string's `unicodeScalars` view. + /// + /// let cafe = "Café 🍵" + /// let i = cafe.utf16.index(of: 32)! + /// let j = i.samePosition(in: cafe.unicodeScalars)! + /// print(cafe.unicodeScalars.prefix(upTo: j)) + /// // Prints "Café" + /// + /// - Parameter unicodeScalars: The view to use for the index conversion. + /// - Returns: The position in `unicodeScalars` that corresponds exactly to + /// this index. If this index does not have an exact corresponding + /// position in `unicodeScalars`, this method returns `nil`. For example, + /// an attempt to convert the position of a UTF-16 trailing surrogate + /// returns `nil`. @warn_unused_result public func samePosition( in unicodeScalars: String.UnicodeScalarView @@ -312,10 +526,26 @@ extension String.UTF16View.Index { return String.UnicodeScalarIndex(self, within: unicodeScalars) } - /// Returns the position in `characters` that corresponds exactly - /// to `self`, or if no such position exists, `nil`. + /// Returns the position in the given string that corresponds exactly to this + /// index. + /// + /// This index must be a valid index of `characters.utf16`. + /// + /// This example first finds the position of a space (UTF-16 code point `32`) + /// in a string's `utf16` view and then uses this method find the same position + /// in the string. + /// + /// let cafe = "Café 🍵" + /// let i = cafe.utf16.index(of: 32)! + /// let j = i.samePosition(in: cafe)! + /// print(cafe[cafe.startIndex ..< j]) + /// // Prints "Café" /// - /// - Precondition: `self` is an element of `characters.utf16.indices`. + /// - Parameter characters: The string to use for the index conversion. + /// - Returns: The position in `characters` that corresponds exactly to this + /// index. If this index does not have an exact corresponding position in + /// `characters`, this method returns `nil`. For example, an attempt to + /// convert the position of a UTF-16 trailing surrogate returns `nil`. @warn_unused_result public func samePosition( in characters: String @@ -326,7 +556,7 @@ extension String.UTF16View.Index { // Reflection extension String.UTF16View : CustomReflectable { - /// Returns a mirror that reflects `self`. + /// Returns a mirror that reflects the UTF-16 view of a string. public var customMirror: Mirror { return Mirror(self, unlabeledChildren: self) } diff --git a/stdlib/public/core/StringUTF8.swift b/stdlib/public/core/StringUTF8.swift index cc35b8a6c67c5..baa049ab5f53b 100644 --- a/stdlib/public/core/StringUTF8.swift +++ b/stdlib/public/core/StringUTF8.swift @@ -91,7 +91,80 @@ extension _StringCore { } extension String { - /// A collection of UTF-8 code units that encodes a `String` value. + /// A view of a string's contents as a collection of UTF-8 code units. + /// + /// You can access a string's view of UTF-8 code units by using its `utf8` + /// property. A string's UTF-8 view encodes the string's Unicode scalar + /// values as 8-bit integers. + /// + /// let flowers = "Flowers 💐" + /// for v in flowers.utf8 { + /// print(v) + /// } + /// // 70 + /// // 108 + /// // 111 + /// // 119 + /// // 101 + /// // 114 + /// // 115 + /// // 32 + /// // 240 + /// // 159 + /// // 146 + /// // 144 + /// + /// A string's Unicode scalar values can be up to 21 bits in length. To + /// represent those scalar values using 8-bit integers, more than one UTF-8 + /// code unit is often required. + /// + /// let flowermoji = "💐" + /// for v in flowermoji.unicodeScalars { + /// print(v, v.value) + /// } + /// // 💐 128144 + /// + /// for v in flowermoji.utf8 { + /// print(v) + /// } + /// // 240 + /// // 159 + /// // 146 + /// // 144 + /// + /// In the encoded representation of a Unicode scalar value, each UTF-8 code + /// unit after the first is called a *continuation byte*. + /// + /// UTF8View Elements Match Encoded C Strings + /// ========================================= + /// + /// Swift streamlines interoperation with C string APIs by letting you pass a + /// `String` instance to a function as an `Int8` or `UInt8` pointer. When you + /// call a C function using a `String`, Swift automatically creates a buffer + /// of UTF-8 code units and passes a pointer to that buffer. The code units + /// of that buffer match the code units in the string's `utf8` view. + /// + /// The following example uses the C `strncmp` function to compare the + /// beginning of two Swift strings. The `strncmp` function takes two + /// `const char*` pointers and an integer specifying the number of characters + /// to compare. Because the strings are identical up to the 14th character, + /// comparing only those characters results in a return value of `0`. + /// + /// let s1 = "They call me 'Bell'" + /// let s2 = "They call me 'Stacey'" + /// + /// print(strncmp(s1, s2, 14)) + /// // Prints "0" + /// print(String(s1.utf8.prefix(14)) + /// // Prints "They call me '" + /// + /// Extending the compared character count to 15 includes the differing + /// characters, so a nonzero result is returned. + /// + /// print(strncmp(s1, s2, 15)) + /// // Prints "-17" + /// print(String(s1.utf8.prefix(14)) + /// // Prints "They call me 'B" public struct UTF8View : Collection, CustomStringConvertible, @@ -117,7 +190,22 @@ extension String { self._endIndex = e } - /// A position in a `String.UTF8View`. + /// A position in a string's `UTF8View` instance. + /// + /// You can convert between indices of the different string views by using + /// conversion initializers and the `samePosition(in:)` method overloads. + /// For example, the following code sample finds the index of the first + /// space in the string's character view and then converts that to the same + /// position in the UTF-8 view. + /// + /// let hearts = "Hearts <3 ♥︎ 💘" + /// if let i = hearts.characters.index(of: " ") { + /// let j = i.samePosition(in: hearts.utf8) + /// print(Array(hearts.utf8.prefix(upTo: j))) + /// print(hearts.utf8.prefix(upTo: j)) + /// } + /// // Prints "[72, 101, 97, 114, 116, 115]" + /// // Prints "Hearts" public struct Index : Comparable { internal typealias Buffer = _StringCore._UTF8Chunk @@ -209,17 +297,18 @@ extension String { public typealias IndexDistance = Int - /// The position of the first code unit if the `String` is - /// non-empty; identical to `endIndex` otherwise. + /// The position of the first code unit if the UTF-8 view is + /// nonempty. + /// + /// If the UTF-8 view is empty, `startIndex` is equal to `endIndex`. public var startIndex: Index { return self._startIndex } - /// The "past the end" position. + /// The "past the end" position---that is, the position one + /// greater than the last valid subscript argument. /// - /// `endIndex` is not a valid argument to `subscript`, and is always - /// reachable from `startIndex` by zero or more applications of - /// `index(after:)`. + /// In an empty UTF-8 view, `endIndex` is equal to `startIndex`. public var endIndex: Index { return self._endIndex } @@ -231,20 +320,29 @@ extension String { return i._successor() } - /// Access the element at `position`. + /// Accesses the code unit at the given position. + /// + /// The following example uses the subscript to print the value of a + /// string's first UTF-8 code unit. + /// + /// let greeting = "Hello, friend!" + /// let i = greeting.utf8.startIndex + /// print("First character's UTF-8 code unit: \(greeting.utf8[i])") + /// // Prints "First character's UTF-8 code unit: 72" /// - /// - Precondition: `position` is a valid position in `self` and - /// `position != endIndex`. + /// - Parameter position: A valid index of the view. `position` + /// must be less than the view's end index. public subscript(position: Index) -> UTF8.CodeUnit { let result = UTF8.CodeUnit(truncatingBitPattern: position._buffer & 0xFF) _precondition(result != 0xFF, "cannot subscript using endIndex") return result } - /// Access the contiguous subrange of elements enclosed by `bounds`. + /// Accesses the contiguous subrange of elements enclosed by the specified + /// range. /// - /// - Complexity: O(1) unless bridging from Objective-C requires an - /// O(N) conversion. + /// - Complexity: O(*n*) if the underlying string is bridged from + /// Objective-C, where *n* is the length of the string; otherwise, O(1). public subscript(bounds: Range) -> UTF8View { return UTF8View(_core, bounds.lowerBound, bounds.upperBound) } @@ -272,11 +370,21 @@ extension String { return _core.elementWidth == 1 ? _core.startASCII : nil } - /// A contiguously-stored nul-terminated UTF-8 representation of - /// `self`. + /// A contiguously stored null-terminated UTF-8 representation of + /// the string. /// /// To access the underlying memory, invoke - /// `withUnsafeBufferPointer` on the `ContiguousArray`. + /// `withUnsafeBufferPointer` on the array. + /// + /// let s = "Hello!" + /// let bytes = s.nulTerminatedUTF8 + /// print(bytes) + /// // Prints "[72, 101, 108, 108, 111, 33, 0]" + /// + /// bytes.withUnsafeBufferPointer { ptr in + /// print(strlen(UnsafePointer(ptr.baseAddress!))) + /// } + /// // Prints "6" public var nulTerminatedUTF8: ContiguousArray { var result = ContiguousArray() result.reserveCapacity(utf8.count + 1) @@ -295,9 +403,24 @@ extension String { return try nulTerminatedUTF8.withUnsafeBufferPointer(body) } - /// Construct the `String` corresponding to the given sequence of - /// UTF-8 code units. If `utf8` contains unpaired surrogates, the - /// result is `nil`. + /// Creates a string corresponding to the given sequence of UTF-8 code units. + /// + /// If `utf8` is an ill-formed UTF-8 code sequence, the result is `nil`. + /// + /// You can use this initializer to create a new string from a slice of + /// another string's `utf8` view. + /// + /// let picnicGuest = "Deserving porcupine" + /// if let i = picnicGuest.utf8.index(of: 32) { + /// let adjective = String(picnicGuest.utf8.prefix(upTo: i)) + /// print(adjective) + /// } + /// // Prints "Optional(Deserving)" + /// + /// The `adjective` constant is created by calling this initializer with a + /// slice of the `picnicGuest.utf8` view. + /// + /// - Parameter utf8: A UTF-8 code sequence. public init?(_ utf8: UTF8View) { let wholeString = String(utf8._core) @@ -309,7 +432,7 @@ extension String { return nil } - /// The index type for subscripting a `String`'s `.utf8` view. + /// The index type for subscripting a string's `utf8` view. public typealias UTF8Index = UTF8View.Index } @@ -370,11 +493,44 @@ extension String.UTF8View.Index { self.init(core, _utf16Offset, buffer) } - /// Construct the position in `utf8` that corresponds exactly to - /// `utf16Index`. If no such position exists, the result is `nil`. + /// Creates an index in the given UTF-8 view that corresponds exactly to the + /// specified `UTF16View` position. /// - /// - Precondition: `utf8Index` is an element of - /// `String(utf16)!.utf8.indices`. + /// The following example finds the position of a space in a string's `utf16` + /// view and then converts that position to an index in the the string's + /// `utf8` view. + /// + /// let cafe = "Café 🍵" + /// + /// let utf16Index = cafe.utf16.index(of: 32)! + /// let utf8Index = String.UTF8View.Index(utf16Index, within: cafe.utf8)! + /// + /// print(Array(cafe.utf8.prefix(upTo: utf8Index))) + /// // Prints "[67, 97, 102, 195, 169]" + /// + /// If the position passed in `utf16Index` doesn't have an exact + /// corresponding position in `utf8`, the result of the initializer is + /// `nil`. For example, because UTF-8 and UTF-16 represent high Unicode code + /// points differently, an attempt to convert the position of the trailing + /// surrogate of a UTF-16 surrogate pair fails. + /// + /// The next example attempts to convert the indices of the two UTF-16 code + /// points that represent the teacup emoji (`"🍵"`). The index of the lead + /// surrogate is successfully converted to a position in `utf8`, but the + /// index of the trailing surrogate is not. + /// + /// let emojiHigh = cafe.utf16.index(after: utf16Index) + /// print(String.UTF8View.Index(emojiHigh, within: cafe.utf8)) + /// // Prints "Optional(String.Index(...))" + /// + /// let emojiLow = cafe.utf16.index(after: emojiHigh) + /// print(String.UTF8View.Index(emojiLow, within: cafe.utf8)) + /// // Prints "nil" + /// + /// - Parameters: + /// - utf16Index: A position in a `UTF16View` instance. `utf16Index` must + /// be an element in `String(utf8).utf16.indices`. + /// - utf8: The `UTF8View` in which to find the new position. public init?(_ utf16Index: String.UTF16Index, within utf8: String.UTF8View) { let utf16 = String.UTF16View(utf8._core) @@ -397,11 +553,24 @@ extension String.UTF8View.Index { self.init(utf8._core, _utf16Offset: utf16Index._offset) } - /// Construct the position in `utf8` that corresponds exactly to - /// `unicodeScalarIndex`. + /// Creates an index in the given UTF-8 view that corresponds exactly to the + /// specified `UnicodeScalarView` position. + /// + /// The following example converts the position of the Unicode scalar `"e"` + /// into its corresponding position in the string's `utf8` view. /// - /// - Precondition: `unicodeScalarIndex` is an element of - /// `String(utf8)!.unicodeScalars.indices`. + /// let cafe = "Cafe\u{0301}" + /// let scalarsIndex = cafe.unicodeScalars.index(of: "e")! + /// let utf8Index = String.UTF8View.Index(scalarsIndex, within: cafe.utf8) + /// + /// print(Array(cafe.utf8.prefix(through: utf8Index))) + /// // Prints "[67, 97, 102, 101]" + /// + /// - Parameters: + /// - unicodeScalarIndex: A position in a `UnicodeScalarView` instance. + /// `unicodeScalarIndex` must be an element of + /// `String(utf8).unicodeScalars.indices`. + /// - utf8: The `UTF8View` in which to find the new position. public init( _ unicodeScalarIndex: String.UnicodeScalarIndex, within utf8: String.UTF8View @@ -409,19 +578,48 @@ extension String.UTF8View.Index { self.init(utf8._core, _utf16Offset: unicodeScalarIndex._position) } - /// Construct the position in `utf8` that corresponds exactly to - /// `characterIndex`. + /// Creates an index in the given UTF-8 view that corresponds exactly to the + /// specified string position. + /// + /// The following example converts the position of the teacup emoji (`"🍵"`) + /// into its corresponding position in the string's `utf8` view. /// - /// - Precondition: `characterIndex` is an element of - /// `String(utf8)!.indices`. + /// let cafe = "Café 🍵" + /// let characterIndex = cafe.characters.index(of: "🍵")! + /// let utf8Index = String.UTF8View.Index(characterIndex, within: cafe.utf8) + /// + /// print(Array(cafe.utf8.suffix(from: utf8Index))) + /// // Prints "[240, 159, 141, 181]" + /// + /// - Parameters: + /// - characterIndex: A position in a `CharacterView` instance. + /// `characterIndex` must be an element of + /// `String(utf8).characters.indices`. + /// - utf8: The `UTF8View` in which to find the new position. public init(_ characterIndex: String.Index, within utf8: String.UTF8View) { self.init(utf8._core, _utf16Offset: characterIndex._base._position) } - /// Returns the position in `utf16` that corresponds exactly - /// to `self`, or if no such position exists, `nil`. + /// Returns the position in the given UTF-16 view that corresponds exactly to + /// this index. + /// + /// The index must be a valid index of `String(utf16).utf8`. /// - /// - Precondition: `self` is an element of `String(utf16)!.utf8.indices`. + /// This example first finds the position of a space (UTF-8 code point `32`) + /// in a string's `utf8` view and then uses this method to find the same + /// position in the string's `utf16` view. + /// + /// let cafe = "Café 🍵" + /// let i = cafe.utf8.index(of: 32)! + /// let j = i.samePosition(in: cafe.utf16)! + /// print(cafe.utf16.prefix(upTo: j)) + /// // Prints "Café" + /// + /// - Parameter utf16: The view to use for the index conversion. + /// - Returns: The position in `utf16` that corresponds exactly to this + /// index. If this index does not have an exact corresponding position in + /// `utf16`, this method returns `nil`. For example, an attempt to convert + /// the position of a UTF-8 continuation byte returns `nil`. @warn_unused_result public func samePosition( in utf16: String.UTF16View @@ -429,11 +627,27 @@ extension String.UTF8View.Index { return String.UTF16View.Index(self, within: utf16) } - /// Returns the position in `unicodeScalars` that corresponds exactly - /// to `self`, or if no such position exists, `nil`. + /// Returns the position in the given view of Unicode scalars that + /// corresponds exactly to this index. + /// + /// This index must be a valid index of `String(unicodeScalars).utf8`. /// - /// - Precondition: `self` is an element of - /// `String(unicodeScalars).utf8.indices`. + /// This example first finds the position of a space (UTF-8 code point `32`) + /// in a string's `utf8` view and then uses this method to find the same position + /// in the string's `unicodeScalars` view. + /// + /// let cafe = "Café 🍵" + /// let i = cafe.utf8.index(of: 32)! + /// let j = i.samePosition(in: cafe.unicodeScalars)! + /// print(cafe.unicodeScalars.prefix(upTo: j)) + /// // Prints "Café" + /// + /// - Parameter unicodeScalars: The view to use for the index conversion. + /// - Returns: The position in `unicodeScalars` that corresponds exactly to + /// this index. If this index does not have an exact corresponding + /// position in `unicodeScalars`, this method returns `nil`. For example, + /// an attempt to convert the position of a UTF-8 continuation byte + /// returns `nil`. @warn_unused_result public func samePosition( in unicodeScalars: String.UnicodeScalarView @@ -441,10 +655,27 @@ extension String.UTF8View.Index { return String.UnicodeScalarIndex(self, within: unicodeScalars) } - /// Returns the position in `characters` that corresponds exactly - /// to `self`, or if no such position exists, `nil`. + /// Returns the position in the given string that corresponds exactly to this + /// index. + /// + /// This index must be a valid index of `characters.utf8`. + /// + /// This example first finds the position of a space (UTF-8 code point `32`) + /// in a string's `utf8` view and then uses this method find the same position + /// in the string. + /// + /// let cafe = "Café 🍵" + /// let i = cafe.utf8.index(of: 32)! + /// let j = i.samePosition(in: cafe)! + /// print(cafe[cafe.startIndex ..< j]) + /// // Prints "Café" /// - /// - Precondition: `self` is an element of `characters.utf8.indices`. + /// - Parameter characters: The string to use for the index conversion. + /// - Returns: The position in `characters` that corresponds exactly to + /// this index. If this index does not have an exact corresponding + /// position in `characters`, this method returns `nil`. For example, + /// an attempt to convert the position of a UTF-8 continuation byte + /// returns `nil`. @warn_unused_result public func samePosition( in characters: String @@ -455,7 +686,7 @@ extension String.UTF8View.Index { // Reflection extension String.UTF8View : CustomReflectable { - /// Returns a mirror that reflects `self`. + /// Returns a mirror that reflects the UTF-8 view of a string. public var customMirror: Mirror { return Mirror(self, unlabeledChildren: self) } diff --git a/stdlib/public/core/StringUnicodeScalarView.swift b/stdlib/public/core/StringUnicodeScalarView.swift index 7bd45308b7b0a..51e7fc2d2ced6 100644 --- a/stdlib/public/core/StringUnicodeScalarView.swift +++ b/stdlib/public/core/StringUnicodeScalarView.swift @@ -27,8 +27,52 @@ public func < ( } extension String { - /// A collection of [Unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value) that - /// encodes a `String` value. + /// A view of a string's contents as a collection of Unicode scalar values. + /// + /// You can access a string's view of Unicode scalar values by using its + /// `unicodeScalars` property. Unicode scalar values are the 21-bit codes + /// that are the basic unit of Unicode. Each scalar value is represented by + /// a `UnicodeScalar` instance and is equivalent to a UTF-32 code unit. + /// + /// let flowers = "Flowers 💐" + /// for v in flowers.unicodeScalars { + /// print(v.value) + /// } + /// // 70 + /// // 108 + /// // 111 + /// // 119 + /// // 101 + /// // 114 + /// // 115 + /// // 32 + /// // 128144 + /// + /// Some characters that are visible in a string are made up of more than one + /// Unicode scalar value. In that case, a string's `unicodeScalars` view + /// contains more values than its `characters` view. + /// + /// let flag = "🇵🇷" + /// for c in flag.characters { + /// print(c) + /// } + /// // 🇵🇷 + /// + /// for v in flag.unicodeScalars { + /// print(v.value) + /// } + /// // 127477 + /// // 127479 + /// + /// You can convert a `String.UnicodeScalarView` instance back into a string + /// using the `String` type's `init(_:)` initializer. + /// + /// let favemoji = "My favorite emoji is 🎉" + /// if let i = favemoji.unicodeScalars.index(where: { $0.value >= 128 }) { + /// let asciiPrefix = String(favemoji.unicodeScalars.prefix(upTo: i)) + /// print(asciiPrefix) + /// } + /// // Prints "My favorite emoji is " public struct UnicodeScalarView : BidirectionalCollection, CustomStringConvertible, @@ -56,7 +100,22 @@ extension String { } } - /// A position in a `String.UnicodeScalarView`. + /// A position in a string's `UnicodeScalars` view. + /// + /// You can convert between indices of the different string views by using + /// conversion initializers and the `samePosition(in:)` method overloads. + /// The following example finds the index of the solid heart pictograph in + /// the string's character view and then converts that to the same + /// position in the Unicode scalars view: + /// + /// let hearts = "Hearts <3 ♥︎ 💘" + /// let i = hearts.characters.index(of: "♥︎")! + /// + /// let j = i.samePosition(in: hearts.unicodeScalars) + /// print(hearts.unicodeScalars.suffix(from: j)) + /// // Prints "♥︎ 💘" + /// print(hearts.unicodeScalars[j].value) + /// // Prints "9829" public struct Index : Comparable { public init(_ _position: Int, _ _core: _StringCore) { self._position = _position @@ -77,17 +136,18 @@ extension String { @_versioned internal var _core: _StringCore } - /// The position of the first `UnicodeScalar` if the `String` is - /// non-empty; identical to `endIndex` otherwise. + /// The position of the first Unicode scalar value if the string is + /// nonempty. + /// + /// If the string is empty, `startIndex` is equal to `endIndex`. public var startIndex: Index { return Index(_core.startIndex, _core) } - /// The "past the end" position. + /// The "past the end" position---that is, the position one greater than + /// the last valid subscript argument. /// - /// `endIndex` is not a valid argument to `subscript`, and is always - /// reachable from `startIndex` by zero or more applications of - /// `index(after:)`. + /// In an empty Unicode scalars view, `endIndex` is equal to `startIndex`. public var endIndex: Index { return Index(_core.endIndex, _core) } @@ -118,10 +178,22 @@ extension String { return Index(i, _core) } - /// Access the element at `position`. + /// Accesses the Unicode scalar value at the given position. /// - /// - Precondition: `position` is a valid position in `self` and - /// `position != endIndex`. + /// The following example searches a string's Unicode scalars view for a + /// capital letter and then prints the character and Unicode scalar value + /// at the found index: + /// + /// let greeting = "Hello, friend!" + /// if let i = greeting.unicodeScalars.index(where: { "A"..."Z" ~= $0 }) { + /// print("First capital letter: \(greeting.unicodeScalars[i])") + /// print("Unicode scalar value: \(greeting.unicodeScalars[i].value)") + /// } + /// // Prints "First capital letter: H" + /// // Prints "Unicode scalar value: 72" + /// + /// - Parameter position: A valid index of the character view. `position` + /// must be less than the view's end index. public subscript(position: Index) -> UnicodeScalar { var scratch = _ScratchIterator(_core, position._position) var decoder = UTF16() @@ -135,17 +207,26 @@ extension String { } } - /// Access the contiguous subrange of elements enclosed by `bounds`. + /// Accesses the Unicode scalar values in the given range. + /// + /// The example below uses this subscript to access the scalar values up + /// to, but not including, the first comma (`","`) in the string. /// - /// - Complexity: O(1) unless bridging from Objective-C requires an - /// O(N) conversion. + /// let str = "All this happened, more or less." + /// let i = str.unicodeScalars.index(of: ",")! + /// let substring = str.unicodeScalars[str.unicodeScalars.startIndex ..< i] + /// print(String(substring)) + /// // Prints "All this happened" + /// + /// - Complexity: O(*n*) if the underlying string is bridged from + /// Objective-C, where *n* is the length of the string; otherwise, O(1). public subscript(r: Range) -> UnicodeScalarView { return UnicodeScalarView( _core[r.lowerBound._position.. UnicodeScalar? { var result: UnicodeDecodingResult if _baseSet { @@ -206,10 +289,9 @@ extension String { internal var _iterator: IndexingIterator<_StringCore>! } - /// Returns an iterator over the `UnicodeScalar`s that comprise - /// this sequence. + /// Returns an iterator over the Unicode scalars that make up this view. /// - /// - Complexity: O(1). + /// - Returns: An iterator over this collection's `UnicodeScalar` elements. @warn_unused_result public func makeIterator() -> Iterator { return Iterator(_core) @@ -226,19 +308,33 @@ extension String { internal var _core: _StringCore } - /// Construct the `String` corresponding to the given sequence of - /// Unicode scalars. + /// Creates a string corresponding to the given collection of Unicode + /// scalars. + /// + /// You can use this initializer to create a new string from a slice of + /// another string's `unicodeScalars` view. + /// + /// let picnicGuest = "Deserving porcupine" + /// if let i = picnicGuest.unicodeScalars.index(of: " ") { + /// let adjective = String(picnicGuest.unicodeScalars.prefix(upTo: i)) + /// print(adjective) + /// } + /// // Prints "Deserving" + /// + /// The `adjective` constant is created by calling this initializer with a + /// slice of the `picnicGuest.unicodeScalars` view. + /// + /// - Parameter unicodeScalars: A collection of Unicode scalar values. public init(_ unicodeScalars: UnicodeScalarView) { self.init(unicodeScalars._core) } - /// The index type for subscripting a `String`'s `.unicodeScalars` - /// view. + /// The index type for a string's `unicodeScalars` view. public typealias UnicodeScalarIndex = UnicodeScalarView.Index } extension String { - /// The value of `self` as a collection of [Unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value). + /// The string's value represented as a collection of Unicode scalar values. public var unicodeScalars : UnicodeScalarView { get { return UnicodeScalarView(_core) @@ -250,36 +346,60 @@ extension String { } extension String.UnicodeScalarView : RangeReplaceableCollection { - /// Construct an empty instance. + /// Creates an empty view instance. public init() { self = String.UnicodeScalarView(_StringCore()) } - /// Reserve enough space to store `n` ASCII characters. + + /// Reserves enough space in the view's underlying storage to store the + /// specified number of ASCII characters. + /// + /// Because a Unicode scalar value can require more than a single ASCII + /// character's worth of storage, additional allocation may be necessary + /// when adding to a Unicode scalar view after a call to + /// `reserveCapacity(_:)`. + /// + /// - Parameter n: The minimum number of ASCII character's worth of storage + /// to allocate. /// - /// - Complexity: O(`n`). + /// - Complexity: O(*n*), where *n* is the capacity being reserved. public mutating func reserveCapacity(_ n: Int) { _core.reserveCapacity(n) } - /// Append `x` to `self`. + + /// Appends the given Unicode scalar to the view. /// - /// - Complexity: Amortized O(1). + /// - Parameter c: The character to append to the string. public mutating func append(_ x: UnicodeScalar) { _core.append(x) } - /// Append the elements of `newElements` to `self`. + + /// Appends the Unicode scalar values in the given sequence to the view. + /// + /// - Parameter newElements: A sequence of Unicode scalar values. /// - /// - Complexity: O(*length of result*). + /// - Complexity: O(*n*), where *n* is the length of the resulting view. public mutating func append< S : Sequence where S.Iterator.Element == UnicodeScalar >(contentsOf newElements: S) { _core.append(contentsOf: newElements.lazy.flatMap { $0.utf16 }) } - /// Replace the elements within `bounds` with `newElements`. + + /// Replaces the elements within the specified bounds with the given Unicode + /// scalar values. + /// + /// Calling this method invalidates any existing indices for use with this + /// string. /// - /// Invalidates all indices with respect to `self`. + /// - Parameters: + /// - bounds: The range of elements to replace. The bounds of the range + /// must be valid indices of the view. + /// - newElements: The new Unicode scalar values to add to the string. /// - /// - Complexity: O(`bounds.count`) if `bounds.upperBound - /// == self.endIndex` and `newElements.isEmpty`, O(N) otherwise. + /// - Complexity: O(*m*), where *m* is the combined length of the view and + /// `newElements`. If the call to `replaceSubrange(_:with:)` simply + /// removes elements at the end of the string, the complexity is O(*n*), + /// where *n* is equal to `bounds.count`. public mutating func replaceSubrange< C: Collection where C.Iterator.Element == UnicodeScalar >( @@ -295,11 +415,31 @@ extension String.UnicodeScalarView : RangeReplaceableCollection { // Index conversions extension String.UnicodeScalarIndex { - /// Construct the position in `unicodeScalars` that corresponds exactly to - /// `utf16Index`. If no such position exists, the result is `nil`. + /// Creates an index in the given Unicode scalars view that corresponds + /// exactly to the specified `UTF16View` position. + /// + /// The following example finds the position of a space in a string's `utf16` + /// view and then converts that position to an index in the the string's + /// `unicodeScalars` view: + /// + /// let cafe = "Café 🍵" + /// + /// let utf16Index = cafe.utf16.index(of: 32)! + /// let scalarIndex = String.UnicodeScalarView.Index(utf16Index, within: cafe.unicodeScalars)! /// - /// - Precondition: `utf16Index` is an element of - /// `String(unicodeScalars).utf16.indices`. + /// print(String(cafe.unicodeScalars.prefix(upTo: scalarIndex))) + /// // Prints "Café" + /// + /// If the position passed in `utf16Index` doesn't have an exact + /// corresponding position in `unicodeScalars`, the result of the + /// initializer is `nil`. For example, an attempt to convert the position of + /// the trailing surrogate of a UTF-16 surrogate pair fails. + /// + /// - Parameters: + /// - utf16Index: A position in the `utf16` view of the `characters` + /// parameter. + /// - unicodeScalars: The `UnicodeScalarView` instance referenced by both + /// `utf16Index` and the resulting index. public init?( _ utf16Index: String.UTF16Index, within unicodeScalars: String.UnicodeScalarView @@ -325,11 +465,19 @@ extension String.UnicodeScalarIndex { self.init(utf16Index._offset, unicodeScalars._core) } - /// Construct the position in `unicodeScalars` that corresponds exactly to - /// `utf8Index`. If no such position exists, the result is `nil`. + /// Creates an index in the given Unicode scalars view that corresponds + /// exactly to the specified `UTF8View` position. + /// + /// If the position passed as `utf8Index` doesn't have an exact corresponding + /// position in `unicodeScalars`, the result of the initializer is `nil`. + /// For example, an attempt to convert the position of a UTF-8 continuation + /// byte returns `nil`. /// - /// - Precondition: `utf8Index` is an element of - /// `String(unicodeScalars).utf8.indices`. + /// - Parameters: + /// - utf8Index: A position in the `utf8` view of the `characters` + /// parameter. + /// - unicodeScalars: The `UnicodeScalarView` instance referenced by both + /// `utf8Index` and the resulting index. public init?( _ utf8Index: String.UTF8Index, within unicodeScalars: String.UnicodeScalarView @@ -347,11 +495,24 @@ extension String.UnicodeScalarIndex { self.init(utf8Index._coreIndex, core) } - /// Construct the position in `unicodeScalars` that corresponds - /// exactly to `characterIndex`. + /// Creates an index in the given Unicode scalars view that corresponds + /// exactly to the specified string position. + /// + /// The following example converts the position of the teacup emoji (`"🍵"`) + /// into its corresponding position in the string's `unicodeScalars` view. + /// + /// let cafe = "Café 🍵" + /// let characterIndex = cafe.characters.index(of: "🍵")! + /// let scalarIndex = String.UnicodeScalarView.Index(characterIndex, within: cafe.unicodeScalars) /// - /// - Precondition: `characterIndex` is an element of - /// `String(unicodeScalars).indices`. + /// print(cafe.unicodeScalars.suffix(from: scalarIndex)) + /// // Prints "🍵" + /// + /// - Parameters: + /// - characterIndex: A position in a `CharacterView` instance. + /// `characterIndex` must be an element of + /// `String(utf8).characters.indices`. + /// - utf8: The `UTF8View` in which to find the new position. public init( _ characterIndex: String.Index, within unicodeScalars: String.UnicodeScalarView @@ -359,19 +520,45 @@ extension String.UnicodeScalarIndex { self.init(characterIndex._base._position, unicodeScalars._core) } - /// Returns the position in `utf8` that corresponds exactly - /// to `self`. + /// Returns the position in the given UTF-8 view that corresponds exactly to + /// this index. + /// + /// The index must be a valid index of `String(utf8).unicodeScalars`. + /// + /// This example first finds the position of the character `"é"` and then uses + /// this method find the same position in the string's `utf8` view. /// - /// - Precondition: `self` is an element of `String(utf8)!.indices`. + /// let cafe = "Café" + /// if let i = cafe.unicodeScalars.index(of: "é") { + /// let j = i.samePosition(in: cafe.utf8) + /// print(Array(cafe.utf8.suffix(from: j))) + /// } + /// // Prints "[195, 169]" + /// + /// - Parameter utf8: The view to use for the index conversion. + /// - Returns: The position in `utf8` that corresponds exactly to this index. @warn_unused_result public func samePosition(in utf8: String.UTF8View) -> String.UTF8View.Index { return String.UTF8View.Index(self, within: utf8) } - /// Returns the position in `utf16` that corresponds exactly - /// to `self`. + /// Returns the position in the given UTF-16 view that corresponds exactly to + /// this index. + /// + /// The index must be a valid index of `String(utf16).unicodeScalars`. /// - /// - Precondition: `self` is an element of `String(utf16)!.indices`. + /// This example first finds the position of the character `"é"` and then uses + /// this method find the same position in the string's `utf16` view. + /// + /// let cafe = "Café" + /// if let i = cafe.characters.index(of: "é") { + /// let j = i.samePosition(in: cafe.utf16) + /// print(cafe.utf16[j]) + /// } + /// // Prints "233" + /// + /// - Parameter utf16: The view to use for the index conversion. + /// - Returns: The position in `utf16` that corresponds exactly to this index. @warn_unused_result public func samePosition( in utf16: String.UTF16View @@ -379,11 +566,27 @@ extension String.UnicodeScalarIndex { return String.UTF16View.Index(self, within: utf16) } - /// Returns the position in `characters` that corresponds exactly - /// to `self`, or if no such position exists, `nil`. + /// Returns the position in the given string that corresponds exactly to this + /// index. + /// + /// This index must be a valid index of `characters.unicodeScalars`. + /// + /// This example first finds the position of a space (UTF-8 code point `32`) + /// in a string's `utf8` view and then uses this method find the same position + /// in the string. + /// + /// let cafe = "Café 🍵" + /// let i = cafe.unicodeScalars.index(of: "🍵") + /// let j = i.samePosition(in: cafe)! + /// print(cafe.suffix(from: j)) + /// // Prints "🍵" /// - /// - Precondition: `self` is an element of - /// `characters.unicodeScalars.indices`. + /// - Parameter characters: The string to use for the index conversion. + /// - Returns: The position in `characters` that corresponds exactly to + /// this index. If this index does not have an exact corresponding + /// position in `characters`, this method returns `nil`. For example, + /// an attempt to convert the position of a UTF-8 continuation byte + /// returns `nil`. @warn_unused_result public func samePosition(in characters: String) -> String.Index? { return String.Index(self, within: characters) @@ -416,7 +619,7 @@ extension String.UnicodeScalarIndex { // Reflection extension String.UnicodeScalarView : CustomReflectable { - /// Returns a mirror that reflects `self`. + /// Returns a mirror that reflects the Unicode scalars view of a string. public var customMirror: Mirror { return Mirror(self, unlabeledChildren: self) } diff --git a/stdlib/public/core/Unicode.swift b/stdlib/public/core/Unicode.swift index 842662fb10186..2817c5d0a3d0c 100644 --- a/stdlib/public/core/Unicode.swift +++ b/stdlib/public/core/Unicode.swift @@ -16,11 +16,19 @@ /// The result of one Unicode decoding step. /// -/// A unicode scalar value, an indication that no more unicode scalars -/// are available, or an indication of a decoding error. +/// Each `UnicodeDecodingResult` instance can represent a Unicode scalar value, +/// an indication that no more Unicode scalars are available, or an indication +/// of a decoding error. +/// +/// - SeeAlso: `UnicodeCodec.decode(next:)` public enum UnicodeDecodingResult : Equatable { + /// A decoded Unicode scalar value. case scalarValue(UnicodeScalar) + + /// An indication that no more Unicode scalars are available in the input. case emptyInput + + /// An indication of a decoding error. case error } @@ -40,56 +48,102 @@ public func == ( } } -/// A Unicode [encoding scheme](http://www.unicode.org/glossary/#character_encoding_scheme). +/// A Unicode encoding form that translates between Unicode scalar values and +/// form-specific code units. /// -/// Consists of an underlying [code unit](http://www.unicode.org/glossary/#code_unit) -/// and functions to translate between sequences of these code units and -/// [unicode scalar values](http://www.unicode.org/glossary/#unicode_scalar_value). +/// The `UnicodeCodec` protocol declares methods that decode code unit +/// sequences into Unicode scalar values and encode Unicode scalar values +/// into code unit sequences. The standard library implements codecs for the +/// UTF-8, UTF-16, and UTF-32 encoding schemes as the `UTF8`, `UTF16`, and +/// `UTF32` types, respectively. Use the `UnicodeScalar` type to work with +/// decoded Unicode scalar values. +/// +/// - SeeAlso: `UTF8`, `UTF16`, `UTF32`, `UnicodeScalar` public protocol UnicodeCodec { - /// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit) - /// values for this encoding. + /// A type that can hold code unit values for this encoding. associatedtype CodeUnit + /// Creates an instance of the codec. init() - /// Start or continue decoding a UTF sequence. + /// Starts or continues decoding a code unit sequence into Unicode scalar + /// values. /// - /// In order to decode a code unit sequence completely, this function should - /// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`. - /// Checking that the iterator was exhausted is not sufficient. The decoder - /// can have an internal buffer that is pre-filled with data from the input - /// iterator. + /// To decode a code unit sequence completely, call this method repeatedly + /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the + /// iterator was exhausted is not sufficient, because the decoder can store + /// buffered data from the input iterator. /// /// Because of buffering, it is impossible to find the corresponding position /// in the iterator for a given returned `UnicodeScalar` or an error. /// - /// - Parameter next: An iterator of code units to be decoded. Repeated - /// calls to this method on the same instance should always pass the same - /// iterator and the iterator or copies thereof should not be used for - /// anything else between calls. Failing to do so will yield unspecified - /// results. + /// The following example decodes the UTF-8 encoded bytes of a string into an + /// array of `UnicodeScalar` instances: + /// + /// let str = "✨Unicode✨" + /// print(Array(str.utf8)) + /// // Prints "[226, 156, 168, 85, 110, 105, 99, 111, 100, 101, 226, 156, 168]" + /// + /// var bytesIterator = str.utf8.makeIterator() + /// var scalars: [UnicodeScalar] = [] + /// var utf8Decoder = UTF8() + /// Decode: while true { + /// switch utf8Decoder.decode(&bytesIterator) { + /// case .scalarValue(let v): scalars.append(v) + /// case .emptyInput: break Decode + /// case .error: + /// print("Decoding error") + /// break Decode + /// } + /// } + /// print(scalars) + /// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]" + /// + /// - Parameter next: An iterator of code units to be decoded. `next` must be + /// the same iterator instance in repeated calls to this method. Do not + /// advance the iterator or any copies of the iterator outside this + /// method. + /// - Returns: A `UnicodeDecodingResult` instance, representing the next + /// Unicode scalar, an indication of an error, or an indication that the + /// UTF sequence has been fully decoded. mutating func decode< I : IteratorProtocol where I.Element == CodeUnit >(_ next: inout I) -> UnicodeDecodingResult - /// Encode a `UnicodeScalar` as a series of `CodeUnit`s by - /// calling `processCodeUnit` on each `CodeUnit`. + /// Encodes a Unicode scalar as a series of code units by calling the given + /// closure on each code unit. + /// + /// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar + /// value (`\u{1D110}`) but requires four code units for its UTF-8 + /// representation. The following code uses the `UTF8` codec to encode a + /// fermata in UTF-8: + /// + /// var bytes: [UTF8.CodeUnit] = [] + /// UTF8.encode("𝄐", sendingOutputTo: { bytes.append($0) }) + /// print(bytes) + /// // Prints "[240, 157, 132, 144]" + /// + /// - Parameters: + /// - input: The Unicode scalar value to encode. + /// - processCodeUnit: A closure that processes one code unit argument at a + /// time. static func encode( _ input: UnicodeScalar, sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void ) } -/// A codec for [UTF-8](http://www.unicode.org/glossary/#UTF_8). +/// A codec for translating between Unicode scalar values and UTF-8 code +/// units. public struct UTF8 : UnicodeCodec { // See Unicode 8.0.0, Ch 3.9, UTF-8. // http://www.unicode.org/versions/Unicode8.0.0/ch03.pdf - /// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit) - /// values for this encoding. + /// A type that can hold code unit values for this encoding. public typealias CodeUnit = UInt8 + /// Creates an instance of the UTF-8 codec. public init() {} /// Lookahead buffer used for UTF-8 decoding. New bytes are inserted at MSB, @@ -105,22 +159,47 @@ public struct UTF8 : UnicodeCodec { /// we are done decoding, as there might still be bytes left in the buffer. internal var _didExhaustIterator: Bool = false - /// Start or continue decoding a UTF-8 sequence. + /// Starts or continues decoding a UTF-8 sequence. /// - /// In order to decode a code unit sequence completely, this function should - /// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`. - /// Checking that the iterator was exhausted is not sufficient. The decoder - /// can have an internal buffer that is pre-filled with data from the input - /// iterator. + /// To decode a code unit sequence completely, call this method repeatedly + /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the + /// iterator was exhausted is not sufficient, because the decoder can store + /// buffered data from the input iterator. /// /// Because of buffering, it is impossible to find the corresponding position /// in the iterator for a given returned `UnicodeScalar` or an error. /// - /// - Parameter next: An iterator of code units to be decoded. Repeated - /// calls to this method on the same instance should always pass the same - /// iterator and the iterator or copies thereof should not be used for - /// anything else between calls. Failing to do so will yield unspecified - /// results. + /// The following example decodes the UTF-8 encoded bytes of a string into an + /// array of `UnicodeScalar` instances. This is a demonstration only---if + /// you need the Unicode scalar representation of a string, use its + /// `unicodeScalars` view. + /// + /// let str = "✨Unicode✨" + /// print(Array(str.utf8)) + /// // Prints "[226, 156, 168, 85, 110, 105, 99, 111, 100, 101, 226, 156, 168]" + /// + /// var bytesIterator = str.utf8.makeIterator() + /// var scalars: [UnicodeScalar] = [] + /// var utf8Decoder = UTF8() + /// Decode: while true { + /// switch utf8Decoder.decode(&bytesIterator) { + /// case .scalarValue(let v): scalars.append(v) + /// case .emptyInput: break Decode + /// case .error: + /// print("Decoding error") + /// break Decode + /// } + /// } + /// print(scalars) + /// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]" + /// + /// - Parameter next: An iterator of code units to be decoded. `next` must be + /// the same iterator instance in repeated calls to this method. Do not + /// advance the iterator or any copies of the iterator outside this + /// method. + /// - Returns: A `UnicodeDecodingResult` instance, representing the next + /// Unicode scalar, an indication of an error, or an indication that the + /// UTF sequence has been fully decoded. public mutating func decode< I : IteratorProtocol where I.Element == CodeUnit >(_ next: inout I) -> UnicodeDecodingResult { @@ -281,8 +360,22 @@ public struct UTF8 : UnicodeCodec { } } - /// Encode a `UnicodeScalar` as a series of `CodeUnit`s by - /// calling `processCodeUnit` on each `CodeUnit`. + /// Encodes a Unicode scalar as a series of code units by calling the given + /// closure on each code unit. + /// + /// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar + /// value (`\u{1D110}`) but requires four code units for its UTF-8 + /// representation. The following code encodes a fermata in UTF-8: + /// + /// var bytes: [UTF8.CodeUnit] = [] + /// UTF8.encode("𝄐", sendingOutputTo: { bytes.append($0) }) + /// print(bytes) + /// // Prints "[240, 157, 132, 144]" + /// + /// - Parameters: + /// - input: The Unicode scalar value to encode. + /// - processCodeUnit: A closure that processes one code unit argument at a + /// time. public static func encode( _ input: UnicodeScalar, sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void @@ -316,20 +409,36 @@ public struct UTF8 : UnicodeCodec { processCodeUnit(buf3) } - /// Returns `true` if `byte` is a continuation byte of the form - /// `0b10xxxxxx`. + /// Returns a Boolean value indicating whether the specified code unit is a + /// UTF-8 continuation byte. + /// + /// Continuation bytes take the form `0b10xxxxxx`. For example, a lowercase + /// "e" with an acute accent above it (`"é"`) uses 2 bytes for its UTF-8 + /// representation: `0b11000011` (195) and `0b10101001` (169). The second + /// byte is a continuation byte. + /// + /// let eAcute = "é" + /// for codePoint in eAcute.utf8 { + /// print(codePoint, UTF8.isContinuation(codePoint)) + /// } + /// // Prints "195 false" + /// // Prints "169 true" + /// + /// - Parameter byte: A UTF-8 code unit. + /// - Returns: `true` if `byte` is a continuation byte; otherwise, `false`. @warn_unused_result public static func isContinuation(_ byte: CodeUnit) -> Bool { return byte & 0b11_00__0000 == 0b10_00__0000 } } -/// A codec for [UTF-16](http://www.unicode.org/glossary/#UTF_16). +/// A codec for translating between Unicode scalar values and UTF-16 code +/// units. public struct UTF16 : UnicodeCodec { - /// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit) - /// values for this encoding. + /// A type that can hold code unit values for this encoding. public typealias CodeUnit = UInt16 + /// Creates an instance of the UTF-16 codec. public init() {} /// A lookahead buffer for one UTF-16 code unit. @@ -342,22 +451,47 @@ public struct UTF16 : UnicodeCodec { /// `x` is set when `_decodeLookahead` contains a code unit. internal var _lookaheadFlags: UInt8 = 0 - /// Start or continue decoding a UTF sequence. + /// Starts or continues decoding a UTF-16 sequence. /// - /// In order to decode a code unit sequence completely, this function should - /// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`. - /// Checking that the iterator was exhausted is not sufficient. The decoder - /// can have an internal buffer that is pre-filled with data from the input - /// iterator. + /// To decode a code unit sequence completely, call this method repeatedly + /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the + /// iterator was exhausted is not sufficient, because the decoder can store + /// buffered data from the input iterator. /// /// Because of buffering, it is impossible to find the corresponding position /// in the iterator for a given returned `UnicodeScalar` or an error. /// - /// - Parameter next: An iterator of code units to be decoded. Repeated - /// calls to this method on the same instance should always pass the same - /// iterator and the iterator or copies thereof should not be used for - /// anything else between calls. Failing to do so will yield unspecified - /// results. + /// The following example decodes the UTF-16 encoded bytes of a string into an + /// array of `UnicodeScalar` instances. This is a demonstration only---if + /// you need the Unicode scalar representation of a string, use its + /// `unicodeScalars` view. + /// + /// let str = "✨Unicode✨" + /// print(Array(str.utf16)) + /// // Prints "[10024, 85, 110, 105, 99, 111, 100, 101, 10024]" + /// + /// var codeUnitIterator = str.utf16.makeIterator() + /// var scalars: [UnicodeScalar] = [] + /// var utf16Decoder = UTF16() + /// Decode: while true { + /// switch utf16Decoder.decode(&codeUnitIterator) { + /// case .scalarValue(let v): scalars.append(v) + /// case .emptyInput: break Decode + /// case .error: + /// print("Decoding error") + /// break Decode + /// } + /// } + /// print(scalars) + /// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]" + /// + /// - Parameter next: An iterator of code units to be decoded. `next` must be + /// the same iterator instance in repeated calls to this method. Do not + /// advance the iterator or any copies of the iterator outside this + /// method. + /// - Returns: A `UnicodeDecodingResult` instance, representing the next + /// Unicode scalar, an indication of an error, or an indication that the + /// UTF sequence has been fully decoded. public mutating func decode< I : IteratorProtocol where I.Element == CodeUnit >(_ input: inout I) -> UnicodeDecodingResult { @@ -453,8 +587,22 @@ public struct UTF16 : UnicodeCodec { } } - /// Encode a `UnicodeScalar` as a series of `CodeUnit`s by - /// calling `processCodeUnit` on each `CodeUnit`. + /// Encodes a Unicode scalar as a series of code units by calling the given + /// closure on each code unit. + /// + /// For example, the musical fermata symbol ("𝄐") is a single Unicode scalar + /// value (`\u{1D110}`) but requires two code units for its UTF-16 + /// representation. The following code encodes a fermata in UTF-16: + /// + /// var codeUnits: [UTF16.CodeUnit] = [] + /// UTF16.encode("𝄐", sendingOutputTo: { codeUnits.append($0) }) + /// print(codeUnits) + /// // Prints "[55348, 56592]" + /// + /// - Parameters: + /// - input: The Unicode scalar value to encode. + /// - processCodeUnit: A closure that processes one code unit argument at a + /// time. public static func encode( _ input: UnicodeScalar, sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void @@ -472,30 +620,56 @@ public struct UTF16 : UnicodeCodec { } } -/// A codec for [UTF-32](http://www.unicode.org/glossary/#UTF_32). +/// A codec for translating between Unicode scalar values and UTF-32 code +/// units. public struct UTF32 : UnicodeCodec { - /// A type that can hold [code unit](http://www.unicode.org/glossary/#code_unit) - /// values for this encoding. + /// A type that can hold code unit values for this encoding. public typealias CodeUnit = UInt32 + /// Creates an instance of the UTF-32 codec. public init() {} - /// Start or continue decoding a UTF sequence. + /// Starts or continues decoding a UTF-32 sequence. /// - /// In order to decode a code unit sequence completely, this function should - /// be called repeatedly until it returns `UnicodeDecodingResult.emptyInput`. - /// Checking that the iterator was exhausted is not sufficient. The decoder - /// can have an internal buffer that is pre-filled with data from the input - /// iterator. + /// To decode a code unit sequence completely, call this method repeatedly + /// until it returns `UnicodeDecodingResult.emptyInput`. Checking that the + /// iterator was exhausted is not sufficient, because the decoder can store + /// buffered data from the input iterator. /// /// Because of buffering, it is impossible to find the corresponding position /// in the iterator for a given returned `UnicodeScalar` or an error. /// - /// - Parameter next: An iterator of code units to be decoded. Repeated - /// calls to this method on the same instance should always pass the same - /// iterator and the iterator or copies thereof should not be used for - /// anything else between calls. Failing to do so will yield unspecified - /// results. + /// The following example decodes the UTF-16 encoded bytes of a string + /// into an array of `UnicodeScalar` instances. This is a demonstration + /// only---if you need the Unicode scalar representation of a string, use + /// its `unicodeScalars` view. + /// + /// // UTF-32 representation of "✨Unicode✨" + /// let codeUnits: [UTF32.CodeUnit] = + /// [10024, 85, 110, 105, 99, 111, 100, 101, 10024] + /// + /// var codeUnitIterator = codeUnits.makeIterator() + /// var scalars: [UnicodeScalar] = [] + /// var utf32Decoder = UTF32() + /// Decode: while true { + /// switch utf32Decoder.decode(&codeUnitIterator) { + /// case .scalarValue(let v): scalars.append(v) + /// case .emptyInput: break Decode + /// case .error: + /// print("Decoding error") + /// break Decode + /// } + /// } + /// print(scalars) + /// // Prints "["\u{2728}", "U", "n", "i", "c", "o", "d", "e", "\u{2728}"]" + /// + /// - Parameter next: An iterator of code units to be decoded. `next` must be + /// the same iterator instance in repeated calls to this method. Do not + /// advance the iterator or any copies of the iterator outside this + /// method. + /// - Returns: A `UnicodeDecodingResult` instance, representing the next + /// Unicode scalar, an indication of an error, or an indication that the + /// UTF sequence has been fully decoded. public mutating func decode< I : IteratorProtocol where I.Element == CodeUnit >(_ input: inout I) -> UnicodeDecodingResult { @@ -513,8 +687,22 @@ public struct UTF32 : UnicodeCodec { } } - /// Encode a `UnicodeScalar` as a series of `CodeUnit`s by - /// calling `processCodeUnit` on each `CodeUnit`. + /// Encodes a Unicode scalar as a UTF-32 code unit by calling the given + /// closure. + /// + /// For example, like every Unicode scalar, the musical fermata symbol ("𝄐") + /// can be represented in UTF-32 as a single code unit. The following code + /// encodes a fermata in UTF-32: + /// + /// var codeUnit: UTF32.CodeUnit = 0 + /// UTF32.encode("𝄐", sendingOutputTo: { codeUnit = $0 }) + /// print(codeUnit) + /// // Prints "119056" + /// + /// - Parameters: + /// - input: The Unicode scalar value to encode. + /// - processCodeUnit: A closure that processes one code unit argument at a + /// time. public static func encode( _ input: UnicodeScalar, sendingOutputTo processCodeUnit: @noescape (CodeUnit) -> Void @@ -523,12 +711,41 @@ public struct UTF32 : UnicodeCodec { } } -/// Translate `input`, in the given `InputEncoding`, into `processCodeUnit`, in -/// the given `OutputEncoding`. +/// Translates the given input from one Unicode encoding to another by calling +/// the given closure. /// -/// - Parameter stopOnError: Causes encoding to stop when an encoding -/// error is detected in `input`, if `true`. Otherwise, U+FFFD -/// replacement characters are inserted for each detected error. +/// The following example transcodes the UTF-8 representation of the string +/// `"Fermata 𝄐"` into UTF-32. +/// +/// let fermata = "Fermata 𝄐" +/// let bytes = fermata.utf8 +/// print(Array(bytes)) +/// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 240, 157, 132, 144]" +/// +/// var codeUnits: [UTF32.CodeUnit] = [] +/// let sink = { codeUnits.append($0) } +/// transcode(bytes.makeIterator(), from: UTF8.self, to: UTF32.self, +/// stoppingOnError: false, sendingOutputTo: sink) +/// print(codeUnits) +/// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 119056]" +/// +/// The `sink` closure is called with each resulting UTF-32 code unit as the +/// function iterates over its input. +/// +/// - Parameters: +/// - input: An iterator of code units to be translated, encoded as +/// `inputEncoding`. If `stopOnError` is `false`, the entire iterator will +/// be exhausted. Otherwise, iteration will stop if an encoding error is +/// detected. +/// - inputEncoding: The Unicode encoding of `input`. +/// - outputEncoding: The destination Unicode encoding. +/// - stopOnError: Pass `true` to stop translation when an encoding error is +/// detected in `input`. Otherwise, a Unicode replacement character +/// (`"\u{FFFD}"`) is inserted for each detected error. +/// - processCodeUnit: A closure that processes one `outputEncoding` code +/// unit at a time. +/// - Returns: `true` if the translation detected encoding errors in `input`; +/// otherwise, `false`. public func transcode< Input : IteratorProtocol, InputEncoding : UnicodeCodec, @@ -702,26 +919,78 @@ extension UTF8.CodeUnit : _StringElement { } extension UTF16 { - /// Returns the number of code units required to encode `x`. + /// Returns the number of code units required to encode the given Unicode + /// scalar. + /// + /// Because a Unicode scalar value can require up to 21 bits to store its + /// value, some Unicode scalars are represented in UTF-16 by a pair of + /// 16-bit code units. The first and second code units of the pair, + /// designated *leading* and *trailing* surrogates, make up a *surrogate + /// pair*. + /// + /// let anA: UnicodeScalar = "A" + /// print(anA.value) + /// // Prints "65" + /// print(UTF16.width(anA)) + /// // Prints "1" + /// + /// let anApple: UnicodeScalar = "🍎" + /// print(anApple.value) + /// // Prints "127822" + /// print(UTF16.width(anApple)) + /// // Prints "2" + /// + /// - Parameter x: A Unicode scalar value. + /// - Returns: The width of `x` when encoded in UTF-16, either `1` or `2`. @warn_unused_result public static func width(_ x: UnicodeScalar) -> Int { return x.value <= 0xFFFF ? 1 : 2 } - /// Returns the high surrogate code unit of a [surrogate pair](http://www.unicode.org/glossary/#surrogate_pair) representing - /// `x`. + /// Returns the high-surrogate code unit of the surrogate pair representing + /// the specifed Unicode scalar. + /// + /// Because a Unicode scalar value can require up to 21 bits to store its + /// value, some Unicode scalars are represented in UTF-16 by a pair of + /// 16-bit code units. The first and second code units of the pair, + /// designated *leading* and *trailing* surrogates, make up a *surrogate + /// pair*. + /// + /// let apple: UnicodeScalar = "🍎" + /// print(UTF16.leadSurrogate(apple) + /// // Prints "55356" + /// + /// - Parameter x: A Unicode scalar value. `x` must be represented by a + /// surrogate pair when encoded in UTF-16. To check whether `x` is + /// represented by a surrogate pair, use `UTF16.width(x) == 2`. + /// - Returns: The leading surrogate code unit of `x` when encoded in UTF-16. /// - /// - Precondition: `width(x) == 2`. + /// - SeeAlso: `UTF16.width(_:)`, `UTF16.trailSurrogate(_:)` @warn_unused_result public static func leadSurrogate(_ x: UnicodeScalar) -> UTF16.CodeUnit { _precondition(width(x) == 2) return UTF16.CodeUnit((x.value - 0x1_0000) >> (10 as UInt32)) + 0xD800 } - /// Returns the low surrogate code unit of a [surrogate pair](http://www.unicode.org/glossary/#surrogate_pair) representing - /// `x`. + /// Returns the low-surrogate code unit of the surrogate pair representing + /// the specifed Unicode scalar. + /// + /// Because a Unicode scalar value can require up to 21 bits to store its + /// value, some Unicode scalars are represented in UTF-16 by a pair of + /// 16-bit code units. The first and second code units of the pair, + /// designated *leading* and *trailing* surrogates, make up a *surrogate + /// pair*. + /// + /// let apple: UnicodeScalar = "🍎" + /// print(UTF16.trailSurrogate(apple) + /// // Prints "57166" + /// + /// - Parameter x: A Unicode scalar value. `x` must be represented by a + /// surrogate pair when encoded in UTF-16. To check whether `x` is + /// represented by a surrogate pair, use `UTF16.width(x) == 2`. + /// - Returns: The trailing surrogate code unit of `x` when encoded in UTF-16. /// - /// - Precondition: `width(x) == 2`. + /// - SeeAlso: `UTF16.width(_:)`, `UTF16.leadSurrogate(_:)` @warn_unused_result public static func trailSurrogate(_ x: UnicodeScalar) -> UTF16.CodeUnit { _precondition(width(x) == 2) @@ -730,11 +999,57 @@ extension UTF16 { ) + 0xDC00 } + /// Returns a Boolean value indicating whether the specified code unit is a + /// high-surrogate code unit. + /// + /// Here's an example of checking whether each code unit in a string's + /// `utf16` view is a lead surrogate. The `apple` string contains a single + /// emoji character made up of a surrogate pair when encoded in UTF-16. + /// + /// let apple = "🍎" + /// for unit in apple.utf16 { + /// print(UTF16.isLeadSurrogate(unit)) + /// } + /// // Prints "true" + /// // Prints "false" + /// + /// This method does not validate the encoding of a UTF-16 sequence beyond + /// the specified code unit. Specifically, it does not validate that a + /// low-surrogate code unit follows `x`. + /// + /// - Parameter x: A UTF-16 code unit. + /// - Returns: `true` if `x` is a high-surrogate code unit; otherwise, + /// `false`. + /// @warn_unused_result public static func isLeadSurrogate(_ x: CodeUnit) -> Bool { return 0xD800...0xDBFF ~= x } + /// Returns a Boolean value indicating whether the specified code unit is a + /// low-surrogate code unit. + /// + /// Here's an example of checking whether each code unit in a string's + /// `utf16` view is a trailing surrogate. The `apple` string contains a + /// single emoji character made up of a surrogate pair when encoded in + /// UTF-16. + /// + /// let apple = "🍎" + /// for unit in apple.utf16 { + /// print(UTF16.isTrailSurrogate(unit)) + /// } + /// // Prints "false" + /// // Prints "true" + /// + /// This method does not validate the encoding of a UTF-16 sequence beyond + /// the specified code unit. Specifically, it does not validate that a + /// high-surrogate code unit precedes `x`. + /// + /// - Parameter x: A UTF-16 code unit. + /// - Returns: `true` if `x` is a low-surrogate code unit; otherwise, + /// `false`. + /// + /// - SeeAlso: `UTF16.width(_:)`, `UTF16.leadSurrogate(_:)` @warn_unused_result public static func isTrailSurrogate(_ x: CodeUnit) -> Bool { return 0xDC00...0xDFFF ~= x @@ -761,12 +1076,39 @@ extension UTF16 { } /// Returns the number of UTF-16 code units required for the given code unit - /// sequence when transcoded to UTF-16, and a bit describing if the sequence - /// was found to contain only ASCII characters. + /// sequence when transcoded to UTF-16, and a Boolean value indicating + /// whether the sequence was found to contain only ASCII characters. + /// + /// The following example finds the length of the UTF-16 encoding of the + /// string `"Fermata 𝄐"`, starting with its UTF-8 representation. + /// + /// let fermata = "Fermata 𝄐" + /// let bytes = fermata.utf8 + /// print(Array(bytes)) + /// // Prints "[70, 101, 114, 109, 97, 116, 97, 32, 240, 157, 132, 144]" + /// + /// let result = transcodedLength(of: bytes.makeIterator(), + /// decodedAs: UTF8.self, + /// repairingIllFormedSequences: false) + /// print(result) + /// // Prints "Optional((10, false))" /// - /// If `repairIllFormedSequences` is `true`, the function always succeeds. - /// If it is `false`, `nil` is returned if an ill-formed code unit sequence is - /// found in `input`. + /// - Parameters: + /// - input: An iterator of code units to be translated, encoded as + /// `sourceEncoding`. If `repairingIllFormedSequences` is `true`, the + /// entire iterator will be exhausted. Otherwise, iteration will stop if + /// an ill-formed sequence is detected. + /// - sourceEncoding: The Unicode encoding of `input`. + /// - repairingIllFormedSequences: Pass `true` to measure the length of + /// `input` even when `input` contains ill-formed sequences. Each + /// ill-formed sequence is replaced with a Unicode replacement character + /// (`"\u{FFFD}"`) and is measured as such. Pass `false` to immediately + /// stop measuring `input` when an ill-formed sequence is encountered. + /// - Returns: A tuple containing the number of UTF-16 code units required to + /// encode `input` and a Boolean value that indicates whether the `input` + /// contained only ASCII characters. If `repairingIllFormedSequences` is + /// `false` and an ill-formed sequence is detected, this method returns + /// `nil`. @warn_unused_result public static func transcodedLength< Encoding : UnicodeCodec, Input : IteratorProtocol @@ -803,7 +1145,7 @@ extension UTF16 { } } -// Unchecked init to avoid precondition branches in hot code paths were we +// Unchecked init to avoid precondition branches in hot code paths where we // already know the value is a valid unicode scalar. extension UnicodeScalar { /// Create an instance with numeric value `value`, bypassing the regular diff --git a/stdlib/public/core/UnicodeScalar.swift b/stdlib/public/core/UnicodeScalar.swift index 0ea4abe447f00..f4b74b388117c 100644 --- a/stdlib/public/core/UnicodeScalar.swift +++ b/stdlib/public/core/UnicodeScalar.swift @@ -12,7 +12,25 @@ // UnicodeScalar Type //===----------------------------------------------------------------------===// -/// A [Unicode scalar value](http://www.unicode.org/glossary/#unicode_scalar_value). +/// A Unicode scalar value. +/// +/// The `UnicodeScalar` type, representing a single Unicode scalar value, is +/// the element type of a string's `unicodeScalars` collection. +/// +/// You can create a `UnicodeScalar` instance by using a string literal that +/// contains a single character representing exactly one Unicode scalar value. +/// +/// let letterK: UnicodeScalar = "K" +/// let kim: UnicodeScalar = "김" +/// print(letterK, kim) +/// // Prints "K 김" +/// +/// You can also create Unicode scalar values directly from their numeric +/// representation. +/// +/// let airplane = UnicodeScalar(9992) +/// print(airplane) +/// // Prints "✈︎" @_fixed_layout public struct UnicodeScalar : _BuiltinUnicodeScalarLiteralConvertible, @@ -20,7 +38,7 @@ public struct UnicodeScalar : var _value: UInt32 - /// A numeric representation of `self`. + /// A numeric representation of the Unicode scalar. public var value: UInt32 { return _value } @_transparent @@ -28,15 +46,35 @@ public struct UnicodeScalar : self._value = UInt32(value) } - /// Create an instance initialized to `value`. + /// Creates a Unicode scalar with the specified value. + /// + /// Do not call this initializer directly. It may be used by the compiler + /// when you use a string literal to initialize a `UnicodeScalar` instance. + /// + /// let letterK: UnicodeScalar = "K" + /// print(letterK) + /// // Prints "K" + /// + /// In this example, the assignment to the `letterK` constant is handled by + /// this initializer behind the scenes. @_transparent public init(unicodeScalarLiteral value: UnicodeScalar) { self = value } - /// Create an instance with numeric value `v`. + /// Creates a Unicode scalar with the specified numeric value. + /// + /// For example, the following code sample creates a `UnicodeScalar` instance + /// with a value of an emoji character: + /// + /// let codepoint: UInt32 = 127881 + /// let emoji = UnicodeScalar(codepoint) + /// print(emoji) + /// // Prints "🎉" /// - /// - Precondition: `v` is a valid Unicode scalar value. + /// - Parameter v: The Unicode code point to use for the scalar. `v` must be + /// a valid Unicode scalar value, in the range `0...0xD7FF` or + /// `0xE000...0x10FFFF`. public init(_ v: UInt32) { // Unicode 6.3.0: // @@ -55,29 +93,70 @@ public struct UnicodeScalar : self._value = v } - /// Create an instance with numeric value `v`. + /// Creates a Unicode scalar with the specified numeric value. /// - /// - Precondition: `v` is a valid Unicode scalar value. + /// For example, the following code sample creates a `UnicodeScalar` instance + /// with a value of `밥`, the Korean word for rice: + /// + /// let codepoint: UInt16 = 48165 + /// let bap = UnicodeScalar(codepoint) + /// print(bap) + /// // Prints "밥" + /// + /// - Parameter v: The Unicode code point to use for the scalar. `v` must be + /// a valid Unicode scalar value, in the range `0...0xD7FF` or + /// `0xE000...0xFFFF`. public init(_ v: UInt16) { self = UnicodeScalar(UInt32(v)) } - /// Create an instance with numeric value `v`. + /// Creates a Unicode scalar with the specified numeric value. + /// + /// For example, the following code sample creates a `UnicodeScalar` instance + /// with a value of `7`: + /// + /// let codepoint: UInt8 = 55 + /// let seven = UnicodeScalar(codepoint) + /// print(seven) + /// // Prints "7" + /// + /// - Parameter v: The code point to use for the scalar. public init(_ v: UInt8) { self = UnicodeScalar(UInt32(v)) } - /// Create a duplicate of `v`. + /// Creates a duplicate of the given Unicode scalar. public init(_ v: UnicodeScalar) { // This constructor allows one to provide necessary type context to // disambiguate between function overloads on 'String' and 'UnicodeScalar'. self = v } - /// Returns a String representation of `self` . + /// Returns a string representation of the Unicode scalar. /// - /// - parameter forceASCII: If `true`, forces most values into a numeric - /// representation. + /// Scalar values representing characters that are normally unprintable or + /// that otherwise require escaping are escaped with a backslash. + /// + /// let tab = UnicodeScalar(9) + /// print(tab) + /// // Prints " " + /// print(tab.escaped(asASCII: false)) + /// // Prints "\t" + /// + /// When the `forceASCII` parameter is `true`, a `UnicodeScalar` instance + /// with a value greater than 127 is represented using an escaped numeric + /// value; otherwise, non-ASCII characters are represented using their + /// typical string value. + /// + /// let bap = UnicodeScalar(48165) + /// print(bap.escaped(asASCII: false)) + /// // Prints "밥" + /// print(bap.escaped(asASCII: true)) + /// // Prints "\u{BC25}" + /// + /// - Parameter forceASCII: Pass `true` if you need the result to use only + /// ASCII characters; otherwise, pass `false`. + /// - Returns: A string representation of the scalar. @warn_unused_result public func escaped(asASCII forceASCII: Bool) -> String { func lowNibbleAsHex(_ v: UInt32) -> String { @@ -138,8 +217,21 @@ public struct UnicodeScalar : } } - /// Returns `true` if this is an ASCII character (code point 0 to 127 - /// inclusive). + /// A Boolean value indicating whether the Unicode scalar is an ASCII + /// character. + /// + /// ASCII characters have a scalar value between 0 and 127, inclusive. For + /// example: + /// + /// let canyon = "Cañón" + /// for scalar in canyon.unicodeScalars { + /// print(scalar, scalar.isASCII, scalar.value) + /// } + /// // Prints "C true 67" + /// // Prints "a true 97" + /// // Prints "ñ false 241" + /// // Prints "ó false 243" + /// // Prints "n true 110" public var isASCII: Bool { return value <= 127 } @@ -156,33 +248,41 @@ public struct UnicodeScalar : } extension UnicodeScalar : CustomStringConvertible, CustomDebugStringConvertible { - /// A textual representation of `self`. + /// An escaped textual representation of the Unicode scalar. public var description: String { return "\"\(escaped(asASCII: false))\"" } - /// A textual representation of `self`, suitable for debugging. + /// An escaped textual representation of the Unicode scalar, suitable for + /// debugging. public var debugDescription: String { return "\"\(escaped(asASCII: true))\"" } } extension UnicodeScalar : Hashable { - /// The hash value. - /// - /// **Axiom:** `x == y` implies `x.hashValue == y.hashValue`. + /// The Unicode scalar's hash value. /// - /// - Note: The hash value is not guaranteed to be stable across - /// different invocations of the same program. Do not persist the - /// hash value across program runs. + /// Hash values are not guaranteed to be equal across different executions of + /// your program. Do not save hash values to use during a future execution. public var hashValue: Int { return Int(self.value) } } extension UnicodeScalar { - /// Construct with value `v`. + /// Creates a Unicode scalar with the specified numeric value. + /// + /// For example, the following code sample creates a `UnicodeScalar` instance + /// with a value of an emoji character: + /// + /// let codepoint = 127881 + /// let emoji = UnicodeScalar(codepoint) + /// print(emoji) + /// // Prints "🎉" /// - /// - Precondition: `v` is a valid unicode scalar value. + /// - Parameter v: The Unicode code point to use for the scalar. `v` must be + /// a valid Unicode scalar value, in the ranges `0...0xD7FF` or + /// `0xE000...0x10FFFF`. public init(_ v: Int) { self = UnicodeScalar(UInt32(v)) } @@ -247,11 +347,10 @@ extension UnicodeScalar.UTF16View : RandomAccessCollection { return 0 } - /// The "past the end" position. + /// The "past the end" position---that is, the position one + /// greater than the last valid subscript argument. /// - /// `endIndex` is not a valid argument to `subscript`, and is always - /// reachable from `startIndex` by zero or more applications of - /// `index(after:)`. + /// If the collection is empty, `endIndex` is equal to `startIndex`. var endIndex: Int { return 0 + UTF16.width(value) } @@ -277,7 +376,7 @@ func _ascii16(_ c: UnicodeScalar) -> UTF16.CodeUnit { extension UnicodeScalar { /// Creates an instance of the NUL scalar value. - @available(*, unavailable, message: "use the 'UnicodeScalar(\"\\0\")'") + @available(*, unavailable, message: "use 'UnicodeScalar(0)'") public init() { Builtin.unreachable() } diff --git a/stdlib/public/core/UnsafeBufferPointer.swift.gyb b/stdlib/public/core/UnsafeBufferPointer.swift.gyb index 6717e3fa03b60..ca98e2008310f 100644 --- a/stdlib/public/core/UnsafeBufferPointer.swift.gyb +++ b/stdlib/public/core/UnsafeBufferPointer.swift.gyb @@ -48,11 +48,11 @@ public struct Unsafe${Mutable}BufferPointer return 0 } - /// The "past the end" position; always identical to `count`. + /// The "past the end" position---that is, the position one greater than the + /// last valid subscript argument. /// - /// `endIndex` is not a valid argument to `subscript`, and is always - /// reachable from `startIndex` by zero or more applications of - /// `index(after:)`. + /// The `endIndex` property of an `Unsafe${Mutable}BufferPointer` instance is + /// always identical to `count`. public var endIndex: Int { return count } From d61435ed080ca8cb40212881ff5f60ba850092fe Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Sat, 21 May 2016 13:26:41 -0500 Subject: [PATCH 2/2] [stdlib] Update tests for new String documentation In addition to a new test fixture for 'SourceKit/DocSupport/ doc_clang_module.swift', this removes the check for an ASCII-only generated standard library interface from 'SourceKit/InterfaceGen/ gen_stdlib.swift'. Cherry-picked from dee12dc --- test/SourceKit/DocSupport/doc_clang_module.swift.response | 2 +- test/SourceKit/InterfaceGen/gen_stdlib.swift | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/SourceKit/DocSupport/doc_clang_module.swift.response b/test/SourceKit/DocSupport/doc_clang_module.swift.response index b08d0750af3f9..f2c1c46b149ce 100644 --- a/test/SourceKit/DocSupport/doc_clang_module.swift.response +++ b/test/SourceKit/DocSupport/doc_clang_module.swift.response @@ -5708,7 +5708,7 @@ var FooSubUnnamedEnumeratorA1: Int { get } key.name: "init(arrayLiteral:)", key.usr: "s:FEsPs10SetAlgebracFt12arrayLiteralGSawx7Element__x::SYNTHESIZED::c:@E@FooRuncingOptions", key.original_usr: "s:FEsPs10SetAlgebracFt12arrayLiteralGSawx7Element__x", - key.doc.full_as_xml: "init(arrayLiteral:)s:FEsPs10SetAlgebracFt12arrayLiteralGSawx7Element__xconvenience init(arrayLiteral: Self.Element...)Creates a set containing the elements of the given array literal.arrayLiteralinA list of elements of the new set.Don’t directly call this initializer, which is used by the compiler when you use an array literal. Instead, create a new set using an array literal as its value by enclosing a comma-separated list of values in square brackets. You can use an array literal anywhere a set is expected by the type context.Here, a set of strings is created from an array literal holding only strings:", + key.doc.full_as_xml: "init(arrayLiteral:)s:FEsPs10SetAlgebracFt12arrayLiteralGSawx7Element__xconvenience init(arrayLiteral: Self.Element...)Creates a set containing the elements of the given array literal.arrayLiteralinA list of elements of the new set.Do not call this initializer directly. It is used by the compiler when you use an array literal. Instead, create a new set using an array literal as its value by enclosing a comma-separated list of values in square brackets. You can use an array literal anywhere a set is expected by the type context.Here, a set of strings is created from an array literal holding only strings:", key.offset: 2240, key.length: 65, key.fully_annotated_decl: "convenience init(arrayLiteral: FooRuncingOptions...)", diff --git a/test/SourceKit/InterfaceGen/gen_stdlib.swift b/test/SourceKit/InterfaceGen/gen_stdlib.swift index bdf188f3341fb..45374afae4e95 100644 --- a/test/SourceKit/InterfaceGen/gen_stdlib.swift +++ b/test/SourceKit/InterfaceGen/gen_stdlib.swift @@ -1,7 +1,7 @@ var x: Int -// RUN: %sourcekitd-test -req=interface-gen -module Swift -check-interface-ascii > %t.response +// RUN: %sourcekitd-test -req=interface-gen -module Swift > %t.response // RUN: FileCheck -check-prefix=CHECK-STDLIB -input-file %t.response %s // RUN: FileCheck -check-prefix=CHECK-MUTATING-ATTR -input-file %t.response %s // RUN: FileCheck -check-prefix=CHECK-HIDE-ATTR -input-file %t.response %s