diff --git a/stdlib/public/SwiftShims/UnicodeShims.h b/stdlib/public/SwiftShims/UnicodeShims.h index d0c2c76bb6a6f..5b07dc78ee630 100644 --- a/stdlib/public/SwiftShims/UnicodeShims.h +++ b/stdlib/public/SwiftShims/UnicodeShims.h @@ -62,27 +62,6 @@ SWIFT_RUNTIME_STDLIB_INTERFACE const __swift_uint16_t * _swift_stdlib_ExtendedGraphemeClusterNoBoundaryRulesMatrix; -SWIFT_RUNTIME_STDLIB_INTERFACE -SWIFT_READONLY __swift_int32_t -_swift_stdlib_unicode_compare_utf16_utf16(const __swift_uint16_t *Left, - __swift_int32_t LeftLength, - const __swift_uint16_t *Right, - __swift_int32_t RightLength); - -SWIFT_RUNTIME_STDLIB_INTERFACE -SWIFT_READONLY __swift_int32_t -_swift_stdlib_unicode_compare_utf8_utf16(const unsigned char *Left, - __swift_int32_t LeftLength, - const __swift_uint16_t *Right, - __swift_int32_t RightLength); - -SWIFT_RUNTIME_STDLIB_INTERFACE -SWIFT_READONLY __swift_int32_t -_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *Left, - __swift_int32_t LeftLength, - const unsigned char *Right, - __swift_int32_t RightLength); - SWIFT_RUNTIME_STDLIB_INTERFACE void *_swift_stdlib_unicodeCollationIterator_create( const __swift_uint16_t *Str, @@ -109,6 +88,123 @@ __swift_int32_t _swift_stdlib_unicode_strToLower( __swift_uint16_t *Destination, __swift_int32_t DestinationCapacity, const __swift_uint16_t *Source, __swift_int32_t SourceLength); +typedef enum __swift_stdlib_UProperty { + __swift_stdlib_UCHAR_ALPHABETIC = 0, + __swift_stdlib_UCHAR_BINARY_START = __swift_stdlib_UCHAR_ALPHABETIC, + __swift_stdlib_UCHAR_ASCII_HEX_DIGIT = 1, + __swift_stdlib_UCHAR_BIDI_CONTROL = 2, + __swift_stdlib_UCHAR_BIDI_MIRRORED = 3, + __swift_stdlib_UCHAR_DASH = 4, + __swift_stdlib_UCHAR_DEFAULT_IGNORABLE_CODE_POINT = 5, + __swift_stdlib_UCHAR_DEPRECATED = 6, + __swift_stdlib_UCHAR_DIACRITIC = 7, + __swift_stdlib_UCHAR_EXTENDER = 8, + __swift_stdlib_UCHAR_FULL_COMPOSITION_EXCLUSION = 9, + __swift_stdlib_UCHAR_GRAPHEME_BASE = 10, + __swift_stdlib_UCHAR_GRAPHEME_EXTEND = 11, + __swift_stdlib_UCHAR_GRAPHEME_LINK = 12, + __swift_stdlib_UCHAR_HEX_DIGIT = 13, + __swift_stdlib_UCHAR_HYPHEN = 14, + __swift_stdlib_UCHAR_ID_CONTINUE = 15, + __swift_stdlib_UCHAR_ID_START = 16, + __swift_stdlib_UCHAR_IDEOGRAPHIC = 17, + __swift_stdlib_UCHAR_IDS_BINARY_OPERATOR = 18, + __swift_stdlib_UCHAR_IDS_TRINARY_OPERATOR = 19, + __swift_stdlib_UCHAR_JOIN_CONTROL = 20, + __swift_stdlib_UCHAR_LOGICAL_ORDER_EXCEPTION = 21, + __swift_stdlib_UCHAR_LOWERCASE = 22, + __swift_stdlib_UCHAR_MATH = 23, + __swift_stdlib_UCHAR_NONCHARACTER_CODE_POINT = 24, + __swift_stdlib_UCHAR_QUOTATION_MARK = 25, + __swift_stdlib_UCHAR_RADICAL = 26, + __swift_stdlib_UCHAR_SOFT_DOTTED = 27, + __swift_stdlib_UCHAR_TERMINAL_PUNCTUATION = 28, + __swift_stdlib_UCHAR_UNIFIED_IDEOGRAPH = 29, + __swift_stdlib_UCHAR_UPPERCASE = 30, + __swift_stdlib_UCHAR_WHITE_SPACE = 31, + __swift_stdlib_UCHAR_XID_CONTINUE = 32, + __swift_stdlib_UCHAR_XID_START = 33, + __swift_stdlib_UCHAR_CASE_SENSITIVE = 34, + __swift_stdlib_UCHAR_S_TERM = 35, + __swift_stdlib_UCHAR_VARIATION_SELECTOR = 36, + __swift_stdlib_UCHAR_NFD_INERT = 37, + __swift_stdlib_UCHAR_NFKD_INERT = 38, + __swift_stdlib_UCHAR_NFC_INERT = 39, + __swift_stdlib_UCHAR_NFKC_INERT = 40, + __swift_stdlib_UCHAR_SEGMENT_STARTER = 41, + __swift_stdlib_UCHAR_PATTERN_SYNTAX = 42, + __swift_stdlib_UCHAR_PATTERN_WHITE_SPACE = 43, + __swift_stdlib_UCHAR_POSIX_ALNUM = 44, + __swift_stdlib_UCHAR_POSIX_BLANK = 45, + __swift_stdlib_UCHAR_POSIX_GRAPH = 46, + __swift_stdlib_UCHAR_POSIX_PRINT = 47, + __swift_stdlib_UCHAR_POSIX_XDIGIT = 48, + __swift_stdlib_UCHAR_CASED = 49, + __swift_stdlib_UCHAR_CASE_IGNORABLE = 50, + __swift_stdlib_UCHAR_CHANGES_WHEN_LOWERCASED = 51, + __swift_stdlib_UCHAR_CHANGES_WHEN_UPPERCASED = 52, + __swift_stdlib_UCHAR_CHANGES_WHEN_TITLECASED = 53, + __swift_stdlib_UCHAR_CHANGES_WHEN_CASEFOLDED = 54, + __swift_stdlib_UCHAR_CHANGES_WHEN_CASEMAPPED = 55, + __swift_stdlib_UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED = 56, + __swift_stdlib_UCHAR_EMOJI = 57, + __swift_stdlib_UCHAR_EMOJI_PRESENTATION = 58, + __swift_stdlib_UCHAR_EMOJI_MODIFIER = 59, + __swift_stdlib_UCHAR_EMOJI_MODIFIER_BASE = 60, + + __swift_stdlib_UCHAR_BIDI_CLASS = 0x1000, + __swift_stdlib_UCHAR_INT_START = __swift_stdlib_UCHAR_BIDI_CLASS, + __swift_stdlib_UCHAR_BLOCK = 0x1001, + __swift_stdlib_UCHAR_CANONICAL_COMBINING_CLASS = 0x1002, + __swift_stdlib_UCHAR_DECOMPOSITION_TYPE = 0x1003, + __swift_stdlib_UCHAR_EAST_ASIAN_WIDTH = 0x1004, + __swift_stdlib_UCHAR_GENERAL_CATEGORY = 0x1005, + __swift_stdlib_UCHAR_JOINING_GROUP = 0x1006, + __swift_stdlib_UCHAR_JOINING_TYPE = 0x1007, + __swift_stdlib_UCHAR_LINE_BREAK = 0x1008, + __swift_stdlib_UCHAR_NUMERIC_TYPE = 0x1009, + __swift_stdlib_UCHAR_SCRIPT = 0x100A, + __swift_stdlib_UCHAR_HANGUL_SYLLABLE_TYPE = 0x100B, + __swift_stdlib_UCHAR_NFD_QUICK_CHECK = 0x100C, + __swift_stdlib_UCHAR_NFKD_QUICK_CHECK = 0x100D, + __swift_stdlib_UCHAR_NFC_QUICK_CHECK = 0x100E, + __swift_stdlib_UCHAR_NFKC_QUICK_CHECK = 0x100F, + __swift_stdlib_UCHAR_LEAD_CANONICAL_COMBINING_CLASS = 0x1010, + __swift_stdlib_UCHAR_TRAIL_CANONICAL_COMBINING_CLASS = 0x1011, + __swift_stdlib_UCHAR_GRAPHEME_CLUSTER_BREAK = 0x1012, + __swift_stdlib_UCHAR_SENTENCE_BREAK = 0x1013, + __swift_stdlib_UCHAR_WORD_BREAK = 0x1014, + __swift_stdlib_UCHAR_BIDI_PAIRED_BRACKET_TYPE = 0x1015, + + __swift_stdlib_UCHAR_GENERAL_CATEGORY_MASK = 0x2000, + __swift_stdlib_UCHAR_MASK_START = __swift_stdlib_UCHAR_GENERAL_CATEGORY_MASK, + + __swift_stdlib_UCHAR_NUMERIC_VALUE = 0x3000, + __swift_stdlib_UCHAR_DOUBLE_START = __swift_stdlib_UCHAR_NUMERIC_VALUE, + + __swift_stdlib_UCHAR_AGE = 0x4000, + __swift_stdlib_UCHAR_STRING_START = __swift_stdlib_UCHAR_AGE, + __swift_stdlib_UCHAR_BIDI_MIRRORING_GLYPH = 0x4001, + __swift_stdlib_UCHAR_CASE_FOLDING = 0x4002, + + __swift_stdlib_UCHAR_LOWERCASE_MAPPING = 0x4004, + __swift_stdlib_UCHAR_NAME = 0x4005, + __swift_stdlib_UCHAR_SIMPLE_CASE_FOLDING = 0x4006, + __swift_stdlib_UCHAR_SIMPLE_LOWERCASE_MAPPING = 0x4007, + __swift_stdlib_UCHAR_SIMPLE_TITLECASE_MAPPING = 0x4008, + __swift_stdlib_UCHAR_SIMPLE_UPPERCASE_MAPPING = 0x4009, + __swift_stdlib_UCHAR_TITLECASE_MAPPING = 0x400A, + + __swift_stdlib_UCHAR_UPPERCASE_MAPPING = 0x400C, + __swift_stdlib_UCHAR_BIDI_PAIRED_BRACKET = 0x400D, + + __swift_stdlib_UCHAR_SCRIPT_EXTENSIONS = 0x7000, + __swift_stdlib_UCHAR_OTHER_PROPERTY_START = + __swift_stdlib_UCHAR_SCRIPT_EXTENSIONS, + + __swift_stdlib_UCHAR_INVALID_CODE = -1 +} __swift_stdlib_UProperty; + typedef enum __swift_stdlib_UErrorCode { __swift_stdlib_U_USING_FALLBACK_WARNING = -128, __swift_stdlib_U_ERROR_WARNING_START = -128, @@ -294,6 +390,9 @@ typedef enum __swift_stdlib_UBreakIteratorType { } __swift_stdlib_UBreakIteratorType; typedef struct __swift_stdlib_UBreakIterator __swift_stdlib_UBreakIterator; +typedef struct __swift_stdlib_UNormalizer2 __swift_stdlib_UNormalizer2; +typedef __swift_int8_t __swift_stdlib_UBool; +typedef __swift_int32_t __swift_stdlib_UChar32; #if defined(__APPLE__) typedef __swift_uint16_t __swift_stdlib_UChar; #else @@ -329,6 +428,37 @@ SWIFT_RUNTIME_STDLIB_INTERFACE __swift_int32_t __swift_stdlib_ubrk_following(__swift_stdlib_UBreakIterator *bi, __swift_int32_t offset); +SWIFT_RUNTIME_STDLIB_INTERFACE +__swift_stdlib_UBool +__swift_stdlib_unorm2_hasBoundaryBefore(const __swift_stdlib_UNormalizer2 *, + __swift_stdlib_UChar32); + +SWIFT_RUNTIME_STDLIB_INTERFACE +const __swift_stdlib_UNormalizer2 * +__swift_stdlib_unorm2_getNFCInstance(__swift_stdlib_UErrorCode *); + +SWIFT_RUNTIME_STDLIB_INTERFACE +__swift_int32_t +__swift_stdlib_unorm2_normalize(const __swift_stdlib_UNormalizer2 *, + const __swift_uint16_t *, __swift_int32_t, + __swift_uint16_t *, __swift_int32_t, + __swift_stdlib_UErrorCode *); + +SWIFT_RUNTIME_STDLIB_INTERFACE +__swift_int32_t __swift_stdlib_unorm2_spanQuickCheckYes( + const __swift_stdlib_UNormalizer2 *, const __swift_stdlib_UChar *, + __swift_int32_t, __swift_stdlib_UErrorCode *); + +SWIFT_RUNTIME_STDLIB_INTERFACE +__swift_stdlib_UBool + __swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32, + __swift_stdlib_UProperty); +SWIFT_RUNTIME_STDLIB_INTERFACE +__swift_stdlib_UBool + __swift_stdlib_u_isdefined(__swift_stdlib_UChar32); + + + #ifdef __cplusplus }} // extern "C", namespace swift #endif diff --git a/stdlib/public/core/CMakeLists.txt b/stdlib/public/core/CMakeLists.txt index 09d6b387ea57a..cf65fce6fb391 100644 --- a/stdlib/public/core/CMakeLists.txt +++ b/stdlib/public/core/CMakeLists.txt @@ -88,6 +88,7 @@ set(SWIFTLIB_ESSENTIAL Misc.swift MutableCollection.swift NewtypeWrapper.swift.gyb + NormalizedCodeUnitIterator.swift ObjCMirrors.swift ObjectIdentifier.swift Optional.swift @@ -123,6 +124,7 @@ set(SWIFTLIB_ESSENTIAL String.swift StringBridge.swift StringComparable.swift + StringComparison.swift StringGuts.swift StringObject.swift StringIndex.swift @@ -132,6 +134,7 @@ set(SWIFTLIB_ESSENTIAL StringStorage.swift StringSwitch.swift StringIndexConversions.swift + StringNormalization.swift StringUnicodeScalarView.swift StringUTF16.swift StringUTF8.swift diff --git a/stdlib/public/core/FixedArray.swift.gyb b/stdlib/public/core/FixedArray.swift.gyb index ad029d3cc67fb..d93dbfdbcacdf 100644 --- a/stdlib/public/core/FixedArray.swift.gyb +++ b/stdlib/public/core/FixedArray.swift.gyb @@ -16,7 +16,7 @@ %{ # The sizes to generate code for. - sizes = [16] + sizes = [2, 8, 16] }% % for N in sizes: @@ -56,7 +56,7 @@ extension _FixedArray${N} { @_inlineable // FIXME(sil-serialize-all) @_versioned // FIXME(sil-serialize-all) - internal var count : Int { + internal var count: Int { @inline(__always) get { return Int(truncatingIfNeeded: _count) } @inline(__always) set { _count = Int8(newValue) } } diff --git a/stdlib/public/core/GroupInfo.json b/stdlib/public/core/GroupInfo.json index 5270de6d38f26..0f3089259794e 100644 --- a/stdlib/public/core/GroupInfo.json +++ b/stdlib/public/core/GroupInfo.json @@ -9,11 +9,13 @@ "Character.swift", "CharacterUnicodeScalars.swift", "ICU.swift", + "NormalizedCodeUnitIterator.swift", "StaticString.swift", "String.swift", "StringBridge.swift", "StringCharacterView.swift", "StringComparable.swift", + "StringComparison.swift", "StringObject.swift", "StringGuts.swift", "StringGraphemeBreaking.swift", @@ -22,6 +24,7 @@ "StringIndexConversions.swift", "StringInterpolation.swift", "StringLegacy.swift", + "StringNormalization.swift", "StringRangeReplaceableCollection.swift", "StringStorage.swift", "StringSwitch.swift", diff --git a/stdlib/public/core/NormalizedCodeUnitIterator.swift b/stdlib/public/core/NormalizedCodeUnitIterator.swift new file mode 100644 index 0000000000000..cc614524ae420 --- /dev/null +++ b/stdlib/public/core/NormalizedCodeUnitIterator.swift @@ -0,0 +1,270 @@ +//===--- StringNormalization.swift ----------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +internal +struct _NormalizedCodeUnitIterator: IteratorProtocol { + var segmentBuffer = _FixedArray16(allZeros:()) + var overflowBuffer: [CodeUnit]? = nil + var normalizationBuffer: [CodeUnit]? = nil + var source: _SegmentSource + var segmentBufferIndex = 0 + var segmentBufferCount = 0 + var overflowBufferIndex = 0 + var overflowBufferCount = 0 + + typealias CodeUnit = UInt16 + + init(_ opaqueString: _UnmanagedOpaqueString, startIndex: Int = 0) { + source = _UnmanagedOpaqueStringSource(opaqueString, start: startIndex) + } + + init(_ unmanagedString: _UnmanagedString, startIndex: Int = 0) { + source = _UnmanagedStringSource(unmanagedString, start: startIndex) + } + + init(_ guts: _StringGuts, _ range: Range, startIndex: Int = 0) { + source = _StringGutsSource(guts, range, start: startIndex) + } + + mutating func compare(with other: _NormalizedCodeUnitIterator) -> _Ordering { + var mutableOther = other + for cu in IteratorSequence(self) { + if let otherCU = mutableOther.next() { + let result = _lexicographicalCompare(cu, otherCU) + if result == .equal { + continue + } else { + return result + } + } else { + //other returned nil, we are greater + return .greater + } + } + + //we ran out of code units, either we are equal, or only we ran out and + //other is greater + if let _ = mutableOther.next() { + return .less + } else { + return .equal + } + } + + struct _UnmanagedOpaqueStringSource: _SegmentSource { + var remaining: Int { + return opaqueString.count - index + } + var opaqueString: _UnmanagedOpaqueString + var index: Int + + init(_ opaqueString: _UnmanagedOpaqueString, start: Int = 0) { + self.opaqueString = opaqueString + index = start + } + + mutating func tryFill(buffer: UnsafeMutableBufferPointer) -> Int? { + var bufferIndex = 0 + let originalIndex = index + repeat { + guard index < opaqueString.count else { + break + } + + guard bufferIndex < buffer.count else { + //The buffer isn't big enough for the current segment + index = originalIndex + return nil + } + + let cu = opaqueString[index] + buffer[bufferIndex] = cu + index += 1 + bufferIndex += 1 + } while !opaqueString.hasNormalizationBoundary(after: index - 1) + + return bufferIndex + } + } + + struct _UnmanagedStringSource: _SegmentSource { + var remaining: Int { + return unmanagedString.count - index + } + + var unmanagedString: _UnmanagedString + var index: Int + + init(_ unmanagedString: _UnmanagedString, start: Int = 0) { + self.unmanagedString = unmanagedString + index = start + } + + mutating func tryFill(buffer: UnsafeMutableBufferPointer) -> Int? { + var bufferIndex = 0 + let originalIndex = index + repeat { + guard index < unmanagedString.count else { + break + } + + guard bufferIndex < buffer.count else { + //The buffer isn't big enough for the current segment + index = originalIndex + return nil + } + + let cu = unmanagedString[index] + buffer[bufferIndex] = cu + index += 1 + bufferIndex += 1 + } while unmanagedString.hasNormalizationBoundary( + after: index - 1, + count: unmanagedString.count) == false + + return bufferIndex + } + } + + struct _StringGutsSource: _SegmentSource { + var remaining: Int { + return range.count - index + } + var guts: _StringGuts + var index: Int + var range: Range + + init(_ guts: _StringGuts, _ range: Range, start: Int = 0) { + self.guts = guts + self.range = range + index = range.lowerBound + start + } + + mutating func tryFill(buffer: UnsafeMutableBufferPointer) -> Int? { + var bufferIndex = 0 + let originalIndex = index + repeat { + guard index < range.count else { + break + } + + guard bufferIndex < buffer.count else { + //The buffer isn't big enough for the current segment + index = originalIndex + return nil + } + + let cu = guts[index] + buffer[bufferIndex] = cu + index += 1 + bufferIndex += 1 + } while !guts.hasNormalizationBoundary(after: index - 1) + + return bufferIndex + } + } + + mutating func next() -> CodeUnit? { + if segmentBufferCount == segmentBufferIndex { + segmentBuffer = _FixedArray16(allZeros:()) + segmentBufferCount = 0 + segmentBufferIndex = 0 + } + + if overflowBufferCount == overflowBufferIndex { + overflowBufferCount = 0 + overflowBufferIndex = 0 + } + + if source.remaining <= 0 + && segmentBufferCount == 0 + && overflowBufferCount == 0 { + // Our source of code units to normalize is empty and our buffers from + // previous normalizations are also empty. + return nil + } + + if segmentBufferCount == 0 && overflowBufferCount == 0 { + //time to fill a buffer if possible. Otherwise we are done, return nil + // Normalize segment, and then compare first code unit + var intermediateBuffer = _FixedArray16(allZeros:()) + if overflowBuffer == nil, + let filled = source.tryFill(buffer: &intermediateBuffer) + { + guard let count = _tryNormalize( + _castOutputBuffer(&intermediateBuffer, + endingAt: filled), + into: &segmentBuffer + ) + else { + fatalError("Output buffer was not big enough, this should not happen") + } + segmentBufferCount = count + } else { + let size = source.remaining * _Normalization._maxNFCExpansionFactor + if overflowBuffer == nil { + overflowBuffer = Array(repeating: 0, count: size) + normalizationBuffer = Array(repeating:0, count: size) + } + + guard let count = normalizationBuffer!.withUnsafeMutableBufferPointer({ + (normalizationBufferPtr) -> Int? in + guard let filled = source.tryFill(buffer: normalizationBufferPtr) + else { + fatalError("Invariant broken, buffer should have space") + } + return overflowBuffer!.withUnsafeMutableBufferPointer { + (overflowBufferPtr) -> Int? in + return _tryNormalize( + UnsafeBufferPointer( rebasing: normalizationBufferPtr[..) -> Int? +} + +extension _SegmentSource { + mutating func tryFill( + buffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer> + ) -> Int? { + return tryFill(buffer: _castOutputBuffer(buffer)) + } +} \ No newline at end of file diff --git a/stdlib/public/core/StringComparable.swift b/stdlib/public/core/StringComparable.swift index 89c2fe2a5e482..d19d61b6b0f10 100644 --- a/stdlib/public/core/StringComparable.swift +++ b/stdlib/public/core/StringComparable.swift @@ -12,148 +12,6 @@ import SwiftShims -#if _runtime(_ObjC) -/// Compare two strings using the Unicode collation algorithm in the -/// deterministic comparison mode. (The strings which are equivalent according -/// to their NFD form are considered equal. Strings which are equivalent -/// according to the plain Unicode collation algorithm are additionally ordered -/// based on their NFD.) -/// -/// See Unicode Technical Standard #10. -/// -/// The behavior is equivalent to `NSString.compare()` with default options. -/// -/// - returns: -/// * an unspecified value less than zero if `lhs < rhs`, -/// * zero if `lhs == rhs`, -/// * an unspecified value greater than zero if `lhs > rhs`. -@_inlineable // FIXME(sil-serialize-all) -@_silgen_name("swift_stdlib_compareNSStringDeterministicUnicodeCollation") -public func _stdlib_compareNSStringDeterministicUnicodeCollation( - _ lhs: AnyObject, _ rhs: AnyObject -) -> Int32 - -@_inlineable // FIXME(sil-serialize-all) -@_silgen_name("swift_stdlib_compareNSStringDeterministicUnicodeCollationPtr") -public func _stdlib_compareNSStringDeterministicUnicodeCollationPointer( - _ lhs: OpaquePointer, _ rhs: OpaquePointer -) -> Int32 -#endif - -#if _runtime(_ObjC) -extension _UnmanagedString where CodeUnit == UInt8 { - /// This is consistent with Foundation, but incorrect as defined by Unicode. - /// Unicode weights some ASCII punctuation in a different order than ASCII - /// value. Such as: - /// - /// 0022 ; [*02FF.0020.0002] # QUOTATION MARK - /// 0023 ; [*038B.0020.0002] # NUMBER SIGN - /// 0025 ; [*038C.0020.0002] # PERCENT SIGN - /// 0026 ; [*0389.0020.0002] # AMPERSAND - /// 0027 ; [*02F8.0020.0002] # APOSTROPHE - /// - @_inlineable // FIXME(sil-serialize-all) - @_versioned - internal func compareASCII(to other: _UnmanagedString) -> Int { - // FIXME Results should be the same across all platforms. - if self.start == other.start { - return (self.count &- other.count).signum() - } - var cmp = Int(truncatingIfNeeded: - _stdlib_memcmp( - self.rawStart, other.rawStart, - Swift.min(self.count, other.count))) - if cmp == 0 { - cmp = self.count &- other.count - } - return cmp.signum() - } -} -#endif - -extension _StringGuts { - - // - // FIXME(TODO: JIRA): HACK HACK HACK: Work around for ARC :-( - // - @inline(never) - @effects(readonly) - public - static func _compareDeterministicUnicodeCollation( - _leftUnsafeStringGutsBitPattern leftBits: _RawBitPattern, - _rightUnsafeStringGutsBitPattern rightBits: _RawBitPattern - ) -> Int { - let left = _StringGuts(rawBits: leftBits) - let right = _StringGuts(rawBits: rightBits) - return _compareDeterministicUnicodeCollation( - left, 0.., - _rightUnsafeStringGutsBitPattern rightBits: _RawBitPattern, - _ rightRange: Range - ) -> Int { - let left = _StringGuts(rawBits: leftBits) - let right = _StringGuts(rawBits: rightBits) - return _compareDeterministicUnicodeCollation( - left, leftRange, to: right, rightRange) - } - - /// Compares two slices of strings with the Unicode Collation Algorithm. - @inline(never) // Hide the CF/ICU dependency - @effects(readonly) - public // @testable - static func _compareDeterministicUnicodeCollation( - _ left: _StringGuts, _ leftRange: Range, - to right: _StringGuts, _ rightRange: Range) -> Int { - // Note: this operation should be consistent with equality comparison of - // Character. -#if _runtime(_ObjC) - if _fastPath(left._isContiguous && right._isContiguous) { - let l = _NSContiguousString(_unmanaged: left, range: leftRange) - let r = _NSContiguousString(_unmanaged: right, range: rightRange) - return l._unsafeWithNotEscapedSelfPointerPair(r) { - return Int( - _stdlib_compareNSStringDeterministicUnicodeCollationPointer($0, $1)) - } - } else { - let l = left._ephemeralCocoaString(leftRange) - let r = right._ephemeralCocoaString(rightRange) - return Int(_stdlib_compareNSStringDeterministicUnicodeCollation(l, r)) - } -#else - switch (left.isASCII, right.isASCII) { - case (true, false): - let l = left._unmanagedASCIIView[leftRange] - let r = right._unmanagedUTF16View[rightRange] - return Int(_swift_stdlib_unicode_compare_utf8_utf16( - l.start, Int32(l.count), - r.start, Int32(r.count))) - case (false, true): - // Just invert it and recurse for this case. - return -_compareDeterministicUnicodeCollation( - right, rightRange, to: left, leftRange) - case (false, false): - let l = left._unmanagedUTF16View[leftRange] - let r = right._unmanagedUTF16View[rightRange] - return Int(_swift_stdlib_unicode_compare_utf16_utf16( - l.start, Int32(l.count), - r.start, Int32(r.count))) - case (true, true): - let l = left._unmanagedASCIIView[leftRange] - let r = right._unmanagedASCIIView[rightRange] - return Int(_swift_stdlib_unicode_compare_utf8_utf8( - l.start, Int32(l.count), - r.start, Int32(r.count))) - } -#endif - } -} - extension _StringGuts { @inline(__always) @_inlineable @@ -191,6 +49,10 @@ extension _StringGuts { internal static func isLess( _ left: _StringGuts, than right: _StringGuts ) -> Bool { + // Bitwise equality implies string equality + if left._bitwiseEqualTo(right) { + return false + } return compare(left, to: right) == -1 } @@ -200,6 +62,10 @@ extension _StringGuts { _ left: _StringGuts, _ leftRange: Range, than right: _StringGuts, _ rightRange: Range ) -> Bool { + // Bitwise equality implies string equality + if left._bitwiseEqualTo(right) && leftRange == rightRange { + return false + } return compare(left, leftRange, to: right, rightRange) == -1 } @@ -211,22 +77,18 @@ extension _StringGuts { ) -> Int { defer { _fixLifetime(left) } defer { _fixLifetime(right) } -#if _runtime(_ObjC) - // We only want to perform this optimization on objc runtimes. Elsewhere, - // we will make it follow the unicode collation algorithm even for ASCII. - // This is consistent with Foundation, but incorrect as defined by Unicode. - // - // FIXME: String ordering should be consistent across all platforms. + if left.isASCII && right.isASCII { let leftASCII = left._unmanagedASCIIView[leftRange] let rightASCII = right._unmanagedASCIIView[rightRange] let result = leftASCII.compareASCII(to: rightASCII) return result } -#endif - return _compareDeterministicUnicodeCollation( - _leftUnsafeStringGutsBitPattern: left.rawBits, leftRange, - _rightUnsafeStringGutsBitPattern: right.rawBits, rightRange) + + let leftBits = left.rawBits + let rightBits = right.rawBits + + return _compareUnicode(leftBits, leftRange, rightBits, rightRange) } @_inlineable @@ -236,22 +98,18 @@ extension _StringGuts { ) -> Int { defer { _fixLifetime(left) } defer { _fixLifetime(right) } -#if _runtime(_ObjC) - // We only want to perform this optimization on objc runtimes. Elsewhere, - // we will make it follow the unicode collation algorithm even for ASCII. - // This is consistent with Foundation, but incorrect as defined by Unicode. - // - // FIXME: String ordering should be consistent across all platforms. + if left.isASCII && right.isASCII { let leftASCII = left._unmanagedASCIIView let rightASCII = right._unmanagedASCIIView let result = leftASCII.compareASCII(to: rightASCII) return result } -#endif - return _compareDeterministicUnicodeCollation( - _leftUnsafeStringGutsBitPattern: left.rawBits, - _rightUnsafeStringGutsBitPattern: right.rawBits) + + let leftBits = left.rawBits + let rightBits = right.rawBits + + return _compareUnicode(leftBits, rightBits) } } diff --git a/stdlib/public/core/StringComparison.swift b/stdlib/public/core/StringComparison.swift new file mode 100644 index 0000000000000..d94153c8455ee --- /dev/null +++ b/stdlib/public/core/StringComparison.swift @@ -0,0 +1,1166 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +import SwiftShims + +//HACK: This gets rid of some retains/releases that was slowing down the +//memcmp fast path for comparing ascii strings. rdar://problem/37473470 +@inline(never) // @outlined +@effects(readonly) +@_versioned internal +func _compareUnicode( + _ lhs: _StringGuts._RawBitPattern, _ rhs: _StringGuts._RawBitPattern +) -> Int { + let left = _StringGuts(rawBits: lhs) + let right = _StringGuts(rawBits: rhs) + return left._compare(right) +} + +@inline(never) // @outlined +@effects(readonly) +@_versioned internal +func _compareUnicode( + _ lhs: _StringGuts._RawBitPattern, _ leftRange: Range, + _ rhs: _StringGuts._RawBitPattern, _ rightRange: Range +) -> Int { + let left = _StringGuts(rawBits: lhs) + let right = _StringGuts(rawBits: rhs) + return left._compare(leftRange, right, rightRange) +} + +// +// Pointer casting helpers +// +@inline(__always) +private func _unsafeMutableBufferPointerCast( + _ ptr: UnsafeMutablePointer, + _ count: Int, + to: U.Type = U.self +) -> UnsafeMutableBufferPointer { + return UnsafeMutableBufferPointer( + start: UnsafeMutableRawPointer(ptr).assumingMemoryBound(to: U.self), + count: count + ) +} +@inline(__always) +private func _unsafeBufferPointerCast( + _ ptr: UnsafePointer, + _ count: Int, + to: U.Type = U.self +) -> UnsafeBufferPointer { + return UnsafeBufferPointer( + start: UnsafeRawPointer(ptr).assumingMemoryBound(to: U.self), + count: count + ) +} + +internal let _leadingSurrogateBias: UInt16 = 0xd800 +internal let _trailingSurrogateBias: UInt16 = 0xdc00 +internal let _surrogateMask: UInt16 = 0xfc00 + +@inline(__always) +internal func _isSurrogate(_ cu: UInt16) -> Bool { + return _isLeadingSurrogate(cu) || _isTrailingSurrogate(cu) +} + +@inline(__always) +internal func _isLeadingSurrogate(_ cu: UInt16) -> Bool { + // NOTE: Specifically match against the trailing surrogate mask, as it matches + // more cases. + return cu & _surrogateMask == _leadingSurrogateBias +} + +@inline(__always) +internal func _isTrailingSurrogate(_ cu: UInt16) -> Bool { + return cu & _surrogateMask == _trailingSurrogateBias +} +@inline(__always) +internal func _decodeSurrogatePair( + leading high: UInt16, trailing low: UInt16 +) -> UInt32 { + _sanityCheck(_isLeadingSurrogate(high) && _isTrailingSurrogate(low)) + let hi10: UInt32 = UInt32(high) &- UInt32(_leadingSurrogateBias) + _sanityCheck(hi10 < 1<<10, "I said high 10. Not high, like, 20 or something") + let lo10: UInt32 = UInt32(low) &- UInt32(_trailingSurrogateBias) + _sanityCheck(lo10 < 1<<10, "I said low 10. Not low, like, 20 or something") + + return ((hi10 &<< 10) | lo10) &+ 0x1_00_00 +} + +internal func _hasNormalizationBoundary(before cu: UInt16) -> Bool { + guard !_isSurrogate(cu) else { return false } + return UnicodeScalar(_unchecked: UInt32(cu))._hasNormalizationBoundaryBefore +} + +// +// Pointer casting helpers +// +internal func _castOutputBuffer( + _ ptr: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>, + endingAt endIdx: Int = _Normalization._SegmentOutputBuffer.capacity +) -> UnsafeMutableBufferPointer { + let bufPtr: UnsafeMutableBufferPointer = + _unsafeMutableBufferPointerCast( + ptr, _Normalization._SegmentOutputBuffer.capacity) + return UnsafeMutableBufferPointer(rebasing: bufPtr[.., + endingAt endIdx: Int = _Normalization._SegmentOutputBuffer.capacity +) -> UnsafeBufferPointer { + let bufPtr: UnsafeBufferPointer = + _unsafeBufferPointerCast( + ptr, _Normalization._SegmentOutputBuffer.capacity) + return UnsafeBufferPointer(rebasing: bufPtr[..) { + _sanityCheck(other.count < _FixedArray16.capacity, + "out of bounds fill") + for i in 0..) -> Int { + let count = Swift.min(self.count, other.count) + for idx in 0.., + _ right: UnsafeBufferPointer +) -> Int { + let count = Swift.min(left.count, right.count) + for idx in 0..( + _ left: UnsafeBufferPointer, + _ right: UnsafeBufferPointer +) -> Int where CodeUnit : FixedWidthInteger & UnsignedInteger { + let count = Swift.min(left.count, right.count) + for idx in 0..( + _ other: _UnmanagedString + ) -> Int { + let count = Swift.min(self.count, other.count) + for idx in 0.. + ) -> Int { + let count = Swift.min(self.count, otherRange.count) + for idx in 0.. _Ordering { + // TODO: inspect code quality + return lhs < rhs ? .less : (lhs > rhs ? .greater : .equal) +} + +internal func _lexicographicalCompare( + _ lhs: UInt16, _ rhs: UInt16 +) -> _Ordering { + return lhs < rhs ? .less : (lhs > rhs ? .greater : .equal) +} + +internal func _lexicographicalCompare( + _ leftHS: UnsafeBufferPointer, + _ rightHS: UnsafeBufferPointer +) -> _Ordering { + let count = Swift.min(leftHS.count, rightHS.count) + + let idx = _findDiffIdx(leftHS, rightHS) + guard idx < count else { + return _lexicographicalCompare(leftHS.count, rightHS.count) + } + let leftHSPtr = leftHS.baseAddress._unsafelyUnwrappedUnchecked + let rightHSPtr = rightHS.baseAddress._unsafelyUnwrappedUnchecked + return _lexicographicalCompare(leftHSPtr[idx], rightHSPtr[idx]) +} + +internal func _lexicographicalCompare( + _ leftHS: UnsafeBufferPointer, + _ rightHS: UnsafeBufferPointer +) -> _Ordering { + let count = Swift.min(leftHS.count, rightHS.count) + + let idx = _findDiffIdx(leftHS, rightHS) + guard idx < count else { + return _lexicographicalCompare(leftHS.count, rightHS.count) + } + let leftHSPtr = leftHS.baseAddress._unsafelyUnwrappedUnchecked + let rightHSPtr = rightHS.baseAddress._unsafelyUnwrappedUnchecked + return _lexicographicalCompare(UInt16(leftHSPtr[idx]), rightHSPtr[idx]) +} +@inline(__always) +internal func _lexicographicalCompare( + _ leftHS: UnsafePointer<_Normalization._SegmentOutputBuffer>, + leftCount: Int, + _ rightHS: UnsafePointer<_Normalization._SegmentOutputBuffer>, + rightCount: Int +) -> _Ordering { + return _lexicographicalCompare( + _castOutputBuffer(leftHS, endingAt: leftCount), + _castOutputBuffer(rightHS, endingAt: rightCount)) +} +@inline(__always) +internal func _lexicographicalCompare( + _ leftHS: Array, + _ rightHS: Array +) -> _Ordering { + return leftHS.withUnsafeBufferPointer { leftPtr in + return rightHS.withUnsafeBufferPointer { rightPtr in + return _lexicographicalCompare(leftPtr, rightPtr) + } + } +} + +internal func _parseRawScalar( + _ buf: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>, + startingFrom idx: Int = 0 +) -> (UnicodeScalar, scalarEndIndex: Int) { + return Swift._parseRawScalar(buffer: _castOutputBuffer(buf), startingFrom: idx) +} + +internal func _parseRawScalar( + buffer buf: UnsafeBufferPointer, + startingFrom idx: Int = 0 +) -> (UnicodeScalar, scalarEndIndex: Int) { + let ptr = buf.baseAddress._unsafelyUnwrappedUnchecked + _sanityCheck(idx >= 0 && idx < buf.count, "out of bounds index") + let cu: UInt16 = ptr[idx] + if _slowPath(idx+1 == buf.count) { + return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) + } + guard _isLeadingSurrogate(cu) else { + return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) + } + let nextCu: UInt16 = ptr[idx+1] + guard _isTrailingSurrogate(nextCu) else { + // Invalid surrogate pair: just return the invalid value + return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) + } + + // Decode + let value: UInt32 = _decodeSurrogatePair(leading: cu, trailing: nextCu) + _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set") + return (UnicodeScalar(_unchecked: value), idx+2) +} + +extension _UnmanagedOpaqueString { + internal func _parseRawScalar( + startingFrom idx: Int = 0 + ) -> (UnicodeScalar, scalarEndIndex: Int) { + var buffer = _FixedArray2(allZeros:()) + if idx+1 < self.count { + buffer[0] = self[idx] + buffer[1] = self[idx+1] + + let bufferPointer = _unsafeBufferPointerCast( + &buffer, 2, to: UInt16.self + ) + return Swift._parseRawScalar(buffer: bufferPointer, startingFrom: 0) + } else { + buffer[0] = self[idx] + + let bufferPointer = _unsafeBufferPointerCast( + &buffer, 1, to: UInt16.self + ) + return Swift._parseRawScalar(buffer: bufferPointer, startingFrom: 0) + } + } +} + +extension _UnmanagedString where CodeUnit == UInt16 { + internal func _parseRawScalar( + startingFrom idx: Int = 0 + ) -> (UnicodeScalar, scalarEndIndex: Int) { + _sanityCheck(idx >= 0 && idx < self.count, "out of bounds index") + let cu = self[idx] + if _slowPath(idx+1 == self.count) { + return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) + } + guard _isLeadingSurrogate(cu) else { + return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) + } + let nextCu = self[idx+1] + guard _isTrailingSurrogate(nextCu) else { + // Invalid surrogate pair: just return the invalid value + return (UnicodeScalar(_unchecked: UInt32(cu)), idx+1) + } + + // Decode + let value: UInt32 = _decodeSurrogatePair(leading: cu, trailing: nextCu) + _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set") + return (UnicodeScalar(_unchecked: value), idx+2) + } + + internal func _reverseParseRawScalar( + endingAt idx: Int // one-past-the-end + ) -> (UnicodeScalar, scalarStartIndex: Int) { + _sanityCheck(idx > 0 && idx <= self.count, "out of bounds end index") + + // Corner case: leading un-paired surrogate + if _slowPath(idx == 1) { + return (UnicodeScalar(_unchecked: UInt32(self[0])), 0) + } + + let cu = self[idx-1] + guard _isTrailingSurrogate(cu) else { + return (UnicodeScalar(_unchecked: UInt32(cu)), idx-1) + } + let priorCU = self[idx-2] + guard _isLeadingSurrogate(priorCU) else { + return (UnicodeScalar(_unchecked: UInt32(cu)), idx-1) + } + + // Decode + let value: UInt32 = _decodeSurrogatePair(leading: priorCU, trailing: cu) + _sanityCheck(Int32(exactly: value) != nil, "top bit shouldn't be set") + return (UnicodeScalar(_unchecked: value), idx-2) + } + + internal func _tryNormalize( + into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer> + ) -> Int? { + return self._tryNormalize(into: _castOutputBuffer(outputBuffer)) + } + + internal func _tryNormalize( + into outputBuffer: UnsafeMutableBufferPointer + ) -> Int? { + var err = __swift_stdlib_U_ZERO_ERROR + let count = __swift_stdlib_unorm2_normalize( + _Normalization._nfcNormalizer, + self.start, + numericCast(self.count), + outputBuffer.baseAddress._unsafelyUnwrappedUnchecked, + numericCast(outputBuffer.count), + &err + ) + guard err.isSuccess else { + // The output buffer needs to grow + return nil + } + return numericCast(count) + } + + internal func _slowNormalize() -> [UInt16] { + _sanityCheck(self.count > 0, "called on empty string") + + let canary = self.count * _Normalization._maxNFCExpansionFactor + var count = self.count + while true { + var result = Array(repeating: 0, count: count) + if let length = result.withUnsafeMutableBufferPointer({ (bufPtr) -> Int? in + return self._tryNormalize(into: bufPtr) + }) { + result.removeLast(count - length) + return result + } + // Otherwise, we need to grow + guard count <= canary else { + fatalError("Invariant broken: Max decomposition factor insufficient") + } + count *= 2 + } + } +} + +internal func _tryNormalize( + _ input: UnsafeBufferPointer, + into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer> +) -> Int? { + return _tryNormalize(input, into: _castOutputBuffer(outputBuffer)) +} +internal func _tryNormalize( + _ input: UnsafeBufferPointer, + into outputBuffer: UnsafeMutableBufferPointer +) -> Int? { + var err = __swift_stdlib_U_ZERO_ERROR + let count = __swift_stdlib_unorm2_normalize( + _Normalization._nfcNormalizer, + input.baseAddress._unsafelyUnwrappedUnchecked, + numericCast(input.count), + outputBuffer.baseAddress._unsafelyUnwrappedUnchecked, + numericCast(outputBuffer.count), + &err + ) + guard err.isSuccess else { + // The output buffer needs to grow + return nil + } + return numericCast(count) +} + +extension _UnmanagedString where CodeUnit == UInt8 { + @_inlineable // FIXME(sil-serialize-all) + @_versioned + internal func compareASCII(to other: _UnmanagedString) -> Int { + // FIXME Results should be the same across all platforms. + if self.start == other.start { + return (self.count &- other.count).signum() + } + var cmp = Int(truncatingIfNeeded: + _stdlib_memcmp( + self.rawStart, other.rawStart, + Swift.min(self.count, other.count))) + if cmp == 0 { + cmp = self.count &- other.count + } + return cmp.signum() + } +} + +public extension _StringGuts { + @inline(__always) + public + func _compare(_ other: _StringGuts) -> Int { + let selfRange = Range(uncheckedBounds: (0, self.count)) + let otherRange = Range(uncheckedBounds: (0, other.count)) + return _compare(selfRange, other, otherRange) + } + + @inline(__always) + public + func _compare( + _ selfRange: Range, + _ other: _StringGuts, + _ otherRange: Range + ) -> Int { + if _slowPath( + !self._isContiguous || !other._isContiguous + ) { + if !self._isContiguous { + return self._asOpaque()._compareOpaque( + selfRange, other, otherRange + ).rawValue + } else { + return other._asOpaque()._compareOpaque( + otherRange, self, selfRange + ).flipped.rawValue + } + } + + switch (self.isASCII, other.isASCII) { + case (true, true): + fatalError("Should have hit the ascii comp in StringComparable.compare()") + case (true, false): + return self._unmanagedASCIIView[selfRange]._compareStringsPreLoop( + other: other._unmanagedUTF16View[otherRange] + ).rawValue + case (false, true): + // Same compare, just invert result + return other._unmanagedASCIIView[otherRange]._compareStringsPreLoop( + other: self._unmanagedUTF16View[selfRange] + ).flipped.rawValue + case (false, false): + return self._unmanagedUTF16View[selfRange]._compareStringsPreLoop( + other: other._unmanagedUTF16View[otherRange] + ).rawValue + } + } +} + +extension _UnmanagedOpaqueString { + @inline(never) + @_versioned + internal + func _compareOpaque( + _ selfRange: Range, + _ other: _StringGuts, + _ otherRange: Range + ) -> _Ordering { + // + // Do a fast Latiny comparison loop; bail if that proves insufficient. + // + // The vast majority of the time, seemingly-non-contiguous Strings are + // really ASCII strings that were bridged improperly. E.g., unknown nul- + // termination of an all-ASCII file loaded by String.init(contentsOfFile:). + // + + + let selfCount = selfRange.count + let otherCount = otherRange.count + let count = Swift.min(selfCount, otherCount) + let idx = self[selfRange]._findDiffIdx(other, otherRange) + if idx == count { + return _lexicographicalCompare(selfCount, otherCount) + } + + let selfCU = self[idx] + let otherCU = other[idx + otherRange.lowerBound] + + // + // Fast path: if one is ASCII, we can often compare the code units directly. + // + let selfIsASCII = selfCU <= 0x7F + let otherIsASCII = otherCU <= 0x7F + + let selfIsSingleSegmentScalar = + self.hasNormalizationBoundary(after: idx) + && _hasNormalizationBoundary(before: selfCU) + let otherIsSingleSegmentScalar = + other.hasNormalizationBoundary(after: idx) + && _hasNormalizationBoundary(before: otherCU) + + if _fastPath(selfIsASCII || otherIsASCII) { + _sanityCheck(idx < selfCount && idx < otherCount, + "Should be caught by check against min-count") + // Check if next CU is <0x300, or if we're in a + // "_isNormalizedSuperASCII" case. 99.9% of the time, we're here because + // the non-contig string is ASCII. We never want to hit the pathological + // path for those. + + if selfIsASCII && otherIsASCII { + if selfIsSingleSegmentScalar && otherIsSingleSegmentScalar { + return _lexicographicalCompare(selfCU, otherCU) + } + + return self._compareOpaquePathological( + other, otherRange, startingFrom: Swift.max(0, idx-1)) + } + + if selfIsASCII && selfIsSingleSegmentScalar + && self._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII { + return .less + } else if otherIsASCII && otherIsSingleSegmentScalar + && self._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII { + return .greater + } + } + + return self._compareOpaquePathological( + other, otherRange, startingFrom: Swift.max(0, idx-1) + ) + } + + @inline(never) + func _compareOpaquePathological( + _ other: _StringGuts, _ otherRange: Range, + startingFrom: Int + ) -> _Ordering { + // Compare by pulling in a segment at a time, normalizing then comparing + // individual code units + var selfIterator = _NormalizedCodeUnitIterator(self, startIndex: startingFrom) + return selfIterator.compare(with: + _NormalizedCodeUnitIterator(other, otherRange, startIndex: startingFrom) + ) + } +} + +extension UnicodeScalar { + internal func _normalize( + into outputBuffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer> + ) -> Int { + // Implementation: Perform the normalization on an input buffer and output + // buffer. + func impl( + _ input: UnsafeMutablePointer<_FixedArray2>, + count: Int, + into output: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer> + ) -> Int { + let inputBuffer = _unsafeBufferPointerCast( + input, count, to: UInt16.self + ) + let outputBuffer = _unsafeMutableBufferPointerCast( + output, _FixedArray8.capacity, to: UInt16.self + ) + return _tryNormalize( + inputBuffer, into: outputBuffer + )._unsafelyUnwrappedUnchecked + } + + var inBuffer = _FixedArray2(allZeros:()) + var inLength = 0 + for cu in self.utf16 { + inBuffer[inLength] = cu + inLength += 1 + } + + return impl(&inBuffer, count: inLength, into: outputBuffer) + } + + static internal let maxValue = 0x0010_FFFF +} + +private struct _UnicodeScalarExceptions { + fileprivate let _multiSegmentExpanders: Set + fileprivate let _normalizedASCIIStarter: Array + + @inline(__always) + init() { + var msExpanders = Set() + msExpanders.reserveCapacity(16) + var normalizedASCIIStarter = Array() + normalizedASCIIStarter.reserveCapacity(8) + + for rawValue in 0.. 0x7F + } +} + +extension _UnmanagedString where CodeUnit == UInt8 { + @_versioned + internal func _compareStringsPreLoop( + other: _UnmanagedString + ) -> _Ordering { + let count = Swift.min(self.count, other.count) + + // + // Fast scan until we find a difference + // + let idx = self._findDiffIdx(other) + guard idx < count else { + return _lexicographicalCompare(self.count, other.count) + } + let otherCU = other[idx] + + // + // Fast path: if other is super-ASCII post-normalization, we must be less. If + // other is ASCII and a single-scalar segment, we have our answer. + // + if otherCU > 0x7F { + if _fastPath( + other._parseRawScalar(startingFrom: idx).0._isNormalizedSuperASCII + ) { + return .less + } + } else { + let selfASCIIChar = UInt16(self[idx]) + _sanityCheck(selfASCIIChar != otherCU, "should be different") + if idx+1 == other.count { + return _lexicographicalCompare(selfASCIIChar, otherCU) + } + if _fastPath(other.hasNormalizationBoundary(after: idx, count: other.count)) { + return _lexicographicalCompare(selfASCIIChar, otherCU) + } + } + + // + // Otherwise, need to normalize the segment and then compare + // + let selfASCIIChar = UInt16(self[idx]) + return _compareStringsPostSuffix( + selfASCIIChar: selfASCIIChar, otherUTF16: other[idx...] + ) + } +} + +extension _StringGuts { + func hasNormalizationBoundary(after index: Int) -> Bool { + let nextIndex = index + 1 + if nextIndex >= self.count { + return true + } + + let nextCU = self[nextIndex] + return _hasNormalizationBoundary(before: nextCU) + } +} + +extension _UnmanagedOpaqueString { + func hasNormalizationBoundary(after index: Int) -> Bool { + let nextIndex = index + 1 + if nextIndex >= self.count { + return true + } + + let nextCU = self[nextIndex] + return _hasNormalizationBoundary(before: nextCU) + } +} + +extension _UnmanagedString where CodeUnit == UInt16 { + func hasNormalizationBoundary(after index: Int, count: Int) -> Bool { + let nextIndex = index + 1 + if nextIndex >= count { + return true + } + + let nextCU = self[nextIndex] + return _hasNormalizationBoundary(before: nextCU) + } +} + +private func _compareStringsPostSuffix( + selfASCIIChar: UInt16, + otherUTF16: _UnmanagedString +) -> _Ordering { + let otherCU = otherUTF16[0] + _sanityCheck(otherCU <= 0x7F, "should be ASCII, otherwise no need to call") + + let segmentEndIdx = otherUTF16._findNormalizationSegmentEnd(startingFrom: 0) + let segment = otherUTF16[.. Int { + let count = self.count + _sanityCheck(idx < count, "out of bounds") + + // Normalization boundaries are best queried before known starters. Advance + // past one scalar first. + var (_, segmentEndIdx) = self._parseRawScalar(startingFrom: idx) + while segmentEndIdx < count { + let (scalar, nextIdx) = self._parseRawScalar(startingFrom: segmentEndIdx) + if scalar._hasNormalizationBoundaryBefore { + break + } + segmentEndIdx = nextIdx + } + return segmentEndIdx + } + + internal func _findNormalizationSegmentStart( + endingAt idx: Int // one-past-the-end + ) -> Int { + var idx = idx + let count = self.count + _sanityCheck(idx > 0 && idx <= count, "out of bounds") + + while idx > 0 { + let (scalar, priorIdx) = _reverseParseRawScalar(endingAt: idx) + idx = priorIdx + if scalar._hasNormalizationBoundaryBefore { + break + } + } + return idx + } + + internal func _findNormalizationSegment(spanning idx: Int) -> (Int, Int) { + var idx = idx + + // Corner case: if we're sub-surrogate, back up + if _slowPath( + idx > 0 + && _isTrailingSurrogate(self[idx]) + && _isLeadingSurrogate(self[idx-1]) + ) { + idx -= 1 + } + let segmentEnd = self._findNormalizationSegmentEnd(startingFrom: idx) + + // Find the start + if _slowPath(idx == 0) { + return (0, segmentEnd) + } + + // Check current scalar + if self._parseRawScalar(startingFrom: idx).0._hasNormalizationBoundaryBefore { + return (idx, segmentEnd) + } + + // Reverse parse until we found the segment start + let segmentStart = self._findNormalizationSegmentStart(endingAt: idx) + + return (segmentStart, segmentEnd) + } + + // Wether the segment identified by `idx` is prenormal. + // + // Scalar values below 0x300 are special: normalization segments containing only + // one such scalar are trivially prenormal under NFC. Most Latin-derived scripts + // can be represented entirely by <0x300 scalar values, meaning that many user + // strings satisfy this prenormal check. We call sub-0x300 scalars "Latiny" (not + // official terminology). + // + // The check is effectively: + // 1) Whether the current scalar <0x300, AND + // 2) Whether the current scalar comprises the entire segment + // + internal func _isLatinyPrenormal(idx: Int + ) -> Bool { + _sanityCheck(idx < self.count, "out of bounds") + + let cu = self[idx] + if _slowPath(cu >= 0x300) { + return false + } + if _slowPath(idx+1 == self.count) { + return true + } + + let nextCU = self[idx+1] + return nextCU < 0x300 || _hasNormalizationBoundary(before: nextCU) + } + + @_versioned + internal + func _compareStringsPreLoop( + other: _UnmanagedString + ) -> _Ordering { + let count = Swift.min(self.count, other.count) + + // + // Fast scan until we find a diff + // + let idx = _findDiffIdx(other) + guard idx < count else { + return _lexicographicalCompare(self.count, other.count) + } + let selfCU = self[idx] + let otherCU = other[idx] + + // + // Fast path: sub-0x300 single-scalar segments can be compared directly + // + if _fastPath( + _isLatinyPrenormal(idx: idx) + && other._isLatinyPrenormal(idx: idx) + ) { + return _lexicographicalCompare(selfCU, otherCU) + } + + return self._compareStringsSuffix(other: other, randomIndex: idx) + } + + //Is the shorter of the two parameters a prefix of the other parameter? + private func shorterPrefixesOther( + _ other: _UnmanagedString + ) -> Bool { + if self.count == other.count { + return false + } + + let minimumLength = Swift.min(self.count, other.count) + for i in 0.., + randomIndex: Int + ) -> _Ordering { + let count = Swift.min(self.count, other.count) + let selfCU = self[randomIndex] + let otherCU = other[randomIndex] + _sanityCheck(randomIndex >= 0 && randomIndex < count, "out of bounds") + _sanityCheck(selfCU != otherCU, "should be called at a point of difference") + + // + // Find the segment surrounding the random index passed in. This may involve + // some back tracking to the nearest normalization boundary. Once we've + // identified the segment, we can normalize and continue comparision. + // + // NOTE: We need to back-track for both self and other. Even though prefixes + // are binary equal, the point of difference might be at the start of a new + // segment for one and in the middle of the prior segment for the other. In + // which case, we will want to effectively compare the two consecutive + // segments together. + // + let (selfSegmentStartIdx, selfSegmentEndIdx) = + self._findNormalizationSegment(spanning: randomIndex) + let (otherSegmentStartIdx, otherSegmentEndIdx) = + other._findNormalizationSegment(spanning: randomIndex) + let comparisonStartIdx = Swift.min(selfSegmentStartIdx, otherSegmentStartIdx) + + + // + // Fast path: if both are prenormal, we have our answer + // + let selfSegment = self[comparisonStartIdx.. + ) -> _Ordering { + var selfIterator = _NormalizedCodeUnitIterator(self) + return selfIterator.compare(with: + _NormalizedCodeUnitIterator(other) + ) + } +} + +private func shorterPrefixesOther( + _ selfBuffer: UnsafePointer<_Normalization._SegmentOutputBuffer>, + _ selfLength: Int, + _ otherBuffer: UnsafePointer<_Normalization._SegmentOutputBuffer>, + _ otherLength: Int +) -> Bool { + return shorterPrefixesOther( + _castOutputBuffer(selfBuffer, endingAt: selfLength), + _castOutputBuffer(otherBuffer, endingAt: otherLength) + ) +} + +//Is the shorter of the two parameters a prefix of the other parameter? +private func shorterPrefixesOther( + _ selfBuffer: UnsafeBufferPointer, + _ otherBuffer: UnsafeBufferPointer +) -> Bool { + if selfBuffer.count == otherBuffer.count { + return false + } + + let minimumLength = Swift.min(selfBuffer.count, otherBuffer.count) + for i in 0.. + ) -> Bool { + var err = __swift_stdlib_U_ZERO_ERROR + let length = __swift_stdlib_unorm2_spanQuickCheckYes( + _Normalization._nfcNormalizer, + buffer.baseAddress._unsafelyUnwrappedUnchecked, + Int32(buffer.count), + &err) + + guard err.isSuccess else { + // This shouldn't be possible unless some deep (unrecoverable) system + // invariants are violated + fatalError("Unable to talk to ICU") + } + return length == buffer.count + } + internal static func _prenormalQuickCheckYes( + _ string: _UnmanagedString + ) -> Bool { + var err = __swift_stdlib_U_ZERO_ERROR + let length = __swift_stdlib_unorm2_spanQuickCheckYes( + _Normalization._nfcNormalizer, + string.start, + Int32(string.count), + &err) + + guard err.isSuccess else { + // This shouldn't be possible unless some deep (unrecoverable) system + // invariants are violated + fatalError("Unable to talk to ICU") + } + return length == string.count + } +} + +extension UnicodeScalar { + // Normalization boundary - a place in a string where everything left of the + // boundary can be normalized independently from everything right of the + // boundary. The concatenation of each result is the same as if the entire + // string had been normalized as a whole. + // + // Normalization segment - a sequence of code units between two normalization + // boundaries (without any boundaries in the middle). Note that normalization + // segments can, as a process of normalization, expand, contract, and even + // produce new sub-segments. + + // Whether this scalar value always has a normalization boundary before it. + internal var _hasNormalizationBoundaryBefore: Bool { + _sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set") + let value = Int32(bitPattern: self.value) + return 0 != __swift_stdlib_unorm2_hasBoundaryBefore( + _Normalization._nfcNormalizer, value) + } + + // Whether the supported version of Unicode has assigned a code point to this + // value. + internal var _isDefined: Bool { + return __swift_stdlib_u_isdefined(Int32(self.value)) != 0 + } + + // A property tracked in ICU regarding the scalar's potential non-normality; + // this is equivalent to whether quickCheck=NO. A subset of such scalars may + // expand under NFC normalization, and a subset of those may expand into + // multiple segments. + internal var _hasFullCompExclusion: Bool { + _sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set") + let value = Int32(bitPattern: self.value) + let prop = __swift_stdlib_UCHAR_FULL_COMPOSITION_EXCLUSION + return __swift_stdlib_u_hasBinaryProperty(value, prop) != 0 + } +} + +extension _Normalization { + // When normalized in NFC, some segments may expand in size (e.g. some non-BMP + // musical notes). This expansion is capped by the maximum expansion factor of + // the normal form. For NFC, that is 3x. + internal static let _maxNFCExpansionFactor = 3 + + // A small output buffer to use for normalizing a single normalization + // segment. Fits all but pathological arbitrary-length segments (i.e. zalgo- + // segments) + internal typealias _SegmentOutputBuffer = _FixedArray16 +} diff --git a/stdlib/public/core/UnmanagedString.swift b/stdlib/public/core/UnmanagedString.swift index db0c5efcf7eb4..11512d307b43d 100644 --- a/stdlib/public/core/UnmanagedString.swift +++ b/stdlib/public/core/UnmanagedString.swift @@ -173,6 +173,36 @@ extension _UnmanagedString : _StringVariant { start: start + offsetRange.lowerBound, count: offsetRange.count) } + + @_inlineable // FIXME(sil-serialize-all) + @_versioned // FIXME(sil-serialize-all) + internal subscript(offsetRange: PartialRangeFrom) -> SubSequence { + _sanityCheck(offsetRange.lowerBound >= 0) + return _UnmanagedString( + start: start + offsetRange.lowerBound, + count: self.count - offsetRange.lowerBound + ) + } + + @_inlineable // FIXME(sil-serialize-all) + @_versioned // FIXME(sil-serialize-all) + internal subscript(offsetRange: PartialRangeUpTo) -> SubSequence { + _sanityCheck(offsetRange.upperBound <= count) + return _UnmanagedString( + start: start, + count: offsetRange.upperBound + ) + } + + @_inlineable // FIXME(sil-serialize-all) + @_versioned // FIXME(sil-serialize-all) + internal subscript(offsetRange: PartialRangeThrough) -> SubSequence { + _sanityCheck(offsetRange.upperBound < count) + return _UnmanagedString( + start: start, + count: offsetRange.upperBound + 1 + ) + } @_inlineable // FIXME(sil-serialize-all) @_versioned // FIXME(sil-serialize-all) diff --git a/stdlib/public/stubs/UnicodeNormalization.cpp b/stdlib/public/stubs/UnicodeNormalization.cpp index d31c3e9916709..6dd98e2e987f9 100644 --- a/stdlib/public/stubs/UnicodeNormalization.cpp +++ b/stdlib/public/stubs/UnicodeNormalization.cpp @@ -22,17 +22,34 @@ // Declare a few external functions to avoid a dependency on ICU headers. extern "C" { + +// Types typedef struct UBreakIterator UBreakIterator; +typedef struct UBreakIterator UNormalizer2; typedef enum UBreakIteratorType {} UBreakIteratorType; typedef enum UErrorCode {} UErrorCode; typedef uint16_t UChar; +typedef int32_t UChar32; +typedef int8_t UBool; +typedef swift::__swift_stdlib_UProperty UProperty; +// Grapheme breaking APIs void ubrk_close(UBreakIterator *); UBreakIterator *ubrk_open(UBreakIteratorType, const char *, const UChar *, int32_t, UErrorCode *); int32_t ubrk_preceding(UBreakIterator *, int32_t); int32_t ubrk_following(UBreakIterator *, int32_t); void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *); + +// Comparison, normalization, and character property APIs +int32_t unorm2_spanQuickCheckYes(const UNormalizer2 *, const UChar *, int32_t, + UErrorCode *); +int32_t unorm2_normalize(const UNormalizer2 *, const UChar *, int32_t, UChar *, + int32_t, UErrorCode *); +const UNormalizer2 *unorm2_getNFCInstance(UErrorCode *); +UBool unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); +UBool u_hasBinaryProperty(UChar32, UProperty); +UBool u_isdefined(UChar32); } #else @@ -45,6 +62,7 @@ void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *); #include #include #include +#include #pragma clang diagnostic pop @@ -137,76 +155,6 @@ class ASCIICollation { ASCIICollation(const ASCIICollation &) = delete; }; -/// Compares the strings via the Unicode Collation Algorithm on the root locale. -/// Results are the usual string comparison results: -/// <0 the left string is less than the right string. -/// ==0 the strings are equal according to their collation. -/// >0 the left string is greater than the right string. -int32_t -swift::_swift_stdlib_unicode_compare_utf16_utf16(const uint16_t *LeftString, - int32_t LeftLength, - const uint16_t *RightString, - int32_t RightLength) { - // ICU UChar type is platform dependent. In Cygwin, it is defined - // as wchar_t which size is 2. It seems that the underlying binary - // representation is same with swift utf16 representation. - // On Clang 4.0 under a recent Linux, ICU uses the built-in char16_t type. - return ucol_strcoll(GetRootCollator(), - reinterpret_cast(LeftString), LeftLength, - reinterpret_cast(RightString), RightLength); -} - -/// Compares the strings via the Unicode Collation Algorithm on the root locale. -/// Results are the usual string comparison results: -/// <0 the left string is less than the right string. -/// ==0 the strings are equal according to their collation. -/// >0 the left string is greater than the right string. -int32_t -swift::_swift_stdlib_unicode_compare_utf8_utf16(const unsigned char *LeftString, - int32_t LeftLength, - const uint16_t *RightString, - int32_t RightLength) { - UCharIterator LeftIterator; - UCharIterator RightIterator; - UErrorCode ErrorCode = U_ZERO_ERROR; - - uiter_setUTF8(&LeftIterator, reinterpret_cast(LeftString), LeftLength); - uiter_setString(&RightIterator, reinterpret_cast(RightString), - RightLength); - - uint32_t Diff = ucol_strcollIter(GetRootCollator(), - &LeftIterator, &RightIterator, &ErrorCode); - if (U_FAILURE(ErrorCode)) { - swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf16 string comparison."); - } - return Diff; -} - -/// Compares the strings via the Unicode Collation Algorithm on the root locale. -/// Results are the usual string comparison results: -/// <0 the left string is less than the right string. -/// ==0 the strings are equal according to their collation. -/// >0 the left string is greater than the right string. -int32_t -swift::_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *LeftString, - int32_t LeftLength, - const unsigned char *RightString, - int32_t RightLength) { - UCharIterator LeftIterator; - UCharIterator RightIterator; - UErrorCode ErrorCode = U_ZERO_ERROR; - - uiter_setUTF8(&LeftIterator, reinterpret_cast(LeftString), LeftLength); - uiter_setUTF8(&RightIterator, reinterpret_cast(RightString), RightLength); - - uint32_t Diff = ucol_strcollIter(GetRootCollator(), - &LeftIterator, &RightIterator, &ErrorCode); - if (U_FAILURE(ErrorCode)) { - swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf8 string comparison."); - } - return Diff; -} - void *swift::_swift_stdlib_unicodeCollationIterator_create( const __swift_uint16_t *Str, __swift_uint32_t Length) { UErrorCode ErrorCode = U_ZERO_ERROR; @@ -225,7 +173,8 @@ __swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next( auto Result = ucol_next( static_cast(CollationIterator), &ErrorCode); if (U_FAILURE(ErrorCode)) { - swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed."); + swift::crash( + "_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed."); } *HitEnd = (Result == UCOL_NULLORDER); return Result; @@ -328,6 +277,44 @@ void swift::__swift_stdlib_ubrk_setText( textLength, ptr_cast(status)); } +swift::__swift_stdlib_UBool swift::__swift_stdlib_unorm2_hasBoundaryBefore( + const __swift_stdlib_UNormalizer2 *ptr, __swift_stdlib_UChar32 char32) { + return unorm2_hasBoundaryBefore(ptr_cast(ptr), char32); +} +const swift::__swift_stdlib_UNormalizer2 * +swift::__swift_stdlib_unorm2_getNFCInstance(__swift_stdlib_UErrorCode *err) { + return ptr_cast<__swift_stdlib_UNormalizer2>( + unorm2_getNFCInstance(ptr_cast(err))); +} + +int32_t swift::__swift_stdlib_unorm2_normalize( + const __swift_stdlib_UNormalizer2 *norm, const __swift_uint16_t *src, + __swift_int32_t len, __swift_uint16_t *dst, __swift_int32_t capacity, + __swift_stdlib_UErrorCode *err) { + return unorm2_normalize(ptr_cast(norm), src, len, dst, capacity, + ptr_cast(err)); +} + +__swift_int32_t swift::__swift_stdlib_unorm2_spanQuickCheckYes( + const __swift_stdlib_UNormalizer2 *norm, const __swift_stdlib_UChar *ptr, + __swift_int32_t len, __swift_stdlib_UErrorCode *err) { + return unorm2_spanQuickCheckYes(ptr_cast(norm), + ptr_cast(ptr), len, + ptr_cast(err)); +} + +swift::__swift_stdlib_UBool +swift::__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32 c, + __swift_stdlib_UProperty p) { + return u_hasBinaryProperty(c, static_cast(p)); +} + +swift::__swift_stdlib_UBool +swift::__swift_stdlib_u_isdefined(UChar32 c) { + return u_isdefined(c); +} + + // Force an autolink with ICU #if defined(__MACH__) asm(".linker_option \"-licucore\"\n"); diff --git a/test/SILOptimizer/string_switch.swift b/test/SILOptimizer/string_switch.swift index 933c78d3fca41..2c0beef0e2f7a 100644 --- a/test/SILOptimizer/string_switch.swift +++ b/test/SILOptimizer/string_switch.swift @@ -1,6 +1,7 @@ // RUN: %target-build-swift -O %s -module-name=test -Xllvm -sil-disable-pass=FunctionSignatureOpts -o %t.out // RUN: %target-build-swift -O %s -module-name=test -Xllvm -sil-disable-pass=FunctionSignatureOpts -emit-sil | %FileCheck %s // RUN: %target-run %t.out +// REQUIRES: swift_stdlib_no_asserts,optimized_stdlib // UNSUPPORTED: nonatomic_rc import StdlibUnittest diff --git a/test/stdlib/CodableTests.swift b/test/stdlib/CodableTests.swift index 626afd8f09eb2..84981454e0f24 100644 --- a/test/stdlib/CodableTests.swift +++ b/test/stdlib/CodableTests.swift @@ -694,7 +694,7 @@ class TestCodable : TestCodableSuper { } func test_URLComponents_Plist() { - for (testLine, components) in urlComponentsValues { + for (testLine, components) in urlComponentsValues { expectRoundTripEqualityThroughPlist(for: components, lineNumber: testLine) } } diff --git a/test/stdlib/RuntimeObjC.swift b/test/stdlib/RuntimeObjC.swift index 64060aad5719d..795fefb374da4 100644 --- a/test/stdlib/RuntimeObjC.swift +++ b/test/stdlib/RuntimeObjC.swift @@ -470,34 +470,6 @@ var nsStringCanaryCount = 0 } } -RuntimeFoundationWrappers.test( - "_stdlib_compareNSStringDeterministicUnicodeCollation/NoLeak" -) { - nsStringCanaryCount = 0 - autoreleasepool { - let a = NSStringCanary() - let b = NSStringCanary() - expectEqual(2, nsStringCanaryCount) - _stdlib_compareNSStringDeterministicUnicodeCollation(a, b) - } - expectEqual(0, nsStringCanaryCount) -} - -RuntimeFoundationWrappers.test( - "_stdlib_compareNSStringDeterministicUnicodeCollationPtr/NoLeak" -) { - nsStringCanaryCount = 0 - autoreleasepool { - let a = NSStringCanary() - let b = NSStringCanary() - expectEqual(2, nsStringCanaryCount) - let ptrA = unsafeBitCast(a, to: OpaquePointer.self) - let ptrB = unsafeBitCast(b, to: OpaquePointer.self) - _stdlib_compareNSStringDeterministicUnicodeCollationPointer(ptrA, ptrB) - } - expectEqual(0, nsStringCanaryCount) -} - RuntimeFoundationWrappers.test("_stdlib_NSStringHashValue/NoLeak") { nsStringCanaryCount = 0 autoreleasepool { diff --git a/test/stdlib/StringAPI.swift b/test/stdlib/StringAPI.swift index a51a6866d42d3..6c7662e2b7ab9 100644 --- a/test/stdlib/StringAPI.swift +++ b/test/stdlib/StringAPI.swift @@ -92,7 +92,7 @@ let tests = [ ComparisonTest(.eq, "\u{212b}", "A\u{30a}"), ComparisonTest(.eq, "\u{212b}", "\u{c5}"), ComparisonTest(.eq, "A\u{30a}", "\u{c5}"), - ComparisonTest(.lt, "A\u{30a}", "a"), + ComparisonTest(.gt, "A\u{30a}", "a"), ComparisonTest(.lt, "A", "A\u{30a}"), // U+2126 OHM SIGN @@ -176,25 +176,7 @@ func checkStringComparison( // Mark the test cases that are expected to fail in checkStringComparison -let comparisonTests = tests.map { - (test: ComparisonTest) -> ComparisonTest in - switch (test.expectedUnicodeCollation, test.lhs, test.rhs) { - case (.gt, "t", "Tt"), (.lt, "A\u{30a}", "a"): - return test.replacingPredicate(.nativeRuntime( - "Comparison reversed between ICU and CFString, https://bugs.swift.org/browse/SR-530")) - - case (.gt, "\u{0}", ""), (.lt, "\u{0}", "\u{0}\u{0}"): - return test.replacingPredicate(.nativeRuntime( - "Null-related issue: https://bugs.swift.org/browse/SR-630")) - - case (.lt, "\u{0301}", "\u{0954}"), (.lt, "\u{0341}", "\u{0954}"): - return test.replacingPredicate(.nativeRuntime( - "Compares as equal with ICU")) - - default: - return test - } -} +let comparisonTests = tests for test in comparisonTests { StringTests.test("String.{Equatable,Hashable,Comparable}: line \(test.loc.line)") diff --git a/test/stdlib/StringOrderRelation.swift b/test/stdlib/StringOrderRelation.swift index 7df4802fb7576..b804c628d3c5e 100644 --- a/test/stdlib/StringOrderRelation.swift +++ b/test/stdlib/StringOrderRelation.swift @@ -6,10 +6,7 @@ import StdlibUnittest var StringOrderRelationTestSuite = TestSuite("StringOrderRelation") -StringOrderRelationTestSuite.test("StringOrderRelation/ASCII/NullByte") - .xfail(.nativeRuntime("String comparison: ICU vs. Foundation " + - "https://bugs.swift.org/browse/SR-630")) - .code { +StringOrderRelationTestSuite.test("StringOrderRelation/ASCII/NullByte") { let baseString = "a" let nullbyteString = "a\0" expectTrue(baseString < nullbyteString) diff --git a/validation-test/stdlib/Algorithm.swift b/validation-test/stdlib/Algorithm.swift index 90076bd5e5d0c..8df47d2184d94 100644 --- a/validation-test/stdlib/Algorithm.swift +++ b/validation-test/stdlib/Algorithm.swift @@ -74,10 +74,7 @@ Algorithm.test("min,max") { expectEqual(c1.identity, max(a1, b1, c2, c1).identity) } -Algorithm.test("sorted/strings") - .xfail(.nativeRuntime("String comparison: ICU vs. Foundation " + - "https://bugs.swift.org/browse/SR-530")) - .code { +Algorithm.test("sorted/strings") { expectEqual( ["Banana", "apple", "cherry"], ["apple", "Banana", "cherry"].sorted()) diff --git a/validation-test/stdlib/String.swift b/validation-test/stdlib/String.swift index 082b30e557625..f4698d3d9663f 100644 --- a/validation-test/stdlib/String.swift +++ b/validation-test/stdlib/String.swift @@ -1,12 +1,21 @@ -// RUN: %target-run-simple-swift +// RUN: %empty-directory(%t) +// RUN: if [ %target-runtime == "objc" ]; \ +// RUN: then \ +// RUN: %target-clang -fobjc-arc %S/Inputs/NSSlowString/NSSlowString.m -c -o %t/NSSlowString.o && \ +// RUN: %target-build-swift -I %S/Inputs/NSSlowString/ %t/NSSlowString.o %s -Xfrontend -disable-access-control -o %t/String; \ +// RUN: else \ +// RUN: %target-build-swift %s -Xfrontend -disable-access-control -o %t/String; \ +// RUN: fi + +// RUN: %target-run %t/String // REQUIRES: executable_test - // XFAIL: interpret import StdlibUnittest import StdlibCollectionUnittest #if _runtime(_ObjC) +import NSSlowString import Foundation // For NSRange #endif @@ -1138,7 +1147,7 @@ StringTests.test("growth") { s2 = s } expectEqual(s2, s) - expectLE(s.nativeCapacity, 34) + expectLE(s.nativeCapacity, 40) } StringTests.test("Construction") { @@ -1165,24 +1174,6 @@ StringTests.test("Conversions") { } } -// Check the internal functions are correct for ASCII values -StringTests.test( - "forall x: Int8, y: Int8 . x < 128 ==> x Void + var comparison: _Ordering + + init(_ strings: [String], _ comparison: _Ordering) { + self.strings = strings + self.comparison = comparison + } + + func test() { + for pair in zip(strings, strings[1...]) { + switch comparison { + case .less: + expectLT(pair.0, pair.1) + case .greater: + expectGT(pair.0, pair.1) + case .equal: + expectEqual(pair.0, pair.1) + } + } + } + + func testOpaqueStrings() { +#if _runtime(_ObjC) + let opaqueStrings = strings.map { NSSlowString(string: $0) as String } + for pair in zip(opaqueStrings, opaqueStrings[1...]) { + switch comparison { + case .less: + expectLT(pair.0, pair.1) + case .greater: + expectGT(pair.0, pair.1) + case .equal: + expectEqual(pair.0, pair.1) + } + } +#endif + } + + func testOpaqueSubstrings() { +#if _runtime(_ObjC) + for pair in zip(strings, strings[1...]) { + let string1 = pair.0.dropLast() + let string2 = pair.1 + let opaqueString = (NSSlowString(string: pair.0) as String).dropLast() + + guard string1.count > 0 else { return } + + let expectedResult: _Ordering = string1 < string2 ? .less : (string1 > string2 ? .greater : .equal) + let opaqueResult: _Ordering = opaqueString < string2 ? .less : (opaqueString > string2 ? .greater : .equal) + + expectEqual(opaqueResult, expectedResult) + } +#endif + } +} + +let simpleComparisonTestCases = [ + ComparisonTestCase(["a", "a"], .equal), + ComparisonTestCase(["abcdefg", "abcdefg"], .equal), + ComparisonTestCase(["", "Z", "a", "b", "c", "\u{00c5}", "á"], .less), + + ComparisonTestCase(["ábcdefg", "ábcdefgh", "ábcdefghi"], .less), + ComparisonTestCase(["abcdefg", "abcdefgh", "abcdefghi"], .less), +] + +let complexComparisonTestCases = [ + ComparisonTestCase(["á", "\u{0061}\u{0301}"], .equal), + ComparisonTestCase(["à", "\u{0061}\u{0301}", "â", "\u{e3}", "a\u{0308}"], .less), + + // Exploding scalars AND exploding segments + ComparisonTestCase(["\u{fa2}", "\u{fa1}\u{fb7}"], .equal), + ComparisonTestCase([ + "\u{fa2}\u{fa2}\u{fa2}\u{fa2}", + "\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}" + ], .equal), + ComparisonTestCase([ + "\u{fa2}\u{fa2}\u{fa2}\u{fa2}\u{fa2}\u{fa2}\u{fa2}\u{fa2}", + "\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}" + ], .equal), + ComparisonTestCase([ + "a\u{fa2}\u{fa2}a\u{fa2}\u{fa2}\u{fa2}\u{fa2}\u{fa2}\u{fa2}", + "a\u{fa1}\u{fb7}\u{fa1}\u{fb7}a\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}\u{fa1}\u{fb7}" + ], .equal), + + ComparisonTestCase(["😀", "😀"], .equal), + ComparisonTestCase(["\u{2f9df}", "\u{8f38}"], .equal), + ComparisonTestCase([ + "a", + "\u{2f9df}", // D87E DDDF as written, but normalizes to 8f38 + "\u{2f9df}\u{2f9df}", // D87E DDDF as written, but normalizes to 8f38 + "👨🏻", // D83D DC68 D83C DFFB + "👨🏻‍⚕️", // D83D DC68 D83C DFFB 200D 2695 FE0F + "👩‍⚕️", // D83D DC69 200D 2695 FE0F + "👩🏾", // D83D DC69 D83C DFFE + "👩🏾‍⚕", // D83D DC69 D83C DFFE 200D 2695 FE0F + "😀", // D83D DE00 + "😅", // D83D DE05 + "🧀" // D83E DDC0 -- aka a really big scalar + ], .less), + + + ComparisonTestCase(["f̛̗̘̙̜̹̺̻̼͇͈͉͍͎̽̾̿̀́͂̓̈́͆͊͋͌̚ͅ͏͓͔͕͖͙͚͐͑͒͗͛ͣͤͥͦ͘͜͟͢͝͞͠͡", "ơ̗̘̙̜̹̺̻̼͇͈͉͍͎̽̾̿̀́͂̓̈́͆͊͋͌̚ͅ͏͓͔͕͖͙͚͐͑͒͗͛ͥͦͧͨͩͪͫͬͭͮ͘"], .less), + ComparisonTestCase(["\u{f90b}", "\u{5587}"], .equal), + + ComparisonTestCase(["a\u{1D160}a", "a\u{1D158}\u{1D1C7}"], .less), + + ComparisonTestCase(["\u{212b}", "\u{00c5}"], .equal), + ComparisonTestCase([ + "A", + "a", + "aa", + "ae", + "ae🧀", + "az", + "aze\u{300}", + "ae\u{301}", + "ae\u{301}ae\u{301}", + "ae\u{301}ae\u{301}ae\u{301}", + "ae\u{301}ae\u{301}ae\u{301}ae\u{301}", + "ae\u{301}ae\u{301}ae\u{301}ae\u{301}ae\u{301}", + "ae\u{301}ae\u{301}ae\u{301}ae\u{301}ae\u{301}ae\u{301}", + "ae\u{301}ae\u{301}ae\u{301}ae\u{301}ae\u{301}ae\u{301}ae\u{301}", + "ae\u{301}ae\u{301}ae\u{301}ae\u{301}ae\u{301}ae\u{301}ae\u{301}ae\u{301}", + "ae\u{302}", + "ae\u{302}{303}", + "ae\u{302}🧀", + "ae\u{303}", + "\u{f90b}\u{f90c}\u{f90d}", // Normalizes to BMP scalars + "🧀", // D83E DDC0 -- aka a really big scalar + "\u{FFEE}" // half width CJK dot + ], .less), + + ComparisonTestCase(["ư̴̵̶̷̸̗̘̙̜̹̺̻̼͇͈͉͍͎̽̾̿̀́͂̓̈́͆͊͋͌̚ͅ͏͓͔͕͖͙͚͐͑͒͗͛ͣͤͥͦͧͨͩͪͫͬͭͮ͘͜͟͢͝͞͠͡", "ì̡̢̧̨̝̞̟̠̣̤̥̦̩̪̫̬̭̮̯̰̹̺̻̼͇͈͉͍͎́̂̃̄̉̊̋̌̍̎̏̐̑̒̓̽̾̿̀́͂̓̈́͆͊͋͌ͅ͏͓͔͕͖͙͐͑͒͗ͬͭͮ͘"], .greater), + ComparisonTestCase(["ư̴̵̶̷̸̗̘̙̜̹̺̻̼͇͈͉͍͎̽̾̿̀́͂̓̈́͆͊͋͌̚ͅ͏͓͔͕͖͙͚͐͑͒͗͛ͣͤͥͦͧͨͩͪͫͬͭͮ͘͜͟͢͝͞͠͡", "aì̡̢̧̨̝̞̟̠̣̤̥̦̩̪̫̬̭̮̯̰̹̺̻̼͇͈͉͍͎́̂̃̄̉̊̋̌̍̎̏̐̑̒̓̽̾̿̀́͂̓̈́͆͊͋͌ͅ͏͓͔͕͖͙͐͑͒͗ͬͭͮ͘"], .greater), + ComparisonTestCase(["ì̡̢̧̨̝̞̟̠̣̤̥̦̩̪̫̬̭̮̯̰̹̺̻̼͇͈͉͍͎́̂̃̄̉̊̋̌̍̎̏̐̑̒̓̽̾̿̀́͂̓̈́͆͊͋͌ͅ͏͓͔͕͖͙͐͑͒͗ͬͭͮ͘", "ì̡̢̧̨̝̞̟̠̣̤̥̦̩̪̫̬̭̮̯̰̹̺̻̼͇͈͉͍͎́̂̃̄̉̊̋̌̍̎̏̐̑̒̓̽̾̿̀́͂̓̈́͆͊͋͌ͅ͏͓͔͕͖͙͐͑͒͗ͬͭͮ͘"], .equal) +] + +let comparisonTestCases = simpleComparisonTestCases + complexComparisonTestCases + +for test in comparisonTestCases { + StringTests.test("Comparison.\(test.strings)") { + test.test() + } + + StringTests.test("Comparison.OpaqueString.\(test.strings)") + .skip(.linuxAny(reason: "NSSlowString requires ObjC interop")) + .code { + test.testOpaqueStrings() + } +} + +for test in simpleComparisonTestCases { + StringTests.test("Comparison.OpaqueSubstring.\(test.strings)") + .skip(.linuxAny(reason: "NSSlowString requires ObjC interop")) + .code { + test.testOpaqueSubstrings() + } +} + runAllTests() diff --git a/validation-test/stdlib/StringHashableComparable.swift.gyb b/validation-test/stdlib/StringHashableComparable.swift.gyb deleted file mode 100644 index 63d9715d74824..0000000000000 --- a/validation-test/stdlib/StringHashableComparable.swift.gyb +++ /dev/null @@ -1,97 +0,0 @@ -// RUN: %target-run-simple-swiftgyb -// REQUIRES: executable_test - -// This test requires that the standard library calls ICU -// directly. It is not specific to Linux, it is just that on -// Apple platforms we are using the NSString bridge right now. - -// REQUIRES: OS=linux-gnu - -import StdlibUnittest -import StdlibUnicodeUnittest - -func assertASCIIRepresentationIfPossible(_ s: String) { - for us in s.unicodeScalars { - if !us.isASCII { - return - } - } - precondition(s._guts.isASCII) -} - -func forceUTF16Representation(_ s: String) -> String { - var s = s - s += "\u{fffd}" - s.removeSubrange(s.index(before: s.endIndex).. ExpectedComparisonResult { - return tests[i].order! <=> tests[j].order! - } - - checkHashable( - tests.map { $0.string }, - equalityOracle: { comparisonOracle($0, $1).isEQ() }, - stackTrace: stackTrace.pushIf(true, file: file, line: line)) - - checkComparable( - tests.map { $0.string }, - oracle: comparisonOracle, - stackTrace: stackTrace.pushIf(true, file: file, line: line)) -} - -var StringTests = TestSuite("StringTests") - -StringTests.test("StringComparisonTest.allTests: tests are in ASCII representation") - .forEach(in: StringComparisonTest.allTests) { - test in - assertASCIIRepresentationIfPossible(test.string) -} - -StringTests.test("Comparable") { - let allTestsInUTF16Representation = StringComparisonTest.allTests.map { - test -> StringComparisonTest in - return StringComparisonTest( - forceUTF16Representation(test.string), - test.collationElements, - sourceLocation: SourceLoc( - test.loc.file, - test.loc.line, - comment: (test.loc.comment ?? "") + "\nin Unicode representation")) - } - checkStringHashableComparable(StringComparisonTest.allTests + allTestsInUTF16Representation) -} - -runAllTests() -