diff --git a/stdlib/public/core/StringObject.swift b/stdlib/public/core/StringObject.swift index 41398bc292011..30f3989df4699 100644 --- a/stdlib/public/core/StringObject.swift +++ b/stdlib/public/core/StringObject.swift @@ -856,6 +856,14 @@ extension _StringObject { return Builtin.reinterpretCast(largeAddressBits) #endif } + + @_alwaysEmitIntoClient + @inlinable + @inline(__always) + internal var owner: AnyObject? { + guard self.isMortal else { return nil } + return Builtin.reinterpretCast(largeAddressBits) + } } // Aggregate queries / abstractions diff --git a/stdlib/public/core/StringTesting.swift b/stdlib/public/core/StringTesting.swift index dee9f7bf8cf18..999fac1a64a4c 100644 --- a/stdlib/public/core/StringTesting.swift +++ b/stdlib/public/core/StringTesting.swift @@ -39,6 +39,20 @@ struct _StringRepresentation { extension String { public // @testable func _classify() -> _StringRepresentation { return _guts._classify() } + + @_alwaysEmitIntoClient + public // @testable + func _deconstructUTF8( + scratch: UnsafeMutableRawBufferPointer? + ) -> ( + owner: AnyObject?, + ToPointer, + length: Int, + usesScratch: Bool, + allocatedMemory: Bool + ) { + _guts._deconstructUTF8(scratch: scratch) + } } extension _StringGuts { @@ -72,5 +86,92 @@ extension _StringGuts { } fatalError() } -} + +/* + + Deconstruct the string into contiguous UTF-8, allocating memory if necessary + +┌────────────────────╥───────────────────────┬─────────────────────┬─────────────┬─────────────────┐ +│ Form ║ owner │ pointer+length │ usesScratch │ allocatedMemory │ +├────────────────────╫───────────────────────┼─────────────────────┼─────────────┼─────────────────┤ +│ small with scratch ║ nil │ `scratch` │ true │ false │ +├────────────────────╫───────────────────────┼─────────────────────┼─────────────┼─────────────────┤ +│ small w/o scratch ║ extra allocation │ `owner` pointer │ false │ true │ +╞════════════════════╬═══════════════════════╪═════════════════════╪═════════════╪═════════════════╡ +│ immortal, large ║ nil │ literal pointer │ false │ false │ +├────────────────────╫───────────────────────┼─────────────────────┼─────────────┼─────────────────┤ +│ native ║ __StringStorage │ tail alloc pointer │ false │ false │ +╞════════════════════╬═══════════════════════╪═════════════════════╪═════════════╪═════════════════╡ +│ shared ║ __SharedStringStorage │ shared pointer │ false │ false │ +├────────────────────╫───────────────────────┼─────────────────────┼─────────────┼─────────────────┤ +│ shared, bridged ║ _CocoaString │ cocoa ASCII pointer │ false │ false │ +╞════════════════════╬═══════════════════════╪═════════════════════╪═════════════╪═════════════════╡ +│ foreign ║ extra allocation │ `owner` pointer │ false │ true │ +└────────────────────╨───────────────────────┴─────────────────────┴─────────────┴─────────────────┘ + +*/ + @_alwaysEmitIntoClient + internal // TODO: figure out if this works as a compiler intrinsic + func _deconstructUTF8( + scratch: UnsafeMutableRawBufferPointer? + ) -> ( + owner: AnyObject?, + ToPointer, + length: Int, + usesScratch: Bool, + allocatedMemory: Bool + ) { + + // If we're small, try to copy into the scratch space provided + if self.isSmall { + let smol = self.asSmall + if let scratch = scratch, scratch.count > smol.count { + let scratchStart = + scratch.baseAddress! + smol.withUTF8 { smolUTF8 -> () in + scratchStart.initializeMemory( + as: UInt8.self, from: smolUTF8.baseAddress!, count: smolUTF8.count) + } + scratch[smol.count] = 0 + return ( + owner: nil, + _convertPointerToPointerArgument(scratchStart), + length: smol.count, + usesScratch: true, allocatedMemory: false) + } + } else if _fastPath(self.isFastUTF8) { + let ptr: ToPointer = + _convertPointerToPointerArgument(self._object.fastUTF8.baseAddress!) + return ( + owner: self._object.owner, + ptr, + length: self._object.count, + usesScratch: false, allocatedMemory: false) + } + + let (object, ptr, len) = self._allocateForDeconstruct() + return ( + owner: object, + _convertPointerToPointerArgument(ptr), + length: len, + usesScratch: false, + allocatedMemory: true) + } + + @_alwaysEmitIntoClient + @inline(never) // slow path + internal + func _allocateForDeconstruct() -> ( + owner: AnyObject, + UnsafeRawPointer, + length: Int + ) { + let utf8 = Array(String(self).utf8) + [0] + let (owner, ptr): (AnyObject?, UnsafeRawPointer) = + _convertConstArrayToPointerArgument(utf8) + + // Array's owner cannot be nil, even though it is declared optional... + return (owner: owner!, ptr, length: utf8.count - 1) + } +} diff --git a/test/stdlib/StringDeconstruction.swift b/test/stdlib/StringDeconstruction.swift new file mode 100644 index 0000000000000..2e42bb8ee764d --- /dev/null +++ b/test/stdlib/StringDeconstruction.swift @@ -0,0 +1,131 @@ +// RUN: %target-run-simple-swift +// REQUIRES: executable_test + +import StdlibUnittest +defer { runAllTests() } + +var StringDeconstructTests = TestSuite("StringDeconstructTests") + +enum ExpectedDeconstruction { + case scratchIfAvailable + case interiorPointer + case extraAllocation +} + +func expectDeconstruct( + _ str: String, + _ expectDeconstruct: ExpectedDeconstruction, + stackTrace: SourceLocStack = SourceLocStack(), + showFrame: Bool = true, + file: String = #file, line: UInt = #line +) { + var stackTrace = stackTrace.pushIf(showFrame, file: file, line: line) + let expectBytes = Array(str.utf8) + + _ = Array(unsafeUninitializedCapacity: 16) { + buffer, initializedCount in + // Deconstruct with a provided scratch space + + // WS == with scratch, N == nil + let scratch = UnsafeMutableRawBufferPointer(buffer) + let (ownerWS, ptrWS, lengthWS, usedScratchWS, allocatedMemoryWS) + : (AnyObject?, UnsafePointer, Int, Bool, Bool) + = str._deconstructUTF8(scratch: scratch) + let (ownerN, ptrN, lengthN, usedScratchN, allocatedMemoryN) + : (AnyObject?, UnsafePointer, Int, Bool, Bool) + = str._deconstructUTF8(scratch: nil) + + let rawBytesWS = UnsafeRawBufferPointer(start: ptrWS, count: lengthWS) + let rawBytesN = UnsafeRawBufferPointer(start: ptrN, count: lengthN) + + expectEqualSequence(expectBytes, rawBytesWS, stackTrace: stackTrace) + expectEqualSequence(rawBytesWS, rawBytesN, stackTrace: stackTrace) + + switch expectDeconstruct { + case .scratchIfAvailable: + expectNil(ownerWS, stackTrace: stackTrace) + expectNotNil(ownerN, stackTrace: stackTrace) + + expectEqual(scratch.baseAddress, rawBytesWS.baseAddress, + stackTrace: stackTrace) + expectNotEqual(scratch.baseAddress, rawBytesN.baseAddress, + stackTrace: stackTrace) + + expectTrue(lengthWS < scratch.count, stackTrace: stackTrace) + expectTrue(lengthN < scratch.count, stackTrace: stackTrace) + + expectTrue(usedScratchWS, stackTrace: stackTrace) + expectFalse(usedScratchN, stackTrace: stackTrace) + + expectFalse(allocatedMemoryWS, stackTrace: stackTrace) + expectTrue(allocatedMemoryN, stackTrace: stackTrace) + + case .interiorPointer: + // TODO: owner == (immortal ? nil : StringObject.largeAddress) + expectTrue(str.isContiguousUTF8, stackTrace: stackTrace) + var copy = str + copy.withUTF8 { + expectEqual($0.baseAddress, ptrWS, stackTrace: stackTrace) + expectEqual($0.baseAddress, ptrN, stackTrace: stackTrace) + expectEqual($0.count, lengthWS, stackTrace: stackTrace) + expectEqual($0.count, lengthN, stackTrace: stackTrace) + } + + expectFalse(usedScratchWS, stackTrace: stackTrace) + expectFalse(usedScratchN, stackTrace: stackTrace) + expectFalse(allocatedMemoryWS, stackTrace: stackTrace) + expectFalse(allocatedMemoryN, stackTrace: stackTrace) + case .extraAllocation: + expectFalse(str.isContiguousUTF8, stackTrace: stackTrace) + expectNotNil(ownerWS, stackTrace: stackTrace) + expectNotNil(ownerN, stackTrace: stackTrace) + expectFalse(usedScratchWS, stackTrace: stackTrace) + expectFalse(usedScratchN, stackTrace: stackTrace) + expectTrue(allocatedMemoryWS, stackTrace: stackTrace) + expectTrue(allocatedMemoryN, stackTrace: stackTrace) + } + } +} + +@inline(never) +func id(_ a: T) -> T { a } + +StringDeconstructTests.test("deconstruct") { + let smallASCII = "abcd" + +#if arch(i386) || arch(arm) || arch(wasm32) + let smallUTF8 = "ジッパ" +#else + let smallUTF8 = "ジッパー" +#endif + + let large = "the quick fox jumped over the lazy brown dog" + + var largeMortal = large + largeMortal.append(id("🧟‍♀️")) + largeMortal.append(id(largeMortal.last!)) + + expectDeconstruct(smallASCII, .scratchIfAvailable) + expectDeconstruct(smallUTF8, .scratchIfAvailable) + expectDeconstruct(large, .interiorPointer) + expectDeconstruct(largeMortal, .interiorPointer) +} + +#if _runtime(_ObjC) +import Foundation +StringDeconstructTests.test("deconstruct cocoa") { + let smallCocoa: NSString = "aaa" + let largeASCIICocoa: NSString = "the quick fox jumped over the lazy brown dog" + let largeCocoa: NSString = "the quick 🧟‍♀️ ate the slow 🧠" + +#if arch(i386) || arch(arm) || arch(wasm32) + expectDeconstruct(smallCocoa as String, .interiorPointer) +#else + expectDeconstruct(smallCocoa as String, .scratchIfAvailable) +#endif + + expectDeconstruct(largeASCIICocoa as String, .interiorPointer) + expectDeconstruct(largeCocoa as String, .extraAllocation) +} +#endif +