From 43b8a999a2c46a5d602d53240382771d044a97a4 Mon Sep 17 00:00:00 2001 From: Simon Evans Date: Sun, 27 May 2018 17:41:20 +0100 Subject: [PATCH] SR-7620: Use charset encoding if available for NSString(contentsOf:usedEncoding:) --- .../project.pbxproj | 4 +++ Foundation.xcodeproj/project.pbxproj | 4 +++ Foundation/NSData.swift | 20 +++++++---- Foundation/NSString.swift | 14 +++++--- Foundation/StringEncodings.swift | 34 +++++++++++++++++++ TestFoundation/HTTPServer.swift | 11 ++++++ .../Resources/NSString-ISO-8859-1-data.txt | 3 ++ TestFoundation/TestNSString.swift | 12 ++++++- build.py | 3 +- 9 files changed, 91 insertions(+), 14 deletions(-) create mode 100644 TestFoundation/Resources/NSString-ISO-8859-1-data.txt diff --git a/DarwinCompatibilityTests.xcodeproj/project.pbxproj b/DarwinCompatibilityTests.xcodeproj/project.pbxproj index 689ba61fea..eaa4138ab1 100644 --- a/DarwinCompatibilityTests.xcodeproj/project.pbxproj +++ b/DarwinCompatibilityTests.xcodeproj/project.pbxproj @@ -7,6 +7,7 @@ objects = { /* Begin PBXBuildFile section */ + B907F36F20BB188800013CBE /* NSString-ISO-8859-1-data.txt in Resources */ = {isa = PBXBuildFile; fileRef = B907F36E20BB188800013CBE /* NSString-ISO-8859-1-data.txt */; }; B917D32420B0DB9700728EE0 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B917D32320B0DB9700728EE0 /* Foundation.framework */; }; B917D32620B0DE2000728EE0 /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = B917D32520B0DE2000728EE0 /* main.swift */; }; B95788861F6FB9470003EB01 /* TestNSNumberBridging.swift in Sources */ = {isa = PBXBuildFile; fileRef = B95788851F6FB9470003EB01 /* TestNSNumberBridging.swift */; }; @@ -135,6 +136,7 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + B907F36E20BB188800013CBE /* NSString-ISO-8859-1-data.txt */ = {isa = PBXFileReference; lastKnownFileType = text; name = "NSString-ISO-8859-1-data.txt"; path = "TestFoundation/Resources/NSString-ISO-8859-1-data.txt"; sourceTree = ""; }; B917D31C20B0DB8B00728EE0 /* xdgTestHelper */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = xdgTestHelper; sourceTree = BUILT_PRODUCTS_DIR; }; B917D32320B0DB9700728EE0 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; B917D32520B0DE2000728EE0 /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = main.swift; path = TestFoundation/xdgTestHelper/main.swift; sourceTree = ""; }; @@ -301,6 +303,7 @@ B9C89FAA1F6DCAE700087AF4 /* NSString-UTF16-LE-data.txt */, B9C89FB01F6DCAE900087AF4 /* NSString-UTF32-BE-data.txt */, B9C89FA51F6DCAE500087AF4 /* NSString-UTF32-LE-data.txt */, + B907F36E20BB188800013CBE /* NSString-ISO-8859-1-data.txt */, B9C89FAE1F6DCAE800087AF4 /* NSStringTestData.txt */, B9C89FB21F6DCAE900087AF4 /* NSURLTestData.plist */, B9C89FB61F6DCAEA00087AF4 /* NSXMLDocumentTestData.xml */, @@ -523,6 +526,7 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( + B907F36F20BB188800013CBE /* NSString-ISO-8859-1-data.txt in Resources */, B9C89FBA1F6DCAEB00087AF4 /* NSString-UTF32-LE-data.txt in Resources */, B9C89FBB1F6DCAEB00087AF4 /* NSKeyedUnarchiver-EdgeInsetsTest.plist in Resources */, B9C89FBC1F6DCAEB00087AF4 /* NSKeyedUnarchiver-ConcreteValueTest.plist in Resources */, diff --git a/Foundation.xcodeproj/project.pbxproj b/Foundation.xcodeproj/project.pbxproj index 0cb903d404..fd4ba30f66 100644 --- a/Foundation.xcodeproj/project.pbxproj +++ b/Foundation.xcodeproj/project.pbxproj @@ -329,6 +329,7 @@ 9F0DD3571ECD783500F68030 /* SwiftFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 5B5D885D1BBC938800234F36 /* SwiftFoundation.framework */; }; A058C2021E529CF100B07AA1 /* TestMassFormatter.swift in Sources */ = {isa = PBXBuildFile; fileRef = A058C2011E529CF100B07AA1 /* TestMassFormatter.swift */; }; AE35A1861CBAC85E0042DB84 /* SwiftFoundation.h in Headers */ = {isa = PBXBuildFile; fileRef = AE35A1851CBAC85E0042DB84 /* SwiftFoundation.h */; settings = {ATTRIBUTES = (Public, ); }; }; + B907F36B20BB07A700013CBE /* NSString-ISO-8859-1-data.txt in Resources */ = {isa = PBXBuildFile; fileRef = B907F36A20BB07A700013CBE /* NSString-ISO-8859-1-data.txt */; }; B90C57BB1EEEEA5A005208AE /* TestFileManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 525AECEB1BF2C96400D15BB0 /* TestFileManager.swift */; }; B90C57BC1EEEEA5A005208AE /* TestThread.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5E5835F31C20C9B500C81317 /* TestThread.swift */; }; B910957A1EEF237800A71930 /* NSString-UTF16-LE-data.txt in Resources */ = {isa = PBXBuildFile; fileRef = B91095781EEF237800A71930 /* NSString-UTF16-LE-data.txt */; }; @@ -812,6 +813,7 @@ A5A34B551C18C85D00FD972B /* TestByteCountFormatter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestByteCountFormatter.swift; sourceTree = ""; }; AE35A1851CBAC85E0042DB84 /* SwiftFoundation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SwiftFoundation.h; sourceTree = ""; }; B167A6641ED7303F0040B09A /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; + B907F36A20BB07A700013CBE /* NSString-ISO-8859-1-data.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "NSString-ISO-8859-1-data.txt"; sourceTree = ""; }; B91095781EEF237800A71930 /* NSString-UTF16-LE-data.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "NSString-UTF16-LE-data.txt"; sourceTree = ""; }; B91095791EEF237800A71930 /* NSString-UTF16-BE-data.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "NSString-UTF16-BE-data.txt"; sourceTree = ""; }; B933A79C1F3055F600FE6846 /* NSString-UTF32-BE-data.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = "NSString-UTF32-BE-data.txt"; sourceTree = ""; }; @@ -1457,6 +1459,7 @@ B91095791EEF237800A71930 /* NSString-UTF16-BE-data.txt */, B933A79C1F3055F600FE6846 /* NSString-UTF32-BE-data.txt */, B933A79D1F3055F600FE6846 /* NSString-UTF32-LE-data.txt */, + B907F36A20BB07A700013CBE /* NSString-ISO-8859-1-data.txt */, 528776181BF27D9500CB0090 /* Test.plist */, EA66F63B1BF1619600136161 /* NSURLTestData.plist */, E1A3726E1C31EBFB0023AF4D /* NSXMLDocumentTestData.xml */, @@ -2150,6 +2153,7 @@ D3E8D6D51C36AC0C00295652 /* NSKeyedUnarchiver-RectTest.plist in Resources */, D3A597F81C3415CC00295652 /* NSKeyedUnarchiver-URLTest.plist in Resources */, D3E8D6D31C36982700295652 /* NSKeyedUnarchiver-EdgeInsetsTest.plist in Resources */, + B907F36B20BB07A700013CBE /* NSString-ISO-8859-1-data.txt in Resources */, D370696E1C394FBF00295652 /* NSKeyedUnarchiver-RangeTest.plist in Resources */, D3A597F71C3415CC00295652 /* NSKeyedUnarchiver-ArrayTest.plist in Resources */, CE19A88C1C23AA2300B4CB6A /* NSStringTestData.txt in Resources */, diff --git a/Foundation/NSData.swift b/Foundation/NSData.swift index dba803e49d..d4eea5ac7d 100644 --- a/Foundation/NSData.swift +++ b/Foundation/NSData.swift @@ -185,24 +185,28 @@ open class NSData : NSObject, NSCopying, NSMutableCopying, NSSecureCoding { /// Initializes a data object with the data from the location specified by a given URL. public init(contentsOf url: URL, options readOptionsMask: ReadingOptions = []) throws { super.init() - try _contentsOf(url: url, options: readOptionsMask) + let (data, _) = try NSData.contentsOf(url: url, options: readOptionsMask) + _init(bytes: UnsafeMutableRawPointer(mutating: data.bytes), length: data.length, copy: true) } /// Initializes a data object with the data from the location specified by a given URL. public init?(contentsOf url: URL) { super.init() do { - try _contentsOf(url: url) + let (data, _) = try NSData.contentsOf(url: url) + _init(bytes: UnsafeMutableRawPointer(mutating: data.bytes), length: data.length, copy: true) } catch { return nil } } - /// Initializes a data object with the data from the location specified by a given URL. - private func _contentsOf(url: URL, options readOptionsMask: ReadingOptions = []) throws { + internal static func contentsOf(url: URL, options readOptionsMask: ReadingOptions = []) throws -> (NSData, URLResponse?) { + let readResult: NSData + var urlResponse: URLResponse? + if url.isFileURL { - let readResult = try NSData.readBytesFromFileWithExtendedAttributes(url.path, options: readOptionsMask) - _init(bytes: readResult.bytes, length: readResult.length, copy: false, deallocator: readResult.deallocator) + let data = try NSData.readBytesFromFileWithExtendedAttributes(url.path, options: readOptionsMask) + readResult = NSData(bytesNoCopy: data.bytes, length: data.length, deallocator: data.deallocator) } else { let session = URLSession(configuration: URLSessionConfiguration.default) let cond = NSCondition() @@ -210,6 +214,7 @@ open class NSData : NSObject, NSCopying, NSMutableCopying, NSSecureCoding { var resData: Data? let task = session.dataTask(with: url, completionHandler: { data, response, error in resData = data + urlResponse = response resError = error cond.broadcast() }) @@ -218,8 +223,9 @@ open class NSData : NSObject, NSCopying, NSMutableCopying, NSSecureCoding { guard let data = resData else { throw resError! } - _init(bytes: UnsafeMutableRawPointer(mutating: data._nsObject.bytes), length: data.count, copy: true) + readResult = NSData(bytes: UnsafeMutableRawPointer(mutating: data._nsObject.bytes), length: data.count) } + return (readResult, urlResponse) } /// Initializes a data object with the given Base64 encoded string. diff --git a/Foundation/NSString.swift b/Foundation/NSString.swift index 92ba2db624..9be1c4b688 100644 --- a/Foundation/NSString.swift +++ b/Foundation/NSString.swift @@ -1254,12 +1254,14 @@ extension NSString { public convenience init(contentsOfFile path: String, encoding enc: UInt) throws { try self.init(contentsOf: URL(fileURLWithPath: path), encoding: enc) } - + public convenience init(contentsOf url: URL, usedEncoding enc: UnsafeMutablePointer?) throws { - let readResult = try NSData(contentsOf: url, options:[]) + let (readResult, urlResponse) = try NSData.contentsOf(url: url) let encoding: UInt let offset: Int + // Look for a BOM (Byte Order Marker) to try and determine the text Encoding, this also skips + // over the bytes. This takes precedence over the textEncoding in the http header let bytePtr = readResult.bytes.bindMemory(to: UInt8.self, capacity:readResult.length) if readResult.length >= 4 && bytePtr[0] == 0xFF && bytePtr[1] == 0xFE && bytePtr[2] == 0x00 && bytePtr[3] == 0x00 { encoding = String.Encoding.utf32LittleEndian.rawValue @@ -1277,14 +1279,15 @@ extension NSString { encoding = String.Encoding.utf32BigEndian.rawValue offset = 4 } - else { + else if let charSet = urlResponse?.textEncodingName, let textEncoding = String.Encoding(charSet: charSet) { + encoding = textEncoding.rawValue + offset = 0 + } else { //Need to work on more conditions. This should be the default encoding = String.Encoding.utf8.rawValue offset = 0 } - enc?.pointee = encoding - // Since the encoding being passed includes the byte order the BOM wont be checked or skipped, so pass offset to // manually skip the BOM header. guard let cf = CFStringCreateWithBytes(kCFAllocatorDefault, bytePtr + offset, readResult.length - offset, @@ -1301,6 +1304,7 @@ extension NSString { "NSDebugDescription" : "Unable to bridge CFString to String." ]) } + enc?.pointee = encoding } public convenience init(contentsOfFile path: String, usedEncoding enc: UnsafeMutablePointer?) throws { diff --git a/Foundation/StringEncodings.swift b/Foundation/StringEncodings.swift index 83d5a6c0c7..2d098d09b4 100644 --- a/Foundation/StringEncodings.swift +++ b/Foundation/StringEncodings.swift @@ -27,6 +27,39 @@ extension String { public static let utf32 = Encoding(rawValue: 0x8c000100) public static let utf32BigEndian = Encoding(rawValue: 0x98000100) public static let utf32LittleEndian = Encoding(rawValue: 0x9c000100) + + // Map selected IANA character set names to encodings, see + // https://www.iana.org/assignments/character-sets/character-sets.xhtml + internal init?(charSet: String) { + let encoding: Encoding? + + switch charSet.lowercased() { + case "us-ascii": encoding = .ascii + case "utf-8": encoding = .utf8 + case "utf-16": encoding = .utf16 + case "utf-16be": encoding = .utf16BigEndian + case "utf-16le": encoding = .utf16LittleEndian + case "utf-32": encoding = .utf32 + case "utf-32be": encoding = .utf32BigEndian + case "utf-32le": encoding = .utf32LittleEndian + case "iso-8859-1": encoding = .isoLatin1 + case "iso-8859-2": encoding = .isoLatin2 + case "iso-2022-jp": encoding = .iso2022JP + case "windows-1250": encoding = .windowsCP1250 + case "windows-1251": encoding = .windowsCP1251 + case "windows-1252": encoding = .windowsCP1252 + case "windows-1253": encoding = .windowsCP1253 + case "windows-1254": encoding = .windowsCP1254 + case "shift_jis": encoding = .shiftJIS + case "euc-jp": encoding = .japaneseEUC + case "macintosh": encoding = .macOSRoman + default: encoding = nil + } + guard let value = encoding?.rawValue else { + return nil + } + rawValue = value + } } public typealias EncodingConversionOptions = NSString.EncodingConversionOptions @@ -50,6 +83,7 @@ extension String.Encoding : CustomStringConvertible { } } + @available(*, unavailable, renamed: "String.Encoding") public typealias NSStringEncoding = UInt diff --git a/TestFoundation/HTTPServer.swift b/TestFoundation/HTTPServer.swift index 00a22304c9..c240370670 100644 --- a/TestFoundation/HTTPServer.swift +++ b/TestFoundation/HTTPServer.swift @@ -306,6 +306,7 @@ struct _HTTPResponse { enum Response : Int { case OK = 200 case REDIRECT = 302 + case NOTFOUND = 404 } private let responseCode: Response private let headers: String @@ -358,6 +359,16 @@ public class TestURLSessionServer { if req.uri.hasPrefix("/LandOfTheLostCities/") { /* these are all misbehaving servers */ try httpServer.respondWithBrokenResponses(uri: req.uri) + } else if req.uri == "/NSString-ISO-8859-1-data.txt" { + // Serve this directly as binary data to avoid any String encoding conversions. + if let url = testBundle().url(forResource: "NSString-ISO-8859-1-data", withExtension: "txt"), + let content = try? Data(contentsOf: url) { + var responseData = "HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=ISO-8859-1\r\nContent-Length: \(content.count)\r\n\r\n".data(using: .ascii)! + responseData.append(content) + try httpServer.socket.writeRawData(responseData) + } else { + try httpServer.respond(with: _HTTPResponse(response: .NOTFOUND, body: "Not Found")) + } } else { try httpServer.respond(with: process(request: req), startDelay: self.startDelay, sendDelay: self.sendDelay, bodyChunks: self.bodyChunks) } diff --git a/TestFoundation/Resources/NSString-ISO-8859-1-data.txt b/TestFoundation/Resources/NSString-ISO-8859-1-data.txt new file mode 100644 index 0000000000..3d959b1320 --- /dev/null +++ b/TestFoundation/Resources/NSString-ISO-8859-1-data.txt @@ -0,0 +1,3 @@ +This file is encoded as ISO-8859-1 + + diff --git a/TestFoundation/TestNSString.swift b/TestFoundation/TestNSString.swift index eb9184319e..56095b3474 100755 --- a/TestFoundation/TestNSString.swift +++ b/TestFoundation/TestNSString.swift @@ -27,7 +27,7 @@ internal let kCFStringEncodingUTF32LE = CFStringBuiltInEncodings.UTF32LE.rawVal #endif -class TestNSString : XCTestCase { +class TestNSString: LoopbackServerTest { static var allTests: [(String, (TestNSString) -> () throws -> Void)] { return [ @@ -292,6 +292,16 @@ class TestNSString : XCTestCase { } catch { XCTFail("Unable to init NSString from contentsOf:encoding:") } + + let url = URL(string: "http://127.0.0.1:\(TestURLSession.serverPort)/NSString-ISO-8859-1-data.txt")! + var enc: UInt = 0 + let contents = try? NSString(contentsOf: url, usedEncoding: &enc) + + XCTAssertNotNil(contents) + XCTAssertEqual(enc, String.Encoding.isoLatin1.rawValue) + if let contents = contents { + XCTAssertEqual(contents, "This file is encoded as ISO-8859-1\nÀÁÂÃÄÅÿ\n±\n") + } } func test_FromContentOfFileUsedEncodingIgnored() { diff --git a/build.py b/build.py index 2fbfd82189..9679d64422 100755 --- a/build.py +++ b/build.py @@ -444,7 +444,7 @@ 'Foundation/URLSession/NativeProtocol.swift', 'Foundation/URLSession/TransferState.swift', 'Foundation/URLSession/libcurl/libcurlHelpers.swift', - 'Foundation/URLSession/http/HTTPURLProtocol.swift', + 'Foundation/URLSession/http/HTTPURLProtocol.swift', 'Foundation/UserDefaults.swift', 'Foundation/NSUUID.swift', 'Foundation/NSValue.swift', @@ -501,6 +501,7 @@ 'TestFoundation/Resources/NSString-UTF16-LE-data.txt', 'TestFoundation/Resources/NSString-UTF32-BE-data.txt', 'TestFoundation/Resources/NSString-UTF32-LE-data.txt', + 'TestFoundation/Resources/NSString-ISO-8859-1-data.txt', 'TestFoundation/Resources/NSXMLDocumentTestData.xml', 'TestFoundation/Resources/PropertyList-1.0.dtd', 'TestFoundation/Resources/NSXMLDTDTestData.xml',