From 7f238104a463666d98adb8e6c8f9ef3a397d6ad5 Mon Sep 17 00:00:00 2001 From: Honza Dvorsky Date: Tue, 14 Nov 2023 14:01:03 +0100 Subject: [PATCH 1/4] [Multipart] Introduce a bytes -> frames parser --- NOTICE.txt | 9 + .../Multipart/ByteUtilities.swift | 121 +++++++ .../MultipartBytesToFramesSequence.swift | 67 ++++ .../Multipart/MultipartInternalTypes.swift | 26 ++ .../Multipart/MultipartParser.swift | 342 ++++++++++++++++++ .../Test_MultipartBytesToFramesSequence.swift | 46 +++ .../Multipart/Test_MultipartParser.swift | 159 ++++++++ Tests/OpenAPIRuntimeTests/Test_Runtime.swift | 21 ++ 8 files changed, 791 insertions(+) create mode 100644 Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift create mode 100644 Sources/OpenAPIRuntime/Multipart/MultipartBytesToFramesSequence.swift create mode 100644 Sources/OpenAPIRuntime/Multipart/MultipartInternalTypes.swift create mode 100644 Sources/OpenAPIRuntime/Multipart/MultipartParser.swift create mode 100644 Tests/OpenAPIRuntimeTests/Multipart/Test_MultipartBytesToFramesSequence.swift create mode 100644 Tests/OpenAPIRuntimeTests/Multipart/Test_MultipartParser.swift diff --git a/NOTICE.txt b/NOTICE.txt index 7b160cf4..cd34ef6d 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -41,3 +41,12 @@ This product contains coder implementations inspired by swift-http-structured-he * https://www.apache.org/licenses/LICENSE-2.0 * HOMEPAGE: * https://github.com/apple/swift-http-structured-headers + +--- + +This product contains header character set validation logic inspired by swift-http-types. + + * LICENSE (Apache License 2.0): + * https://www.apache.org/licenses/LICENSE-2.0 + * HOMEPAGE: + * https://github.com/apple/swift-http-types diff --git a/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift b/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift new file mode 100644 index 00000000..004a7100 --- /dev/null +++ b/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift @@ -0,0 +1,121 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the SwiftOpenAPIGenerator open source project +// +// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors +// Licensed under Apache License v2.0 +// +// See LICENSE.txt for license information +// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors +// +// SPDX-License-Identifier: Apache-2.0 +// +//===----------------------------------------------------------------------===// + +/// A namespace of utilities for byte parsers and serializers. +enum ASCII { + + /// The dash `-` character. + static let dash: UInt8 = 0x2d + + /// The carriage return `` character. + static let cr: UInt8 = 0x0d + + /// The line feed `` character. + static let lf: UInt8 = 0x0a + + /// The colon `:` character. + static let colon: UInt8 = 0x3a + + /// The space ` ` character. + static let space: UInt8 = 0x20 + + /// The horizontal tab `` character. + static let tab: UInt8 = 0x20 + + /// Two dash characters. + static let dashes: [UInt8] = [dash, dash] + + /// The `` character follow by the `` character. + static let crlf: [UInt8] = [cr, lf] + + /// The characters that represent optional whitespace (OWS). + static let optionalWhitespace: Set = [space, tab] + + /// Checks whether the provided byte can appear in a header field name. + /// - Parameter byte: The byte to check. + /// - Returns: A Boolean value; `true` if the byte is valid in a header field + /// name, `false` otherwise. + static func isValidHeaderFieldNameByte(_ byte: UInt8) -> Bool { + // Copied from swift-http-types, because we create HTTPField.Name from these anyway later. + switch byte { + case 0x21, 0x23, 0x24, 0x25, 0x26, 0x27, 0x2A, 0x2B, 0x2D, 0x2E, 0x5E, 0x5F, 0x60, 0x7C, 0x7E: return true + case 0x30...0x39, 0x41...0x5A, 0x61...0x7A: // DIGHT, ALPHA + return true + default: return false + } + } +} + +/// A value returned by the `firstIndexAfterElements` method. +enum FirstIndexAfterElementsResult { + + /// The index after the end of the first match. + case index(C.Index) + + /// Matched all characters so far, but reached the end of self before matching all. + /// When more data is fetched, it's possible this will fully match. + case reachedEndOfSelf + + /// The character at the provided index does not match the expected character. + case mismatchedCharacter(C.Index) +} + +extension RandomAccessCollection where Element: Equatable { + + /// Verifies that the elements match the provided sequence and returns the first index past the match. + /// - Parameter expectedElements: The elements to match against. + /// - Returns: The result. + func firstIndexAfterElements(_ expectedElements: some Sequence) -> FirstIndexAfterElementsResult { + var index = startIndex + for expectedElement in expectedElements { + guard index < endIndex else { return .reachedEndOfSelf } + guard self[index] == expectedElement else { return .mismatchedCharacter(index) } + formIndex(after: &index) + } + return .index(index) + } +} + +/// A value returned by the `longestMatch` method. +enum LongestMatchResult { + + /// No match found at any position in self. + case noMatch + + /// Found a prefix match but reached the end of self. + /// Provides the index of the first matching character. + /// When more data is fetched, this might become a full match. + case prefixMatch(fromIndex: C.Index) + + /// Found a full match within self at the provided range. + case fullMatch(Range) +} + +extension RandomAccessCollection where Element: Equatable { + + /// Returns the longest match found within the sequence. + /// - Parameter expectedElements: The elements to match in the sequence. + /// - Returns: The result. + func longestMatch(_ expectedElements: some Sequence) -> LongestMatchResult { + var index = startIndex + while index < endIndex { + switch self[index...].firstIndexAfterElements(expectedElements) { + case .index(let end): return .fullMatch(index..: Sendable +where Upstream.Element == ArraySlice { + + /// The source of byte chunks. + var upstream: Upstream + + /// The boundary string used to separate multipart parts. + var boundary: String +} + +extension MultipartBytesToFramesSequence: AsyncSequence { + + /// The type of element produced by this asynchronous sequence. + typealias Element = MultipartFrame + + /// Creates the asynchronous iterator that produces elements of this + /// asynchronous sequence. + /// + /// - Returns: An instance of the `AsyncIterator` type used to produce + /// elements of the asynchronous sequence. + func makeAsyncIterator() -> Iterator { + Iterator(upstream: upstream.makeAsyncIterator(), boundary: boundary) + } + + /// An iterator that pulls byte chunks from the upstream iterator and provides + /// parsed multipart frames. + struct Iterator: AsyncIteratorProtocol + where UpstreamIterator.Element == ArraySlice { + /// The iterator that provides the byte chunks. + private var upstream: UpstreamIterator + + /// The multipart frame parser. + private var parser: MultipartParser + /// Creates a new iterator from the provided source of byte chunks and a boundary string. + /// - Parameters: + /// - upstream: The iterator that provides the byte chunks. + /// - boundary: The boundary separating the multipart parts. + init(upstream: UpstreamIterator, boundary: String) { + self.upstream = upstream + self.parser = .init(boundary: boundary) + } + + /// Asynchronously advances to the next element and returns it, or ends the + /// sequence if there is no next element. + /// + /// - Returns: The next element, if it exists, or `nil` to signal the end of + /// the sequence. + mutating func next() async throws -> MultipartFrame? { try await parser.next { try await upstream.next() } } + } +} diff --git a/Sources/OpenAPIRuntime/Multipart/MultipartInternalTypes.swift b/Sources/OpenAPIRuntime/Multipart/MultipartInternalTypes.swift new file mode 100644 index 00000000..49e57b9f --- /dev/null +++ b/Sources/OpenAPIRuntime/Multipart/MultipartInternalTypes.swift @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the SwiftOpenAPIGenerator open source project +// +// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors +// Licensed under Apache License v2.0 +// +// See LICENSE.txt for license information +// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors +// +// SPDX-License-Identifier: Apache-2.0 +// +//===----------------------------------------------------------------------===// + +import HTTPTypes + +/// A frame of a multipart message, either the whole header fields +/// section or a chunk of the body bytes. +enum MultipartFrame: Sendable, Hashable { + + /// The header fields section. + case headerFields(HTTPFields) + + /// One byte chunk of the part's body. + case bodyChunk(ArraySlice) +} diff --git a/Sources/OpenAPIRuntime/Multipart/MultipartParser.swift b/Sources/OpenAPIRuntime/Multipart/MultipartParser.swift new file mode 100644 index 00000000..97c4b2fb --- /dev/null +++ b/Sources/OpenAPIRuntime/Multipart/MultipartParser.swift @@ -0,0 +1,342 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the SwiftOpenAPIGenerator open source project +// +// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors +// Licensed under Apache License v2.0 +// +// See LICENSE.txt for license information +// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors +// +// SPDX-License-Identifier: Apache-2.0 +// +//===----------------------------------------------------------------------===// + +import Foundation +import HTTPTypes + +/// A parser of mutlipart frames from bytes. +struct MultipartParser { + + /// The underlying state machine. + private var stateMachine: StateMachine + /// Creates a new parser. + /// - Parameter boundary: The boundary that separates parts. + init(boundary: String) { self.stateMachine = .init(boundary: boundary) } + /// Parses the next frame. + /// - Parameter fetchChunk: A closure that is called when the parser + /// needs more bytes to parse the next frame. + /// - Returns: A parsed frame, or nil at the end of the message. + /// - Throws: When a parsing error is encountered. + mutating func next(_ fetchChunk: () async throws -> ArraySlice?) async throws -> MultipartFrame? { + while true { + switch stateMachine.readNextPart() { + case .none: continue + case .emitError(let actionError): throw ParserError(error: actionError) + case .returnNil: return nil + case .emitHeaderFields(let httpFields): return .headerFields(httpFields) + case .emitBodyChunk(let bodyChunk): return .bodyChunk(bodyChunk) + case .needsMore: + let chunk = try await fetchChunk() + switch stateMachine.receivedChunk(chunk) { + case .none: continue + case .returnNil: return nil + case .emitError(let actionError): throw ParserError(error: actionError) + } + } + } + } +} +extension MultipartParser { + /// An error thrown by the parser. + struct ParserError: Swift.Error, CustomStringConvertible, LocalizedError { + + /// The underlying error emitted by the state machine. + let error: MultipartParser.StateMachine.ActionError + + var description: String { + switch error { + case .invalidInitialBoundary: return "Invalid initial boundary." + case .invalidCRLFAtStartOfHeaderField: return "Invalid CRLF at the start of a header field." + case .missingColonAfterHeaderName: return "Missing colon after header field name." + case .invalidCharactersInHeaderFieldName: return "Invalid characters in a header field name." + case .incompleteMultipartMessage: return "Incomplete multipart message." + case .receivedChunkWhenFinished: return "Received a chunk after being finished." + } + } + + var errorDescription: String? { description } + } +} + +extension MultipartParser { + + /// A state machine representing the byte to multipart frame parser. + struct StateMachine { + /// The possible states of the state machine. + enum State: Hashable { + + /// Has not yet fully parsed the initial boundary. + case parsingInitialBoundary([UInt8]) + + /// A substate when parsing a part. + enum PartState: Hashable { + + /// Accumulating part headers. + case parsingHeaderFields(HTTPFields) + + /// Forwarding body chunks. + case parsingBody + } + + /// Is parsing a part. + case parsingPart([UInt8], PartState) + + /// Finished, the terminal state. + case finished + + /// Helper state to avoid copy-on-write copies. + case mutating + } + + /// The current state of the state machine. + private(set) var state: State + + /// The bytes of the boundary. + private let boundary: ArraySlice + + /// The bytes of the boundary with the double dash prepended. + private let dashDashBoundary: ArraySlice + + /// The bytes of the boundary prepended by CRLF + double dash. + private let crlfDashDashBoundary: ArraySlice + /// Creates a new state machine. + /// - Parameter boundary: The boundary used to separate parts. + init(boundary: String) { + self.state = .parsingInitialBoundary([]) + self.boundary = ArraySlice(boundary.utf8) + self.dashDashBoundary = ASCII.dashes + self.boundary + self.crlfDashDashBoundary = ASCII.crlf + dashDashBoundary + } + /// An error returned by the state machine. + enum ActionError: Hashable { + + /// The initial boundary is malformed. + case invalidInitialBoundary + + /// The expected CRLF at the start of a header is missing. + case invalidCRLFAtStartOfHeaderField + + /// A header field name contains an invalid character. + case invalidCharactersInHeaderFieldName + + /// The header field name is not followed by a colon. + case missingColonAfterHeaderName + + /// More bytes were received after completion. + case receivedChunkWhenFinished + + /// Ran out of bytes without the message being complete. + case incompleteMultipartMessage + } + /// An action returned by the `readNextPart` method. + enum ReadNextPartAction: Hashable { + + /// No action, call `readNextPart` again. + case none + + /// Throw the provided error. + case emitError(ActionError) + + /// Return nil to the caller, no more frames. + case returnNil + + /// Emit a frame with the provided header fields. + case emitHeaderFields(HTTPFields) + + /// Emit a frame with the provided part body chunk. + case emitBodyChunk(ArraySlice) + + /// Needs more bytes to parse the next frame. + case needsMore + } + /// Read the next part from the accumulated bytes. + /// - Returns: An action to perform. + mutating func readNextPart() -> ReadNextPartAction { + switch state { + case .mutating: preconditionFailure("Invalid state: \(state)") + case .finished: return .returnNil + case .parsingInitialBoundary(var buffer): + state = .mutating + // These first bytes must be the boundary already, otherwise this is a malformed multipart body. + switch buffer.firstIndexAfterElements(dashDashBoundary) { + case .index(let index): + buffer.removeSubrange(buffer.startIndex..?) -> ReceivedChunkAction { + switch state { + case .parsingInitialBoundary(var buffer): + guard let chunk else { return .emitError(.incompleteMultipartMessage) } + state = .mutating + buffer.append(contentsOf: chunk) + state = .parsingInitialBoundary(buffer) + return .none + case .parsingPart(var buffer, let part): + guard let chunk else { return .emitError(.incompleteMultipartMessage) } + state = .mutating + buffer.append(contentsOf: chunk) + state = .parsingPart(buffer, part) + return .none + case .finished: + guard chunk == nil else { return .emitError(.receivedChunkWhenFinished) } + return .returnNil + case .mutating: preconditionFailure("Invalid state: \(state)") + } + } + } +} diff --git a/Tests/OpenAPIRuntimeTests/Multipart/Test_MultipartBytesToFramesSequence.swift b/Tests/OpenAPIRuntimeTests/Multipart/Test_MultipartBytesToFramesSequence.swift new file mode 100644 index 00000000..7229e45b --- /dev/null +++ b/Tests/OpenAPIRuntimeTests/Multipart/Test_MultipartBytesToFramesSequence.swift @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the SwiftOpenAPIGenerator open source project +// +// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors +// Licensed under Apache License v2.0 +// +// See LICENSE.txt for license information +// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors +// +// SPDX-License-Identifier: Apache-2.0 +// +//===----------------------------------------------------------------------===// +import XCTest +@_spi(Generated) @testable import OpenAPIRuntime +import Foundation + +final class Test_MultipartBytesToFramesSequence: Test_Runtime { + func test() async throws { + var chunk = chunkFromStringLines([ + "--__abcd__", #"Content-Disposition: form-data; name="name""#, "", "24", "--__abcd__", + #"Content-Disposition: form-data; name="info""#, "", "{}", "--__abcd__--", + ]) + let next: () async throws -> ArraySlice? = { + if let first = chunk.first { + let out: ArraySlice = [first] + chunk = chunk.dropFirst() + return out + } else { + return nil + } + } + let upstream = HTTPBody(AsyncThrowingStream(unfolding: next), length: .unknown, iterationBehavior: .single) + let sequence = MultipartBytesToFramesSequence(upstream: upstream, boundary: "__abcd__") + var frames: [MultipartFrame] = [] + for try await frame in sequence { frames.append(frame) } + XCTAssertEqual( + frames, + [ + .headerFields([.contentDisposition: #"form-data; name="name""#]), .bodyChunk(chunkFromString("2")), + .bodyChunk(chunkFromString("4")), .headerFields([.contentDisposition: #"form-data; name="info""#]), + .bodyChunk(chunkFromString("{")), .bodyChunk(chunkFromString("}")), + ] + ) + } +} diff --git a/Tests/OpenAPIRuntimeTests/Multipart/Test_MultipartParser.swift b/Tests/OpenAPIRuntimeTests/Multipart/Test_MultipartParser.swift new file mode 100644 index 00000000..5587868b --- /dev/null +++ b/Tests/OpenAPIRuntimeTests/Multipart/Test_MultipartParser.swift @@ -0,0 +1,159 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the SwiftOpenAPIGenerator open source project +// +// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors +// Licensed under Apache License v2.0 +// +// See LICENSE.txt for license information +// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors +// +// SPDX-License-Identifier: Apache-2.0 +// +//===----------------------------------------------------------------------===// +import XCTest +@_spi(Generated) @testable import OpenAPIRuntime +import Foundation + +final class Test_MultipartParser: Test_Runtime { + func test() async throws { + var chunk = chunkFromStringLines([ + "--__abcd__", #"Content-Disposition: form-data; name="name""#, "", "24", "--__abcd__", + #"Content-Disposition: form-data; name="info""#, "", "{}", "--__abcd__--", + ]) + var parser = MultipartParser(boundary: "__abcd__") + let next: () async throws -> ArraySlice? = { + if let first = chunk.first { + let out: ArraySlice = [first] + chunk = chunk.dropFirst() + return out + } else { + return nil + } + } + var frames: [MultipartFrame] = [] + while let frame = try await parser.next(next) { frames.append(frame) } + XCTAssertEqual( + frames, + [ + .headerFields([.contentDisposition: #"form-data; name="name""#]), .bodyChunk(chunkFromString("2")), + .bodyChunk(chunkFromString("4")), .headerFields([.contentDisposition: #"form-data; name="info""#]), + .bodyChunk(chunkFromString("{")), .bodyChunk(chunkFromString("}")), + ] + ) + } +} + +private func newStateMachine() -> MultipartParser.StateMachine { .init(boundary: "__abcd__") } + +final class Test_MultipartParserStateMachine: Test_Runtime { + + func testInvalidInitialBoundary() throws { + var stateMachine = newStateMachine() + XCTAssertEqual(stateMachine.receivedChunk(chunkFromString("invalid")), .none) + XCTAssertEqual(stateMachine.readNextPart(), .emitError(.invalidInitialBoundary)) + } + + func testHeaderFields() throws { + var stateMachine = newStateMachine() + XCTAssertEqual(stateMachine.receivedChunk(chunkFromString("--__ab")), .none) + XCTAssertEqual(stateMachine.readNextPart(), .needsMore) + XCTAssertEqual(stateMachine.state, .parsingInitialBoundary(bufferFromString("--__ab"))) + XCTAssertEqual(stateMachine.receivedChunk(chunkFromString("cd__", addCRLFs: 1)), .none) + XCTAssertEqual(stateMachine.readNextPart(), .none) + XCTAssertEqual(stateMachine.state, .parsingPart([0x0d, 0x0a], .parsingHeaderFields(.init()))) + XCTAssertEqual(stateMachine.receivedChunk(chunkFromString(#"Content-Disposi"#)), .none) + XCTAssertEqual( + stateMachine.state, + .parsingPart([0x0d, 0x0a] + bufferFromString(#"Content-Disposi"#), .parsingHeaderFields(.init())) + ) + XCTAssertEqual(stateMachine.readNextPart(), .needsMore) + XCTAssertEqual( + stateMachine.receivedChunk(chunkFromString(#"tion: form-data; name="name""#, addCRLFs: 2)), + .none + ) + XCTAssertEqual( + stateMachine.state, + .parsingPart( + [0x0d, 0x0a] + bufferFromString(#"Content-Disposition: form-data; name="name""#) + [ + 0x0d, 0x0a, 0x0d, 0x0a, + ], + .parsingHeaderFields(.init()) + ) + ) + // Reads the first header field. + XCTAssertEqual(stateMachine.readNextPart(), .none) + // Reads the end of the header fields section. + XCTAssertEqual( + stateMachine.readNextPart(), + .emitHeaderFields([.contentDisposition: #"form-data; name="name""#]) + ) + XCTAssertEqual(stateMachine.state, .parsingPart([], .parsingBody)) + } + + func testPartBody() throws { + var stateMachine = newStateMachine() + let chunk = chunkFromStringLines(["--__abcd__", #"Content-Disposition: form-data; name="name""#, "", "24"]) + XCTAssertEqual(stateMachine.receivedChunk(chunk), .none) + XCTAssertEqual(stateMachine.state, .parsingInitialBoundary(Array(chunk))) + // Parse the initial boundary and first header field. + for _ in 0..<2 { XCTAssertEqual(stateMachine.readNextPart(), .none) } + // Parse the end of header fields. + XCTAssertEqual( + stateMachine.readNextPart(), + .emitHeaderFields([.contentDisposition: #"form-data; name="name""#]) + ) + XCTAssertEqual(stateMachine.state, .parsingPart(bufferFromString(#"24"#) + [0x0d, 0x0a], .parsingBody)) + XCTAssertEqual(stateMachine.receivedChunk(chunkFromString(".42")), .none) + XCTAssertEqual( + stateMachine.state, + .parsingPart(bufferFromString("24") + [0x0d, 0x0a] + bufferFromString(".42"), .parsingBody) + ) + XCTAssertEqual( + stateMachine.readNextPart(), + .emitBodyChunk(bufferFromString("24") + [0x0d, 0x0a] + bufferFromString(".42")) + ) + XCTAssertEqual(stateMachine.state, .parsingPart([], .parsingBody)) + XCTAssertEqual(stateMachine.receivedChunk([0x0d, 0x0a] + chunkFromString("--__ab")), .none) + XCTAssertEqual(stateMachine.state, .parsingPart([0x0d, 0x0a] + chunkFromString("--__ab"), .parsingBody)) + XCTAssertEqual(stateMachine.readNextPart(), .needsMore) + XCTAssertEqual(stateMachine.receivedChunk(chunkFromString("cd__--", addCRLFs: 1)), .none) + XCTAssertEqual( + stateMachine.state, + .parsingPart([0x0d, 0x0a] + chunkFromString("--__abcd__--", addCRLFs: 1), .parsingBody) + ) + // Parse the final boundary. + XCTAssertEqual(stateMachine.readNextPart(), .none) + // Parse the trailing two dashes. + XCTAssertEqual(stateMachine.readNextPart(), .returnNil) + } + + func testTwoParts() throws { + var stateMachine = newStateMachine() + let chunk = chunkFromStringLines([ + "--__abcd__", #"Content-Disposition: form-data; name="name""#, "", "24", "--__abcd__", + #"Content-Disposition: form-data; name="info""#, "", "{}", "--__abcd__--", + ]) + XCTAssertEqual(stateMachine.receivedChunk(chunk), .none) + // Parse the initial boundary and first header field. + for _ in 0..<2 { XCTAssertEqual(stateMachine.readNextPart(), .none) } + // Parse the end of header fields. + XCTAssertEqual( + stateMachine.readNextPart(), + .emitHeaderFields([.contentDisposition: #"form-data; name="name""#]) + ) + // Parse the first part's body. + XCTAssertEqual(stateMachine.readNextPart(), .emitBodyChunk(chunkFromString("24"))) + // Parse the boundary. + XCTAssertEqual(stateMachine.readNextPart(), .none) + // Parse the end of header fields. + XCTAssertEqual( + stateMachine.readNextPart(), + .emitHeaderFields([.contentDisposition: #"form-data; name="info""#]) + ) + // Parse the second part's body. + XCTAssertEqual(stateMachine.readNextPart(), .emitBodyChunk(chunkFromString("{}"))) + // Parse the trailing two dashes. + XCTAssertEqual(stateMachine.readNextPart(), .returnNil) + } +} diff --git a/Tests/OpenAPIRuntimeTests/Test_Runtime.swift b/Tests/OpenAPIRuntimeTests/Test_Runtime.swift index 29666cc1..e2fe87c0 100644 --- a/Tests/OpenAPIRuntimeTests/Test_Runtime.swift +++ b/Tests/OpenAPIRuntimeTests/Test_Runtime.swift @@ -109,6 +109,27 @@ class Test_Runtime: XCTestCase { } } +/// Each line gets a CRLF added. Extra CRLFs are added after the last line's CRLF. +func chunkFromStringLines(_ strings: [String], addExtraCRLFs: Int = 0) -> ArraySlice { + var slice: ArraySlice = [] + for string in strings { slice.append(contentsOf: chunkFromString(string, addCRLFs: 1)) } + slice.append(contentsOf: chunkFromString("", addCRLFs: addExtraCRLFs)) + return slice +} + +func chunkFromString(_ string: String, addCRLFs: Int = 0) -> ArraySlice { + var slice = ArraySlice(string.utf8) + for _ in 0.. [UInt8] { Array(string.utf8) } + +extension ArraySlice { + mutating func append(_ string: String) { append(contentsOf: chunkFromString(string)) } + mutating func appendCRLF() { append(contentsOf: [0x0d, 0x0a]) } +} + struct TestError: Error, Equatable {} struct MockMiddleware: ClientMiddleware, ServerMiddleware { From 9352d13a886a9de00023eb474128b055847b0660 Mon Sep 17 00:00:00 2001 From: Honza Dvorsky Date: Tue, 14 Nov 2023 16:46:47 +0100 Subject: [PATCH 2/4] Update ByteUtilities.swift Co-authored-by: Si Beaumont --- Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift b/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift index 004a7100..e989a5c1 100644 --- a/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift +++ b/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift @@ -31,7 +31,7 @@ enum ASCII { static let space: UInt8 = 0x20 /// The horizontal tab `` character. - static let tab: UInt8 = 0x20 + static let tab: UInt8 = 0x09 /// Two dash characters. static let dashes: [UInt8] = [dash, dash] From 115734250be5359adc7e617e471e28f6a3cf1de8 Mon Sep 17 00:00:00 2001 From: Honza Dvorsky Date: Wed, 15 Nov 2023 08:51:29 +0100 Subject: [PATCH 3/4] PR feedback --- .../Multipart/ByteUtilities.swift | 16 +++++------ .../Multipart/MultipartParser.swift | 28 ++++++++++++------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift b/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift index 004a7100..0d22c760 100644 --- a/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift +++ b/Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift @@ -57,10 +57,10 @@ enum ASCII { } } -/// A value returned by the `firstIndexAfterElements` method. -enum FirstIndexAfterElementsResult { +/// A value returned by the `firstIndexAfterPrefix` method. +enum FirstIndexAfterPrefixResult { - /// The index after the end of the first match. + /// The index after the end of the prefix match. case index(C.Index) /// Matched all characters so far, but reached the end of self before matching all. @@ -68,7 +68,7 @@ enum FirstIndexAfterElementsResult { case reachedEndOfSelf /// The character at the provided index does not match the expected character. - case mismatchedCharacter(C.Index) + case unexpectedPrefix(C.Index) } extension RandomAccessCollection where Element: Equatable { @@ -76,11 +76,11 @@ extension RandomAccessCollection where Element: Equatable { /// Verifies that the elements match the provided sequence and returns the first index past the match. /// - Parameter expectedElements: The elements to match against. /// - Returns: The result. - func firstIndexAfterElements(_ expectedElements: some Sequence) -> FirstIndexAfterElementsResult { + func firstIndexAfterPrefix(_ expectedElements: some Sequence) -> FirstIndexAfterPrefixResult { var index = startIndex for expectedElement in expectedElements { guard index < endIndex else { return .reachedEndOfSelf } - guard self[index] == expectedElement else { return .mismatchedCharacter(index) } + guard self[index] == expectedElement else { return .unexpectedPrefix(index) } formIndex(after: &index) } return .index(index) @@ -110,10 +110,10 @@ extension RandomAccessCollection where Element: Equatable { func longestMatch(_ expectedElements: some Sequence) -> LongestMatchResult { var index = startIndex while index < endIndex { - switch self[index...].firstIndexAfterElements(expectedElements) { + switch self[index...].firstIndexAfterPrefix(expectedElements) { case .index(let end): return .fullMatch(index.. + /// Creates a new state machine. /// - Parameter boundary: The boundary used to separate parts. init(boundary: String) { @@ -118,6 +123,7 @@ extension MultipartParser { self.dashDashBoundary = ASCII.dashes + self.boundary self.crlfDashDashBoundary = ASCII.crlf + dashDashBoundary } + /// An error returned by the state machine. enum ActionError: Hashable { @@ -139,6 +145,7 @@ extension MultipartParser { /// Ran out of bytes without the message being complete. case incompleteMultipartMessage } + /// An action returned by the `readNextPart` method. enum ReadNextPartAction: Hashable { @@ -160,6 +167,7 @@ extension MultipartParser { /// Needs more bytes to parse the next frame. case needsMore } + /// Read the next part from the accumulated bytes. /// - Returns: An action to perform. mutating func readNextPart() -> ReadNextPartAction { @@ -169,7 +177,7 @@ extension MultipartParser { case .parsingInitialBoundary(var buffer): state = .mutating // These first bytes must be the boundary already, otherwise this is a malformed multipart body. - switch buffer.firstIndexAfterElements(dashDashBoundary) { + switch buffer.firstIndexAfterPrefix(dashDashBoundary) { case .index(let index): buffer.removeSubrange(buffer.startIndex.. Date: Wed, 15 Nov 2023 08:54:47 +0100 Subject: [PATCH 4/4] Fixes for 5.8 on Linux --- Sources/OpenAPIRuntime/Multipart/MultipartParser.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/OpenAPIRuntime/Multipart/MultipartParser.swift b/Sources/OpenAPIRuntime/Multipart/MultipartParser.swift index 7ed78750..87267a6c 100644 --- a/Sources/OpenAPIRuntime/Multipart/MultipartParser.swift +++ b/Sources/OpenAPIRuntime/Multipart/MultipartParser.swift @@ -206,7 +206,7 @@ extension MultipartParser { case .unexpectedPrefix: break } // Consume CRLF - let indexAfterFirstCRLF: [UInt8].Index + let indexAfterFirstCRLF: Array.Index switch buffer.firstIndexAfterPrefix(ASCII.crlf) { case .index(let index): indexAfterFirstCRLF = index case .reachedEndOfSelf: @@ -236,7 +236,7 @@ extension MultipartParser { state = .parsingPart(buffer, .parsingHeaderFields(headerFields)) return .needsMore } - let startHeaderValueWithWhitespaceIndex: [UInt8].Index + let startHeaderValueWithWhitespaceIndex: Array.Index // Check that what follows is a colon, otherwise this is a malformed header field line. // Source: RFC 7230, section 3.2.4. switch buffer[endHeaderNameIndex...].firstIndexAfterPrefix([ASCII.colon]) {