From 1547a1eee7c2bc0e59dc9af196c82759bd25b06a Mon Sep 17 00:00:00 2001 From: NikitaBumblebee Date: Wed, 14 Aug 2024 17:16:36 +0700 Subject: [PATCH 1/6] Add translation and transcription endpoints --- Package.resolved | 40 +---- .../Endpoint Configurations/Audio.swift | 142 ++++++++++++++++++ .../Models/Audio/Transcription.swift | 49 ++++++ .../Models/Audio/TranscriptionSegment.swift | 40 +++++ .../Models/Audio/Translation.swift | 35 +++++ .../Models/Enums/AudioResponseFormat.swift | 36 +++++ .../ExyteOpenAI/Models/Enums/STTModel.swift | 29 ++++ .../Models/Enums/SpeechVoice.swift | 36 +++++ .../ExyteOpenAI/Models/Enums/TTSModel.swift | 32 ++++ .../Models/Enums/TextResponseFormat.swift | 35 +++++ .../Models/Enums/TimestampGranularity.swift | 30 ++++ .../ExyteOpenAI/Networking/FormBodyPart.swift | 2 + Sources/ExyteOpenAI/Networking/Provider.swift | 20 +++ Sources/ExyteOpenAI/OpenAI+Audio.swift | 69 +++++++++ Sources/ExyteOpenAI/OpenAI.swift | 2 + .../Payloads/CreateSpeechPayload.swift | 49 ++++++ .../Payloads/CreateTranscriptionPayload.swift | 55 +++++++ .../Payloads/CreateTranslationPayload.swift | 49 ++++++ 18 files changed, 712 insertions(+), 38 deletions(-) create mode 100644 Sources/ExyteOpenAI/Endpoint Configurations/Audio.swift create mode 100644 Sources/ExyteOpenAI/Models/Audio/Transcription.swift create mode 100644 Sources/ExyteOpenAI/Models/Audio/TranscriptionSegment.swift create mode 100644 Sources/ExyteOpenAI/Models/Audio/Translation.swift create mode 100644 Sources/ExyteOpenAI/Models/Enums/AudioResponseFormat.swift create mode 100644 Sources/ExyteOpenAI/Models/Enums/STTModel.swift create mode 100644 Sources/ExyteOpenAI/Models/Enums/SpeechVoice.swift create mode 100644 Sources/ExyteOpenAI/Models/Enums/TTSModel.swift create mode 100644 Sources/ExyteOpenAI/Models/Enums/TextResponseFormat.swift create mode 100644 Sources/ExyteOpenAI/Models/Enums/TimestampGranularity.swift create mode 100644 Sources/ExyteOpenAI/OpenAI+Audio.swift create mode 100644 Sources/ExyteOpenAI/Payloads/CreateSpeechPayload.swift create mode 100644 Sources/ExyteOpenAI/Payloads/CreateTranscriptionPayload.swift create mode 100644 Sources/ExyteOpenAI/Payloads/CreateTranslationPayload.swift diff --git a/Package.resolved b/Package.resolved index aa1387f..3c989e4 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,48 +1,12 @@ { "pins" : [ - { - "identity" : "alamofire", - "kind" : "remoteSourceControl", - "location" : "https://github.com/Alamofire/Alamofire.git", - "state" : { - "revision" : "f455c2975872ccd2d9c81594c658af65716e9b9a", - "version" : "5.9.1" - } - }, { "identity" : "eventsourcehttpbody", "kind" : "remoteSourceControl", "location" : "https://github.com/exyte/EventSourceHttpBody.git", "state" : { - "revision" : "9b68240460bae09faa0c5a9afbbccf5e18890e0c", - "version" : "0.1.3" - } - }, - { - "identity" : "moya", - "kind" : "remoteSourceControl", - "location" : "https://github.com/Moya/Moya.git", - "state" : { - "revision" : "c263811c1f3dbf002be9bd83107f7cdc38992b26", - "version" : "15.0.3" - } - }, - { - "identity" : "reactiveswift", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ReactiveCocoa/ReactiveSwift.git", - "state" : { - "revision" : "c43bae3dac73fdd3cb906bd5a1914686ca71ed3c", - "version" : "6.7.0" - } - }, - { - "identity" : "rxswift", - "kind" : "remoteSourceControl", - "location" : "https://github.com/ReactiveX/RxSwift.git", - "state" : { - "revision" : "b06a8c8596e4c3e8e7788e08e720e3248563ce6a", - "version" : "6.7.1" + "revision" : "b000e62b83206dd6e00f2066cf08c96f232a4168", + "version" : "0.1.5" } } ], diff --git a/Sources/ExyteOpenAI/Endpoint Configurations/Audio.swift b/Sources/ExyteOpenAI/Endpoint Configurations/Audio.swift new file mode 100644 index 0000000..a429567 --- /dev/null +++ b/Sources/ExyteOpenAI/Endpoint Configurations/Audio.swift @@ -0,0 +1,142 @@ +// +// Audio.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation + +enum Audio { + case createTranscriptionPayload(payload: CreateTranscriptionPayload) + case createTranslationPayload(payload: CreateTranslationPayload) + case createSpeechPayload(payload: CreateSpeechPayload, destination: URL) +} + +extension Audio: EndpointConfiguration { + + var method: HTTPRequestMethod { + return .post + } + + var path: String { + switch self { + case .createTranscriptionPayload: + return "/audio/transcriptions" + case .createTranslationPayload: + return "/audio/translations" + case .createSpeechPayload: + return "/audio/speech" + } + } + + var task: RequestTask { + switch self { + case .createTranscriptionPayload(let payload): + var data: [FormBodyPart] = [ + FormBodyPart( + name: "file", + value: .fileURL(payload.file), + fileName: payload.file.lastPathComponent, + mimeType: payload.file.pathExtension + ), + FormBodyPart( + name: "model", + value: .plainText(payload.model.rawValue) + ), + FormBodyPart( + name: "response_format", + value: .plainText(payload.responseFormat?.rawValue ?? TextResponseFormat.json.rawValue) + ) + ] + if let temperature = payload.temperature { + data.append( + FormBodyPart( + name: "temperature", + value: .floatingPoint(Float(temperature)) + ) + ) + } + if let prompt = payload.prompt { + data.append( + FormBodyPart( + name: "prompt", + value: .plainText(prompt) + ) + ) + } + if let language = payload.language { + data.append( + FormBodyPart( + name: "language", + value: .plainText(language)) + ) + } + if let timestampGranularities = payload.timestampGranularities, + payload.responseFormat == .verboseJson { + let timestampGranularitiesData = withUnsafeBytes(of: timestampGranularities) { Data($0) } + data.append( + FormBodyPart( + name: "timestamp_granularities", + value: .data(timestampGranularitiesData) + ) + ) + } + return .uploadMultipart(data) + case .createTranslationPayload(let payload): + var data: [FormBodyPart] = [ + FormBodyPart( + name: "file", + value: .fileURL(payload.file), + fileName: payload.file.lastPathComponent, + mimeType: payload.file.pathExtension + ), + FormBodyPart( + name: "model", + value: .plainText(payload.model.rawValue) + ), + FormBodyPart( + name: "response_format", + value: .plainText(payload.responseFormat?.rawValue ?? TextResponseFormat.json.rawValue) + ) + ] + if let prompt = payload.prompt { + data.append( + FormBodyPart( + name: "prompt", + value: .plainText(prompt) + ) + ) + } + if let temperature = payload.temperature { + data.append( + FormBodyPart( + name: "temperature", + value: .floatingPoint(Float(temperature)) + ) + ) + } + return .uploadMultipart(data) + case .createSpeechPayload(let payload, let destination): + return .download(destination) + } + } + +} diff --git a/Sources/ExyteOpenAI/Models/Audio/Transcription.swift b/Sources/ExyteOpenAI/Models/Audio/Transcription.swift new file mode 100644 index 0000000..5d85efb --- /dev/null +++ b/Sources/ExyteOpenAI/Models/Audio/Transcription.swift @@ -0,0 +1,49 @@ +// +// Transcription.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation + +public struct Transcription: Codable { + + let text: String + let language: String? + let duration: Double? + let words: String? + let segments: [TranscriptionSegment]? + + public init( + text: String, + language: String? = nil, + duration: Double? = nil, + words: String? = nil, + segments: [TranscriptionSegment]? = nil + ) { + self.text = text + self.language = language + self.duration = duration + self.words = words + self.segments = segments + } + +} diff --git a/Sources/ExyteOpenAI/Models/Audio/TranscriptionSegment.swift b/Sources/ExyteOpenAI/Models/Audio/TranscriptionSegment.swift new file mode 100644 index 0000000..cb789cf --- /dev/null +++ b/Sources/ExyteOpenAI/Models/Audio/TranscriptionSegment.swift @@ -0,0 +1,40 @@ +// +// TranscriptionSegment.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation + +public struct TranscriptionSegment: Codable { + + let id: Int + let seek: Int + let start: Double + let end: Double + let text: String + let tokens: [Int] + let temperature: Double + let avgLogprob: Double? + let compressionRatio: Double? + let noSpeechProb: Double? + +} diff --git a/Sources/ExyteOpenAI/Models/Audio/Translation.swift b/Sources/ExyteOpenAI/Models/Audio/Translation.swift new file mode 100644 index 0000000..911fdf3 --- /dev/null +++ b/Sources/ExyteOpenAI/Models/Audio/Translation.swift @@ -0,0 +1,35 @@ +// +// Translation.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation + +public struct Translation: Codable { + + let text: String + + public init(text: String) { + self.text = text + } + +} diff --git a/Sources/ExyteOpenAI/Models/Enums/AudioResponseFormat.swift b/Sources/ExyteOpenAI/Models/Enums/AudioResponseFormat.swift new file mode 100644 index 0000000..a717d8b --- /dev/null +++ b/Sources/ExyteOpenAI/Models/Enums/AudioResponseFormat.swift @@ -0,0 +1,36 @@ +// +// AudioResponseFormat.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation + +public enum AudioResponseFormat: String, Codable { + + case mp3 = "mp3" + case opus = "opus" + case aac = "aac" + case flac = "flac" + case wav = "wav" + case pcm = "pcm" + +} diff --git a/Sources/ExyteOpenAI/Models/Enums/STTModel.swift b/Sources/ExyteOpenAI/Models/Enums/STTModel.swift new file mode 100644 index 0000000..7fe3491 --- /dev/null +++ b/Sources/ExyteOpenAI/Models/Enums/STTModel.swift @@ -0,0 +1,29 @@ +// +// STTModel.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation + +public enum STTModel: String, Codable { + case whisper1 = "whisper-1" +} diff --git a/Sources/ExyteOpenAI/Models/Enums/SpeechVoice.swift b/Sources/ExyteOpenAI/Models/Enums/SpeechVoice.swift new file mode 100644 index 0000000..185795c --- /dev/null +++ b/Sources/ExyteOpenAI/Models/Enums/SpeechVoice.swift @@ -0,0 +1,36 @@ +// +// SpeechVoice.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation + +public enum SpeechVoice: String, Codable { + + case alloy = "alloy" + case echo = "echo" + case fable = "fable" + case onyx = "onyx" + case nova = "nova" + case shimmer = "shimmer" + +} diff --git a/Sources/ExyteOpenAI/Models/Enums/TTSModel.swift b/Sources/ExyteOpenAI/Models/Enums/TTSModel.swift new file mode 100644 index 0000000..e557628 --- /dev/null +++ b/Sources/ExyteOpenAI/Models/Enums/TTSModel.swift @@ -0,0 +1,32 @@ +// +// TTSModel.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation + +public enum TTSModel: String, Codable { + + case tts1 = "tts-1" + case tts1Hd = "tts-1-hd" + +} diff --git a/Sources/ExyteOpenAI/Models/Enums/TextResponseFormat.swift b/Sources/ExyteOpenAI/Models/Enums/TextResponseFormat.swift new file mode 100644 index 0000000..84f8560 --- /dev/null +++ b/Sources/ExyteOpenAI/Models/Enums/TextResponseFormat.swift @@ -0,0 +1,35 @@ +// +// TextResponseFormat.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation + +public enum TextResponseFormat: String, Codable { + + case json = "json" + case text = "text" + case srt = "srt" + case verboseJson = "verbose_json" + case vtt = "vtt" + +} diff --git a/Sources/ExyteOpenAI/Models/Enums/TimestampGranularity.swift b/Sources/ExyteOpenAI/Models/Enums/TimestampGranularity.swift new file mode 100644 index 0000000..cfcab72 --- /dev/null +++ b/Sources/ExyteOpenAI/Models/Enums/TimestampGranularity.swift @@ -0,0 +1,30 @@ +// +// TimestampGranularity.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation + +public enum TimestampGranularity: String, Codable { + case word = "word" + case segment = "segment" +} diff --git a/Sources/ExyteOpenAI/Networking/FormBodyPart.swift b/Sources/ExyteOpenAI/Networking/FormBodyPart.swift index 75f630d..c86b928 100644 --- a/Sources/ExyteOpenAI/Networking/FormBodyPart.swift +++ b/Sources/ExyteOpenAI/Networking/FormBodyPart.swift @@ -30,6 +30,8 @@ public struct FormBodyPart { case fileURL(URL) case data(Data) case plainText(String) + case integer(Int) + case floatingPoint(Float) } public let name: String diff --git a/Sources/ExyteOpenAI/Networking/Provider.swift b/Sources/ExyteOpenAI/Networking/Provider.swift index 49401b9..28508c7 100644 --- a/Sources/ExyteOpenAI/Networking/Provider.swift +++ b/Sources/ExyteOpenAI/Networking/Provider.swift @@ -234,6 +234,26 @@ open class Provider { throw OpenAIError.multipartEncoding(encodingError: .dataEncodingFailed) } bodyData = textData + case .floatingPoint(let float): + mimeType = $0.mimeType ?? MimeType.unknownBinary + let encoder = JSONEncoder() + encoder.keyEncodingStrategy = .convertToSnakeCase + do { + let numberData = try encoder.encode(float) + bodyData = numberData + } catch { + throw OpenAIError.multipartEncoding(encodingError: .dataEncodingFailed) + } + case .integer(let integer): + mimeType = $0.mimeType ?? MimeType.unknownBinary + let encoder = JSONEncoder() + encoder.keyEncodingStrategy = .convertToSnakeCase + do { + let numberData = try encoder.encode(integer) + bodyData = numberData + } catch { + throw OpenAIError.multipartEncoding(encodingError: .dataEncodingFailed) + } } body.append("Content-Type: \(mimeType)\(crlf)\(crlf)".data(using: String.Encoding.utf8)!) body.append(bodyData) diff --git a/Sources/ExyteOpenAI/OpenAI+Audio.swift b/Sources/ExyteOpenAI/OpenAI+Audio.swift new file mode 100644 index 0000000..bc7c9e4 --- /dev/null +++ b/Sources/ExyteOpenAI/OpenAI+Audio.swift @@ -0,0 +1,69 @@ +// +// OpenAI+Audio.swift +// +// Copyright (c) 2024 Exyte +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +import Foundation +import Combine + +// MARK: - Combine + +public extension OpenAI { + + func createSpeechPayload(from payload: CreateSpeechPayload, destination: URL) -> AnyPublisher { + audioProvider.downloadTaskPublisher(for: .createSpeechPayload(payload: payload, destination: destination)) + .eraseToAnyPublisher() + } + + func createTranscriptionPayload(from payload: CreateTranscriptionPayload) -> AnyPublisher { + audioProvider.requestPublisher(for: .createTranscriptionPayload(payload: payload)) + .map { $0.data } + .map(to: Transcription.self, decoder: OpenAI.defaultDecoder) + .eraseToAnyPublisher() + } + + func createTranslationPayload(from payload: CreateTranslationPayload) -> AnyPublisher { + audioProvider.requestPublisher(for: .createTranslationPayload(payload: payload)) + .map { $0.data } + .map(to: Translation.self, decoder: OpenAI.defaultDecoder) + .eraseToAnyPublisher() + } + +} + +// MARK: - Concurrency + +public extension OpenAI { + + func createSpeechPayload(from payload: CreateSpeechPayload, destination: URL) async throws -> URL { + try await createSpeechPayload(from: payload, destination: destination).async() + } + + func createTranscriptionPayload(from payload: CreateTranscriptionPayload) async throws -> Transcription { + try await createTranscriptionPayload(from: payload).async() + } + + func createTranslationPayload(from payload: CreateTranslationPayload) async throws -> Translation { + try await createTranslationPayload(from: payload).async() + } + +} diff --git a/Sources/ExyteOpenAI/OpenAI.swift b/Sources/ExyteOpenAI/OpenAI.swift index 21814d6..a583995 100644 --- a/Sources/ExyteOpenAI/OpenAI.swift +++ b/Sources/ExyteOpenAI/OpenAI.swift @@ -47,6 +47,7 @@ public final class OpenAI { let runsProvider: Provider let filesProvider: Provider let chatsProvider: Provider + let audioProvider: Provider