From 4a74cf4f5cccb045e265906aef5b7126693b4a2d Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:31:51 +0900 Subject: [PATCH] Raw Int16 data for `toAVAudioPCMBuffer` (#497) --- Package.swift | 2 +- Package@swift-5.9.swift | 2 +- .../LiveKit/Convenience/AudioProcessing.swift | 28 ++++----- .../LiveKit/Extensions/AVAudioPCMBuffer.swift | 62 +++++++++++++++++++ 4 files changed, 77 insertions(+), 17 deletions(-) diff --git a/Package.swift b/Package.swift index bde2de4cc..c669b4f7e 100644 --- a/Package.swift +++ b/Package.swift @@ -18,7 +18,7 @@ let package = Package( ], dependencies: [ // LK-Prefixed Dynamic WebRTC XCFramework - .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.08"), + .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.09"), .package(url: "https://github.com/apple/swift-protobuf.git", from: "1.26.0"), .package(url: "https://github.com/apple/swift-log.git", from: "1.5.4"), // Only used for DocC generation diff --git a/Package@swift-5.9.swift b/Package@swift-5.9.swift index 3ab57def2..f2bc1c430 100644 --- a/Package@swift-5.9.swift +++ b/Package@swift-5.9.swift @@ -20,7 +20,7 @@ let package = Package( ], dependencies: [ // LK-Prefixed Dynamic WebRTC XCFramework - .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.08"), + .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.09"), .package(url: "https://github.com/apple/swift-protobuf.git", from: "1.26.0"), .package(url: "https://github.com/apple/swift-log.git", from: "1.5.4"), // Only used for DocC generation diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift index 3d33c3c94..283e205ae 100644 --- a/Sources/LiveKit/Convenience/AudioProcessing.swift +++ b/Sources/LiveKit/Convenience/AudioProcessing.swift @@ -25,33 +25,31 @@ public struct AudioLevel { } public extension LKAudioBuffer { - /// Convert to AVAudioPCMBuffer float buffer will be normalized to 32 bit. + /// Convert to AVAudioPCMBuffer Int16 format. @objc func toAVAudioPCMBuffer() -> AVAudioPCMBuffer? { - guard let audioFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, + guard let audioFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: Double(frames * 100), channels: AVAudioChannelCount(channels), interleaved: false), let pcmBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: AVAudioFrameCount(frames)) - else { - return nil - } + else { return nil } pcmBuffer.frameLength = AVAudioFrameCount(frames) - guard let targetBufferPointer = pcmBuffer.floatChannelData else { return nil } - - // Optimized version - var normalizationFactor: Float = 1.0 / 32768.0 + guard let targetBufferPointer = pcmBuffer.int16ChannelData else { return nil } for i in 0 ..< channels { - vDSP_vsmul(rawBuffer(forChannel: i), - 1, - &normalizationFactor, - targetBufferPointer[i], - 1, - vDSP_Length(frames)) + let sourceBuffer = rawBuffer(forChannel: i) + let targetBuffer = targetBufferPointer[i] + // sourceBuffer is in the format of [Int16] but is stored in 32-bit alignment, we need to pack the Int16 data correctly. + + for frame in 0 ..< frames { + // Cast and pack the source 32-bit Int16 data into the target 16-bit buffer + let clampedValue = max(Float(Int16.min), min(Float(Int16.max), sourceBuffer[frame])) + targetBuffer[frame] = Int16(clampedValue) + } } return pcmBuffer diff --git a/Sources/LiveKit/Extensions/AVAudioPCMBuffer.swift b/Sources/LiveKit/Extensions/AVAudioPCMBuffer.swift index 96eeab168..ce4ccab49 100644 --- a/Sources/LiveKit/Extensions/AVAudioPCMBuffer.swift +++ b/Sources/LiveKit/Extensions/AVAudioPCMBuffer.swift @@ -14,6 +14,7 @@ * limitations under the License. */ +import Accelerate import AVFoundation public extension AVAudioPCMBuffer { @@ -71,4 +72,65 @@ public extension AVAudioPCMBuffer { return convertedBuffer } + + /// Convert PCM buffer to specified common format. + /// Currently supports conversion from Int16 to Float32. + func convert(toCommonFormat commonFormat: AVAudioCommonFormat) -> AVAudioPCMBuffer? { + // Check if conversion is needed + guard format.commonFormat != commonFormat else { + return self + } + + // Check if the conversion is supported + guard format.commonFormat == .pcmFormatInt16, commonFormat == .pcmFormatFloat32 else { + print("Unsupported conversion: only Int16 to Float32 is supported") + return nil + } + + // Create output format + guard let outputFormat = AVAudioFormat(commonFormat: commonFormat, + sampleRate: format.sampleRate, + channels: format.channelCount, + interleaved: false) + else { + print("Failed to create output audio format") + return nil + } + + // Create output buffer + guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat, + frameCapacity: frameCapacity) + else { + print("Failed to create output PCM buffer") + return nil + } + + outputBuffer.frameLength = frameLength + + let channelCount = Int(format.channelCount) + let frameCount = Int(frameLength) + + // Ensure the source buffer has Int16 data + guard let int16Data = int16ChannelData else { + print("Source buffer doesn't contain Int16 data") + return nil + } + + // Ensure the output buffer has Float32 data + guard let floatData = outputBuffer.floatChannelData else { + print("Failed to get float channel data from output buffer") + return nil + } + + // Convert Int16 to Float32 and normalize to [-1.0, 1.0] + let scale = Float(Int16.max) + var scalar = 1.0 / scale + + for channel in 0 ..< channelCount { + vDSP_vflt16(int16Data[channel], 1, floatData[channel], 1, vDSP_Length(frameCount)) + vDSP_vsmul(floatData[channel], 1, &scalar, floatData[channel], 1, vDSP_Length(frameCount)) + } + + return outputBuffer + } }