From b7eef422ca44973c1b6bac5d248230d8c89fdcfc Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:16:54 +0900 Subject: [PATCH 1/5] exp1 --- Package.swift | 2 +- Package@swift-5.9.swift | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Package.swift b/Package.swift index c669b4f7e..022154941 100644 --- a/Package.swift +++ b/Package.swift @@ -18,7 +18,7 @@ let package = Package( ], dependencies: [ // LK-Prefixed Dynamic WebRTC XCFramework - .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.09"), + .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.11-exp.1"), .package(url: "https://github.com/apple/swift-protobuf.git", from: "1.26.0"), .package(url: "https://github.com/apple/swift-log.git", from: "1.5.4"), // Only used for DocC generation diff --git a/Package@swift-5.9.swift b/Package@swift-5.9.swift index f2bc1c430..41f0182f3 100644 --- a/Package@swift-5.9.swift +++ b/Package@swift-5.9.swift @@ -20,7 +20,7 @@ let package = Package( ], dependencies: [ // LK-Prefixed Dynamic WebRTC XCFramework - .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.09"), + .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.11-exp.1"), .package(url: "https://github.com/apple/swift-protobuf.git", from: "1.26.0"), .package(url: "https://github.com/apple/swift-log.git", from: "1.5.4"), // Only used for DocC generation From 2c0b833d8fc1b8fe1dd0f17413388d4212489e48 Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Wed, 23 Oct 2024 23:26:47 +0900 Subject: [PATCH 2/5] exp2 --- Package.swift | 2 +- Package@swift-5.9.swift | 2 +- Sources/LiveKit/Track/AudioManager.swift | 114 +++++++---------------- 3 files changed, 36 insertions(+), 82 deletions(-) diff --git a/Package.swift b/Package.swift index 022154941..973b42f3a 100644 --- a/Package.swift +++ b/Package.swift @@ -18,7 +18,7 @@ let package = Package( ], dependencies: [ // LK-Prefixed Dynamic WebRTC XCFramework - .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.11-exp.1"), + .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.11-exp.2"), .package(url: "https://github.com/apple/swift-protobuf.git", from: "1.26.0"), .package(url: "https://github.com/apple/swift-log.git", from: "1.5.4"), // Only used for DocC generation diff --git a/Package@swift-5.9.swift b/Package@swift-5.9.swift index 41f0182f3..d3249c5c3 100644 --- a/Package@swift-5.9.swift +++ b/Package@swift-5.9.swift @@ -20,7 +20,7 @@ let package = Package( ], dependencies: [ // LK-Prefixed Dynamic WebRTC XCFramework - .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.11-exp.1"), + .package(url: "https://github.com/livekit/webrtc-xcframework.git", exact: "125.6422.11-exp.2"), .package(url: "https://github.com/apple/swift-protobuf.git", from: "1.26.0"), .package(url: "https://github.com/apple/swift-log.git", from: "1.5.4"), // Only used for DocC generation diff --git a/Sources/LiveKit/Track/AudioManager.swift b/Sources/LiveKit/Track/AudioManager.swift index 29d1b2815..957767fb5 100644 --- a/Sources/LiveKit/Track/AudioManager.swift +++ b/Sources/LiveKit/Track/AudioManager.swift @@ -59,6 +59,21 @@ public class LKAudioBuffer: NSObject { // Audio Session Configuration related public class AudioManager: Loggable { + class AudioSessionDelegateObserver: NSObject, Loggable, LKRTCAudioSessionDelegate { + func audioSessionDidStartPlayOrRecord(_: LKRTCAudioSession) { + log() + } + + func audioSession(_: LKRTCAudioSession, audioUnitWillInitialize isRecord: Bool) { + log("isRecord: \(isRecord)") + LKRTCAudioSessionConfiguration.webRTC().category = AVAudioSession.Category.playAndRecord.rawValue + } + + func audioSessionDidStopPlayOrRecord(_: LKRTCAudioSession) { + log() + } + } + // MARK: - Public #if compiler(>=6.0) @@ -68,6 +83,7 @@ public class AudioManager: Loggable { #endif public typealias DeviceUpdateFunc = (_ audioManager: AudioManager) -> Void + public typealias OnSpeechUpdate = (_ audioManager: AudioManager, _ event: Int) -> Void #if os(iOS) || os(visionOS) || os(tvOS) @@ -208,13 +224,22 @@ public class AudioManager: Loggable { public var onDeviceUpdate: DeviceUpdateFunc? { didSet { - RTC.audioDeviceModule.setDevicesUpdatedHandler { [weak self] in + RTC.audioDeviceModule.setDevicesDidUpdateCallback { [weak self] in guard let self else { return } self.onDeviceUpdate?(self) } } } + public var onSpeechEvent: OnSpeechUpdate? { + didSet { + RTC.audioDeviceModule.setSpeechActivityCallback { [weak self] event in + guard let self else { return } + self.onSpeechEvent?(self, event.rawValue) + } + } + } + // MARK: - Internal enum `Type` { @@ -226,100 +251,29 @@ public class AudioManager: Loggable { // MARK: - Private - private let _configureRunner = SerialRunnerActor() - - #if os(iOS) || os(visionOS) || os(tvOS) - private func _asyncConfigure(newState: State, oldState: State) async throws { - try await _configureRunner.run { - self.log("\(oldState) -> \(newState)") - let configureFunc = newState.customConfigureFunc ?? self.defaultConfigureAudioSessionFunc - configureFunc(newState, oldState) - } - } - #endif - func trackDidStart(_ type: Type) async throws { - let (newState, oldState) = state.mutate { state in - let oldState = state + state.mutate { state in if type == .local { state.localTracksCount += 1 } if type == .remote { state.remoteTracksCount += 1 } - return (state, oldState) } - #if os(iOS) || os(visionOS) || os(tvOS) - try await _asyncConfigure(newState: newState, oldState: oldState) - #endif } func trackDidStop(_ type: Type) async throws { - let (newState, oldState) = state.mutate { state in - let oldState = state + state.mutate { state in if type == .local { state.localTracksCount = max(state.localTracksCount - 1, 0) } if type == .remote { state.remoteTracksCount = max(state.remoteTracksCount - 1, 0) } - return (state, oldState) } - #if os(iOS) || os(visionOS) || os(tvOS) - try await _asyncConfigure(newState: newState, oldState: oldState) - #endif } - #if os(iOS) || os(visionOS) || os(tvOS) - /// The default implementation when audio session configuration is requested by the SDK. - /// Configure the `RTCAudioSession` of `WebRTC` framework. - /// - /// > Note: It is recommended to use `RTCAudioSessionConfiguration.webRTC()` to obtain an instance of `RTCAudioSessionConfiguration` instead of instantiating directly. - /// - /// - Parameters: - /// - configuration: A configured RTCAudioSessionConfiguration - /// - setActive: passing true/false will call `AVAudioSession.setActive` internally - public func defaultConfigureAudioSessionFunc(newState: State, oldState: State) { - // Lazily computed config - let computeConfiguration: (() -> AudioSessionConfiguration) = { - switch newState.trackState { - case .none: - // Use .soloAmbient configuration - return .soloAmbient - case .remoteOnly where newState.isSpeakerOutputPreferred: - // Use .playback configuration with spoken audio - return .playback - default: - // Use .playAndRecord configuration - return newState.isSpeakerOutputPreferred ? .playAndRecordSpeaker : .playAndRecordReceiver - } - } - - let configuration = newState.sessionConfiguration ?? computeConfiguration() - - var setActive: Bool? - if newState.trackState != .none, oldState.trackState == .none { - // activate audio session when there is any local/remote audio track - setActive = true - } else if newState.trackState == .none, oldState.trackState != .none { - // deactivate audio session when there are no more local/remote audio tracks - setActive = false - } - - let session = LKRTCAudioSession.sharedInstance() - // Check if needs setConfiguration - guard configuration != session.toAudioSessionConfiguration() else { - log("Skipping configure audio session, no changes") - return - } + let _audioSessionDelegateObserver = AudioSessionDelegateObserver() - session.lockForConfiguration() - defer { session.unlockForConfiguration() } + init() { + LKRTCAudioSession.sharedInstance().add(_audioSessionDelegateObserver) + } - do { - log("Configuring audio session: \(String(describing: configuration))") - if let setActive { - try session.setConfiguration(configuration.toRTCType(), active: setActive) - } else { - try session.setConfiguration(configuration.toRTCType()) - } - } catch { - log("Failed to configure audio session with error: \(error)", .error) - } + deinit { + LKRTCAudioSession.sharedInstance().remove(_audioSessionDelegateObserver) } - #endif } public extension AudioManager { From 6c474f9eda7b82c0ff983b154aa36d7a66d19b59 Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Thu, 7 Nov 2024 05:18:50 +0900 Subject: [PATCH 3/5] Squashed commit of the following: commit 2e05b21bd1265dd88bcef6030cbb6e3755653c58 Author: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Mon Oct 28 22:21:56 2024 +0900 v2.0.17 commit 86a86340ea4f95169359bdedc603a3cdf1a46209 Author: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Tue Oct 29 15:02:17 2024 +0900 Move parts of visualization code into components commit 027fea7b7f1b2ac962f931b933c60118bfedee46 Author: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Tue Oct 29 02:25:54 2024 +0900 rtc 125.6422.11 commit 26d7725d33667e9fee6a46bc30f03027c1c1ce8a Author: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Mon Oct 28 00:43:37 2024 +0900 Audio visualization helpers (#474) commit 4a73d3924508b01993df9f6dc7e879d222cd30a6 Author: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Sun Oct 27 22:59:11 2024 +0900 swiftformat commit 04bf8276e1db773e49c03b986a2416c7777032b6 Author: Bogdan Vatamanu Date: Sun Oct 27 15:57:21 2024 +0200 Remove leaked renderers appeared on camera switch (#507) Hello @hiroshihorie! I found that inside **VideoView** on the camera switch the previous **primaryRenderer** gets leaked because it's not removed from the view's hierarchy when the transition ends. See: ![image](https://github.com/user-attachments/assets/b57769be-57c6-4a66-b7b6-d55aeb6bbedd) Co-authored-by: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> commit 89a0fa2e583bd369589a43ec53d0781a3aa79474 Author: Bogdan Vatamanu Date: Sun Oct 27 13:14:05 2024 +0200 Fix boundaries setting for "SampleBufferVideoRenderer" re-layouting (#506) Hello @hiroshihorie! I'm experiencing issues with the camera's front-to-back switch feature, which occasionally causes the video frame to be incorrectly oriented in landscape mode when it should be in portrait mode. To resolve this issue, I've found that it's essential to update the frame of the **AVSampleBufferDisplayLayer** after applying the transformation matrix. This is because the transformation matrix also rotates the frame, which can result in incorrect orientation if the frame is not updated accordingly. commit 5869cea34a061a63c508c6a1e0da6ae3dab48220 Author: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Thu Oct 24 18:38:51 2024 +0900 Agent state property (#495) commit 62a70ea80e4e98bb1cf1379d6c7322d66a4884dc Author: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Sun Oct 20 00:22:27 2024 +0900 Temporarily remove strict concurrency warnings commit 1c9a6813a5ed72003d1ee7b4f563507fd471e360 Author: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Sun Oct 20 00:06:47 2024 +0900 WebRTC 125.6422.10 (#502) --- LiveKitClient.podspec | 6 +- Package@swift-5.9.swift | 1 - README.md | 2 +- .../LiveKit/Convenience/AudioProcessing.swift | 64 ++++++++ Sources/LiveKit/LiveKit.swift | 2 +- Sources/LiveKit/Participant/Participant.swift | 8 + .../AudioCustomProcessingDelegate.swift | 34 ++-- Sources/LiveKit/Protocols/AudioRenderer.swift | 25 +-- Sources/LiveKit/Support/FFTProcessor.swift | 147 ++++++++++++++++++ Sources/LiveKit/Support/RingBuffer.swift | 51 ++++++ Sources/LiveKit/Track/AudioManager.swift | 12 +- .../Track/Remote/RemoteAudioTrack.swift | 26 +++- Sources/LiveKit/Types/AgentState.swift | 54 +++++++ .../Views/SampleBufferVideoRenderer.swift | 2 +- Sources/LiveKit/Views/VideoView.swift | 9 +- 15 files changed, 384 insertions(+), 59 deletions(-) create mode 100755 Sources/LiveKit/Support/FFTProcessor.swift create mode 100644 Sources/LiveKit/Support/RingBuffer.swift create mode 100644 Sources/LiveKit/Types/AgentState.swift diff --git a/LiveKitClient.podspec b/LiveKitClient.podspec index 26207fbf5..2491a2f46 100644 --- a/LiveKitClient.podspec +++ b/LiveKitClient.podspec @@ -1,6 +1,6 @@ Pod::Spec.new do |spec| spec.name = "LiveKitClient" - spec.version = "2.0.16" + spec.version = "2.0.17" spec.summary = "LiveKit Swift Client SDK. Easily build live audio or video experiences into your mobile app, game or website." spec.homepage = "https://github.com/livekit/client-sdk-swift" spec.license = {:type => "Apache 2.0", :file => "LICENSE"} @@ -10,11 +10,11 @@ Pod::Spec.new do |spec| spec.osx.deployment_target = "10.15" spec.swift_versions = ["5.7"] - spec.source = {:git => "https://github.com/livekit/client-sdk-swift.git", :tag => "2.0.16"} + spec.source = {:git => "https://github.com/livekit/client-sdk-swift.git", :tag => "2.0.17"} spec.source_files = "Sources/**/*" - spec.dependency("LiveKitWebRTC", "= 125.6422.07") + spec.dependency("LiveKitWebRTC", "= 125.6422.11") spec.dependency("SwiftProtobuf") spec.dependency("Logging") diff --git a/Package@swift-5.9.swift b/Package@swift-5.9.swift index d3249c5c3..52dd657ca 100644 --- a/Package@swift-5.9.swift +++ b/Package@swift-5.9.swift @@ -41,7 +41,6 @@ let package = Package( ], swiftSettings: [ .enableExperimentalFeature("AccessLevelOnImport"), - .enableExperimentalFeature("StrictConcurrency"), ] ), .testTarget( diff --git a/README.md b/README.md index 141160eec..a2832b8ee 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Add the dependency and also to your target let package = Package( ... dependencies: [ - .package(name: "LiveKit", url: "https://github.com/livekit/client-sdk-swift.git", .upToNextMajor("2.0.16")), + .package(name: "LiveKit", url: "https://github.com/livekit/client-sdk-swift.git", .upToNextMajor("2.0.17")), ], targets: [ .target( diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift index 283e205ae..e658bebf2 100644 --- a/Sources/LiveKit/Convenience/AudioProcessing.swift +++ b/Sources/LiveKit/Convenience/AudioProcessing.swift @@ -96,3 +96,67 @@ public extension Sequence where Iterator.Element == AudioLevel { peak: totalSums.peakSum / Float(count)) } } + +public class AudioVisualizeProcessor { + static let bufferSize = 1024 + + // MARK: - Public + + public let minFrequency: Float + public let maxFrequency: Float + public let minDB: Float + public let maxDB: Float + public let bandsCount: Int + + private var bands: [Float]? + + // MARK: - Private + + private let ringBuffer = RingBuffer(size: AudioVisualizeProcessor.bufferSize) + private let processor: FFTProcessor + + public init(minFrequency: Float = 10, + maxFrequency: Float = 8000, + minDB: Float = -32.0, + maxDB: Float = 32.0, + bandsCount: Int = 100) + { + self.minFrequency = minFrequency + self.maxFrequency = maxFrequency + self.minDB = minDB + self.maxDB = maxDB + self.bandsCount = bandsCount + + processor = FFTProcessor(bufferSize: Self.bufferSize) + bands = [Float](repeating: 0.0, count: bandsCount) + } + + public func process(pcmBuffer: AVAudioPCMBuffer) -> [Float]? { + guard let pcmBuffer = pcmBuffer.convert(toCommonFormat: .pcmFormatFloat32) else { return nil } + guard let floatChannelData = pcmBuffer.floatChannelData else { return nil } + + // Get the float array. + let floats = Array(UnsafeBufferPointer(start: floatChannelData[0], count: Int(pcmBuffer.frameLength))) + ringBuffer.write(floats) + + // Get full-size buffer if available, otherwise return + guard let buffer = ringBuffer.read() else { return nil } + + // Process FFT and compute frequency bands + let fftRes = processor.process(buffer: buffer) + let bands = fftRes.computeBands( + minFrequency: minFrequency, + maxFrequency: maxFrequency, + bandsCount: bandsCount, + sampleRate: Float(pcmBuffer.format.sampleRate) + ) + + let headroom = maxDB - minDB + + // Normalize magnitudes (already in decibels) + return bands.magnitudes.map { magnitude in + let adjustedMagnitude = max(0, magnitude + abs(minDB)) + return min(1.0, adjustedMagnitude / headroom) + } + } +} diff --git a/Sources/LiveKit/LiveKit.swift b/Sources/LiveKit/LiveKit.swift index 1c19f4f84..f34116779 100644 --- a/Sources/LiveKit/LiveKit.swift +++ b/Sources/LiveKit/LiveKit.swift @@ -39,7 +39,7 @@ let logger = Logger(label: "LiveKitSDK") @objc public class LiveKitSDK: NSObject { @objc(sdkVersion) - public static let version = "2.0.16" + public static let version = "2.0.17" @objc public static func setLoggerStandardOutput() { diff --git a/Sources/LiveKit/Participant/Participant.swift b/Sources/LiveKit/Participant/Participant.swift index 58e1a52a2..de2da3e00 100644 --- a/Sources/LiveKit/Participant/Participant.swift +++ b/Sources/LiveKit/Participant/Participant.swift @@ -75,6 +75,14 @@ public class Participant: NSObject, ObservableObject, Loggable { _state.trackPublications.values.filter { $0.kind == .video } } + @objc + public var agentState: AgentState { + guard case .agent = kind else { return .unknown } + guard let attrString = _state.attributes[agentStateAttributeKey] else { return .connecting } + guard let state = AgentState.fromString(attrString) else { return .connecting } + return state + } + var info: Livekit_ParticipantInfo? // Reference to the Room this Participant belongs to diff --git a/Sources/LiveKit/Protocols/AudioCustomProcessingDelegate.swift b/Sources/LiveKit/Protocols/AudioCustomProcessingDelegate.swift index d085b3f83..0b84aa0f0 100644 --- a/Sources/LiveKit/Protocols/AudioCustomProcessingDelegate.swift +++ b/Sources/LiveKit/Protocols/AudioCustomProcessingDelegate.swift @@ -39,31 +39,29 @@ public protocol AudioCustomProcessingDelegate { func audioProcessingRelease() } -class AudioCustomProcessingDelegateAdapter: NSObject, LKRTCAudioCustomProcessingDelegate { +class AudioCustomProcessingDelegateAdapter: MulticastDelegate, LKRTCAudioCustomProcessingDelegate { // MARK: - Public public var target: AudioCustomProcessingDelegate? { _state.target } - // MARK: - Internal - - let audioRenderers = MulticastDelegate(label: "AudioRenderer") - // MARK: - Private private struct State { weak var target: AudioCustomProcessingDelegate? } - private var _state: StateSync - - init(target: AudioCustomProcessingDelegate? = nil) { - _state = StateSync(State(target: target)) - } + private var _state = StateSync(State()) public func set(target: AudioCustomProcessingDelegate?) { _state.mutate { $0.target = target } } + init() { + super.init(label: "AudioCustomProcessingDelegateAdapter") + } + + // MARK: - AudioCustomProcessingDelegate + func audioProcessingInitialize(sampleRate sampleRateHz: Int, channels: Int) { target?.audioProcessingInitialize(sampleRate: sampleRateHz, channels: channels) } @@ -73,24 +71,12 @@ class AudioCustomProcessingDelegateAdapter: NSObject, LKRTCAudioCustomProcessing target?.audioProcessingProcess(audioBuffer: lkAudioBuffer) // Convert to pcmBuffer and notify only if an audioRenderer is added. - if audioRenderers.isDelegatesNotEmpty, let pcmBuffer = lkAudioBuffer.toAVAudioPCMBuffer() { - audioRenderers.notify { $0.render(pcmBuffer: pcmBuffer) } + if isDelegatesNotEmpty, let pcmBuffer = lkAudioBuffer.toAVAudioPCMBuffer() { + notify { $0.render(pcmBuffer: pcmBuffer) } } } func audioProcessingRelease() { target?.audioProcessingRelease() } - - // Proxy the equality operators - - override func isEqual(_ object: Any?) -> Bool { - guard let other = object as? AudioCustomProcessingDelegateAdapter else { return false } - return target === other.target - } - - override var hash: Int { - guard let target else { return 0 } - return ObjectIdentifier(target).hashValue - } } diff --git a/Sources/LiveKit/Protocols/AudioRenderer.swift b/Sources/LiveKit/Protocols/AudioRenderer.swift index 535e606aa..7675a00da 100644 --- a/Sources/LiveKit/Protocols/AudioRenderer.swift +++ b/Sources/LiveKit/Protocols/AudioRenderer.swift @@ -29,26 +29,17 @@ public protocol AudioRenderer { func render(pcmBuffer: AVAudioPCMBuffer) } -class AudioRendererAdapter: NSObject, LKRTCAudioRenderer { - private weak var target: AudioRenderer? - private let targetHashValue: Int +class AudioRendererAdapter: MulticastDelegate, LKRTCAudioRenderer { + // + typealias Delegate = AudioRenderer - init(target: AudioRenderer) { - self.target = target - targetHashValue = ObjectIdentifier(target).hashValue + init() { + super.init(label: "AudioRendererAdapter") } - func render(pcmBuffer: AVAudioPCMBuffer) { - target?.render(pcmBuffer: pcmBuffer) - } + // MARK: - LKRTCAudioRenderer - // Proxy the equality operators - override func isEqual(_ object: Any?) -> Bool { - guard let other = object as? AudioRendererAdapter else { return false } - return targetHashValue == other.targetHashValue - } - - override var hash: Int { - targetHashValue + func render(pcmBuffer: AVAudioPCMBuffer) { + notify { $0.render(pcmBuffer: pcmBuffer) } } } diff --git a/Sources/LiveKit/Support/FFTProcessor.swift b/Sources/LiveKit/Support/FFTProcessor.swift new file mode 100755 index 000000000..83ab75cc7 --- /dev/null +++ b/Sources/LiveKit/Support/FFTProcessor.swift @@ -0,0 +1,147 @@ +/* + * Copyright 2024 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Accelerate +import AVFoundation + +extension Float { + var nyquistFrequency: Float { self / 2.0 } +} + +public struct FFTComputeBandsResult { + let count: Int + let magnitudes: [Float] + let frequencies: [Float] +} + +public class FFTResult { + public let magnitudes: [Float] + + init(magnitudes: [Float]) { + self.magnitudes = magnitudes + } + + func computeBands(minFrequency: Float, maxFrequency: Float, bandsCount: Int, sampleRate: Float) -> FFTComputeBandsResult { + let actualMaxFrequency = min(sampleRate.nyquistFrequency, maxFrequency) + var bandMagnitudes = [Float](repeating: 0.0, count: bandsCount) + var bandFrequencies = [Float](repeating: 0.0, count: bandsCount) + + let magLowerRange = _magnitudeIndex(for: minFrequency, sampleRate: sampleRate) + let magUpperRange = _magnitudeIndex(for: actualMaxFrequency, sampleRate: sampleRate) + let ratio = Float(magUpperRange - magLowerRange) / Float(bandsCount) + + return magnitudes.withUnsafeBufferPointer { magnitudesPtr in + for i in 0 ..< bandsCount { + let magsStartIdx = vDSP_Length(floorf(Float(i) * ratio)) + magLowerRange + let magsEndIdx = vDSP_Length(floorf(Float(i + 1) * ratio)) + magLowerRange + + let count = magsEndIdx - magsStartIdx + if count > 0 { + var sum: Float = 0 + vDSP_sve(magnitudesPtr.baseAddress! + Int(magsStartIdx), 1, &sum, count) + bandMagnitudes[i] = sum / Float(count) + } else { + bandMagnitudes[i] = magnitudes[Int(magsStartIdx)] + } + + // Compute average frequency + let bandwidth = sampleRate.nyquistFrequency / Float(magnitudes.count) + bandFrequencies[i] = (bandwidth * Float(magsStartIdx) + bandwidth * Float(magsEndIdx)) / 2 + } + + return FFTComputeBandsResult(count: bandsCount, magnitudes: bandMagnitudes, frequencies: bandFrequencies) + } + } + + @inline(__always) private func _magnitudeIndex(for frequency: Float, sampleRate: Float) -> vDSP_Length { + vDSP_Length(Float(magnitudes.count) * frequency / sampleRate.nyquistFrequency) + } +} + +class FFTProcessor { + public enum WindowType { + case none + case hanning + case hamming + } + + public let bufferSize: vDSP_Length + public let windowType: WindowType + + private let bufferHalfSize: vDSP_Length + private let bufferLog2Size: vDSP_Length + private var window: [Float] = [] + private var fftSetup: FFTSetup + private var realBuffer: [Float] + private var imaginaryBuffer: [Float] + private var zeroDBReference: Float = 1.0 + + init(bufferSize: Int, windowType: WindowType = .hanning) { + self.bufferSize = vDSP_Length(bufferSize) + self.windowType = windowType + + bufferHalfSize = vDSP_Length(bufferSize / 2) + bufferLog2Size = vDSP_Length(log2f(Float(bufferSize))) + + realBuffer = [Float](repeating: 0.0, count: Int(bufferHalfSize)) + imaginaryBuffer = [Float](repeating: 0.0, count: Int(bufferHalfSize)) + window = [Float](repeating: 1.0, count: Int(bufferSize)) + + fftSetup = vDSP_create_fftsetup(UInt(bufferLog2Size), FFTRadix(FFT_RADIX2))! + + switch windowType { + case .none: + break + case .hanning: + vDSP_hann_window(&window, vDSP_Length(bufferSize), Int32(vDSP_HANN_NORM)) + case .hamming: + vDSP_hamm_window(&window, vDSP_Length(bufferSize), 0) + } + } + + deinit { + vDSP_destroy_fftsetup(fftSetup) + } + + func process(buffer: [Float]) -> FFTResult { + precondition(buffer.count == Int(bufferSize), "Input buffer size mismatch.") + + var windowedBuffer = [Float](repeating: 0.0, count: Int(bufferSize)) + + vDSP_vmul(buffer, 1, window, 1, &windowedBuffer, 1, bufferSize) + + return realBuffer.withUnsafeMutableBufferPointer { realPtr in + imaginaryBuffer.withUnsafeMutableBufferPointer { imagPtr in + var complexBuffer = DSPSplitComplex(realp: realPtr.baseAddress!, imagp: imagPtr.baseAddress!) + + windowedBuffer.withUnsafeBufferPointer { bufferPtr in + let complexPtr = UnsafeRawPointer(bufferPtr.baseAddress!).bindMemory(to: DSPComplex.self, capacity: Int(bufferHalfSize)) + vDSP_ctoz(complexPtr, 2, &complexBuffer, 1, bufferHalfSize) + } + + vDSP_fft_zrip(fftSetup, &complexBuffer, 1, bufferLog2Size, FFTDirection(FFT_FORWARD)) + + var magnitudes = [Float](repeating: 0.0, count: Int(bufferHalfSize)) + vDSP_zvabs(&complexBuffer, 1, &magnitudes, 1, bufferHalfSize) + + // Convert magnitudes to decibels + vDSP_vdbcon(magnitudes, 1, &zeroDBReference, &magnitudes, 1, vDSP_Length(magnitudes.count), 1) + + return FFTResult(magnitudes: magnitudes) + } + } + } +} diff --git a/Sources/LiveKit/Support/RingBuffer.swift b/Sources/LiveKit/Support/RingBuffer.swift new file mode 100644 index 000000000..ddbc1e682 --- /dev/null +++ b/Sources/LiveKit/Support/RingBuffer.swift @@ -0,0 +1,51 @@ +/* + * Copyright 2024 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import Foundation + +// Simple ring-buffer used for internal audio processing. Not thread-safe. +class RingBuffer { + private var _isFull = false + private var _buffer: [T] + private var _head: Int = 0 + + init(size: Int) { + _buffer = [T](repeating: 0, count: size) + } + + func write(_ value: T) { + _buffer[_head] = value + _head = (_head + 1) % _buffer.count + if _head == 0 { _isFull = true } + } + + func write(_ sequence: [T]) { + for value in sequence { + write(value) + } + } + + func read() -> [T]? { + guard _isFull else { return nil } + + if _head == 0 { + return _buffer // Return the entire buffer if _head is at the start + } else { + // Return the buffer in the correct order + return Array(_buffer[_head ..< _buffer.count] + _buffer[0 ..< _head]) + } + } +} diff --git a/Sources/LiveKit/Track/AudioManager.swift b/Sources/LiveKit/Track/AudioManager.swift index 957767fb5..37b49ecd6 100644 --- a/Sources/LiveKit/Track/AudioManager.swift +++ b/Sources/LiveKit/Track/AudioManager.swift @@ -172,13 +172,13 @@ public class AudioManager: Loggable { // MARK: - AudioProcessingModule private lazy var capturePostProcessingDelegateAdapter: AudioCustomProcessingDelegateAdapter = { - let adapter = AudioCustomProcessingDelegateAdapter(target: nil) + let adapter = AudioCustomProcessingDelegateAdapter() RTC.audioProcessingModule.capturePostProcessingDelegate = adapter return adapter }() private lazy var renderPreProcessingDelegateAdapter: AudioCustomProcessingDelegateAdapter = { - let adapter = AudioCustomProcessingDelegateAdapter(target: nil) + let adapter = AudioCustomProcessingDelegateAdapter() RTC.audioProcessingModule.renderPreProcessingDelegate = adapter return adapter }() @@ -281,11 +281,11 @@ public extension AudioManager { /// Only ``AudioRenderer/render(pcmBuffer:)`` will be called. /// Usage: `AudioManager.shared.add(localAudioRenderer: localRenderer)` func add(localAudioRenderer delegate: AudioRenderer) { - capturePostProcessingDelegateAdapter.audioRenderers.add(delegate: delegate) + capturePostProcessingDelegateAdapter.add(delegate: delegate) } func remove(localAudioRenderer delegate: AudioRenderer) { - capturePostProcessingDelegateAdapter.audioRenderers.remove(delegate: delegate) + capturePostProcessingDelegateAdapter.remove(delegate: delegate) } } @@ -295,10 +295,10 @@ public extension AudioManager { /// To receive buffer for individual tracks, use ``RemoteAudioTrack/add(audioRenderer:)`` instead. /// Usage: `AudioManager.shared.add(remoteAudioRenderer: localRenderer)` func add(remoteAudioRenderer delegate: AudioRenderer) { - renderPreProcessingDelegateAdapter.audioRenderers.add(delegate: delegate) + renderPreProcessingDelegateAdapter.add(delegate: delegate) } func remove(remoteAudioRenderer delegate: AudioRenderer) { - renderPreProcessingDelegateAdapter.audioRenderers.remove(delegate: delegate) + renderPreProcessingDelegateAdapter.remove(delegate: delegate) } } diff --git a/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift b/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift index 6348e3eb4..94821a347 100644 --- a/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift +++ b/Sources/LiveKit/Track/Remote/RemoteAudioTrack.swift @@ -14,6 +14,7 @@ * limitations under the License. */ +import AVFoundation import CoreMedia #if swift(>=5.9) @@ -36,6 +37,8 @@ public class RemoteAudioTrack: Track, RemoteTrack, AudioTrack { } } + private lazy var _adapter = AudioRendererAdapter() + init(name: String, source: Track.Source, track: LKRTCMediaStreamTrack, @@ -48,14 +51,29 @@ public class RemoteAudioTrack: Track, RemoteTrack, AudioTrack { reportStatistics: reportStatistics) } - public func add(audioRenderer: AudioRenderer) { + deinit { + // Directly remove the adapter without unnecessary checks guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } - audioTrack.add(AudioRendererAdapter(target: audioRenderer)) + audioTrack.remove(_adapter) + } + + public func add(audioRenderer: AudioRenderer) { + let wasEmpty = _adapter.countDelegates == 0 + _adapter.add(delegate: audioRenderer) + // Attach adapter only if it wasn't attached before + if wasEmpty { + guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } + audioTrack.add(_adapter) + } } public func remove(audioRenderer: AudioRenderer) { - guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } - audioTrack.remove(AudioRendererAdapter(target: audioRenderer)) + _adapter.remove(delegate: audioRenderer) + // Remove adapter only if there are no more delegates + if _adapter.countDelegates == 0 { + guard let audioTrack = mediaTrack as? LKRTCAudioTrack else { return } + audioTrack.remove(_adapter) + } } // MARK: - Internal diff --git a/Sources/LiveKit/Types/AgentState.swift b/Sources/LiveKit/Types/AgentState.swift new file mode 100644 index 000000000..de37b8767 --- /dev/null +++ b/Sources/LiveKit/Types/AgentState.swift @@ -0,0 +1,54 @@ +/* + * Copyright 2024 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +let agentStateAttributeKey = "lk.agent.state" + +@objc +public enum AgentState: Int { + case unknown + case disconnected + case connecting + case initializing + case listening + case thinking + case speaking +} + +extension AgentState { + static func fromString(_ rawString: String?) -> AgentState? { + switch rawString { + case "initializing": return .initializing + case "listening": return .listening + case "thinking": return .thinking + case "speaking": return .speaking + default: return unknown + } + } +} + +extension AgentState: CustomStringConvertible { + public var description: String { + switch self { + case .unknown: return "Unknown" + case .disconnected: return "Disconnected" + case .connecting: return "Connecting" + case .initializing: return "Initializing" + case .listening: return "Listening" + case .thinking: return "Thinking" + case .speaking: return "Speaking" + } + } +} diff --git a/Sources/LiveKit/Views/SampleBufferVideoRenderer.swift b/Sources/LiveKit/Views/SampleBufferVideoRenderer.swift index 4d00959a0..a6f3d0d78 100644 --- a/Sources/LiveKit/Views/SampleBufferVideoRenderer.swift +++ b/Sources/LiveKit/Views/SampleBufferVideoRenderer.swift @@ -52,10 +52,10 @@ class SampleBufferVideoRenderer: NativeView, Loggable { override func performLayout() { super.performLayout() - sampleBufferDisplayLayer.frame = bounds let (rotation, isMirrored) = _state.read { ($0.videoRotation, $0.isMirrored) } sampleBufferDisplayLayer.transform = CATransform3D.from(rotation: rotation, isMirrored: isMirrored) + sampleBufferDisplayLayer.frame = bounds sampleBufferDisplayLayer.removeAllAnimations() } diff --git a/Sources/LiveKit/Views/VideoView.swift b/Sources/LiveKit/Views/VideoView.swift index 247a38802..b3ce2ffe0 100644 --- a/Sources/LiveKit/Views/VideoView.swift +++ b/Sources/LiveKit/Views/VideoView.swift @@ -706,16 +706,23 @@ extension VideoView: VideoRenderer { self._secondaryRenderer = nil } + let previousPrimaryRendered = _primaryRenderer + let completion: (Bool) -> Void = { _ in + previousPrimaryRendered?.removeFromSuperview() + } + // Currently only for iOS #if os(iOS) let (mode, duration, position) = _state.read { ($0.transitionMode, $0.transitionDuration, $0.captureDevice?.facingPosition) } if let transitionOption = mode.toAnimationOption(fromPosition: position) { - UIView.transition(with: self, duration: duration, options: transitionOption, animations: block, completion: nil) + UIView.transition(with: self, duration: duration, options: transitionOption, animations: block, completion: completion) } else { block() + completion(true) } #else block() + completion(true) #endif } } From 9529857e38d5f160c9297b5429d73eb504c5a69e Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Thu, 7 Nov 2024 06:14:46 +0900 Subject: [PATCH 4/5] mode: videoChat --- Sources/LiveKit/Track/AudioManager.swift | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Sources/LiveKit/Track/AudioManager.swift b/Sources/LiveKit/Track/AudioManager.swift index 37b49ecd6..8bff5e32a 100644 --- a/Sources/LiveKit/Track/AudioManager.swift +++ b/Sources/LiveKit/Track/AudioManager.swift @@ -66,7 +66,11 @@ public class AudioManager: Loggable { func audioSession(_: LKRTCAudioSession, audioUnitWillInitialize isRecord: Bool) { log("isRecord: \(isRecord)") - LKRTCAudioSessionConfiguration.webRTC().category = AVAudioSession.Category.playAndRecord.rawValue + let config = LKRTCAudioSessionConfiguration.webRTC() + config.category = AVAudioSession.Category.playAndRecord.rawValue + config.categoryOptions = [.allowBluetooth, .allowBluetoothA2DP, .allowAirPlay] + config.mode = AVAudioSession.Mode.videoChat.rawValue + LKRTCAudioSessionConfiguration.setWebRTC(config) } func audioSessionDidStopPlayOrRecord(_: LKRTCAudioSession) { From 66db8576c1b6ac103de52e951985bdfeab4a7550 Mon Sep 17 00:00:00 2001 From: hiroshihorie <548776+hiroshihorie@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:00:54 +0900 Subject: [PATCH 5/5] fix mac compile --- Sources/LiveKit/Track/AudioManager.swift | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/LiveKit/Track/AudioManager.swift b/Sources/LiveKit/Track/AudioManager.swift index 8bff5e32a..f59ebb578 100644 --- a/Sources/LiveKit/Track/AudioManager.swift +++ b/Sources/LiveKit/Track/AudioManager.swift @@ -59,6 +59,7 @@ public class LKAudioBuffer: NSObject { // Audio Session Configuration related public class AudioManager: Loggable { + #if os(iOS) class AudioSessionDelegateObserver: NSObject, Loggable, LKRTCAudioSessionDelegate { func audioSessionDidStartPlayOrRecord(_: LKRTCAudioSession) { log() @@ -77,6 +78,7 @@ public class AudioManager: Loggable { log() } } + #endif // MARK: - Public @@ -269,6 +271,7 @@ public class AudioManager: Loggable { } } + #if os(iOS) let _audioSessionDelegateObserver = AudioSessionDelegateObserver() init() { @@ -278,6 +281,7 @@ public class AudioManager: Loggable { deinit { LKRTCAudioSession.sharedInstance().remove(_audioSessionDelegateObserver) } + #endif } public extension AudioManager {