diff --git a/vrs/ContentBlockReader.cpp b/vrs/ContentBlockReader.cpp index 74405ebb..6d83a988 100644 --- a/vrs/ContentBlockReader.cpp +++ b/vrs/ContentBlockReader.cpp @@ -283,8 +283,18 @@ bool AudioBlockReader::audioContentFromAudioSpec( if (!audioSpec_.sampleCount.get(sampleCount) || sampleCount == 0) { sampleCount = findAudioSampleCount(record, player); } + + uint8_t stereoPairCount = 0; + audioSpec_.stereoPairCount.get(stereoPairCount); + outAudioContentBlock = ContentBlock( - audioFormat, sampleFormat, numChannels, sampleFrameStride, sampleRate, sampleCount); + audioFormat, + sampleFormat, + numChannels, + sampleFrameStride, + sampleRate, + sampleCount, + stereoPairCount); return true; } return false; diff --git a/vrs/DataLayoutConventions.h b/vrs/DataLayoutConventions.h index 5fe0a766..02d4c0c1 100644 --- a/vrs/DataLayoutConventions.h +++ b/vrs/DataLayoutConventions.h @@ -120,6 +120,8 @@ constexpr const char* kAudioChannelCount = "audio_channel_count"; constexpr const char* kAudioSampleRate = "audio_sample_rate"; /// DataLayout convention name for a count of audio sample frames. constexpr const char* kAudioSampleCount = "audio_sample_count"; +/// DataLayout convention name for a count of audio coupled stream count. +constexpr const char* kAudioStereoPairCount = "audio_stereo_pair_count"; /// \brief DataLayout definitions use to describe what's in an audio content block. /// @@ -139,6 +141,7 @@ class AudioSpec : public AutoDataLayout { DataPieceValue channelCount{kAudioChannelCount}; DataPieceValue sampleRate{kAudioSampleRate}; DataPieceValue sampleCount{kAudioSampleCount}; + DataPieceValue stereoPairCount{kAudioStereoPairCount}; AutoDataLayoutEnd end; }; diff --git a/vrs/RecordFormat.cpp b/vrs/RecordFormat.cpp index bb59650f..f9acc4fd 100644 --- a/vrs/RecordFormat.cpp +++ b/vrs/RecordFormat.cpp @@ -671,15 +671,18 @@ AudioContentBlockSpec::AudioContentBlockSpec( uint8_t channelCount, uint8_t sampleFrameStride, uint32_t sampleFrameRate, - uint32_t sampleFrameCount) + uint32_t sampleFrameCount, + uint8_t stereoPairCount) : audioFormat_{audioFormat}, sampleFormat_{sampleFormat}, sampleFrameStride_{sampleFrameStride}, channelCount_{channelCount}, sampleFrameRate_{sampleFrameRate}, - sampleFrameCount_{sampleFrameCount} { + sampleFrameCount_{sampleFrameCount}, + stereoPairCount_{stereoPairCount} { XR_VERIFY(audioFormat != AudioFormat::UNDEFINED); XR_VERIFY(sampleFrameStride_ == 0 || sampleFrameStride_ >= getBytesPerSample() * channelCount); + XR_VERIFY(channelCount >= stereoPairCount * 2); } AudioContentBlockSpec::AudioContentBlockSpec(const string& formatStr) { @@ -923,7 +926,8 @@ ContentBlock::ContentBlock( uint8_t numChannels, uint8_t sampleFrameStride, uint32_t sampleRate, - uint32_t sampleCount) + uint32_t sampleCount, + uint8_t stereoPairCount) : contentType_(ContentType::AUDIO), audioSpec_( audioFormat, @@ -931,7 +935,8 @@ ContentBlock::ContentBlock( numChannels, sampleFrameStride, sampleRate, - sampleCount) {} + sampleCount, + stereoPairCount) {} ContentBlock::ContentBlock(ContentType type, size_t size) : contentType_(type), size_(size) { switch (contentType_) { diff --git a/vrs/RecordFormat.h b/vrs/RecordFormat.h index addde774..ef98e721 100644 --- a/vrs/RecordFormat.h +++ b/vrs/RecordFormat.h @@ -348,7 +348,8 @@ class AudioContentBlockSpec { uint8_t channelCount = 0, uint8_t sampleFrameStride = 0, uint32_t sampleFrameRate = 0, - uint32_t sampleFrameCount = 0); + uint32_t sampleFrameCount = 0, + uint8_t stereoPairCount = 0); /// Constructor used for factory construction. /// @internal @@ -433,6 +434,11 @@ class AudioContentBlockSpec { void setSampleCount(uint32_t sampleCount) { sampleFrameCount_ = sampleCount; } + + uint8_t getStereoPairCount() const { + return stereoPairCount_; + } + /// Tell if the audio sample format is fully defined. /// For instance, PCM audio data when we have enough details: sample format & channel count. bool isSampleBlockFormatDefined() const { @@ -466,6 +472,7 @@ class AudioContentBlockSpec { uint8_t channelCount_{}; uint32_t sampleFrameRate_{}; uint32_t sampleFrameCount_{}; + uint8_t stereoPairCount_{}; }; /// \brief Specification of a VRS record content block. @@ -533,7 +540,8 @@ class ContentBlock { uint8_t numChannels = 0, uint8_t sampleFrameStride = 0, uint32_t sampleRate = 0, - uint32_t sampleCount = 0); + uint32_t sampleCount = 0, + uint8_t stereoPairCount = 0); /// Default copy constructor ContentBlock(const ContentBlock&) = default; diff --git a/vrs/test/AudioContentBlockReaderTest.cpp b/vrs/test/AudioContentBlockReaderTest.cpp index f88c69b7..c27080d6 100644 --- a/vrs/test/AudioContentBlockReaderTest.cpp +++ b/vrs/test/AudioContentBlockReaderTest.cpp @@ -47,6 +47,8 @@ const uint32_t kWavHeaderSize = 44; const uint32_t kSampleRate = 48000; const uint8_t kChannels = 2; +const uint8_t kStereoPairCount = 1; + uint32_t kSampleCount = 0; const vector& getAudioSamples() { @@ -133,10 +135,12 @@ class AudioStream : public Recordable { break; case LayoutStyle::OpusStereo: config_.audioFormat.set(AudioFormat::OPUS); + config_.stereoPairCount.set(kStereoPairCount); return createRecord(getTimestampSec(), Record::Type::CONFIGURATION, 1, DataSource(config_)); break; case LayoutStyle::OpusStereoNoSampleCount: config_.audioFormat.set(AudioFormat::OPUS); + config_.stereoPairCount.set(kStereoPairCount); return createRecord(getTimestampSec(), Record::Type::CONFIGURATION, 1, DataSource(config_)); break; } @@ -174,7 +178,13 @@ class AudioStream : public Recordable { case LayoutStyle::OpusStereoNoSampleCount: { if (compressionHandler_.encoder == nullptr) { compressionHandler_.create( - {AudioFormat::OPUS, AudioSampleFormat::S16_LE, kChannels, 0, kSampleRate}); + {AudioFormat::OPUS, + AudioSampleFormat::S16_LE, + kChannels, + 0, + kSampleRate, + 0, + kStereoPairCount}); opusData_.resize(4096 * kChannels); } // Opus isn't very flexible: it can only process specific sizes, so we might need to padd! diff --git a/vrs/utils/AudioBlock.h b/vrs/utils/AudioBlock.h index 30414c02..dbb02dc6 100644 --- a/vrs/utils/AudioBlock.h +++ b/vrs/utils/AudioBlock.h @@ -21,15 +21,15 @@ #include #include -using OpusEncoder = struct OpusEncoder; -using OpusDecoder = struct OpusDecoder; +using OpusMSEncoder = struct OpusMSEncoder; +using OpusMSDecoder = struct OpusMSDecoder; namespace vrs::utils { using std::vector; struct AudioCompressionHandler { - OpusEncoder* encoder{}; + OpusMSEncoder* encoder{}; AudioContentBlockSpec encoderSpec; bool create(const AudioContentBlockSpec& spec); @@ -39,7 +39,7 @@ struct AudioCompressionHandler { }; struct AudioDecompressionHandler { - OpusDecoder* decoder{}; + OpusMSDecoder* decoder{}; AudioContentBlockSpec decoderSpec; ~AudioDecompressionHandler(); @@ -98,6 +98,11 @@ class AudioBlock { uint32_t getSampleRate() const { return audioSpec_.getSampleRate(); } + + uint8_t getStereoPairCount() const { + return audioSpec_.getStereoPairCount(); + } + uint8_t getSampleFrameStride() const { return audioSpec_.getSampleFrameStride(); } diff --git a/vrs/utils/AudioBlockOpus.cpp b/vrs/utils/AudioBlockOpus.cpp index 53d6e8b3..a686c978 100644 --- a/vrs/utils/AudioBlockOpus.cpp +++ b/vrs/utils/AudioBlockOpus.cpp @@ -18,6 +18,7 @@ #ifdef OPUS_IS_AVAILABLE #include +#include #endif #define DEFAULT_LOG_CHANNEL "AudioBlockOpus" @@ -49,12 +50,44 @@ bool AudioBlock::opusDecompress(AudioDecompressionHandler& handler, AudioBlock& return false; } if (handler.decoder != nullptr && !handler.decoderSpec.isCompatibleWith(audioSpec_)) { - opus_decoder_destroy(handler.decoder); + opus_multistream_decoder_destroy(handler.decoder); handler.decoder = nullptr; } + if (handler.decoder == nullptr) { int error = 0; - handler.decoder = opus_decoder_create(getSampleRate(), getChannelCount(), &error); + + uint32_t totalAudioChannel = getChannelCount(); + if (totalAudioChannel > 255 || totalAudioChannel == 0) { + XR_LOGW("Invalid channel count of {}", totalAudioChannel); + return false; + } + + uint32_t totalCoupledAudioChannel = 2 * getStereoPairCount(); + if (totalAudioChannel < totalCoupledAudioChannel) { + XR_LOGW( + "Invalid channel count of {} and stereo channel count of {}", + totalAudioChannel, + totalCoupledAudioChannel); + return false; + } + + uint32_t totalMonoChannel = totalAudioChannel - totalCoupledAudioChannel; + uint32_t totalAudioStreamCount = totalMonoChannel + getStereoPairCount(); + + vector mapping(getChannelCount()); + for (uint32_t i = 0; i < totalCoupledAudioChannel + totalMonoChannel; ++i) { + mapping[i] = i; + } + + handler.decoder = opus_multistream_decoder_create( + getSampleRate(), + getChannelCount(), + totalAudioStreamCount, + getStereoPairCount(), + mapping.data(), + &error); + if (error != OPUS_OK || handler.decoder == nullptr) { XR_LOGW("Couldn't create Opus decoder. Error {}: {}", error, opus_strerror(error)); return false; @@ -75,7 +108,7 @@ bool AudioBlock::opusDecompress(AudioDecompressionHandler& handler, AudioBlock& 0, getSampleRate(), sampleCount); - opus_int32 result = opus_decode( + opus_int32 result = opus_multistream_decode( handler.decoder, data(), audioBytes_.size(), @@ -93,29 +126,61 @@ bool AudioBlock::opusDecompress(AudioDecompressionHandler& handler, AudioBlock& AudioDecompressionHandler::~AudioDecompressionHandler() { if (decoder != nullptr) { - opus_decoder_destroy(decoder); + opus_multistream_decoder_destroy(decoder); } } bool AudioCompressionHandler::create(const AudioContentBlockSpec& spec) { if (encoder != nullptr) { - opus_encoder_destroy(encoder); + opus_multistream_encoder_destroy(encoder); encoder = nullptr; } if (!XR_VERIFY(supportedSampleRate(spec.getSampleRate()))) { return false; } int error = 0; - encoder = opus_encoder_create( - spec.getSampleRate(), spec.getChannelCount(), OPUS_APPLICATION_AUDIO, &error); + + uint32_t totalAudioChannel = spec.getChannelCount(); + if (totalAudioChannel > 255 || totalAudioChannel == 0) { + XR_LOGW("Invalid channel count of {}", totalAudioChannel); + return false; + } + + uint32_t totalCoupledAudioChannel = 2 * spec.getStereoPairCount(); + if (totalAudioChannel < totalCoupledAudioChannel) { + XR_LOGW( + "Invalid channel count of {} and stereo channel count of {}", + totalAudioChannel, + totalCoupledAudioChannel); + return false; + } + + uint32_t totalMonoChannel = totalAudioChannel - totalCoupledAudioChannel; + uint32_t totalAudioStreamCount = totalMonoChannel + spec.getStereoPairCount(); + + vector mapping(spec.getChannelCount()); + + for (uint32_t i = 0; i < totalCoupledAudioChannel + totalMonoChannel; ++i) { + mapping[i] = i; + } + + encoder = opus_multistream_encoder_create( + spec.getSampleRate(), + spec.getChannelCount(), + totalAudioStreamCount, + spec.getStereoPairCount(), + mapping.data(), + OPUS_APPLICATION_AUDIO, + &error); + if (error != OPUS_OK || encoder == nullptr) { XR_LOGW("Couldn't create Opus encoder. Error {}: {}", error, opus_strerror(error)); return false; } encoderSpec = spec; - XR_VERIFY(opus_encoder_ctl(encoder, OPUS_SET_BITRATE(96000)) == OPUS_OK); - XR_VERIFY(opus_encoder_ctl(encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)) == OPUS_OK); - XR_VERIFY(opus_encoder_ctl(encoder, OPUS_SET_VBR(1)) == OPUS_OK); + XR_VERIFY(opus_multistream_encoder_ctl(encoder, OPUS_SET_BITRATE(96000)) == OPUS_OK); + XR_VERIFY(opus_multistream_encoder_ctl(encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)) == OPUS_OK); + XR_VERIFY(opus_multistream_encoder_ctl(encoder, OPUS_SET_VBR(1)) == OPUS_OK); return true; } @@ -124,13 +189,13 @@ int AudioCompressionHandler::compress( uint32_t sampleCount, void* outOpusBytes, size_t maxBytes) { - return opus_encode( + return opus_multistream_encode( encoder, (opus_int16*)samples, sampleCount, (unsigned char*)outOpusBytes, maxBytes); } AudioCompressionHandler::~AudioCompressionHandler() { if (encoder != nullptr) { - opus_encoder_destroy(encoder); + opus_multistream_encoder_destroy(encoder); } }