Skip to content

Commit

Permalink
Multichannel OPUS API
Browse files Browse the repository at this point in the history
Summary:
This diff incorporates support for the OPUS multichannel API by introducing a new variable, stereoPair, within the data layout. Based on the total number of channels and stereo pair count, the total monochannels within an OPUS compression frame can be calculated. The implementation requires that all stereo pairs are positioned at the start of the frame followed by mono channels, with no streams or channels lacking audio information included in the frame. Due to these restrictions, channel mapping is limited to stereo pair values less than two times the stereo pair count and mono channel mappings greater than two times the stereo pair count but below the total channel count.

# Mapping
 1. for stereo channel => mapping  < 2*stereoPairCount
 2. for mono channel => 2*stereoPairCount < mapping < total channel

# Background and logic behind Channel mapping needed for the OPUS multichannel API
1. **Mapping Table**: This table associates each input/output (I/O) channel with a decoded channel. It's usually represented as an unsigned char array.
2. **Mapping Logic**:
If i < 2*coupled_streams, then the I/O channel j is encoded as either the left or right channel of a stereo stream, depending on whether i is even or odd respectively. The stereo stream index is i/2.
If i >= 2*coupled_streams, the I/O channel j is encoded as mono in a stream. If i equals 255, it's omitted from encoding (treated as silence). Each i must be either 255 or less than streams + coupled_streams.
3. **Output Channels**: The encoder should use the Vorbis channel ordering. A decoder might reorder the channels if needed.
4. **Multistream Packet Structure**: Each multistream packet contains an Opus packet for each stream. All Opus packets within a single multistream packet must have the same duration.
5. **TOC Sequence**: The duration of a multistream packet can be determined from the Type of Content (TOC) sequence of the first stream, located at the packet's beginning, similar to an elementary Opus stream.

Reviewed By: georges-berenger

Differential Revision: D56542097

fbshipit-source-id: 1f93d6848952cca8d0f6bac21b9cc07df38d3ebb
  • Loading branch information
Vishal Suvarna authored and facebook-github-bot committed May 1, 2024
1 parent 917dfec commit 7a137c2
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 24 deletions.
12 changes: 11 additions & 1 deletion vrs/ContentBlockReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,18 @@ bool AudioBlockReader::audioContentFromAudioSpec(
if (!audioSpec_.sampleCount.get(sampleCount) || sampleCount == 0) {
sampleCount = findAudioSampleCount(record, player);
}

uint8_t stereoPairCount = 0;
audioSpec_.stereoPairCount.get(stereoPairCount);

outAudioContentBlock = ContentBlock(
audioFormat, sampleFormat, numChannels, sampleFrameStride, sampleRate, sampleCount);
audioFormat,
sampleFormat,
numChannels,
sampleFrameStride,
sampleRate,
sampleCount,
stereoPairCount);
return true;
}
return false;
Expand Down
3 changes: 3 additions & 0 deletions vrs/DataLayoutConventions.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ constexpr const char* kAudioChannelCount = "audio_channel_count";
constexpr const char* kAudioSampleRate = "audio_sample_rate";
/// DataLayout convention name for a count of audio sample frames.
constexpr const char* kAudioSampleCount = "audio_sample_count";
/// DataLayout convention name for a count of audio coupled stream count.
constexpr const char* kAudioStereoPairCount = "audio_stereo_pair_count";

/// \brief DataLayout definitions use to describe what's in an audio content block.
///
Expand All @@ -139,6 +141,7 @@ class AudioSpec : public AutoDataLayout {
DataPieceValue<uint8_t> channelCount{kAudioChannelCount};
DataPieceValue<uint32_t> sampleRate{kAudioSampleRate};
DataPieceValue<uint32_t> sampleCount{kAudioSampleCount};
DataPieceValue<uint8_t> stereoPairCount{kAudioStereoPairCount};

AutoDataLayoutEnd end;
};
Expand Down
13 changes: 9 additions & 4 deletions vrs/RecordFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -671,15 +671,18 @@ AudioContentBlockSpec::AudioContentBlockSpec(
uint8_t channelCount,
uint8_t sampleFrameStride,
uint32_t sampleFrameRate,
uint32_t sampleFrameCount)
uint32_t sampleFrameCount,
uint8_t stereoPairCount)
: audioFormat_{audioFormat},
sampleFormat_{sampleFormat},
sampleFrameStride_{sampleFrameStride},
channelCount_{channelCount},
sampleFrameRate_{sampleFrameRate},
sampleFrameCount_{sampleFrameCount} {
sampleFrameCount_{sampleFrameCount},
stereoPairCount_{stereoPairCount} {
XR_VERIFY(audioFormat != AudioFormat::UNDEFINED);
XR_VERIFY(sampleFrameStride_ == 0 || sampleFrameStride_ >= getBytesPerSample() * channelCount);
XR_VERIFY(channelCount >= stereoPairCount * 2);
}

AudioContentBlockSpec::AudioContentBlockSpec(const string& formatStr) {
Expand Down Expand Up @@ -923,15 +926,17 @@ ContentBlock::ContentBlock(
uint8_t numChannels,
uint8_t sampleFrameStride,
uint32_t sampleRate,
uint32_t sampleCount)
uint32_t sampleCount,
uint8_t stereoPairCount)
: contentType_(ContentType::AUDIO),
audioSpec_(
audioFormat,
sampleFormat,
numChannels,
sampleFrameStride,
sampleRate,
sampleCount) {}
sampleCount,
stereoPairCount) {}

ContentBlock::ContentBlock(ContentType type, size_t size) : contentType_(type), size_(size) {
switch (contentType_) {
Expand Down
12 changes: 10 additions & 2 deletions vrs/RecordFormat.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,8 @@ class AudioContentBlockSpec {
uint8_t channelCount = 0,
uint8_t sampleFrameStride = 0,
uint32_t sampleFrameRate = 0,
uint32_t sampleFrameCount = 0);
uint32_t sampleFrameCount = 0,
uint8_t stereoPairCount = 0);

/// Constructor used for factory construction.
/// @internal
Expand Down Expand Up @@ -433,6 +434,11 @@ class AudioContentBlockSpec {
void setSampleCount(uint32_t sampleCount) {
sampleFrameCount_ = sampleCount;
}

uint8_t getStereoPairCount() const {
return stereoPairCount_;
}

/// Tell if the audio sample format is fully defined.
/// For instance, PCM audio data when we have enough details: sample format & channel count.
bool isSampleBlockFormatDefined() const {
Expand Down Expand Up @@ -466,6 +472,7 @@ class AudioContentBlockSpec {
uint8_t channelCount_{};
uint32_t sampleFrameRate_{};
uint32_t sampleFrameCount_{};
uint8_t stereoPairCount_{};
};

/// \brief Specification of a VRS record content block.
Expand Down Expand Up @@ -533,7 +540,8 @@ class ContentBlock {
uint8_t numChannels = 0,
uint8_t sampleFrameStride = 0,
uint32_t sampleRate = 0,
uint32_t sampleCount = 0);
uint32_t sampleCount = 0,
uint8_t stereoPairCount = 0);

/// Default copy constructor
ContentBlock(const ContentBlock&) = default;
Expand Down
12 changes: 11 additions & 1 deletion vrs/test/AudioContentBlockReaderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ const uint32_t kWavHeaderSize = 44;

const uint32_t kSampleRate = 48000;
const uint8_t kChannels = 2;
const uint8_t kStereoPairCount = 1;

uint32_t kSampleCount = 0;

const vector<uint16_t>& getAudioSamples() {
Expand Down Expand Up @@ -133,10 +135,12 @@ class AudioStream : public Recordable {
break;
case LayoutStyle::OpusStereo:
config_.audioFormat.set(AudioFormat::OPUS);
config_.stereoPairCount.set(kStereoPairCount);
return createRecord(getTimestampSec(), Record::Type::CONFIGURATION, 1, DataSource(config_));
break;
case LayoutStyle::OpusStereoNoSampleCount:
config_.audioFormat.set(AudioFormat::OPUS);
config_.stereoPairCount.set(kStereoPairCount);
return createRecord(getTimestampSec(), Record::Type::CONFIGURATION, 1, DataSource(config_));
break;
}
Expand Down Expand Up @@ -174,7 +178,13 @@ class AudioStream : public Recordable {
case LayoutStyle::OpusStereoNoSampleCount: {
if (compressionHandler_.encoder == nullptr) {
compressionHandler_.create(
{AudioFormat::OPUS, AudioSampleFormat::S16_LE, kChannels, 0, kSampleRate});
{AudioFormat::OPUS,
AudioSampleFormat::S16_LE,
kChannels,
0,
kSampleRate,
0,
kStereoPairCount});
opusData_.resize(4096 * kChannels);
}
// Opus isn't very flexible: it can only process specific sizes, so we might need to padd!
Expand Down
13 changes: 9 additions & 4 deletions vrs/utils/AudioBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
#include <vrs/RecordFormat.h>
#include <vrs/RecordReaders.h>

using OpusEncoder = struct OpusEncoder;
using OpusDecoder = struct OpusDecoder;
using OpusMSEncoder = struct OpusMSEncoder;
using OpusMSDecoder = struct OpusMSDecoder;

namespace vrs::utils {

using std::vector;

struct AudioCompressionHandler {
OpusEncoder* encoder{};
OpusMSEncoder* encoder{};
AudioContentBlockSpec encoderSpec;

bool create(const AudioContentBlockSpec& spec);
Expand All @@ -39,7 +39,7 @@ struct AudioCompressionHandler {
};

struct AudioDecompressionHandler {
OpusDecoder* decoder{};
OpusMSDecoder* decoder{};
AudioContentBlockSpec decoderSpec;

~AudioDecompressionHandler();
Expand Down Expand Up @@ -98,6 +98,11 @@ class AudioBlock {
uint32_t getSampleRate() const {
return audioSpec_.getSampleRate();
}

uint8_t getStereoPairCount() const {
return audioSpec_.getStereoPairCount();
}

uint8_t getSampleFrameStride() const {
return audioSpec_.getSampleFrameStride();
}
Expand Down
89 changes: 77 additions & 12 deletions vrs/utils/AudioBlockOpus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#ifdef OPUS_IS_AVAILABLE
#include <opus.h>
#include <opus_multistream.h>
#endif

#define DEFAULT_LOG_CHANNEL "AudioBlockOpus"
Expand Down Expand Up @@ -49,12 +50,44 @@ bool AudioBlock::opusDecompress(AudioDecompressionHandler& handler, AudioBlock&
return false;
}
if (handler.decoder != nullptr && !handler.decoderSpec.isCompatibleWith(audioSpec_)) {
opus_decoder_destroy(handler.decoder);
opus_multistream_decoder_destroy(handler.decoder);
handler.decoder = nullptr;
}

if (handler.decoder == nullptr) {
int error = 0;
handler.decoder = opus_decoder_create(getSampleRate(), getChannelCount(), &error);

uint32_t totalAudioChannel = getChannelCount();
if (totalAudioChannel > 255 || totalAudioChannel == 0) {
XR_LOGW("Invalid channel count of {}", totalAudioChannel);
return false;
}

uint32_t totalCoupledAudioChannel = 2 * getStereoPairCount();
if (totalAudioChannel < totalCoupledAudioChannel) {
XR_LOGW(
"Invalid channel count of {} and stereo channel count of {}",
totalAudioChannel,
totalCoupledAudioChannel);
return false;
}

uint32_t totalMonoChannel = totalAudioChannel - totalCoupledAudioChannel;
uint32_t totalAudioStreamCount = totalMonoChannel + getStereoPairCount();

vector<uint8_t> mapping(getChannelCount());
for (uint32_t i = 0; i < totalCoupledAudioChannel + totalMonoChannel; ++i) {
mapping[i] = i;
}

handler.decoder = opus_multistream_decoder_create(
getSampleRate(),
getChannelCount(),
totalAudioStreamCount,
getStereoPairCount(),
mapping.data(),
&error);

if (error != OPUS_OK || handler.decoder == nullptr) {
XR_LOGW("Couldn't create Opus decoder. Error {}: {}", error, opus_strerror(error));
return false;
Expand All @@ -75,7 +108,7 @@ bool AudioBlock::opusDecompress(AudioDecompressionHandler& handler, AudioBlock&
0,
getSampleRate(),
sampleCount);
opus_int32 result = opus_decode(
opus_int32 result = opus_multistream_decode(
handler.decoder,
data<unsigned char>(),
audioBytes_.size(),
Expand All @@ -93,29 +126,61 @@ bool AudioBlock::opusDecompress(AudioDecompressionHandler& handler, AudioBlock&

AudioDecompressionHandler::~AudioDecompressionHandler() {
if (decoder != nullptr) {
opus_decoder_destroy(decoder);
opus_multistream_decoder_destroy(decoder);
}
}

bool AudioCompressionHandler::create(const AudioContentBlockSpec& spec) {
if (encoder != nullptr) {
opus_encoder_destroy(encoder);
opus_multistream_encoder_destroy(encoder);
encoder = nullptr;
}
if (!XR_VERIFY(supportedSampleRate(spec.getSampleRate()))) {
return false;
}
int error = 0;
encoder = opus_encoder_create(
spec.getSampleRate(), spec.getChannelCount(), OPUS_APPLICATION_AUDIO, &error);

uint32_t totalAudioChannel = spec.getChannelCount();
if (totalAudioChannel > 255 || totalAudioChannel == 0) {
XR_LOGW("Invalid channel count of {}", totalAudioChannel);
return false;
}

uint32_t totalCoupledAudioChannel = 2 * spec.getStereoPairCount();
if (totalAudioChannel < totalCoupledAudioChannel) {
XR_LOGW(
"Invalid channel count of {} and stereo channel count of {}",
totalAudioChannel,
totalCoupledAudioChannel);
return false;
}

uint32_t totalMonoChannel = totalAudioChannel - totalCoupledAudioChannel;
uint32_t totalAudioStreamCount = totalMonoChannel + spec.getStereoPairCount();

vector<uint8_t> mapping(spec.getChannelCount());

for (uint32_t i = 0; i < totalCoupledAudioChannel + totalMonoChannel; ++i) {
mapping[i] = i;
}

encoder = opus_multistream_encoder_create(
spec.getSampleRate(),
spec.getChannelCount(),
totalAudioStreamCount,
spec.getStereoPairCount(),
mapping.data(),
OPUS_APPLICATION_AUDIO,
&error);

if (error != OPUS_OK || encoder == nullptr) {
XR_LOGW("Couldn't create Opus encoder. Error {}: {}", error, opus_strerror(error));
return false;
}
encoderSpec = spec;
XR_VERIFY(opus_encoder_ctl(encoder, OPUS_SET_BITRATE(96000)) == OPUS_OK);
XR_VERIFY(opus_encoder_ctl(encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)) == OPUS_OK);
XR_VERIFY(opus_encoder_ctl(encoder, OPUS_SET_VBR(1)) == OPUS_OK);
XR_VERIFY(opus_multistream_encoder_ctl(encoder, OPUS_SET_BITRATE(96000)) == OPUS_OK);
XR_VERIFY(opus_multistream_encoder_ctl(encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_MUSIC)) == OPUS_OK);
XR_VERIFY(opus_multistream_encoder_ctl(encoder, OPUS_SET_VBR(1)) == OPUS_OK);
return true;
}

Expand All @@ -124,13 +189,13 @@ int AudioCompressionHandler::compress(
uint32_t sampleCount,
void* outOpusBytes,
size_t maxBytes) {
return opus_encode(
return opus_multistream_encode(
encoder, (opus_int16*)samples, sampleCount, (unsigned char*)outOpusBytes, maxBytes);
}

AudioCompressionHandler::~AudioCompressionHandler() {
if (encoder != nullptr) {
opus_encoder_destroy(encoder);
opus_multistream_encoder_destroy(encoder);
}
}

Expand Down

0 comments on commit 7a137c2

Please sign in to comment.