From 2502db909c7abee8c12273c91319e720f9d3b50f Mon Sep 17 00:00:00 2001 From: aler9 <46489434+aler9@users.noreply.github.com> Date: Sat, 27 Apr 2024 20:37:37 +0200 Subject: [PATCH] fix VP9 decoding on iOS The current implementation of the VP9 payloader produces payloads that are not compatible with iOS. This is because the payloader provides only the muxing strategy called "flexible mode". According to the VP9 RFC draft, there are two ways to wrap VP9 frames into RTP packets: the "flexible mode" and the "non-flexible mode", with the latter being the preferred one for live-streaming applications. In particular, all browsers encodes VP9 RTP packets in the "non-flexible mode", while iOS supports decoding RTP packets in this mode only, and this is probably a problem shared by other implementations. This patch improves the VP9 payloader by adding support for the "non-flexible mode". The "flexible mode" is retained and a flag is provided to perform the switch between the two modes. --- codecs/vp9/bits.go | 62 +++++++++++ codecs/vp9/header.go | 216 ++++++++++++++++++++++++++++++++++++++ codecs/vp9_packet.go | 153 ++++++++++++++++++++++----- codecs/vp9_packet_test.go | 154 +++++++++++++++++++++------ 4 files changed, 521 insertions(+), 64 deletions(-) create mode 100644 codecs/vp9/bits.go create mode 100644 codecs/vp9/header.go diff --git a/codecs/vp9/bits.go b/codecs/vp9/bits.go new file mode 100644 index 00000000..ab0178b7 --- /dev/null +++ b/codecs/vp9/bits.go @@ -0,0 +1,62 @@ +package vp9 + +import "fmt" + +func hasSpace(buf []byte, pos int, n int) error { + if n > ((len(buf) * 8) - pos) { + return fmt.Errorf("not enough bits") + } + return nil +} + +func readFlag(buf []byte, pos *int) (bool, error) { + err := hasSpace(buf, *pos, 1) + if err != nil { + return false, err + } + + return readFlagUnsafe(buf, pos), nil +} + +func readFlagUnsafe(buf []byte, pos *int) bool { + b := (buf[*pos>>0x03] >> (7 - (*pos & 0x07))) & 0x01 + *pos++ + return b == 1 +} + +func readBits(buf []byte, pos *int, n int) (uint64, error) { + err := hasSpace(buf, *pos, n) + if err != nil { + return 0, err + } + + return readBitsUnsafe(buf, pos, n), nil +} + +func readBitsUnsafe(buf []byte, pos *int, n int) uint64 { + v := uint64(0) + + res := 8 - (*pos & 0x07) + if n < res { + v := uint64((buf[*pos>>0x03] >> (res - n)) & (1<>0x03]&(1<= 8 { + v = (v << 8) | uint64(buf[*pos>>0x03]) + *pos += 8 + n -= 8 + } + + if n > 0 { + v = (v << n) | uint64(buf[*pos>>0x03]>>(8-n)) + *pos += n + } + + return v +} diff --git a/codecs/vp9/header.go b/codecs/vp9/header.go new file mode 100644 index 00000000..e5db774e --- /dev/null +++ b/codecs/vp9/header.go @@ -0,0 +1,216 @@ +package vp9 + +import ( + "fmt" +) + +// Header_ColorConfig is the color_config member of an header. +type Header_ColorConfig struct { //nolint:revive + TenOrTwelveBit bool + BitDepth uint8 + ColorSpace uint8 + ColorRange bool + SubsamplingX bool + SubsamplingY bool +} + +func (c *Header_ColorConfig) unmarshal(profile uint8, buf []byte, pos *int) error { + if profile >= 2 { + var err error + c.TenOrTwelveBit, err = readFlag(buf, pos) + if err != nil { + return err + } + + if c.TenOrTwelveBit { + c.BitDepth = 12 + } else { + c.BitDepth = 10 + } + } else { + c.BitDepth = 8 + } + + tmp, err := readBits(buf, pos, 3) + if err != nil { + return err + } + c.ColorSpace = uint8(tmp) + + if c.ColorSpace != 7 { + var err error + c.ColorRange, err = readFlag(buf, pos) + if err != nil { + return err + } + + if profile == 1 || profile == 3 { + err := hasSpace(buf, *pos, 3) + if err != nil { + return err + } + + c.SubsamplingX = readFlagUnsafe(buf, pos) + c.SubsamplingY = readFlagUnsafe(buf, pos) + *pos++ + } else { + c.SubsamplingX = true + c.SubsamplingY = true + } + } else { + c.ColorRange = true + + if profile == 1 || profile == 3 { + c.SubsamplingX = false + c.SubsamplingY = false + + err := hasSpace(buf, *pos, 1) + if err != nil { + return err + } + *pos++ + } + } + + return nil +} + +// Header_FrameSize is the frame_size member of an header. +type Header_FrameSize struct { //nolint:revive + FrameWidthMinus1 uint16 + FrameHeightMinus1 uint16 +} + +func (s *Header_FrameSize) unmarshal(buf []byte, pos *int) error { + err := hasSpace(buf, *pos, 32) + if err != nil { + return err + } + + s.FrameWidthMinus1 = uint16(readBitsUnsafe(buf, pos, 16)) + s.FrameHeightMinus1 = uint16(readBitsUnsafe(buf, pos, 16)) + return nil +} + +// Header is a VP9 Frame header. +// Specification: +// https://storage.googleapis.com/downloads.webmproject.org/docs/vp9/vp9-bitstream-specification-v0.6-20160331-draft.pdf +type Header struct { + Profile uint8 + ShowExistingFrame bool + FrameToShowMapIdx uint8 + IsKeyFrame bool + ShowFrame bool + ErrorResilientMode bool + ColorConfig *Header_ColorConfig + FrameSize *Header_FrameSize +} + +// Unmarshal decodes a Header. +func (h *Header) Unmarshal(buf []byte) error { + pos := 0 + + err := hasSpace(buf, pos, 4) + if err != nil { + return err + } + + frameMarker := readBitsUnsafe(buf, &pos, 2) + if frameMarker != 2 { + return fmt.Errorf("invalid frame marker") + } + + profileLowBit := uint8(readBitsUnsafe(buf, &pos, 1)) + profileHighBit := uint8(readBitsUnsafe(buf, &pos, 1)) + h.Profile = profileHighBit<<1 + profileLowBit + + if h.Profile == 3 { + err := hasSpace(buf, pos, 1) + if err != nil { + return err + } + pos++ + } + + h.ShowExistingFrame, err = readFlag(buf, &pos) + if err != nil { + return err + } + + if h.ShowExistingFrame { + tmp, err := readBits(buf, &pos, 3) + if err != nil { + return err + } + h.FrameToShowMapIdx = uint8(tmp) + + return nil + } + + err = hasSpace(buf, pos, 3) + if err != nil { + return err + } + + h.IsKeyFrame = !readFlagUnsafe(buf, &pos) + h.ShowFrame = readFlagUnsafe(buf, &pos) + h.ErrorResilientMode = readFlagUnsafe(buf, &pos) + + if h.IsKeyFrame { + err := hasSpace(buf, pos, 24) + if err != nil { + return err + } + + frameSyncByte0 := uint8(readBitsUnsafe(buf, &pos, 8)) + if frameSyncByte0 != 0x49 { + return fmt.Errorf("wrong frame_sync_byte_0") + } + + frameSyncByte1 := uint8(readBitsUnsafe(buf, &pos, 8)) + if frameSyncByte1 != 0x83 { + return fmt.Errorf("wrong frame_sync_byte_1") + } + + frameSyncByte2 := uint8(readBitsUnsafe(buf, &pos, 8)) + if frameSyncByte2 != 0x42 { + return fmt.Errorf("wrong frame_sync_byte_2") + } + + h.ColorConfig = &Header_ColorConfig{} + err = h.ColorConfig.unmarshal(h.Profile, buf, &pos) + if err != nil { + return err + } + + h.FrameSize = &Header_FrameSize{} + err = h.FrameSize.unmarshal(buf, &pos) + if err != nil { + return err + } + } + + return nil +} + +// Width returns the video width. +func (h Header) Width() int { + return int(h.FrameSize.FrameWidthMinus1) + 1 +} + +// Height returns the video height. +func (h Header) Height() int { + return int(h.FrameSize.FrameHeightMinus1) + 1 +} + +// ChromaSubsampling returns the chroma subsampling format, in ISO-BMFF/vpcC format. +func (h Header) ChromaSubsampling() uint8 { + switch { + case !h.ColorConfig.SubsamplingX && !h.ColorConfig.SubsamplingY: + return 3 // 4:4:4 + case h.ColorConfig.SubsamplingX && !h.ColorConfig.SubsamplingY: + return 2 // 4:2:2 + default: + return 1 // 4:2:0 colocated with luma + } +} diff --git a/codecs/vp9_packet.go b/codecs/vp9_packet.go index 6a6e0b0d..c3c061e0 100644 --- a/codecs/vp9_packet.go +++ b/codecs/vp9_packet.go @@ -5,6 +5,7 @@ package codecs import ( "github.com/pion/randutil" + "github.com/pion/rtp/codecs/vp9" ) // Use global random generator to properly seed by crypto grade random. @@ -12,24 +13,50 @@ var globalMathRandomGenerator = randutil.NewMathRandomGenerator() // nolint:goch // VP9Payloader payloads VP9 packets type VP9Payloader struct { - pictureID uint16 - initialized bool + // whether to use flexible mode or non-flexible mode. + FlexibleMode bool // InitialPictureIDFn is a function that returns random initial picture ID. InitialPictureIDFn func() uint16 + + pictureID uint16 + initialized bool } const ( - vp9HeaderSize = 3 // Flexible mode 15 bit picture ID maxSpatialLayers = 5 maxVP9RefPics = 3 ) // Payload fragments an VP9 packet across one or more byte arrays func (p *VP9Payloader) Payload(mtu uint16, payload []byte) [][]byte { + if !p.initialized { + if p.InitialPictureIDFn == nil { + p.InitialPictureIDFn = func() uint16 { + return uint16(globalMathRandomGenerator.Intn(0x7FFF)) + } + } + p.pictureID = p.InitialPictureIDFn() & 0x7FFF + p.initialized = true + } + + var payloads [][]byte + if p.FlexibleMode { + payloads = p.payloadFlexible(mtu, payload) + } else { + payloads = p.payloadNonFlexible(mtu, payload) + } + + p.pictureID++ + if p.pictureID >= 0x8000 { + p.pictureID = 0 + } + + return payloads +} + +func (p *VP9Payloader) payloadFlexible(mtu uint16, payload []byte) [][]byte { /* - * https://www.ietf.org/id/draft-ietf-payload-vp9-13.txt - * * Flexible mode (F=1) * 0 1 2 3 4 5 6 7 * +-+-+-+-+-+-+-+-+ @@ -46,7 +73,45 @@ func (p *VP9Payloader) Payload(mtu uint16, payload []byte) [][]byte { * V: | SS | * | .. | * +-+-+-+-+-+-+-+-+ - * + */ + + headerSize := 3 + maxFragmentSize := int(mtu) - headerSize + payloadDataRemaining := len(payload) + payloadDataIndex := 0 + var payloads [][]byte + + if min(maxFragmentSize, payloadDataRemaining) <= 0 { + return [][]byte{} + } + + for payloadDataRemaining > 0 { + currentFragmentSize := min(maxFragmentSize, payloadDataRemaining) + out := make([]byte, headerSize+currentFragmentSize) + + out[0] = 0x90 // F=1, I=1 + if payloadDataIndex == 0 { + out[0] |= 0x08 // B=1 + } + if payloadDataRemaining == currentFragmentSize { + out[0] |= 0x04 // E=1 + } + + out[1] = byte(p.pictureID>>8) | 0x80 + out[2] = byte(p.pictureID) + + copy(out[headerSize:], payload[payloadDataIndex:payloadDataIndex+currentFragmentSize]) + payloads = append(payloads, out) + + payloadDataRemaining -= currentFragmentSize + payloadDataIndex += currentFragmentSize + } + + return payloads +} + +func (p *VP9Payloader) payloadNonFlexible(mtu uint16, payload []byte) [][]byte { + /* * Non-flexible mode (F=0) * 0 1 2 3 4 5 6 7 * +-+-+-+-+-+-+-+-+ @@ -65,51 +130,81 @@ func (p *VP9Payloader) Payload(mtu uint16, payload []byte) [][]byte { * +-+-+-+-+-+-+-+-+ */ - if !p.initialized { - if p.InitialPictureIDFn == nil { - p.InitialPictureIDFn = func() uint16 { - return uint16(globalMathRandomGenerator.Intn(0x7FFF)) - } - } - p.pictureID = p.InitialPictureIDFn() & 0x7FFF - p.initialized = true - } - if payload == nil { + var h vp9.Header + err := h.Unmarshal(payload) + if err != nil { return [][]byte{} } - maxFragmentSize := int(mtu) - vp9HeaderSize payloadDataRemaining := len(payload) payloadDataIndex := 0 - - if min(maxFragmentSize, payloadDataRemaining) <= 0 { - return [][]byte{} - } - var payloads [][]byte + for payloadDataRemaining > 0 { + var headerSize int + if h.IsKeyFrame && payloadDataIndex == 0 { + headerSize = 3 + 8 + } else { + headerSize = 3 + } + + maxFragmentSize := int(mtu) - headerSize currentFragmentSize := min(maxFragmentSize, payloadDataRemaining) - out := make([]byte, vp9HeaderSize+currentFragmentSize) + if currentFragmentSize <= 0 { + return [][]byte{} + } + + out := make([]byte, headerSize+currentFragmentSize) + + out[0] = 0x80 | 0x01 // I=1, Z=1 - out[0] = 0x90 // F=1 I=1 + if !h.IsKeyFrame { + out[0] |= 0x40 // P=1 + } if payloadDataIndex == 0 { out[0] |= 0x08 // B=1 } if payloadDataRemaining == currentFragmentSize { out[0] |= 0x04 // E=1 } + out[1] = byte(p.pictureID>>8) | 0x80 out[2] = byte(p.pictureID) - copy(out[vp9HeaderSize:], payload[payloadDataIndex:payloadDataIndex+currentFragmentSize]) + off := 3 + + if h.IsKeyFrame && payloadDataIndex == 0 { + out[0] |= 0x02 // V=1 + out[off] = 0x10 | 0x08 // N_S=0, Y=1, G=1 + off++ + + width := h.Width() + out[off] = byte(width >> 8) + off++ + out[off] = byte(width & 0xFF) + off++ + + height := h.Height() + out[off] = byte(height >> 8) + off++ + out[off] = byte(height & 0xFF) + off++ + + out[off] = 0x01 // N_G=1 + off++ + + out[off] = 1<<4 | 1<<2 // TID=0, U=1, R=1 + off++ + + out[off] = 0x01 // P_DIFF=1 + off++ + } + + copy(out[headerSize:], payload[payloadDataIndex:payloadDataIndex+currentFragmentSize]) payloads = append(payloads, out) payloadDataRemaining -= currentFragmentSize payloadDataIndex += currentFragmentSize } - p.pictureID++ - if p.pictureID >= 0x8000 { - p.pictureID = 0 - } return payloads } diff --git a/codecs/vp9_packet_test.go b/codecs/vp9_packet_test.go index 97e176b3..5129e0f2 100644 --- a/codecs/vp9_packet_test.go +++ b/codecs/vp9_packet_test.go @@ -5,7 +5,6 @@ package codecs import ( "errors" - "fmt" "math/rand" "reflect" "testing" @@ -222,63 +221,146 @@ func TestVP9Payloader_Payload(t *testing.T) { r0++ } - cases := map[string]struct { - b [][]byte - mtu uint16 - res [][]byte + for _, c := range []struct { + name string + b [][]byte + flexible bool + mtu uint16 + res [][]byte }{ - "NilPayload": { - b: [][]byte{nil}, - mtu: 100, - res: [][]byte{}, - }, - "SmallMTU": { - b: [][]byte{{0x00, 0x00}}, - mtu: 1, - res: [][]byte{}, - }, - "OnePacket": { - b: [][]byte{{0x01, 0x02}}, - mtu: 10, + { + name: "flexible NilPayload", + b: [][]byte{nil}, + flexible: true, + mtu: 100, + res: [][]byte{}, + }, + { + name: "flexible SmallMTU", + b: [][]byte{{0x00, 0x00}}, + flexible: true, + mtu: 1, + res: [][]byte{}, + }, + { + name: "flexible OnePacket", + b: [][]byte{{0x01, 0x02}}, + flexible: true, + mtu: 10, res: [][]byte{ {0x9C, rands[0][0], rands[0][1], 0x01, 0x02}, }, }, - "TwoPackets": { - b: [][]byte{{0x01, 0x02}}, - mtu: 4, + { + name: "flexible TwoPackets", + b: [][]byte{{0x01, 0x02}}, + flexible: true, + mtu: 4, res: [][]byte{ {0x98, rands[0][0], rands[0][1], 0x01}, {0x94, rands[0][0], rands[0][1], 0x02}, }, }, - "ThreePackets": { - b: [][]byte{{0x01, 0x02, 0x03}}, - mtu: 4, + { + name: "flexible ThreePackets", + b: [][]byte{{0x01, 0x02, 0x03}}, + flexible: true, + mtu: 4, res: [][]byte{ {0x98, rands[0][0], rands[0][1], 0x01}, {0x90, rands[0][0], rands[0][1], 0x02}, {0x94, rands[0][0], rands[0][1], 0x03}, }, }, - "TwoFramesFourPackets": { - b: [][]byte{{0x01, 0x02, 0x03}, {0x04}}, - mtu: 5, + { + name: "flexible TwoFramesFourPackets", + b: [][]byte{{0x01, 0x02, 0x03}, {0x04}}, + flexible: true, + mtu: 5, res: [][]byte{ {0x98, rands[0][0], rands[0][1], 0x01, 0x02}, {0x94, rands[0][0], rands[0][1], 0x03}, {0x9C, rands[1][0], rands[1][1], 0x04}, }, }, - } - for name, c := range cases { - pck := VP9Payloader{ - InitialPictureIDFn: func() uint16 { - return uint16(rand.New(rand.NewSource(0)).Int31n(0x7FFF)) //nolint:gosec + { + name: "non-flexible NilPayload", + b: [][]byte{nil}, + mtu: 100, + res: [][]byte{}, + }, + { + name: "non-flexible SmallMTU", + b: [][]byte{{0x82, 0x49, 0x83, 0x42, 0x0, 0x77, 0xf0, 0x32, 0x34}}, + mtu: 1, + res: [][]byte{}, + }, + { + name: "non-flexible OnePacket key frame", + b: [][]byte{{0x82, 0x49, 0x83, 0x42, 0x0, 0x77, 0xf0, 0x32, 0x34}}, + mtu: 20, + res: [][]byte{{ + 0x8f, 0xa1, 0xf4, 0x18, 0x07, 0x80, 0x03, 0x24, + 0x01, 0x14, 0x01, 0x82, 0x49, 0x83, 0x42, 0x00, + 0x77, 0xf0, 0x32, 0x34, + }}, + }, + { + name: "non-flexible TwoPackets key frame", + b: [][]byte{{0x82, 0x49, 0x83, 0x42, 0x0, 0x77, 0xf0, 0x32, 0x34}}, + mtu: 12, + res: [][]byte{ + { + 0x8b, 0xa1, 0xf4, 0x18, 0x07, 0x80, 0x03, 0x24, + 0x01, 0x14, 0x01, 0x82, + }, + { + 0x85, 0xa1, 0xf4, 0x49, 0x83, 0x42, 0x00, 0x77, + 0xf0, 0x32, 0x34, + }, }, - } - c := c - t.Run(fmt.Sprintf("%s_MTU%d", name, c.mtu), func(t *testing.T) { + }, + { + name: "non-flexible ThreePackets key frame", + b: [][]byte{{ + 0x82, 0x49, 0x83, 0x42, 0x00, 0x77, 0xf0, 0x32, + 0x34, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, + }}, + mtu: 12, + res: [][]byte{ + { + 0x8b, 0xa1, 0xf4, 0x18, 0x07, 0x80, 0x03, 0x24, + 0x01, 0x14, 0x01, 0x82, + }, + { + 0x81, 0xa1, 0xf4, 0x49, 0x83, 0x42, 0x00, 0x77, + 0xf0, 0x32, 0x34, 0x01, + }, + { + 0x85, 0xa1, 0xf4, 0x02, 0x03, 0x04, 0x05, 0x06, + 0x07, 0x08, + }, + }, + }, + { + name: "non-flexible OnePacket non key frame", + b: [][]byte{{0x86, 0x0, 0x40, 0x92, 0xe1, 0x31, 0x42, 0x8c, 0xc0, 0x40}}, + mtu: 20, + res: [][]byte{{ + 0xcd, 0xa1, 0xf4, 0x86, 0x00, 0x40, 0x92, 0xe1, + 0x31, 0x42, 0x8c, 0xc0, 0x40, + }}, + }, + } { + t.Run(c.name, func(t *testing.T) { + pck := VP9Payloader{ + FlexibleMode: c.flexible, + InitialPictureIDFn: func() uint16 { + return uint16(rand.New(rand.NewSource(0)).Int31n(0x7FFF)) //nolint:gosec + }, + } + res := [][]byte{} for _, b := range c.b { res = append(res, pck.Payload(c.mtu, b)...) @@ -288,8 +370,10 @@ func TestVP9Payloader_Payload(t *testing.T) { } }) } + t.Run("PictureIDOverflow", func(t *testing.T) { pck := VP9Payloader{ + FlexibleMode: true, InitialPictureIDFn: func() uint16 { return uint16(rand.New(rand.NewSource(0)).Int31n(0x7FFF)) //nolint:gosec },