diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 09c7414..20cd3b7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,8 +29,11 @@ jobs: - uses: actions/checkout@v4 - run: | sudo apt-get update - sudo apt-get install -y libpulse-dev + sudo apt-get install -y libpulse-dev yasm clang-12 if: matrix.m.linux == true + - run: | + brew install yasm + if: matrix.m.linux == false - run: python3 run.py --sumomo --package ${{ matrix.m.name }} - name: Get package name run: | diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index 3af780e..71deb3e 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -5,14 +5,20 @@ "includePath": [ "${workspaceFolder}/include", "${workspaceFolder}/_source/libdatachannel/include", + "${workspaceFolder}/_source/libdatachannel/include/rtc", + "${workspaceFolder}/_source/libdatachannel/src", "${workspaceFolder}/_source/libdatachannel/deps/json/include", "${workspaceFolder}/_source/libdatachannel/deps/plog/include", + "${workspaceFolder}/_source/libdatachannel/deps/libjuice/include", + "${workspaceFolder}/_source/libdatachannel/deps/usrsctp/usrsctplib", + "${workspaceFolder}/_source/libdatachannel/deps/libsrtp/include", "${workspaceFolder}/_source/opus/include", "${workspaceFolder}/_build/ubuntu-20.04_x86_64/release/sorac", "${workspaceFolder}/_build/ubuntu-20.04_x86_64/release/sorac/proto/sorac", "${workspaceFolder}/_install/ubuntu-20.04_x86_64/release/mbedtls/include", "${workspaceFolder}/_install/ubuntu-20.04_x86_64/release/openh264/include", + "${workspaceFolder}/_install/ubuntu-20.04_x86_64/release/aom/include", "${workspaceFolder}/_install/ubuntu-20.04_x86_64/release/libjpeg-turbo/include", "${workspaceFolder}/_install/ubuntu-20.04_x86_64/release/libyuv/include", @@ -20,6 +26,7 @@ "${workspaceFolder}/_build/macos_arm64/release/sorac/proto/sorac", "${workspaceFolder}/_install/macos_arm64/release/mbedtls/include", "${workspaceFolder}/_install/macos_arm64/release/openh264/include", + "${workspaceFolder}/_install/macos_arm64/release/aom/include", "${workspaceFolder}/_install/macos_arm64/release/libjpeg-turbo/include", "${workspaceFolder}/_install/macos_arm64/release/libyuv/include" ], diff --git a/.vscode/settings.json b/.vscode/settings.json index 627d8a5..f1161fd 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -103,6 +103,8 @@ "recorder.h": "c", "__mutex_base": "cpp", "sorac.h": "c", - "soracp.json.c.h": "c" + "soracp.json.c.h": "c", + "complex": "cpp", + "cfenv": "cpp" } } \ No newline at end of file diff --git a/CHANGES.md b/CHANGES.md index 22c5e1e..57b202f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,13 @@ ## develop +- [ADD] マルチコーデックサイマルキャストの送信に対応する + - @melpon +- [ADD] libaom を追加して AV1 エンコードできるようにする + - @melpon +- [UPDATE] protoc-gen-jsonif をアップデートして optional 対応する + - @melpon + ## 2024.1.0 **祝いリリース** diff --git a/CMakeLists.txt b/CMakeLists.txt index 06abae2..6acc468 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,8 +92,11 @@ add_custom_command( target_sources(sorac PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/proto/sorac/soracp.json.c.cpp" + src/aom_av1_video_encoder.cpp src/current_time.cpp src/data_channel.cpp + src/default_encoder_adapter.cpp + src/h264_profile_level_id.cpp src/open_h264_video_encoder.cpp src/opus_audio_encoder.cpp src/signaling.cpp @@ -112,9 +115,12 @@ target_sources(sorac "${CMAKE_CURRENT_BINARY_DIR}/proto/sorac/soracp.json.h" "${CMAKE_CURRENT_BINARY_DIR}/proto/sorac/soracp.json.c.h" "${CMAKE_CURRENT_BINARY_DIR}/proto/sorac/soracp.json.c.hpp" + include/sorac/aom_av1_video_encoder.hpp include/sorac/bitrate.hpp include/sorac/current_time.hpp include/sorac/data_channel.hpp + include/sorac/default_encoder_adapter.hpp + include/sorac/h264_profile_level_id.hpp include/sorac/open_h264_video_encoder.hpp include/sorac/opus_audio_encoder.hpp include/sorac/signaling.hpp @@ -132,6 +138,9 @@ target_compile_definitions(sorac PRIVATE JSONIF_USE_NLOHMANN_JSON) # OpenH264 target_include_directories(sorac PRIVATE ${OPENH264_ROOT_DIR}/include) +# AOM +target_include_directories(sorac PRIVATE ${AOM_ROOT_DIR}/include) + set_target_properties(sorac PROPERTIES CXX_STANDARD 20 C_STANDARD 20) set(_LIBS diff --git a/NOTICE.md b/NOTICE.md index 13e48f1..7fe0396 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -333,3 +333,68 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ``` + +## AOM + +https://aomedia.googlesource.com/aom/ + +``` +Copyright (c) 2016, Alliance for Open Media. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +``` + +## WebRTC + +https://webrtc.googlesource.com/src/ + +``` +Copyright (c) 2011, The WebRTC project authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name of Google nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` diff --git a/VERSION b/VERSION index c453195..d07d839 100644 --- a/VERSION +++ b/VERSION @@ -1,8 +1,10 @@ SORA_C_SDK_VERSION=2024.1.0 -LIBDATACHANNEL_VERSION=v0.20.1 +LIBDATACHANNEL_URL=https://github.com/melpon/libdatachannel.git +LIBDATACHANNEL_VERSION=005820eb54585b57fe5567a7ad51b801598c159f OPUS_VERSION=v1.4 CMAKE_VERSION=3.28.1 OPENH264_VERSION=v2.4.0 +AOM_VERSION=v3.8.1 MBEDTLS_VERSION=v3.5.1 PROTOBUF_VERSION=21.1 PROTOC_GEN_JSONIF_VERSION=0.12.1 diff --git a/doc/FAQ.md b/doc/FAQ.md index 801420f..24b44b2 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -80,6 +80,26 @@ libwebrtc ベースの [Sora C++ SDK](https://github.com/shiguredo/sora-cpp-sdk) --cacert cacert.pem ``` +### マルチコーデックサイマルキャスト + +マルチコーデックサイマルキャストを利用した配信を macOS arm64 で利用する例です。 + +```bash +./sumomo --signaling-url wss://sora.example.com/signaling \ + --channel-id sora \ + --simulcast true \ + --simulcast-multicodec true \ + --audio=true \ + --audio-type=macos \ + --capture-type mac \ + --capture-device-name=OBS \ + --video-codec-type=AV1 \ + --video-bit-rate 5000 \ + --h264-encoder-type videotoolbox \ + --aom `pwd`/_install/macos_arm64/release/aom/lib/libaom.dylib +~/shiguredo/sora-c-sdk +``` + ## Sumomo のヘルプ ```bash diff --git a/examples/sumomo/fake_capturer.cpp b/examples/sumomo/fake_capturer.cpp index c63c956..99cd8b0 100644 --- a/examples/sumomo/fake_capturer.cpp +++ b/examples/sumomo/fake_capturer.cpp @@ -13,7 +13,8 @@ namespace sumomo { class FakeCapturer : public SumomoCapturer { public: - FakeCapturer() { + FakeCapturer(int width, int height, int fps, SumomoFakeCapturerFormat format) + : width_(width), height_(height), fps_(fps), format_(format) { this->destroy = [](SumomoCapturer* p) { delete (sumomo::FakeCapturer*)p; }; this->set_frame_callback = [](SumomoCapturer* p, sumomo_capturer_on_frame_func on_frame, @@ -42,19 +43,27 @@ class FakeCapturer : public SumomoCapturer { engine_ = std::make_unique(seed_gen()); return nullptr; }); - th_.Start(30, [this](std::chrono::microseconds timestamp, - std::chrono::microseconds prev) { - std::uniform_int_distribution dist(0, 640 * 480 - 1); + th_.Start(fps_, [this](std::chrono::microseconds timestamp, + std::chrono::microseconds prev) { + std::uniform_int_distribution dist(0, width_ * height_ - 1); sorac::VideoFrame frame; frame.timestamp = timestamp; - frame.i420_buffer = sorac::VideoFrameBufferI420::Create(640, 480); - frame.i420_buffer->y[dist(*engine_)] = 0xff; - frame.i420_buffer->y[dist(*engine_)] = 0xff; - frame.i420_buffer->y[dist(*engine_)] = 0xff; - frame.i420_buffer->y[dist(*engine_)] = 0xff; - frame.i420_buffer->y[dist(*engine_)] = 0xff; - frame.base_width = 640; - frame.base_height = 480; + frame.frame_number = ++frame_number_; + if (format_ == SUMOMO_FAKE_CAPTURER_FORMAT_I420) { + frame.i420_buffer = + sorac::VideoFrameBufferI420::Create(width_, height_); + for (int i = 0; i < width_ / 10; i++) { + frame.i420_buffer->y[dist(*engine_)] = 0xff; + } + } else if (format_ == SUMOMO_FAKE_CAPTURER_FORMAT_NV12) { + frame.nv12_buffer = + sorac::VideoFrameBufferNV12::Create(width_, height_); + for (int i = 0; i < width_ / 10; i++) { + frame.nv12_buffer->y[dist(*engine_)] = 0xff; + } + } + frame.base_width = width_; + frame.base_height = height_; callback_(frame); }); return 0; @@ -62,6 +71,11 @@ class FakeCapturer : public SumomoCapturer { void Stop() { th_.Stop(); } private: + int width_; + int height_; + int fps_; + int frame_number_ = 0; + SumomoFakeCapturerFormat format_; std::function callback_; SteadyFrameThread th_; std::unique_ptr engine_; @@ -71,7 +85,10 @@ class FakeCapturer : public SumomoCapturer { extern "C" { -SumomoCapturer* sumomo_fake_capturer_create() { - return new sumomo::FakeCapturer(); +SumomoCapturer* sumomo_fake_capturer_create(int width, + int height, + int fps, + SumomoFakeCapturerFormat format) { + return new sumomo::FakeCapturer(width, height, fps, format); } } diff --git a/examples/sumomo/fake_capturer.h b/examples/sumomo/fake_capturer.h index 31e6421..e95dcce 100644 --- a/examples/sumomo/fake_capturer.h +++ b/examples/sumomo/fake_capturer.h @@ -9,7 +9,16 @@ extern "C" { #endif -extern SumomoCapturer* sumomo_fake_capturer_create(); +typedef enum { + SUMOMO_FAKE_CAPTURER_FORMAT_I420 = 0, + SUMOMO_FAKE_CAPTURER_FORMAT_NV12 = 1, +} SumomoFakeCapturerFormat; + +extern SumomoCapturer* sumomo_fake_capturer_create( + int width, + int height, + int fps, + SumomoFakeCapturerFormat format); #ifdef __cplusplus } diff --git a/examples/sumomo/mac_capturer.h b/examples/sumomo/mac_capturer.h index f9e6da6..335e828 100644 --- a/examples/sumomo/mac_capturer.h +++ b/examples/sumomo/mac_capturer.h @@ -11,7 +11,8 @@ extern "C" { extern SumomoCapturer* sumomo_mac_capturer_create(const char* device, int width, - int height); + int height, + int fps); #ifdef __cplusplus } diff --git a/examples/sumomo/mac_capturer.mm b/examples/sumomo/mac_capturer.mm index f820d4f..d931845 100644 --- a/examples/sumomo/mac_capturer.mm +++ b/examples/sumomo/mac_capturer.mm @@ -48,6 +48,7 @@ @implementation SumomoMacCapturer { std::function _callback; BOOL _willBeRunning; dispatch_queue_t _frameQueue; + int _frameNumber; } - (instancetype)initWithCallback: @@ -64,6 +65,7 @@ - (instancetype)initWithCallback: _videoDataOutput = [[AVCaptureVideoDataOutput alloc] init]; _willBeRunning = NO; _frameQueue = nil; + _frameNumber = 0; NSSet* supportedPixelFormats = [NSSet setWithObjects:@(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange), @@ -283,6 +285,7 @@ - (void)captureOutput:(AVCaptureOutput*)captureOutput (int64_t)(CMTimeGetSeconds( CMSampleBufferGetPresentationTimeStamp(sampleBuffer)) * kMicrosecondsPerSecond)); + frame.frame_number = ++_frameNumber; frame.nv12_buffer = sorac::VideoFrameBufferNV12::Create(width, height); frame.base_width = width; frame.base_height = height; @@ -368,10 +371,11 @@ - (dispatch_queue_t)frameQueue { class MacCapturer : public SumomoCapturer { public: - MacCapturer(const char* device, int width, int height) { + MacCapturer(const char* device, int width, int height, int fps) { this->device_ = device; this->width_ = width; this->height_ = height; + this->fps_ = fps; this->destroy = [](SumomoCapturer* p) { delete (sumomo::MacCapturer*)p; }; this->set_frame_callback = [](SumomoCapturer* p, sumomo_capturer_on_frame_func on_frame, @@ -385,7 +389,7 @@ - (dispatch_queue_t)frameQueue { }; this->start = [](SumomoCapturer* p) { auto q = (sumomo::MacCapturer*)p; - return q->Start(q->device_.c_str(), q->width_, q->height_); + return q->Start(q->device_.c_str(), q->width_, q->height_, q->fps_); }; this->stop = [](SumomoCapturer* p) { ((sumomo::MacCapturer*)p)->Stop(); }; } @@ -396,14 +400,14 @@ void SetFrameCallback( callback_ = callback; } - int Start(const char* device, int width, int height) { + int Start(const char* device, int width, int height, int fps) { Stop(); capturer_ = [[SumomoMacCapturer alloc] initWithCallback:callback_]; [capturer_ startCaptureWithDeviceName:device width:width height:height - fps:30 + fps:fps completionHandler:[](NSError* _Nullable error) { if (error) { fprintf(stderr, "Failed to start capture: %s\n", @@ -422,6 +426,7 @@ void Stop() { std::function callback_; int width_; int height_; + int fps_; SumomoMacCapturer* capturer_; }; @@ -432,7 +437,8 @@ void Stop() { SumomoCapturer* sumomo_mac_capturer_create(const char* device, int width, - int height) { - return new sumomo::MacCapturer(device, width, height); + int height, + int fps) { + return new sumomo::MacCapturer(device, width, height, fps); } } diff --git a/examples/sumomo/option.c b/examples/sumomo/option.c index 38a4746..10eca3b 100644 --- a/examples/sumomo/option.c +++ b/examples/sumomo/option.c @@ -11,17 +11,24 @@ static struct option long_opts[] = { {"signaling-url", required_argument, 0, 0}, {"channel-id", required_argument, 0, 0}, {"simulcast", required_argument, 0, 0}, + {"simulcast-multicodec", required_argument, 0, 0}, {"video-codec-type", required_argument, 0, 0}, {"video-bit-rate", required_argument, 0, 0}, {"metadata", required_argument, 0, 0}, + {"video", required_argument, 0, 0}, + {"audio", required_argument, 0, 0}, + {"capture-type", required_argument, 0, 0}, {"capture-device-name", required_argument, 0, 0}, {"capture-device-width", required_argument, 0, 0}, {"capture-device-height", required_argument, 0, 0}, + {"capture-device-fps", required_argument, 0, 0}, {"audio-type", required_argument, 0, 0}, {"h264-encoder-type", required_argument, 0, 0}, {"h265-encoder-type", required_argument, 0, 0}, + {"av1-encoder-type", required_argument, 0, 0}, {"openh264", required_argument, 0, 0}, + {"aom", required_argument, 0, 0}, {"cacert", required_argument, 0, 0}, {"help", no_argument, 0, 0}, {0, 0, 0, 0}, @@ -37,7 +44,7 @@ int sumomo_option_parse(SumomoOption* option, } *error = 0; memset(option, 0, sizeof(SumomoOption)); - option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE; + option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE_I420; #if defined(__linux__) option->capture_device_name = "/dev/video0"; #elif defined(__APPLE__) @@ -45,8 +52,8 @@ int sumomo_option_parse(SumomoOption* option, #endif option->capture_device_width = 640; option->capture_device_height = 480; + option->capture_device_fps = 30; option->audio_type = SUMOMO_OPTION_AUDIO_TYPE_FAKE; - option->video_codec_type = "H264"; option->cacert = "/etc/ssl/certs/ca-certificates.crt"; int index; @@ -57,6 +64,20 @@ int sumomo_option_parse(SumomoOption* option, switch (c) { case 0: #define OPT_IS(optname) strcmp(long_opts[index].name, optname) == 0 +#define SET_OPTBOOL(name) \ + do { \ + if (strcmp(optarg, "true") == 0) { \ + name = SUMOMO_OPTIONAL_BOOL_TRUE; \ + } else if (strcmp(optarg, "false") == 0) { \ + name = SUMOMO_OPTIONAL_BOOL_FALSE; \ + } else if (strcmp(optarg, "none") == 0) { \ + name = SUMOMO_OPTIONAL_BOOL_NONE; \ + } else { \ + fprintf(stderr, "Failed to set to " #name ": %s\n", optarg); \ + *error = 1; \ + } \ + } while (false) + if (OPT_IS("signaling-url")) { if (option->signaling_url_len >= sizeof(option->signaling_url) / @@ -70,21 +91,16 @@ int sumomo_option_parse(SumomoOption* option, } else if (OPT_IS("channel-id")) { option->channel_id = optarg; } else if (OPT_IS("simulcast")) { - if (strcmp(optarg, "true") == 0) { - option->simulcast = SUMOMO_OPTIONAL_BOOL_TRUE; - } else if (strcmp(optarg, "false") == 0) { - option->simulcast = SUMOMO_OPTIONAL_BOOL_FALSE; - } else if (strcmp(optarg, "none") == 0) { - option->simulcast = SUMOMO_OPTIONAL_BOOL_NONE; - } else { - fprintf(stderr, "Invalid simulcast: %s\n", optarg); - *error = 1; - } + SET_OPTBOOL(option->simulcast); + } else if (OPT_IS("simulcast-multicodec")) { + SET_OPTBOOL(option->simulcast_multicodec); } else if (OPT_IS("video-codec-type")) { if (strcmp(optarg, "H264") == 0) { option->video_codec_type = optarg; } else if (strcmp(optarg, "H265") == 0) { option->video_codec_type = optarg; + } else if (strcmp(optarg, "AV1") == 0) { + option->video_codec_type = optarg; } else { fprintf(stderr, "Invalid video encoder type: %s\n", optarg); *error = 1; @@ -98,9 +114,15 @@ int sumomo_option_parse(SumomoOption* option, } } else if (OPT_IS("metadata")) { option->metadata = optarg; + } else if (OPT_IS("video")) { + SET_OPTBOOL(option->video); + } else if (OPT_IS("audio")) { + SET_OPTBOOL(option->audio); } else if (OPT_IS("capture-type")) { - if (strcmp(optarg, "fake") == 0) { - option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE; + if (strcmp(optarg, "fake-i420") == 0) { + option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE_I420; + } else if (strcmp(optarg, "fake-nv12") == 0) { + option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_FAKE_NV12; } else if (strcmp(optarg, "v4l2") == 0) { option->capture_type = SUMOMO_OPTION_CAPTURE_TYPE_V4L2; } else if (strcmp(optarg, "mac") == 0) { @@ -115,6 +137,8 @@ int sumomo_option_parse(SumomoOption* option, option->capture_device_width = atoi(optarg); } else if (OPT_IS("capture-device-height")) { option->capture_device_height = atoi(optarg); + } else if (OPT_IS("capture-device-fps")) { + option->capture_device_fps = atoi(optarg); } else if (OPT_IS("audio-type")) { if (strcmp(optarg, "fake") == 0) { option->audio_type = SUMOMO_OPTION_AUDIO_TYPE_FAKE; @@ -142,8 +166,17 @@ int sumomo_option_parse(SumomoOption* option, fprintf(stderr, "Invalid h265 encoder type: %s\n", optarg); *error = 1; } + } else if (OPT_IS("av1-encoder-type")) { + if (strcmp(optarg, "aom") == 0) { + option->av1_encoder_type = soracp_AV1_ENCODER_TYPE_AOM; + } else { + fprintf(stderr, "Invalid AV1 encoder type: %s\n", optarg); + *error = 1; + } } else if (OPT_IS("openh264")) { option->openh264 = optarg; + } else if (OPT_IS("aom")) { + option->aom = optarg; } else if (OPT_IS("cacert")) { option->cacert = optarg; } else if (OPT_IS("help")) { @@ -162,17 +195,23 @@ int sumomo_option_parse(SumomoOption* option, fprintf(stdout, " --signaling-url=URL [required]\n"); fprintf(stdout, " --channel-id=ID [required]\n"); fprintf(stdout, " --simulcast=true,false,none\n"); + fprintf(stdout, " --simulcast-multicodec=true,false,none\n"); fprintf(stdout, " --video-codec-type=H264,H265\n"); - fprintf(stdout, " --video-bit-rate=0-5000 [kbps]\n"); + fprintf(stdout, " --video-bit-rate=0-15000 [kbps]\n"); fprintf(stdout, " --metadata=JSON\n"); - fprintf(stdout, " --capture-type=fake,v4l2,mac\n"); + fprintf(stdout, " --video=true,false,none\n"); + fprintf(stdout, " --audio=true,false,none\n"); + fprintf(stdout, " --capture-type=fake-i420,fake-nv12,v4l2,mac\n"); fprintf(stdout, " --capture-device-name=NAME\n"); fprintf(stdout, " --capture-device-width=WIDTH\n"); fprintf(stdout, " --capture-device-height=HEIGHT\n"); + fprintf(stdout, " --capture-device-fps=FPS\n"); fprintf(stdout, " --audio-type=fake,pulse,macos\n"); fprintf(stdout, " --h264-encoder-type=openh264,videotoolbox\n"); fprintf(stdout, " --h265-encoder-type=videotoolbox\n"); + fprintf(stdout, " --av1-encoder-type=aom\n"); fprintf(stdout, " --openh264=PATH\n"); + fprintf(stdout, " --aom=PATH\n"); fprintf(stdout, " --cacert=PATH\n"); fprintf(stdout, " --help\n"); return -1; diff --git a/examples/sumomo/option.h b/examples/sumomo/option.h index 658c950..ab628fc 100644 --- a/examples/sumomo/option.h +++ b/examples/sumomo/option.h @@ -10,7 +10,8 @@ typedef enum SumomoOptionalBool { } SumomoOptionalBool; typedef enum SumomoOptionCaptureType { - SUMOMO_OPTION_CAPTURE_TYPE_FAKE, + SUMOMO_OPTION_CAPTURE_TYPE_FAKE_I420, + SUMOMO_OPTION_CAPTURE_TYPE_FAKE_NV12, SUMOMO_OPTION_CAPTURE_TYPE_V4L2, SUMOMO_OPTION_CAPTURE_TYPE_MAC, } SumomoOptionCaptureType; @@ -25,18 +26,24 @@ typedef struct SumomoOption { int signaling_url_len; const char* channel_id; SumomoOptionalBool simulcast; + SumomoOptionalBool simulcast_multicodec; const char* video_codec_type; int video_bit_rate; const char* metadata; + SumomoOptionalBool video; + SumomoOptionalBool audio; SumomoOptionCaptureType capture_type; const char* capture_device_name; int capture_device_width; int capture_device_height; + int capture_device_fps; SumomoOptionAudioType audio_type; soracp_H264EncoderType h264_encoder_type; soracp_H265EncoderType h265_encoder_type; + soracp_Av1EncoderType av1_encoder_type; const char* openh264; + const char* aom; const char* cacert; } SumomoOption; diff --git a/examples/sumomo/sumomo.c b/examples/sumomo/sumomo.c index df2e6e5..f4921a3 100644 --- a/examples/sumomo/sumomo.c +++ b/examples/sumomo/sumomo.c @@ -27,7 +27,7 @@ typedef struct State { SumomoRecorder* recorder; SumomoCapturer* capturer; SoracDataChannel* data_channel; - soracp_RtpEncodingParameters rtp_encoding_parameters; + soracp_RtpParameters rtp_parameters; } State; void on_capture_frame_scaled(SoracVideoFrameRef* frame, void* userdata) { @@ -37,22 +37,29 @@ void on_capture_frame_scaled(SoracVideoFrameRef* frame, void* userdata) { void on_capture_frame(SoracVideoFrameRef* frame, void* userdata) { State* state = (State*)userdata; - sorac_signaling_get_rtp_encoding_parameters(state->signaling, - &state->rtp_encoding_parameters); - if (!state->rtp_encoding_parameters.enable_parameters) { + sorac_signaling_get_rtp_parameters(state->signaling, &state->rtp_parameters); + if (state->rtp_parameters.encodings_len == 0) { sorac_signaling_send_video_frame(state->signaling, frame); } else { // 動的な確保が面倒なので適当に固定で持っておく const char* rids[10]; - int rids_len = state->rtp_encoding_parameters.parameters_len; - if (rids_len > sizeof(rids) / sizeof(rids[0])) { - rids_len = sizeof(rids) / sizeof(rids[0]); + float scales[10]; + int len = state->rtp_parameters.encodings_len; + if (len > sizeof(rids) / sizeof(rids[0])) { + len = sizeof(rids) / sizeof(rids[0]); } - for (int i = 0; i < rids_len; i++) { - rids[i] = state->rtp_encoding_parameters.parameters[i].rid; + for (int i = 0; i < len; i++) { + soracp_RtpEncodingParameters* encoding = + &state->rtp_parameters.encodings[i]; + rids[i] = encoding->rid; + if (soracp_RtpEncodingParameters_has_scale_resolution_down_by(encoding)) { + scales[i] = encoding->scale_resolution_down_by; + } else { + scales[i] = 1.0f; + } } - sumomo_util_scale_simulcast(rids, rids_len, frame, on_capture_frame_scaled, - state); + sumomo_util_scale_simulcast(rids, scales, len, frame, + on_capture_frame_scaled, state); } } @@ -74,7 +81,7 @@ void on_track(SoracTrack* track, void* userdata) { #if defined(__linux__) state->capturer = sumomo_v4l2_capturer_create( state->opt->capture_device_name, state->opt->capture_device_width, - state->opt->capture_device_height); + state->opt->capture_device_height, state->opt->capture_device_fps); #else fprintf(stderr, "V4L2 capturer cannot be used on environments other than Linux"); @@ -84,14 +91,19 @@ void on_track(SoracTrack* track, void* userdata) { #if defined(__APPLE__) state->capturer = sumomo_mac_capturer_create( state->opt->capture_device_name, state->opt->capture_device_width, - state->opt->capture_device_height); + state->opt->capture_device_height, state->opt->capture_device_fps); #else fprintf(stderr, "V4L2 capturer cannot be used on environments other than Linux"); exit(1); #endif } else { - state->capturer = sumomo_fake_capturer_create(); + state->capturer = sumomo_fake_capturer_create( + state->opt->capture_device_width, state->opt->capture_device_height, + state->opt->capture_device_fps, + state->opt->capture_type == SUMOMO_OPTION_CAPTURE_TYPE_FAKE_I420 + ? SUMOMO_FAKE_CAPTURER_FORMAT_I420 + : SUMOMO_FAKE_CAPTURER_FORMAT_NV12); } sumomo_capturer_set_frame_callback(state->capturer, on_capture_frame, state); @@ -164,7 +176,7 @@ int main(int argc, char* argv[]) { sorac_plog_init(); State state = {0}; - soracp_RtpEncodingParameters_init(&state.rtp_encoding_parameters); + soracp_RtpParameters_init(&state.rtp_parameters); soracp_SignalingConfig config; soracp_SoraConnectConfig sora_config; soracp_DataChannel dc; @@ -183,13 +195,15 @@ int main(int argc, char* argv[]) { if (opt.openh264 != NULL) { soracp_SignalingConfig_set_openh264(&config, opt.openh264); } + if (opt.aom != NULL) { + soracp_SignalingConfig_set_aom(&config, opt.aom); + } if (opt.cacert != NULL) { soracp_SignalingConfig_set_ca_certificate(&config, opt.cacert); } soracp_SignalingConfig_set_h264_encoder_type(&config, opt.h264_encoder_type); soracp_SignalingConfig_set_h265_encoder_type(&config, opt.h265_encoder_type); - soracp_SignalingConfig_set_video_encoder_initial_bitrate_kbps( - &config, opt.video_bit_rate == 0 ? 500 : opt.video_bit_rate); + soracp_SignalingConfig_set_av1_encoder_type(&config, opt.av1_encoder_type); SoracSignaling* signaling = sorac_signaling_create(&config); state.signaling = signaling; @@ -200,7 +214,6 @@ int main(int argc, char* argv[]) { soracp_SoraConnectConfig_set_role(&sora_config, "sendonly"); soracp_SoraConnectConfig_set_channel_id(&sora_config, opt.channel_id); - sora_config.video = true; if (opt.video_codec_type != NULL) { soracp_SoraConnectConfig_set_video_codec_type(&sora_config, opt.video_codec_type); @@ -212,17 +225,23 @@ int main(int argc, char* argv[]) { if (opt.metadata != NULL) { soracp_SoraConnectConfig_set_metadata(&sora_config, opt.metadata); } - soracp_SoraConnectConfig_set_audio(&sora_config, true); - soracp_SoraConnectConfig_set_multistream(&sora_config, - soracp_OPTIONAL_BOOL_TRUE); - soracp_SoraConnectConfig_set_data_channel_signaling( - &sora_config, soracp_OPTIONAL_BOOL_TRUE); - soracp_SoraConnectConfig_set_simulcast( - &sora_config, opt.simulcast == SUMOMO_OPTIONAL_BOOL_NONE - ? soracp_OPTIONAL_BOOL_NONE - : opt.simulcast == SUMOMO_OPTIONAL_BOOL_FALSE - ? soracp_OPTIONAL_BOOL_FALSE - : soracp_OPTIONAL_BOOL_TRUE); + + // none, true の場合は true, false の場合は false + soracp_SoraConnectConfig_set_video(&sora_config, + opt.video != SUMOMO_OPTIONAL_BOOL_FALSE); + soracp_SoraConnectConfig_set_audio(&sora_config, + opt.audio != SUMOMO_OPTIONAL_BOOL_FALSE); + + soracp_SoraConnectConfig_set_multistream(&sora_config, true); + soracp_SoraConnectConfig_set_data_channel_signaling(&sora_config, true); + if (opt.simulcast != SUMOMO_OPTIONAL_BOOL_NONE) { + soracp_SoraConnectConfig_set_simulcast( + &sora_config, opt.simulcast == SUMOMO_OPTIONAL_BOOL_TRUE); + } + if (opt.simulcast_multicodec != SUMOMO_OPTIONAL_BOOL_NONE) { + soracp_SoraConnectConfig_set_simulcast_multicodec( + &sora_config, opt.simulcast_multicodec == SUMOMO_OPTIONAL_BOOL_TRUE); + } soracp_SoraConnectConfig_alloc_data_channels(&sora_config, 1); soracp_DataChannel_set_label(&dc, "#test"); diff --git a/examples/sumomo/util.cpp b/examples/sumomo/util.cpp index 1c00167..c5346ec 100644 --- a/examples/sumomo/util.cpp +++ b/examples/sumomo/util.cpp @@ -8,26 +8,17 @@ extern "C" { void sumomo_util_scale_simulcast(const char* rids[], - int num_rids, + const float scales[], + int len, SoracVideoFrameRef* frame, void (*scaled)(SoracVideoFrameRef* frame, void* userdata), void* userdata) { - for (int i = 0; i < num_rids; i++) { + for (int i = 0; i < len; i++) { sorac::VideoFrame f = *(sorac::VideoFrame*)frame; f.rid = rids[i]; - int width; - int height; - if (*f.rid == "r0") { - width = f.width() / 4; - height = f.height() / 4; - } else if (*f.rid == "r1") { - width = f.width() / 2; - height = f.height() / 2; - } else { - width = f.width(); - height = f.height(); - } + int width = (int)(f.width() / scales[i]); + int height = (int)(f.height() / scales[i]); if (f.width() != width || f.height() != height) { if (f.i420_buffer) { auto fb = sorac::VideoFrameBufferI420::Create(width, height); diff --git a/examples/sumomo/util.h b/examples/sumomo/util.h index 9ecd227..95b4c7b 100644 --- a/examples/sumomo/util.h +++ b/examples/sumomo/util.h @@ -9,7 +9,8 @@ extern "C" { extern void sumomo_util_scale_simulcast( const char* rids[], - int num_rids, + const float scales[], + int len, SoracVideoFrameRef* frame, void (*scaled)(SoracVideoFrameRef* frame, void* userdata), void* userdata); diff --git a/examples/sumomo/v4l2_capturer.cpp b/examples/sumomo/v4l2_capturer.cpp index 06a12dc..2308e4c 100644 --- a/examples/sumomo/v4l2_capturer.cpp +++ b/examples/sumomo/v4l2_capturer.cpp @@ -32,10 +32,11 @@ namespace sumomo { class V4L2Capturer : public SumomoCapturer { public: - V4L2Capturer(const char* device, int width, int height) { + V4L2Capturer(const char* device, int width, int height, int fps) { this->device_ = device; this->width_ = width; this->height_ = height; + this->fps_ = fps; this->destroy = [](SumomoCapturer* p) { delete (sumomo::V4L2Capturer*)p; }; this->set_frame_callback = [](SumomoCapturer* p, sumomo_capturer_on_frame_func on_frame, @@ -49,7 +50,7 @@ class V4L2Capturer : public SumomoCapturer { }; this->start = [](SumomoCapturer* p) { auto q = (sumomo::V4L2Capturer*)p; - return q->Start(q->device_.c_str(), q->width_, q->height_); + return q->Start(q->device_.c_str(), q->width_, q->height_, q->fps_); }; this->stop = [](SumomoCapturer* p) { ((sumomo::V4L2Capturer*)p)->Stop(); }; } @@ -59,7 +60,7 @@ class V4L2Capturer : public SumomoCapturer { callback_ = callback; } - int Start(const char* device, int width, int height) { + int Start(const char* device, int width, int height, int fps) { Stop(); device_fd_ = open(device, O_RDWR | O_NONBLOCK, 0); @@ -102,6 +103,24 @@ class V4L2Capturer : public SumomoCapturer { width_ = fmt.fmt.pix.width; height_ = fmt.fmt.pix.height; + // フレームレートの設定 + struct v4l2_streamparm sp; + memset(&sp, 0, sizeof(sp)); + sp.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + if (ioctl(device_fd_, VIDIOC_G_PARM, &sp) < 0) { + fprintf(stderr, "Failed to VIDIOC_G_PARM: %s\n", strerror(errno)); + } else { + if ((sp.parm.capture.capability & V4L2_CAP_TIMEPERFRAME) != 0) { + memset(&sp, 0, sizeof(sp)); + sp.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + sp.parm.capture.timeperframe.numerator = 1; + sp.parm.capture.timeperframe.denominator = fps; + if (ioctl(device_fd_, VIDIOC_S_PARM, &sp) < 0) { + fprintf(stderr, "Failed to set the framerate: %s\n", strerror(errno)); + } + } + } + // ビデオバッファの設定 const int V4L2_BUFFER_COUNT = 4; { @@ -210,6 +229,7 @@ class V4L2Capturer : public SumomoCapturer { frame.timestamp = sorac::get_current_time(); frame.base_width = width_; frame.base_height = height_; + frame.frame_number = ++frame_number_; callback_(frame); if (ioctl(device_fd_, VIDIOC_QBUF, &buf) < 0) { @@ -247,6 +267,8 @@ class V4L2Capturer : public SumomoCapturer { std::function callback_; int width_; int height_; + int fps_; + int frame_number_ = 0; int device_fd_ = -1; std::atomic quit_; @@ -265,7 +287,8 @@ extern "C" { SumomoCapturer* sumomo_v4l2_capturer_create(const char* device, int width, - int height) { - return new sumomo::V4L2Capturer(device, width, height); + int height, + int fps) { + return new sumomo::V4L2Capturer(device, width, height, fps); } } diff --git a/examples/sumomo/v4l2_capturer.h b/examples/sumomo/v4l2_capturer.h index f991721..85a4ae1 100644 --- a/examples/sumomo/v4l2_capturer.h +++ b/examples/sumomo/v4l2_capturer.h @@ -11,7 +11,8 @@ extern "C" { extern SumomoCapturer* sumomo_v4l2_capturer_create(const char* device, int width, - int height); + int height, + int fps); #ifdef __cplusplus } diff --git a/include/sorac/aom_av1_video_encoder.hpp b/include/sorac/aom_av1_video_encoder.hpp new file mode 100644 index 0000000..5594a33 --- /dev/null +++ b/include/sorac/aom_av1_video_encoder.hpp @@ -0,0 +1,15 @@ +#ifndef SORAC_AOM_AV1_VIDEO_ENCODER_HPP_ +#define SORAC_AOM_AV1_VIDEO_ENCODER_HPP_ + +#include +#include + +#include "video_encoder.hpp" + +namespace sorac { + +std::shared_ptr CreateAomAv1VideoEncoder(const std::string& aom); + +} + +#endif diff --git a/include/sorac/default_encoder_adapter.hpp b/include/sorac/default_encoder_adapter.hpp new file mode 100644 index 0000000..3635b68 --- /dev/null +++ b/include/sorac/default_encoder_adapter.hpp @@ -0,0 +1,20 @@ +#ifndef SORAC_DEFAULT_ENCODER_ADAPTER_HPP_ +#define SORAC_DEFAULT_ENCODER_ADAPTER_HPP_ + +#include +#include +#include +#include + +#include "soracp.json.h" +#include "types.hpp" +#include "video_encoder.hpp" + +namespace sorac { + +std::shared_ptr CreateDefaultEncoderAdapter( + std::shared_ptr encoder); + +} + +#endif diff --git a/include/sorac/h264_profile_level_id.hpp b/include/sorac/h264_profile_level_id.hpp new file mode 100644 index 0000000..3182dc2 --- /dev/null +++ b/include/sorac/h264_profile_level_id.hpp @@ -0,0 +1,67 @@ +// https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/video_codecs/h264_profile_level_id.h +// から必要な部分だけ抜き出して修正したもの。 + +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef API_VIDEO_CODECS_H264_PROFILE_LEVEL_ID_H_ +#define API_VIDEO_CODECS_H264_PROFILE_LEVEL_ID_H_ + +#include +#include + +namespace sorac { + +enum class H264Profile { + kProfileConstrainedBaseline, + kProfileBaseline, + kProfileMain, + kProfileConstrainedHigh, + kProfileHigh, + kProfilePredictiveHigh444, +}; + +// All values are equal to ten times the level number, except level 1b which is +// special. +enum class H264Level { + kLevel1_b = 0, + kLevel1 = 10, + kLevel1_1 = 11, + kLevel1_2 = 12, + kLevel1_3 = 13, + kLevel2 = 20, + kLevel2_1 = 21, + kLevel2_2 = 22, + kLevel3 = 30, + kLevel3_1 = 31, + kLevel3_2 = 32, + kLevel4 = 40, + kLevel4_1 = 41, + kLevel4_2 = 42, + kLevel5 = 50, + kLevel5_1 = 51, + kLevel5_2 = 52 +}; + +struct H264ProfileLevelId { + H264ProfileLevelId(H264Profile profile, H264Level level) + : profile(profile), level(level) {} + H264Profile profile; + H264Level level; +}; + +// Parse profile level id that is represented as a string of 3 hex bytes. +// Nothing will be returned if the string is not a recognized H264 +// profile level id. +std::optional ParseH264ProfileLevelId(const char* str); + +} // namespace sorac + +#endif // API_VIDEO_CODECS_H264_PROFILE_LEVEL_ID_H_ diff --git a/include/sorac/signaling.hpp b/include/sorac/signaling.hpp index 8e4014e..4d99d9f 100644 --- a/include/sorac/signaling.hpp +++ b/include/sorac/signaling.hpp @@ -32,7 +32,7 @@ class Signaling { std::function on_notify) = 0; virtual void SetOnPush(std::function on_push) = 0; - virtual soracp::RtpEncodingParameters GetRtpEncodingParameters() const = 0; + virtual soracp::RtpParameters GetRtpParameters() const = 0; }; std::shared_ptr CreateSignaling( diff --git a/include/sorac/simulcast_encoder_adapter.hpp b/include/sorac/simulcast_encoder_adapter.hpp index 6c3369d..03f2453 100644 --- a/include/sorac/simulcast_encoder_adapter.hpp +++ b/include/sorac/simulcast_encoder_adapter.hpp @@ -13,8 +13,8 @@ namespace sorac { std::shared_ptr CreateSimulcastEncoderAdapter( - const soracp::RtpEncodingParameters& params, - std::function()> create_encoder); + const soracp::RtpParameters& params, + std::function(std::string)> create_encoder); } diff --git a/include/sorac/sorac.h b/include/sorac/sorac.h index 86985b3..8e73f68 100644 --- a/include/sorac/sorac.h +++ b/include/sorac/sorac.h @@ -225,9 +225,8 @@ extern void sorac_signaling_set_on_notify( extern void sorac_signaling_set_on_push(SoracSignaling* p, sorac_signaling_on_push_func on_push, void* userdata); -extern void sorac_signaling_get_rtp_encoding_parameters( - SoracSignaling* p, - soracp_RtpEncodingParameters* params); +extern void sorac_signaling_get_rtp_parameters(SoracSignaling* p, + soracp_RtpParameters* params); #ifdef __cplusplus } diff --git a/include/sorac/types.hpp b/include/sorac/types.hpp index b1a02c4..87074e7 100644 --- a/include/sorac/types.hpp +++ b/include/sorac/types.hpp @@ -45,6 +45,9 @@ struct VideoFrame { int height() const { return i420_buffer != nullptr ? i420_buffer->height : nv12_buffer->height; } + // サイマルキャストで DD を利用する時にフレーム番号を全体で同じにする必要があるため + // ここにフレーム番号を持たせる + int frame_number; }; struct EncodedImage { @@ -52,6 +55,9 @@ struct EncodedImage { int size; std::chrono::microseconds timestamp; std::optional rid; + // rtc::RtpPacketizationConfig::DependencyDescriptorContext 型なんだけど、ここで + // libdatachannel のヘッダーを include してはいけないので shared_ptr を利用する + std::shared_ptr dependency_descriptor_context; }; struct AudioFrame { diff --git a/include/sorac/video_encoder.hpp b/include/sorac/video_encoder.hpp index a55feab..f987d38 100644 --- a/include/sorac/video_encoder.hpp +++ b/include/sorac/video_encoder.hpp @@ -15,6 +15,7 @@ class VideoEncoder { int width; int height; Bps bitrate; + int fps; }; virtual ~VideoEncoder() {} diff --git a/include/sorac/vt_h26x_video_encoder.hpp b/include/sorac/vt_h26x_video_encoder.hpp index 09d211a..72df488 100644 --- a/include/sorac/vt_h26x_video_encoder.hpp +++ b/include/sorac/vt_h26x_video_encoder.hpp @@ -2,8 +2,10 @@ #define SORAC_VT_H26X_VIDEO_ENCODER_HPP_ #include +#include #include +#include "h264_profile_level_id.hpp" #include "video_encoder.hpp" namespace sorac { @@ -14,7 +16,8 @@ enum class VTH26xVideoEncoderType { }; std::shared_ptr CreateVTH26xVideoEncoder( - VTH26xVideoEncoderType type); + VTH26xVideoEncoderType type, + std::optional profile); } // namespace sorac diff --git a/proto/soracp.proto b/proto/soracp.proto index 5c2d16f..f9db82b 100644 --- a/proto/soracp.proto +++ b/proto/soracp.proto @@ -2,12 +2,6 @@ syntax = "proto3"; package soracp; -enum OptionalBool { - OPTIONAL_BOOL_NONE = 0; - OPTIONAL_BOOL_TRUE = 1; - OPTIONAL_BOOL_FALSE = 2; -} - enum H264EncoderType { H264_ENCODER_TYPE_OPEN_H264 = 0; H264_ENCODER_TYPE_VIDEO_TOOLBOX = 1; @@ -18,18 +12,21 @@ enum H265EncoderType { H265_ENCODER_TYPE_VIDEO_TOOLBOX = 1; } +enum Av1EncoderType { + AV1_ENCODER_TYPE_AOM = 0; +} + message DataChannel { // required string label = 1; string direction = 2; // optional - OptionalBool ordered = 4; - + optional bool ordered = 4; optional int32 max_packet_life_time = 6; optional int32 max_retransmits = 8; optional string protocol = 10; - OptionalBool compress = 12; + optional bool compress = 12; } message ForwardingFilter { @@ -49,13 +46,14 @@ message SignalingConfig { repeated string signaling_url_candidates = 1; H264EncoderType h264_encoder_type = 11; H265EncoderType h265_encoder_type = 12; + Av1EncoderType av1_encoder_type = 120; string openh264 = 2; + string aom = 21; string ca_certificate = 3; string proxy_url = 44; string proxy_username = 45; string proxy_password = 46; string proxy_agent = 47; - int32 video_encoder_initial_bitrate_kbps = 4; } message SoraConnectConfig { @@ -63,12 +61,13 @@ message SoraConnectConfig { string client_id = 4; string metadata = 5; string role = 6; - OptionalBool multistream = 8; - OptionalBool spotlight = 10; + optional bool multistream = 8; + optional bool spotlight = 10; int32 spotlight_number = 11; string spotlight_focus_rid = 12; string spotlight_unfocus_rid = 13; - OptionalBool simulcast = 15; + optional bool simulcast = 15; + optional bool simulcast_multicodec = 150; string simulcast_rid = 16; bool video = 20; bool audio = 21; @@ -79,8 +78,8 @@ message SoraConnectConfig { int32 video_bit_rate = 26; string audio_codec_type = 31; int32 audio_bit_rate = 34; - OptionalBool data_channel_signaling = 36; - OptionalBool ignore_disconnect_websocket = 39; + optional bool data_channel_signaling = 36; + optional bool ignore_disconnect_websocket = 39; repeated DataChannel data_channels = 41; string bundle_id = 43; string audio_streaming_language_code = 48; @@ -88,17 +87,34 @@ message SoraConnectConfig { optional ForwardingFilter forwarding_filter = 51; } -message RtpEncodingParameter { - string rid = 10; - bool active = 20; - optional double scale_resolution_down_by = 31; - optional int32 max_bitrate_bps = 41; - optional double max_framerate = 51; - bool adaptive_ptime = 60; - optional string scalability_mode = 71; +message RtpEncodingParameters { + string rid = 10; + bool active = 20; + optional double scale_resolution_down_by = 31; + optional int32 max_bitrate_bps = 41; + optional double max_framerate = 51; + bool adaptive_ptime = 60; + optional string scalability_mode = 71; +} + +message RtpCodecParameters { + string kind = 10; + string name = 20; + int32 payload_type = 40; } -message RtpEncodingParameters { - bool enable_parameters = 10; - repeated RtpEncodingParameter parameters = 11; +message RidDescription { + string rid = 10; + string direction = 20; + optional int32 payload_type = 30; +} + +message RtpParameters { + string mid = 10; + repeated RtpEncodingParameters encodings = 20; + repeated RtpCodecParameters codecs = 30; + + // rids は本来 RtpParameters には含まれないんだけど、 + // ここにあった方が便利なのでここに含める + repeated RidDescription rids = 40; } diff --git a/run.py b/run.py index 0fd64f8..e39a994 100644 --- a/run.py +++ b/run.py @@ -400,6 +400,40 @@ def install_openh264(version, source_dir, install_dir): ) +@versioned +def install_aom(version, source_dir, build_dir, install_dir, cmake_args): + rm_rf(os.path.join(source_dir, "aom")) + rm_rf(os.path.join(build_dir, "aom")) + rm_rf(os.path.join(install_dir, "aom")) + git_clone_shallow( + "https://aomedia.googlesource.com/aom", + version, + os.path.join(source_dir, "aom"), + ) + with cd(os.path.join(source_dir, "aom")): + cmd( + [ + "cmake", + "-B", + os.path.join(build_dir, "aom"), + f'-DCMAKE_INSTALL_PREFIX={os.path.join(install_dir, "aom")}', + "-DBUILD_SHARED_LIBS=ON", + *cmake_args, + ] + ) + cmd( + [ + "cmake", + "--build", + os.path.join(build_dir, "aom"), + f"-j{multiprocessing.cpu_count()}", + "--config", + "Release", + ] + ) + cmd(["cmake", "--install", os.path.join(build_dir, "aom")]) + + @versioned def install_mbedtls(version, source_dir, build_dir, install_dir, cmake_args): rm_rf(os.path.join(source_dir, "mbedtls")) @@ -455,7 +489,7 @@ def install_protobuf(version, source_dir, install_dir, platform: str): extract(path, install_dir, "protobuf") # なぜか実行属性が消えてるので入れてやる for file in os.scandir(os.path.join(install_dir, "protobuf", "bin")): - if file.is_file: + if file.is_file(): os.chmod(file.path, file.stat().st_mode | stat.S_IXUSR) @@ -479,7 +513,7 @@ def install_protoc_gen_jsonif(version, source_dir, install_dir, platform: str): ) # なぜか実行属性が消えてるので入れてやる for file in os.scandir(os.path.join(jsonif_install_dir, "bin")): - if file.is_file: + if file.is_file(): os.chmod(file.path, file.stat().st_mode | stat.S_IXUSR) @@ -564,9 +598,36 @@ def install_deps( with cd(BASE_DIR): version = read_version_file("VERSION") + # CMake + install_cmake_args = { + "version": version["CMAKE_VERSION"], + "version_file": os.path.join(install_dir, "cmake.version"), + "source_dir": source_dir, + "install_dir": install_dir, + "platform": "", + "ext": "tar.gz", + } + if build_platform in ("windows_x86_64",): + install_cmake_args["platform"] = "windows-x86_64" + install_cmake_args["ext"] = "zip" + elif build_platform in ("macos_x86_64", "macos_arm64"): + install_cmake_args["platform"] = "macos-universal" + elif build_platform in ("ubuntu-20.04_x86_64", "ubuntu-22.04_x86_64"): + install_cmake_args["platform"] = "linux-x86_64" + elif build_platform in ("ubuntu-20.04_arm64", "ubuntu-22.04_arm64"): + install_cmake_args["platform"] = "linux-aarch64" + else: + raise Exception("Failed to install CMake") + install_cmake(**install_cmake_args) + + if build_platform == "macos_arm64": + add_path(os.path.join(install_dir, "cmake", "CMake.app", "Contents", "bin")) + else: + add_path(os.path.join(install_dir, "cmake", "bin")) + # libdatachannel dir = os.path.join(shared_source_dir, "libdatachannel") - url = "https://github.com/paullouisageneau/libdatachannel.git" + url = version["LIBDATACHANNEL_URL"] if not os.path.exists(os.path.join(dir, ".git")): cmd(["git", "clone", url, dir]) with cd(dir): @@ -596,32 +657,21 @@ def install_deps( } install_openh264(**install_openh264_args) - # CMake - install_cmake_args = { - "version": version["CMAKE_VERSION"], - "version_file": os.path.join(install_dir, "cmake.version"), + # AOM + install_aom_args = { + "version": version["AOM_VERSION"], + "version_file": os.path.join(install_dir, "aom.version"), "source_dir": source_dir, + "build_dir": build_dir, "install_dir": install_dir, - "platform": "", - "ext": "tar.gz", + "cmake_args": [], } - if build_platform in ("windows_x86_64",): - install_cmake_args["platform"] = "windows-x86_64" - install_cmake_args["ext"] = "zip" - elif build_platform in ("macos_x86_64", "macos_arm64"): - install_cmake_args["platform"] = "macos-universal" - elif build_platform in ("ubuntu-20.04_x86_64", "ubuntu-22.04_x86_64"): - install_cmake_args["platform"] = "linux-x86_64" - elif build_platform in ("ubuntu-20.04_arm64", "ubuntu-22.04_arm64"): - install_cmake_args["platform"] = "linux-aarch64" - else: - raise Exception("Failed to install CMake") - install_cmake(**install_cmake_args) - - if build_platform == "macos_arm64": - add_path(os.path.join(install_dir, "cmake", "CMake.app", "Contents", "bin")) - else: - add_path(os.path.join(install_dir, "cmake", "bin")) + if build_platform in ("ubuntu-20.04_x86_64", "ubuntu-22.04_x86_64"): + install_aom_args["cmake_args"] = [ + "-DCMAKE_C_COMPILER=clang-12", + "-DCMAKE_CXX_COMPILER=clang++-12", + ] + install_aom(**install_aom_args) macos_cmake_args = [] if build_platform in ("macos_x86_64", "macos_arm64"): @@ -922,6 +972,11 @@ def main(): f"-DOPENH264_ROOT_DIR={cmake_path(os.path.join(install_dir, 'openh264'))}" ) + # AOM + cmake_args.append( + f"-DAOM_ROOT_DIR={cmake_path(os.path.join(install_dir, 'aom'))}" + ) + # libdatachannel cmake_args.append("-DUSE_MBEDTLS=ON") cmake_args.append( diff --git a/src/aom_av1_video_encoder.cpp b/src/aom_av1_video_encoder.cpp new file mode 100644 index 0000000..60f9bc9 --- /dev/null +++ b/src/aom_av1_video_encoder.cpp @@ -0,0 +1,375 @@ +#include "sorac/aom_av1_video_encoder.hpp" + +#include +#include +#include + +// Linux +#include + +// plog +#include + +// libdatachannel +#include + +// AOM +#include +#include +#include + +// text の定義を全て展開した上で文字列化する。 +// 単純に #text とした場合、全て展開する前に文字列化されてしまう +#if defined(_WIN32) +#define SORAC_STRINGIZE(text) SORAC_STRINGIZE_((text)) +#define SORAC_STRINGIZE_(x) SORAC_STRINGIZE_I x +#else +#define SORAC_STRINGIZE(x) SORAC_STRINGIZE_I(x) +#endif + +#define SORAC_STRINGIZE_I(text) #text + +// a と b の定義を全て展開した上で結合する +// 単純に a ## b とした場合、全て展開する前に結合されてしまう +#define SORAC_CAT(a, b) SORAC_CAT_I(a, b) + +#if defined(_WIN32) +#define SORAC_CAT_I(a, b) a##b +#else +#define SORAC_CAT_I(a, b) SORAC_CAT_II(a##b) +#define SORAC_CAT_II(res) res +#endif + +namespace sorac { + +class AomAv1VideoEncoder : public VideoEncoder { + public: + AomAv1VideoEncoder(const std::string& aom) { + bool result = InitAom(aom); + if (!result) { + throw std::runtime_error("Failed to load AOM"); + } + } + ~AomAv1VideoEncoder() override { + Release(); + ReleaseAom(); + } + + void ForceIntraNextFrame() override { next_iframe_ = true; } + + bool InitEncode(const Settings& settings) override { + Release(); + + PLOG_INFO << "AOM InitEncode"; + + settings_ = settings; + + // https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/video_coding/codecs/av1/libaom_av1_encoder.cc + // を参考に初期化やエンコードを行う + + aom_codec_err_t ret = aom_codec_enc_config_default_( + aom_codec_av1_cx_(), &cfg_, AOM_USAGE_REALTIME); + if (ret != AOM_CODEC_OK) { + PLOG_ERROR << "Failed to aom_codec_enc_config_default: ret=" << ret; + return false; + } + + // Overwrite default config with input encoder settings & RTC-relevant values. + cfg_.g_w = settings.width; + cfg_.g_h = settings.height; + cfg_.g_threads = 8; + cfg_.g_timebase.num = 1; + cfg_.g_timebase.den = 90000; + cfg_.rc_target_bitrate = bitrate_cast(settings.bitrate).count(); + cfg_.rc_dropframe_thresh = 0; + cfg_.g_input_bit_depth = 8; + cfg_.kf_mode = AOM_KF_DISABLED; + cfg_.rc_min_quantizer = 10; + cfg_.rc_max_quantizer = 63; + cfg_.rc_undershoot_pct = 50; + cfg_.rc_overshoot_pct = 50; + cfg_.rc_buf_initial_sz = 600; + cfg_.rc_buf_optimal_sz = 600; + cfg_.rc_buf_sz = 1000; + cfg_.g_usage = AOM_USAGE_REALTIME; + cfg_.g_error_resilient = 0; + // Low-latency settings. + cfg_.rc_end_usage = AOM_CBR; // Constant Bit Rate (CBR) mode + cfg_.g_pass = AOM_RC_ONE_PASS; // One-pass rate control + cfg_.g_lag_in_frames = 0; // No look ahead when lag equals 0. + + if (frame_for_encode_ != nullptr) { + aom_img_free_(frame_for_encode_); + frame_for_encode_ = nullptr; + } + + // Flag options: AOM_CODEC_USE_PSNR and AOM_CODEC_USE_HIGHBITDEPTH + aom_codec_flags_t flags = 0; + + // Initialize an encoder instance. + ret = aom_codec_enc_init_ver_(&ctx_, aom_codec_av1_cx_(), &cfg_, flags, + AOM_ENCODER_ABI_VERSION); + if (ret != AOM_CODEC_OK) { + PLOG_ERROR << "Failed to aom_codec_enc_init_ver: ret=" << ret; + return false; + } + init_ctx_ = true; + +#define SET_PARAM(param_id, param_value) \ + do { \ + ret = aom_codec_control_(&ctx_, param_id, param_value); \ + if (ret != AOM_CODEC_OK) { \ + PLOG_ERROR << "Failed to aom_codec_control: ret=" << ret \ + << ", param_id=" << SORAC_STRINGIZE(param_id) \ + << ", param_value=" << param_value; \ + return false; \ + } \ + } while (0) + + // Set control parameters + SET_PARAM(AOME_SET_CPUUSED, 10); + SET_PARAM(AV1E_SET_ENABLE_CDEF, 1); + SET_PARAM(AV1E_SET_ENABLE_TPL_MODEL, 0); + SET_PARAM(AV1E_SET_DELTAQ_MODE, 0); + SET_PARAM(AV1E_SET_ENABLE_ORDER_HINT, 0); + SET_PARAM(AV1E_SET_AQ_MODE, 3); + SET_PARAM(AOME_SET_MAX_INTRA_BITRATE_PCT, 300); + SET_PARAM(AV1E_SET_COEFF_COST_UPD_FREQ, 3); + SET_PARAM(AV1E_SET_MODE_COST_UPD_FREQ, 3); + SET_PARAM(AV1E_SET_MV_COST_UPD_FREQ, 3); + + SET_PARAM(AV1E_SET_ENABLE_PALETTE, 0); + + SET_PARAM(AV1E_SET_TILE_ROWS, 1); + SET_PARAM(AV1E_SET_TILE_COLUMNS, 2); + + SET_PARAM(AV1E_SET_ROW_MT, 1); + SET_PARAM(AV1E_SET_ENABLE_OBMC, 0); + SET_PARAM(AV1E_SET_NOISE_SENSITIVITY, 0); + SET_PARAM(AV1E_SET_ENABLE_WARPED_MOTION, 0); + SET_PARAM(AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + SET_PARAM(AV1E_SET_ENABLE_REF_FRAME_MVS, 0); + SET_PARAM(AV1E_SET_SUPERBLOCK_SIZE, AOM_SUPERBLOCK_SIZE_DYNAMIC); + SET_PARAM(AV1E_SET_ENABLE_CFL_INTRA, 0); + SET_PARAM(AV1E_SET_ENABLE_SMOOTH_INTRA, 0); + SET_PARAM(AV1E_SET_ENABLE_ANGLE_DELTA, 0); + SET_PARAM(AV1E_SET_ENABLE_FILTER_INTRA, 0); + SET_PARAM(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1); + SET_PARAM(AV1E_SET_DISABLE_TRELLIS_QUANT, 1); + SET_PARAM(AV1E_SET_ENABLE_DIST_WTD_COMP, 0); + SET_PARAM(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0); + SET_PARAM(AV1E_SET_ENABLE_DUAL_FILTER, 0); + SET_PARAM(AV1E_SET_ENABLE_INTERINTRA_COMP, 0); + SET_PARAM(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0); + SET_PARAM(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0); + SET_PARAM(AV1E_SET_ENABLE_INTRABC, 0); + SET_PARAM(AV1E_SET_ENABLE_MASKED_COMP, 0); + SET_PARAM(AV1E_SET_ENABLE_PAETH_INTRA, 0); + SET_PARAM(AV1E_SET_ENABLE_QM, 0); + SET_PARAM(AV1E_SET_ENABLE_RECT_PARTITIONS, 0); + SET_PARAM(AV1E_SET_ENABLE_RESTORATION, 0); + SET_PARAM(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0); + SET_PARAM(AV1E_SET_ENABLE_TX64, 0); + SET_PARAM(AV1E_SET_MAX_REFERENCE_FRAMES, 3); + + return true; + } + + void SetEncodeCallback( + std::function callback) override { + callback_ = callback; + } + + void Encode(const VideoFrame& frame) override { + if (frame.i420_buffer == nullptr && frame.nv12_buffer == nullptr) { + PLOG_ERROR << "Unknown video frame format"; + return; + } + aom_img_fmt_t fmt = + frame.i420_buffer != nullptr ? AOM_IMG_FMT_I420 : AOM_IMG_FMT_NV12; + + if (frame_for_encode_ == nullptr || frame_for_encode_->fmt != fmt) { + if (frame_for_encode_ != nullptr) { + aom_img_free_(frame_for_encode_); + } + frame_for_encode_ = + aom_img_wrap_(nullptr, fmt, cfg_.g_w, cfg_.g_h, 1, nullptr); + } + + if (frame.i420_buffer != nullptr) { + // I420 + frame_for_encode_->planes[AOM_PLANE_Y] = frame.i420_buffer->y.get(); + frame_for_encode_->planes[AOM_PLANE_U] = frame.i420_buffer->u.get(); + frame_for_encode_->planes[AOM_PLANE_V] = frame.i420_buffer->v.get(); + frame_for_encode_->stride[AOM_PLANE_Y] = frame.i420_buffer->stride_y; + frame_for_encode_->stride[AOM_PLANE_U] = frame.i420_buffer->stride_u; + frame_for_encode_->stride[AOM_PLANE_V] = frame.i420_buffer->stride_v; + } else { + // NV12 + frame_for_encode_->planes[AOM_PLANE_Y] = frame.nv12_buffer->y.get(); + frame_for_encode_->planes[AOM_PLANE_U] = frame.nv12_buffer->uv.get(); + frame_for_encode_->planes[AOM_PLANE_V] = nullptr; + frame_for_encode_->stride[AOM_PLANE_Y] = frame.nv12_buffer->stride_y; + frame_for_encode_->stride[AOM_PLANE_U] = frame.nv12_buffer->stride_uv; + frame_for_encode_->stride[AOM_PLANE_V] = 0; + } + + const uint32_t duration = 90000 / settings_.fps; + timestamp_ += duration; + + aom_enc_frame_flags_t flags = 0; + + bool send_key_frame = next_iframe_.exchange(false); + if (send_key_frame) { + PLOG_DEBUG << "KeyFrame generated"; + flags = AOM_EFLAG_FORCE_KF; + } + + aom_codec_err_t ret = aom_codec_encode_(&ctx_, frame_for_encode_, + timestamp_, duration, flags); + + EncodedImage encoded; + const aom_codec_cx_pkt_t* pkt = nullptr; + aom_codec_iter_t iter = nullptr; + while (true) { + const aom_codec_cx_pkt_t* p = aom_codec_get_cx_data_(&ctx_, &iter); + if (p == nullptr) { + break; + } + if (p->kind == AOM_CODEC_CX_FRAME_PKT && p->data.frame.sz > 0) { + pkt = p; + } + } + + encoded.buf.reset(new uint8_t[pkt->data.frame.sz]); + encoded.size = pkt->data.frame.sz; + memcpy(encoded.buf.get(), pkt->data.frame.buf, encoded.size); + encoded.timestamp = frame.timestamp; + + bool is_key_frame = (pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0; + + // DD の設定を行う + rtc::RtpPacketizationConfig::DependencyDescriptorContext ctx; + ctx.structure.templateIdOffset = 0; + ctx.structure.decodeTargetCount = 1; + ctx.structure.chainCount = 1; + ctx.structure.decodeTargetProtectedBy = {0}; + ctx.structure.resolutions.push_back({frame.width(), frame.height()}); + rtc::FrameDependencyTemplate key_frame_template; + key_frame_template.spatialId = 0; + key_frame_template.temporalId = 0; + key_frame_template.decodeTargetIndications = { + rtc::DecodeTargetIndication::Switch}; + key_frame_template.chainDiffs = {0}; + rtc::FrameDependencyTemplate delta_frame_template; + delta_frame_template.spatialId = 0; + delta_frame_template.temporalId = 0; + delta_frame_template.decodeTargetIndications = { + rtc::DecodeTargetIndication::Switch}; + delta_frame_template.chainDiffs = {1}; + delta_frame_template.frameDiffs = {1}; + ctx.structure.templates = {key_frame_template, delta_frame_template}; + ctx.activeChains[0] = true; + ctx.descriptor.frameNumber = frame.frame_number; + if (is_key_frame) { + ctx.descriptor.dependencyTemplate = key_frame_template; + } else { + ctx.descriptor.dependencyTemplate = delta_frame_template; + ctx.descriptor.dependencyTemplate.frameDiffs = {frame.frame_number - + prev_frame_number_}; + } + ctx.descriptor.structureAttached = is_key_frame; + + encoded.dependency_descriptor_context = std::make_shared< + rtc::RtpPacketizationConfig::DependencyDescriptorContext>(ctx); + + prev_frame_number_ = frame.frame_number; + + callback_(encoded); + } + + void Release() override { + if (frame_for_encode_ != nullptr) { + aom_img_free_(frame_for_encode_); + frame_for_encode_ = nullptr; + } + if (init_ctx_) { + aom_codec_destroy_(&ctx_); + init_ctx_ = false; + } + } + + private: + bool InitAom(const std::string& aom) { + void* handle = ::dlopen(aom.c_str(), RTLD_LAZY); + if (handle == nullptr) { + PLOG_ERROR << "Failed to dlopen: error=" << dlerror(); + return false; + } + +#define LOAD_AOM(name) \ + SORAC_CAT(name, _) = \ + (SORAC_CAT(name, _func))::dlsym(handle, SORAC_STRINGIZE(name)); \ + if (SORAC_CAT(name, _) == nullptr) { \ + PLOG_ERROR << "Failed to dlsym: name=" << SORAC_STRINGIZE(name); \ + ::dlclose(handle); \ + return false; \ + } + + LOAD_AOM(aom_codec_av1_cx); + LOAD_AOM(aom_codec_enc_config_default); + LOAD_AOM(aom_codec_enc_init_ver); + LOAD_AOM(aom_codec_destroy); + LOAD_AOM(aom_codec_encode); + LOAD_AOM(aom_codec_get_cx_data); + LOAD_AOM(aom_codec_control); + LOAD_AOM(aom_codec_enc_config_set); + LOAD_AOM(aom_img_wrap); + LOAD_AOM(aom_img_free); + aom_handle_ = handle; + return true; + } + void ReleaseAom() { + if (aom_handle_ != nullptr) { + ::dlclose(aom_handle_); + aom_handle_ = nullptr; + } + } + + private: + Settings settings_; + bool init_ctx_ = false; + aom_codec_ctx_t ctx_; + aom_codec_enc_cfg_t cfg_; + aom_image_t* frame_for_encode_ = nullptr; + int64_t timestamp_ = 0; + int prev_frame_number_ = 0; + + std::function callback_; + + std::atomic next_iframe_; + + void* aom_handle_ = nullptr; + +#define DECLARE_AOM(name, result, ...) \ + using SORAC_CAT(name, _func) = result (*)(__VA_ARGS__); \ + SORAC_CAT(name, _func) SORAC_CAT(name, _); + // clang-format off + DECLARE_AOM(aom_codec_av1_cx, aom_codec_iface_t*, void); + DECLARE_AOM(aom_codec_enc_config_default, aom_codec_err_t, aom_codec_iface_t* iface, aom_codec_enc_cfg_t* cfg, unsigned int usage); + DECLARE_AOM(aom_codec_enc_init_ver, aom_codec_err_t, aom_codec_ctx_t* ctx, aom_codec_iface_t* iface, const aom_codec_enc_cfg_t* cfg, aom_codec_flags_t flags, int ver); + DECLARE_AOM(aom_codec_destroy, aom_codec_err_t, aom_codec_ctx_t* ctx); + DECLARE_AOM(aom_codec_encode, aom_codec_err_t, aom_codec_ctx_t* ctx, const aom_image_t* img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags); + DECLARE_AOM(aom_codec_get_cx_data, const aom_codec_cx_pkt_t*, aom_codec_ctx_t* ctx, aom_codec_iter_t* iter); + DECLARE_AOM(aom_codec_control, aom_codec_err_t, aom_codec_ctx_t* ctx, int ctrl_id, ...); + DECLARE_AOM(aom_codec_enc_config_set, aom_codec_err_t, aom_codec_ctx_t* ctx, const aom_codec_enc_cfg_t* cfg); + DECLARE_AOM(aom_img_wrap, aom_image_t*, aom_image_t* img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int stride_align, unsigned char* img_data); + DECLARE_AOM(aom_img_free, void, aom_image_t* img); + // clang-format on +}; + +std::shared_ptr CreateAomAv1VideoEncoder(const std::string& aom) { + return std::make_shared(aom); +} + +} // namespace sorac diff --git a/src/default_encoder_adapter.cpp b/src/default_encoder_adapter.cpp new file mode 100644 index 0000000..41ba9f2 --- /dev/null +++ b/src/default_encoder_adapter.cpp @@ -0,0 +1,99 @@ +#include "sorac/default_encoder_adapter.hpp" + +#include +#include +#include + +// plog +#include + +#include "sorac/bitrate.hpp" + +namespace sorac { + +// FPS の計測区間 +static const std::chrono::seconds kFpsCalcInterval(2); + +// 全てのエンコーダに適用するアダプタ。 +// +// 機能ごとにアダプタを分けるのが面倒なので一緒にしてしまう。 +// 今のところ、以下の機能がある。 +// - エンコードする映像を 16 の倍数にアライメントする +// - FPS を計測して、指定した FPS を超えた場合はエンコードをスキップする +class DefaultEncoderAdapter : public VideoEncoder { + public: + DefaultEncoderAdapter(std::shared_ptr encoder) + : encoder_(encoder) {} + ~DefaultEncoderAdapter() override { Release(); } + + void ForceIntraNextFrame() override { encoder_->ForceIntraNextFrame(); } + + bool InitEncode(const Settings& settings) override { + Release(); + + // 16の倍数にアライメントする + settings_ = settings; + settings_.width = settings.width / 16 * 16; + settings_.height = settings.height / 16 * 16; + if (settings.width != settings_.width || + settings.height != settings_.height) { + PLOG_INFO << "InitEncode adjusted: width=" << settings_.width + << " height=" << settings_.height; + } + return encoder_->InitEncode(settings_); + } + + void SetEncodeCallback( + std::function callback) override { + encoder_->SetEncodeCallback(callback); + } + + void Encode(const VideoFrame& frame) override { + // フレームレートによってはエンコードをスキップする + auto now = std::chrono::steady_clock::now(); + if (!start_timestamp_) { + start_timestamp_ = now; + } else { + auto from = std::max(now - kFpsCalcInterval, *start_timestamp_); + // from 未満のフレームを削除する + encode_timestamps_.erase( + std::remove_if(encode_timestamps_.begin(), encode_timestamps_.end(), + [from](const auto& t) { return t < from; }), + encode_timestamps_.end()); + auto fps = + ((double)encode_timestamps_.size() * 1000000 / + std::chrono::duration_cast(now - from) + .count()); + if (fps > settings_.fps) { + return; + } + } + encode_timestamps_.push_back(now); + + VideoFrame frame2 = frame; + if (frame2.i420_buffer != nullptr) { + frame2.i420_buffer->width = settings_.width; + frame2.i420_buffer->height = settings_.height; + } + if (frame2.nv12_buffer != nullptr) { + frame2.nv12_buffer->width = settings_.width; + frame2.nv12_buffer->height = settings_.height; + } + encoder_->Encode(frame2); + } + + void Release() override { encoder_->Release(); } + + private: + std::shared_ptr encoder_; + Settings settings_; + std::vector encode_timestamps_; + std::optional start_timestamp_; +}; + +std::shared_ptr CreateDefaultEncoderAdapter( + std::shared_ptr encoder) { + return std::make_shared(encoder); +} + +} // namespace sorac diff --git a/src/h264_profile_level_id.cpp b/src/h264_profile_level_id.cpp new file mode 100644 index 0000000..a3c860f --- /dev/null +++ b/src/h264_profile_level_id.cpp @@ -0,0 +1,134 @@ +// https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/api/video_codecs/h264_profile_level_id.cpp +// から必要な部分だけ抜き出して修正したもの。 + +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "sorac/h264_profile_level_id.hpp" + +#include +#include +#include +#include + +namespace sorac { + +namespace { + +// For level_idc=11 and profile_idc=0x42, 0x4D, or 0x58, the constraint set3 +// flag specifies if level 1b or level 1.1 is used. +const uint8_t kConstraintSet3Flag = 0x10; + +// Convert a string of 8 characters into a byte where the positions containing +// character c will have their bit set. For example, c = 'x', str = "x1xx0000" +// will return 0b10110000. constexpr is used so that the pattern table in +// kProfilePatterns is statically initialized. +constexpr uint8_t ByteMaskString(char c, const char (&str)[9]) { + return (str[0] == c) << 7 | (str[1] == c) << 6 | (str[2] == c) << 5 | + (str[3] == c) << 4 | (str[4] == c) << 3 | (str[5] == c) << 2 | + (str[6] == c) << 1 | (str[7] == c) << 0; +} + +// Class for matching bit patterns such as "x1xx0000" where 'x' is allowed to be +// either 0 or 1. +class BitPattern { + public: + explicit constexpr BitPattern(const char (&str)[9]) + : mask_(~ByteMaskString('x', str)), + masked_value_(ByteMaskString('1', str)) {} + + bool IsMatch(uint8_t value) const { return masked_value_ == (value & mask_); } + + private: + const uint8_t mask_; + const uint8_t masked_value_; +}; + +// Table for converting between profile_idc/profile_iop to H264Profile. +struct ProfilePattern { + const uint8_t profile_idc; + const BitPattern profile_iop; + const H264Profile profile; +}; + +// This is from https://tools.ietf.org/html/rfc6184#section-8.1. +constexpr ProfilePattern kProfilePatterns[] = { + {0x42, BitPattern("x1xx0000"), H264Profile::kProfileConstrainedBaseline}, + {0x4D, BitPattern("1xxx0000"), H264Profile::kProfileConstrainedBaseline}, + {0x58, BitPattern("11xx0000"), H264Profile::kProfileConstrainedBaseline}, + {0x42, BitPattern("x0xx0000"), H264Profile::kProfileBaseline}, + {0x58, BitPattern("10xx0000"), H264Profile::kProfileBaseline}, + {0x4D, BitPattern("0x0x0000"), H264Profile::kProfileMain}, + {0x64, BitPattern("00000000"), H264Profile::kProfileHigh}, + {0x64, BitPattern("00001100"), H264Profile::kProfileConstrainedHigh}, + {0xF4, BitPattern("00000000"), H264Profile::kProfilePredictiveHigh444}}; + +} // anonymous namespace + +std::optional ParseH264ProfileLevelId(const char* str) { + // The string should consist of 3 bytes in hexadecimal format. + if (std::strlen(str) != 6u) + return std::nullopt; + const uint32_t profile_level_id_numeric = strtol(str, nullptr, 16); + if (profile_level_id_numeric == 0) + return std::nullopt; + + // Separate into three bytes. + const uint8_t level_idc = + static_cast(profile_level_id_numeric & 0xFF); + const uint8_t profile_iop = + static_cast((profile_level_id_numeric >> 8) & 0xFF); + const uint8_t profile_idc = + static_cast((profile_level_id_numeric >> 16) & 0xFF); + + // Parse level based on level_idc and constraint set 3 flag. + H264Level level_casted = static_cast(level_idc); + H264Level level; + + switch (level_casted) { + case H264Level::kLevel1_1: + level = (profile_iop & kConstraintSet3Flag) != 0 ? H264Level::kLevel1_b + : H264Level::kLevel1_1; + break; + case H264Level::kLevel1: + case H264Level::kLevel1_2: + case H264Level::kLevel1_3: + case H264Level::kLevel2: + case H264Level::kLevel2_1: + case H264Level::kLevel2_2: + case H264Level::kLevel3: + case H264Level::kLevel3_1: + case H264Level::kLevel3_2: + case H264Level::kLevel4: + case H264Level::kLevel4_1: + case H264Level::kLevel4_2: + case H264Level::kLevel5: + case H264Level::kLevel5_1: + case H264Level::kLevel5_2: + level = level_casted; + break; + default: + // Unrecognized level_idc. + return std::nullopt; + } + + // Parse profile_idc/profile_iop into a Profile enum. + for (const ProfilePattern& pattern : kProfilePatterns) { + if (profile_idc == pattern.profile_idc && + pattern.profile_iop.IsMatch(profile_iop)) { + return H264ProfileLevelId(pattern.profile, level); + } + } + + // Unrecognized profile_idc/profile_iop combination. + return std::nullopt; +} + +} // namespace sorac \ No newline at end of file diff --git a/src/open_h264_video_encoder.cpp b/src/open_h264_video_encoder.cpp index 748cd7b..e112d79 100644 --- a/src/open_h264_video_encoder.cpp +++ b/src/open_h264_video_encoder.cpp @@ -53,7 +53,7 @@ class OpenH264VideoEncoder : public VideoEncoder { encoder_params.iMaxBitrate = UNSPECIFIED_BIT_RATE; // Rate Control mode encoder_params.iRCMode = RC_BITRATE_MODE; - encoder_params.fMaxFrameRate = 30; + encoder_params.fMaxFrameRate = settings.fps; // The following parameters are extension parameters (they're in SEncParamExt, // not in SEncParamBase). diff --git a/src/signaling.cpp b/src/signaling.cpp index 9770579..257f5e0 100644 --- a/src/signaling.cpp +++ b/src/signaling.cpp @@ -13,7 +13,10 @@ // plog #include +#include "sorac/aom_av1_video_encoder.hpp" #include "sorac/current_time.hpp" +#include "sorac/default_encoder_adapter.hpp" +#include "sorac/h264_profile_level_id.hpp" #include "sorac/open_h264_video_encoder.hpp" #include "sorac/opus_audio_encoder.hpp" #include "sorac/simulcast_encoder_adapter.hpp" @@ -133,6 +136,10 @@ class SignalingImpl : public Signaling { } void SendVideoFrame(const VideoFrame& frame) override { + if (rtp_params_.mid.empty()) { + return; + } + if (!client_.video_encoder_settings || frame.base_width != client_.video_encoder_settings->width || frame.base_height != client_.video_encoder_settings->height) { @@ -140,7 +147,8 @@ class SignalingImpl : public Signaling { VideoEncoder::Settings settings; settings.width = frame.base_width; settings.height = frame.base_height; - settings.bitrate = Kbps(config_.video_encoder_initial_bitrate_kbps); + settings.bitrate = default_bitrate_; + settings.fps = 30; if (!client_.video_encoder->InitEncode(settings)) { PLOG_ERROR << "Failed to InitEncode()"; return; @@ -161,6 +169,13 @@ class SignalingImpl : public Signaling { if (rtp_config->timestampToSeconds(report_elapsed_timestamp) > 0.2) { sender->setNeedsToReport(); } + if (image.dependency_descriptor_context != nullptr && + dependency_descriptor_id_ != 0) { + rtp_config->dependencyDescriptorId = dependency_descriptor_id_; + rtp_config->dependencyDescriptorContext = *std::static_pointer_cast< + rtc::RtpPacketizationConfig::DependencyDescriptorContext>( + image.dependency_descriptor_context); + } std::vector buf((std::byte*)image.buf.get(), (std::byte*)image.buf.get() + image.size); client_.video->simulcast_handler->config()->rid = image.rid; @@ -171,6 +186,9 @@ class SignalingImpl : public Signaling { } void SendAudioFrame(const AudioFrame& frame) override { + if (client_.opus_encoder == nullptr) { + return; + } client_.opus_encoder->Encode(frame); } @@ -194,8 +212,8 @@ class SignalingImpl : public Signaling { on_push_ = on_push; } - soracp::RtpEncodingParameters GetRtpEncodingParameters() const override { - return rtp_encoding_params_; + soracp::RtpParameters GetRtpParameters() const override { + return rtp_params_; } private: @@ -242,9 +260,8 @@ class SignalingImpl : public Signaling { } if (js["simulcast"].get()) { - rtp_encoding_params_.enable_parameters = true; for (auto& enc : js["encodings"]) { - soracp::RtpEncodingParameter p; + soracp::RtpEncodingParameters p; p.rid = enc["rid"].get(); p.active = true; if (enc.contains("active")) { @@ -266,7 +283,7 @@ class SignalingImpl : public Signaling { if (enc.contains("scalabilityMode")) { p.set_scalability_mode(enc["scalabilityMode"].get()); } - rtp_encoding_params_.parameters.push_back(p); + rtp_params_.encodings.push_back(p); } } @@ -274,10 +291,16 @@ class SignalingImpl : public Signaling { client_.pc = std::make_shared(config); client_.pc->onLocalDescription([this](rtc::Description desc) { auto sdp = desc.generateSdp(); - sdp += "a=rid:r0 send\r\n"; - sdp += "a=rid:r1 send\r\n"; - sdp += "a=rid:r2 send\r\n"; - sdp += "a=simulcast:send r0;r1;r2\r\n"; + if (IsSimulcast()) { + for (const auto& rd : rtp_params_.rids) { + sdp += "a=rid:" + rd.rid + " send"; + if (rd.has_payload_type()) { + sdp += " pt=" + std::to_string(rd.payload_type); + } + sdp += "\r\n"; + } + sdp += "a=simulcast:send r0;r1;r2\r\n"; + } PLOG_DEBUG << "answer sdp:" << sdp; nlohmann::json js = { {"type", desc.typeString()}, @@ -390,8 +413,21 @@ class SignalingImpl : public Signaling { auto cname = "cname-" + generate_random_string(24); auto msid = "msid-" + generate_random_string(24); auto track_id = "trackid-" + generate_random_string(24); + // ビットレート + default_bitrate_ = std::invoke([&]() { + auto it = std::find_if( + lines.begin(), lines.end(), + [](const std::string& s) { return starts_with(s, "b=TIAS:"); }); + if (it == lines.end()) { + throw std::runtime_error("b=TIAS: not found"); + } + auto ys = split_with(*it, ":"); + auto bitrate = Bps(std::stoi(ys[1])); + return bitrate; + }); + // video - { + std::invoke([&]() { // m=video から他の m= が出てくるまでの間のデータを取得する std::vector video_lines; { @@ -405,31 +441,61 @@ class SignalingImpl : public Signaling { video_lines.assign(it, it2); } } + + std::optional h264_profile; + std::optional h264_profile_string; + // mid, payload_type, codec - std::string mid; - int payload_type; - std::string codec; - { - auto get_value = - [&video_lines](const std::string& search) -> std::string { - auto it = std::find_if(video_lines.begin(), video_lines.end(), - [&search](const std::string& s) { - return starts_with(s, search); - }); - if (it == video_lines.end()) { - return ""; + for (const auto& line : video_lines) { + if (auto s = std::string("a=mid:"); starts_with(line, s)) { + auto mid = line.substr(s.size()); + PLOG_DEBUG << "mid=" << mid; + rtp_params_.mid = mid; + } else if (auto s = std::string("a=rtpmap:"); starts_with(line, s)) { + auto rtpmap = line.substr(s.size()); + auto ys = split_with(rtpmap, " "); + auto payload_type = std::stoi(ys[0]); + auto codec = split_with(ys[1], "/")[0]; + if (codec == "H264" || codec == "H265" || codec == "AV1") { + PLOG_DEBUG << "payload_type=" << payload_type + << ", codec=" << codec; + soracp::RtpCodecParameters cp; + cp.payload_type = payload_type; + cp.kind = "video"; + cp.name = codec; + rtp_params_.codecs.push_back(cp); } - return it->substr(search.size()); - }; - mid = get_value("a=mid:"); - PLOG_DEBUG << "mid=" << mid; - auto xs = split_with(get_value("a=msid:"), " "); - auto rtpmap = get_value("a=rtpmap:"); - auto ys = split_with(rtpmap, " "); - payload_type = std::stoi(ys[0]); - codec = split_with(ys[1], "/")[0]; - PLOG_DEBUG << "payload_type=" << payload_type << ", codec=" << codec; + } else if (auto s = std::string("a=fmtp:"); starts_with(line, s)) { + // 直前の a=rtpmap が H264 だった場合、a=fmtp 行の profile-level-id を取得する + if (rtp_params_.codecs.empty() || + rtp_params_.codecs.back().name != "H264") { + continue; + } + auto fmtp = line.substr(s.size()); + auto ys = split_with(fmtp, " "); + auto params = split_with(ys[1], ";"); + for (const auto& param : params) { + auto zs = split_with(param, "="); + if (zs.size() != 2) { + continue; + } + if (zs[0] == "profile-level-id") { + h264_profile_string = zs[1]; + h264_profile = ParseH264ProfileLevelId(zs[1].c_str()); + PLOG_DEBUG << "profile-level-id=" << zs[1]; + if (h264_profile != std::nullopt) { + PLOG_DEBUG << "profile=" << (int)h264_profile->profile + << ", level=" << (int)h264_profile->level; + } + } + } + } } + // mid が空ということは vido=false なので何もしない + if (rtp_params_.mid.empty()) { + return; + } + // サイマルキャストの場合、拡張ヘッダーのどの ID を使えば良いか調べる if (IsSimulcast()) { auto it = std::find_if( @@ -444,17 +510,72 @@ class SignalingImpl : public Signaling { rtp_stream_id_ = std::stoi(ys[1]); PLOG_DEBUG << "rtp_stream_id=" << rtp_stream_id_; } + // rid が参照するべき payload_type の対応を作る + if (IsSimulcast()) { + for (const auto& line : video_lines) { + // 以下のような感じの行を探して値を設定する + // a=rid:r0 send + // a=rid:r0 recv pt=37 + + auto s = std::string("a=rid:"); + if (!starts_with(line, s)) { + continue; + } + auto xs = split_with(line, " "); + if (xs.size() < 2) { + continue; + } + soracp::RidDescription rd; + rd.rid = xs[0].substr(s.size()); + rd.direction = xs[1]; + s = "pt="; + if (xs.size() >= 3 && starts_with(xs[2], s)) { + rd.set_payload_type(std::stoi(xs[2].substr(s.size()))); + } + rtp_params_.rids.push_back(rd); + PLOG_DEBUG << "rid=" << rd.rid << ", direction=" << rd.direction + << ", payload_type=" + << (rd.has_payload_type() + ? std::to_string(rd.payload_type) + : "(none)"); + } + } + { + auto it = std::find_if( + video_lines.begin(), video_lines.end(), [](const std::string& s) { + return starts_with(s, "a=extmap:") && + s.find( + "https://aomediacodec.github.io/av1-rtp-spec/" + "#dependency-descriptor-rtp-header-extension") != + std::string::npos; + }); + if (it != video_lines.end()) { + auto xs = split_with(*it, " "); + auto ys = split_with(xs[0], ":"); + dependency_descriptor_id_ = std::stoi(ys[1]); + PLOG_DEBUG << "dependency_descriptor_id=" + << dependency_descriptor_id_; + } + } std::shared_ptr track; std::map, std::shared_ptr> sr_reporters; - auto video = rtc::Description::Video(mid); - if (codec == "H264") { - video.addH264Codec(payload_type); - } else { - video.addH265Codec(payload_type); + auto video = rtc::Description::Video(rtp_params_.mid); + for (const auto& codec : rtp_params_.codecs) { + if (codec.name == "H264") { + std::optional profile; + if (h264_profile_string != std::nullopt) { + profile = "level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=" + *h264_profile_string; + } + video.addH264Codec(codec.payload_type, profile); + } else if (codec.name == "H265") { + video.addH265Codec(codec.payload_type); + } else if (codec.name == "AV1") { + video.addAV1Codec(codec.payload_type); + } } std::map, uint32_t> ssrcs; if (!IsSimulcast()) { @@ -462,7 +583,7 @@ class SignalingImpl : public Signaling { video.addSSRC(ssrc, cname, msid, track_id); ssrcs.insert(std::make_pair(std::nullopt, ssrc)); } else { - for (const auto& p : rtp_encoding_params_.parameters) { + for (const auto& p : rtp_params_.encodings) { uint32_t ssrc = generate_random_number(); video.addSSRC(ssrc, cname, msid, track_id); ssrcs.insert(std::make_pair(p.rid, ssrc)); @@ -473,26 +594,56 @@ class SignalingImpl : public Signaling { auto simulcast_config = std::make_shared(); auto simulcast_handler = std::make_shared(simulcast_config); - for (int i = 0; - i < (!IsSimulcast() ? 1 : rtp_encoding_params_.parameters.size()); + for (int i = 0; i < (!IsSimulcast() ? 1 : rtp_params_.encodings.size()); i++) { std::optional rid; if (IsSimulcast()) { - rid = rtp_encoding_params_.parameters[i].rid; + rid = rtp_params_.encodings[i].rid; } uint32_t ssrc = ssrcs[rid]; + int payload_type; + std::string codec; + if (IsSimulcast()) { + // この rid が参照するべき payload_type と codec を探す + auto it = + std::find_if(rtp_params_.rids.begin(), rtp_params_.rids.end(), + [rid](const soracp::RidDescription& rd) { + return rd.rid == *rid; + }); + if (it == rtp_params_.rids.end() || !it->has_payload_type()) { + payload_type = rtp_params_.codecs[0].payload_type; + codec = rtp_params_.codecs[0].name; + } else { + payload_type = it->payload_type; + codec = + std::find_if( + rtp_params_.codecs.begin(), rtp_params_.codecs.end(), + [payload_type](const soracp::RtpCodecParameters& codec) { + return codec.payload_type == payload_type; + }) + ->name; + } + } else { + payload_type = rtp_params_.codecs[0].payload_type; + codec = rtp_params_.codecs[0].name; + } + auto rtp_config = std::make_shared( ssrc, cname, payload_type, - codec == "H264" ? rtc::H264RtpPacketizer::defaultClockRate - : rtc::H265RtpPacketizer::defaultClockRate); + codec == "H264" ? rtc::H264RtpPacketizer::defaultClockRate + : codec == "H265" ? rtc::H265RtpPacketizer::defaultClockRate + : rtc::AV1RtpPacketizer::defaultClockRate); std::shared_ptr packetizer; if (codec == "H264") { packetizer = std::make_shared( rtc::NalUnit::Separator::LongStartSequence, rtp_config); - } else { + } else if (codec == "H265") { packetizer = std::make_shared( rtc::NalUnit::Separator::LongStartSequence, rtp_config); + } else { + packetizer = std::make_shared( + rtc::AV1RtpPacketizer::Packetization::TemporalUnit, rtp_config); } auto sr_reporter = std::make_shared(rtp_config); packetizer->addToChain(sr_reporter); @@ -520,55 +671,62 @@ class SignalingImpl : public Signaling { track->setMediaHandler(simulcast_handler); track->onOpen([this, wtrack = std::weak_ptr(track), - codec]() { + h264_profile]() { PLOG_DEBUG << "Video Track Opened"; auto track = wtrack.lock(); if (track == nullptr) { return; } - std::function()> create_encoder; - - if (codec == "H264") { - if (config_.h264_encoder_type == - soracp::H264_ENCODER_TYPE_OPEN_H264) { - create_encoder = [openh264 = config_.openh264]() { - return CreateOpenH264VideoEncoder(openh264); - }; - } else if (config_.h264_encoder_type == - soracp::H264_ENCODER_TYPE_VIDEO_TOOLBOX) { + std::function(std::string)> + create_encoder = + [this, h264_profile]( + std::string codec) -> std::shared_ptr { + if (codec == "H264") { + if (config_.h264_encoder_type == + soracp::H264_ENCODER_TYPE_OPEN_H264) { + return CreateOpenH264VideoEncoder(config_.openh264); + } else if (config_.h264_encoder_type == + soracp::H264_ENCODER_TYPE_VIDEO_TOOLBOX) { #if defined(__APPLE__) - create_encoder = []() { - return CreateVTH26xVideoEncoder(VTH26xVideoEncoderType::kH264); - }; + return CreateVTH26xVideoEncoder(VTH26xVideoEncoderType::kH264, + h264_profile); #else - PLOG_ERROR << "VideoToolbox is only supported on macOS/iOS"; - return; + PLOG_ERROR << "VideoToolbox is only supported on macOS/iOS"; #endif - } else { - PLOG_ERROR << "Unknown H264EncoderType"; - return; - } - } else if (codec == "H265") { - if (config_.h265_encoder_type == - soracp::H265_ENCODER_TYPE_VIDEO_TOOLBOX) { + } else { + PLOG_ERROR << "Unknown H264EncoderType"; + } + } else if (codec == "H265") { + if (config_.h265_encoder_type == + soracp::H265_ENCODER_TYPE_VIDEO_TOOLBOX) { #if defined(__APPLE__) - create_encoder = []() { - return CreateVTH26xVideoEncoder(VTH26xVideoEncoderType::kH265); - }; + return CreateVTH26xVideoEncoder(VTH26xVideoEncoderType::kH265, + std::nullopt); #else - PLOG_ERROR << "VideoToolbox is only supported on macOS/iOS"; - return; + PLOG_ERROR << "VideoToolbox is only supported on macOS/iOS"; #endif - } else { - PLOG_ERROR << "Unknown H265EncoderType"; - return; + } else { + PLOG_ERROR << "Unknown H265EncoderType"; + } + } else if (codec == "AV1") { + if (config_.av1_encoder_type == soracp::AV1_ENCODER_TYPE_AOM) { + return CreateAomAv1VideoEncoder(config_.aom); + } else { + PLOG_ERROR << "Unknown Av1EncoderType"; + } } - } - if (create_encoder) { - client_.video_encoder = CreateSimulcastEncoderAdapter( - rtp_encoding_params_, create_encoder); - } + return nullptr; + }; + std::function(std::string)> + create_encoder2 = + [create_encoder]( + std::string codec) -> std::shared_ptr { + return CreateDefaultEncoderAdapter(create_encoder(codec)); + }; + + client_.video_encoder = + CreateSimulcastEncoderAdapter(rtp_params_, create_encoder2); on_track_(track); }); @@ -576,9 +734,10 @@ class SignalingImpl : public Signaling { client_.video->track = track; client_.video->senders = sr_reporters; client_.video->simulcast_handler = simulcast_handler; - } + }); + // audio - { + std::invoke([&]() { uint32_t ssrc = generate_random_number(); // m=audio から他の m= が出てくるまでの間のデータを取得する std::vector audio_lines; @@ -610,6 +769,11 @@ class SignalingImpl : public Signaling { }; mid = get_value("a=mid:"); PLOG_DEBUG << "mid=" << mid; + // mid が空ということは audio=false なので何もしない + if (mid.empty()) { + return; + } + auto xs = split_with(get_value("a=msid:"), " "); auto rtpmap = get_value("a=rtpmap:"); payload_type = std::stoi(split_with(rtpmap, " ")[0]); @@ -674,7 +838,7 @@ class SignalingImpl : public Signaling { sr_reporters; sr_reporters[std::nullopt] = sr_reporter; client_.audio->senders = sr_reporters; - } + }); client_.pc->setRemoteDescription(rtc::Description(sdp, "offer")); } else if (js["type"] == "switched") { @@ -726,9 +890,9 @@ class SignalingImpl : public Signaling { } }; auto set_optional_bool = [](nlohmann::json& js, const std::string& key, - soracp::OptionalBool value) { - if (value != soracp::OPTIONAL_BOOL_NONE) { - js[key] = value == soracp::OPTIONAL_BOOL_TRUE ? true : false; + bool has_value, bool value) { + if (has_value) { + js[key] = value; } }; auto set_json = [](nlohmann::json& js, const std::string& key, @@ -741,10 +905,12 @@ class SignalingImpl : public Signaling { set_if(js, "redirect", true, redirect); set_string(js, "client_id", sc.client_id); set_string(js, "bundle_id", sc.bundle_id); - set_optional_bool(js, "multistream", sc.multistream); - set_optional_bool(js, "simulcast", sc.simulcast); + set_optional_bool(js, "multistream", sc.has_multistream(), sc.multistream); + set_optional_bool(js, "simulcast", sc.has_simulcast(), sc.simulcast); + set_optional_bool(js, "simulcast_multicodec", sc.has_simulcast_multicodec(), + sc.simulcast_multicodec); set_string(js, "simulcast_rid", sc.simulcast_rid); - set_optional_bool(js, "spotlight", sc.spotlight); + set_optional_bool(js, "spotlight", sc.has_spotlight(), sc.spotlight); set_if(js, "spotlight_number", sc.spotlight_number, sc.spotlight_number > 0); set_string(js, "spotlight_focus_rid", sc.spotlight_focus_rid); @@ -785,8 +951,11 @@ class SignalingImpl : public Signaling { set_string(js, "audio_streaming_language_code", sc.audio_streaming_language_code); - set_optional_bool(js, "data_channel_signaling", sc.data_channel_signaling); + set_optional_bool(js, "data_channel_signaling", + sc.has_data_channel_signaling(), + sc.data_channel_signaling); set_optional_bool(js, "ignore_disconnect_websocket", + sc.has_ignore_disconnect_websocket(), sc.ignore_disconnect_websocket); for (const auto& d : sc.data_channels) { @@ -797,7 +966,7 @@ class SignalingImpl : public Signaling { d.has_max_packet_life_time()); set_if(dc, "max_retransmits", d.max_retransmits, d.has_max_retransmits()); set_if(dc, "protocol", d.protocol, d.has_protocol()); - set_optional_bool(dc, "compress", d.compress); + set_optional_bool(dc, "compress", d.has_compress(), d.compress); js["data_channels"].push_back(dc); } @@ -834,7 +1003,7 @@ class SignalingImpl : public Signaling { // ws_ = nullptr; } - bool IsSimulcast() const { return rtp_encoding_params_.enable_parameters; } + bool IsSimulcast() const { return !rtp_params_.encodings.empty(); } std::shared_ptr GetWebSocket() const { std::lock_guard lock(ws_mutex_); @@ -848,8 +1017,10 @@ class SignalingImpl : public Signaling { Client client_; soracp::SignalingConfig config_; soracp::SoraConnectConfig sora_config_; - soracp::RtpEncodingParameters rtp_encoding_params_; + soracp::RtpParameters rtp_params_; int rtp_stream_id_ = 0; + int dependency_descriptor_id_ = 0; + Bps default_bitrate_; int video_ssrc_ = 0; std::function)> on_track_; std::function)> on_data_channel_; diff --git a/src/simulcast_encoder_adapter.cpp b/src/simulcast_encoder_adapter.cpp index d2f89e0..f4ed721 100644 --- a/src/simulcast_encoder_adapter.cpp +++ b/src/simulcast_encoder_adapter.cpp @@ -53,16 +53,38 @@ static Bps GetMaxBitrate(int width, int height) { class SimulcastEncoderAdapter : public VideoEncoder { public: SimulcastEncoderAdapter( - const soracp::RtpEncodingParameters& params, - std::function()> create_encoder) + const soracp::RtpParameters& params, + std::function(std::string)> create_encoder) : create_encoder_(create_encoder) { - if (!params.enable_parameters || params.parameters.empty()) { + if (params.encodings.empty()) { encoders_.resize(1); - encoders_[0].param.active = true; + encoders_[0].encoding.active = true; + encoders_[0].codec = params.codecs[0]; simulcast_ = false; } else { - for (auto& param : params.parameters) { - encoders_.push_back({nullptr, param}); + for (const auto& encoding : params.encodings) { + auto it = std::find_if(params.rids.begin(), params.rids.end(), + [&encoding](const soracp::RidDescription& rd) { + return rd.rid == encoding.rid; + }); + if (it == params.rids.end()) { + PLOG_ERROR << "Rid not found: rid=" << encoding.rid; + continue; + } + if (!it->has_payload_type()) { + encoders_.push_back({nullptr, encoding, params.codecs[0]}); + } else { + auto it2 = + std::find_if(params.codecs.begin(), params.codecs.end(), + [it](const soracp::RtpCodecParameters& codec) { + return codec.payload_type == it->payload_type; + }); + if (it2 == params.codecs.end()) { + PLOG_ERROR << "Codec not found: payload_type=" << it->payload_type; + continue; + } + encoders_.push_back({nullptr, encoding, *it2}); + } } simulcast_ = true; } @@ -82,11 +104,12 @@ class SimulcastEncoderAdapter : public VideoEncoder { PLOG_INFO << "InitEncode: width=" << settings.width << " height=" << settings.height - << " bitrate=" << settings.bitrate.count(); + << " bitrate=" << settings.bitrate.count() + << " fps=" << settings.fps; // 各サイズの最大ビットレートを計算して、その割合でビットレートを分配する Bps sum_bitrate; for (const auto& e : encoders_) { - const auto& p = e.param; + const auto& p = e.encoding; if (!p.active) { continue; } @@ -100,20 +123,31 @@ class SimulcastEncoderAdapter : public VideoEncoder { } for (auto& e : encoders_) { - if (!e.param.active) { + if (!e.encoding.active) { continue; } Settings s = settings; - if (e.param.has_scale_resolution_down_by()) { - s.width = (int)(settings.width / e.param.scale_resolution_down_by); - s.height = (int)(settings.height / e.param.scale_resolution_down_by); + if (e.encoding.has_scale_resolution_down_by()) { + s.width = (int)(settings.width / e.encoding.scale_resolution_down_by); + s.height = (int)(settings.height / e.encoding.scale_resolution_down_by); } double rate = (double)GetMaxBitrate(s.width, s.height).count() / sum_bitrate.count(); s.bitrate = Bps((int64_t)(settings.bitrate.count() * rate)); - e.encoder = create_encoder_(); - PLOG_INFO << "InitEncode(Layerd): width=" << s.width - << " height=" << s.height << " bitrate=" << s.bitrate.count(); + s.fps = settings.fps; + // 個別にビットレートやフレームレートが指定されていたら、その通りにする + if (e.encoding.has_max_bitrate_bps()) { + s.bitrate = Bps(e.encoding.max_bitrate_bps); + } + if (e.encoding.has_max_framerate()) { + s.fps = e.encoding.max_framerate; + } + + e.encoder = create_encoder_(e.codec.name); + PLOG_INFO << "InitEncode(Layerd): rid=" << e.encoding.rid + << ", codec=" << e.codec.name << ", width=" << s.width + << ", height=" << s.height << ", bitrate=" << s.bitrate.count() + << ", fps=" << s.fps; if (!e.encoder->InitEncode(s)) { return false; } @@ -129,7 +163,7 @@ class SimulcastEncoderAdapter : public VideoEncoder { if (e.encoder != nullptr) { std::optional rid; if (simulcast_) { - rid = e.param.rid; + rid = e.encoding.rid; } e.encoder->SetEncodeCallback( [rid, callback](const sorac::EncodedImage& image) { @@ -156,7 +190,7 @@ class SimulcastEncoderAdapter : public VideoEncoder { } for (auto& e : encoders_) { if (e.encoder != nullptr) { - if (e.param.rid == *frame.rid) { + if (e.encoding.rid == *frame.rid) { e.encoder->Encode(frame); break; } @@ -177,17 +211,18 @@ class SimulcastEncoderAdapter : public VideoEncoder { private: struct Encoder { std::shared_ptr encoder; - soracp::RtpEncodingParameter param; + soracp::RtpEncodingParameters encoding; + soracp::RtpCodecParameters codec; Settings settings; }; std::vector encoders_; bool simulcast_; - std::function()> create_encoder_; + std::function(std::string)> create_encoder_; }; std::shared_ptr CreateSimulcastEncoderAdapter( - const soracp::RtpEncodingParameters& params, - std::function()> create_encoder) { + const soracp::RtpParameters& params, + std::function(std::string)> create_encoder) { return std::make_shared(params, create_encoder); } diff --git a/src/sorac.cpp b/src/sorac.cpp index 7f563b0..a26061a 100644 --- a/src/sorac.cpp +++ b/src/sorac.cpp @@ -509,11 +509,10 @@ void sorac_signaling_set_on_push(SoracSignaling* p, on_push(message.c_str(), (int)message.size(), userdata); }); } -void sorac_signaling_get_rtp_encoding_parameters( - SoracSignaling* p, - soracp_RtpEncodingParameters* params) { +void sorac_signaling_get_rtp_parameters(SoracSignaling* p, + soracp_RtpParameters* params) { auto signaling = g_cptr.Get(p, g_signaling_map); - auto u = signaling->GetRtpEncodingParameters(); - soracp_RtpEncodingParameters_from_cpp(u, params); + auto u = signaling->GetRtpParameters(); + soracp_RtpParameters_from_cpp(u, params); } } diff --git a/src/vt_h26x_video_encoder.cpp b/src/vt_h26x_video_encoder.cpp index 79e5b9d..900756f 100644 --- a/src/vt_h26x_video_encoder.cpp +++ b/src/vt_h26x_video_encoder.cpp @@ -12,6 +12,112 @@ namespace sorac { +// https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/sdk/objc/components/video_codec/RTCVideoEncoderH264.mm +// より +// Extract VideoToolbox profile out of the webrtc::SdpVideoFormat. If there is +// no specific VideoToolbox profile for the specified level, AutoLevel will be +// returned. The user must initialize the encoder with a resolution and +// framerate conforming to the selected H264 level regardless. +CFStringRef ExtractProfile(const sorac::H264ProfileLevelId& profile_level_id) { + switch (profile_level_id.profile) { + case sorac::H264Profile::kProfileConstrainedBaseline: + case sorac::H264Profile::kProfileBaseline: + switch (profile_level_id.level) { + case sorac::H264Level::kLevel3: + return kVTProfileLevel_H264_Baseline_3_0; + case sorac::H264Level::kLevel3_1: + return kVTProfileLevel_H264_Baseline_3_1; + case sorac::H264Level::kLevel3_2: + return kVTProfileLevel_H264_Baseline_3_2; + case sorac::H264Level::kLevel4: + return kVTProfileLevel_H264_Baseline_4_0; + case sorac::H264Level::kLevel4_1: + return kVTProfileLevel_H264_Baseline_4_1; + case sorac::H264Level::kLevel4_2: + return kVTProfileLevel_H264_Baseline_4_2; + case sorac::H264Level::kLevel5: + return kVTProfileLevel_H264_Baseline_5_0; + case sorac::H264Level::kLevel5_1: + return kVTProfileLevel_H264_Baseline_5_1; + case sorac::H264Level::kLevel5_2: + return kVTProfileLevel_H264_Baseline_5_2; + case sorac::H264Level::kLevel1: + case sorac::H264Level::kLevel1_b: + case sorac::H264Level::kLevel1_1: + case sorac::H264Level::kLevel1_2: + case sorac::H264Level::kLevel1_3: + case sorac::H264Level::kLevel2: + case sorac::H264Level::kLevel2_1: + case sorac::H264Level::kLevel2_2: + return kVTProfileLevel_H264_Baseline_AutoLevel; + } + + case sorac::H264Profile::kProfileMain: + switch (profile_level_id.level) { + case sorac::H264Level::kLevel3: + return kVTProfileLevel_H264_Main_3_0; + case sorac::H264Level::kLevel3_1: + return kVTProfileLevel_H264_Main_3_1; + case sorac::H264Level::kLevel3_2: + return kVTProfileLevel_H264_Main_3_2; + case sorac::H264Level::kLevel4: + return kVTProfileLevel_H264_Main_4_0; + case sorac::H264Level::kLevel4_1: + return kVTProfileLevel_H264_Main_4_1; + case sorac::H264Level::kLevel4_2: + return kVTProfileLevel_H264_Main_4_2; + case sorac::H264Level::kLevel5: + return kVTProfileLevel_H264_Main_5_0; + case sorac::H264Level::kLevel5_1: + return kVTProfileLevel_H264_Main_5_1; + case sorac::H264Level::kLevel5_2: + return kVTProfileLevel_H264_Main_5_2; + case sorac::H264Level::kLevel1: + case sorac::H264Level::kLevel1_b: + case sorac::H264Level::kLevel1_1: + case sorac::H264Level::kLevel1_2: + case sorac::H264Level::kLevel1_3: + case sorac::H264Level::kLevel2: + case sorac::H264Level::kLevel2_1: + case sorac::H264Level::kLevel2_2: + return kVTProfileLevel_H264_Main_AutoLevel; + } + + case sorac::H264Profile::kProfileConstrainedHigh: + case sorac::H264Profile::kProfileHigh: + case sorac::H264Profile::kProfilePredictiveHigh444: + switch (profile_level_id.level) { + case sorac::H264Level::kLevel3: + return kVTProfileLevel_H264_High_3_0; + case sorac::H264Level::kLevel3_1: + return kVTProfileLevel_H264_High_3_1; + case sorac::H264Level::kLevel3_2: + return kVTProfileLevel_H264_High_3_2; + case sorac::H264Level::kLevel4: + return kVTProfileLevel_H264_High_4_0; + case sorac::H264Level::kLevel4_1: + return kVTProfileLevel_H264_High_4_1; + case sorac::H264Level::kLevel4_2: + return kVTProfileLevel_H264_High_4_2; + case sorac::H264Level::kLevel5: + return kVTProfileLevel_H264_High_5_0; + case sorac::H264Level::kLevel5_1: + return kVTProfileLevel_H264_High_5_1; + case sorac::H264Level::kLevel5_2: + return kVTProfileLevel_H264_High_5_2; + case sorac::H264Level::kLevel1: + case sorac::H264Level::kLevel1_b: + case sorac::H264Level::kLevel1_1: + case sorac::H264Level::kLevel1_2: + case sorac::H264Level::kLevel1_3: + case sorac::H264Level::kLevel2: + case sorac::H264Level::kLevel2_1: + case sorac::H264Level::kLevel2_2: + return kVTProfileLevel_H264_High_AutoLevel; + } + } +} + // デストラクタで指定した関数を呼ぶだけのクラス class Resource { public: @@ -24,7 +130,9 @@ class Resource { class VTH26xVideoEncoder : public VideoEncoder { public: - VTH26xVideoEncoder(VTH26xVideoEncoderType type) : type_(type) {} + VTH26xVideoEncoder(VTH26xVideoEncoderType type, + std::optional profile) + : type_(type), profile_(profile) {} ~VTH26xVideoEncoder() override { Release(); } void ForceIntraNextFrame() override { next_iframe_ = true; } @@ -51,8 +159,7 @@ class VTH26xVideoEncoder : public VideoEncoder { CFDictionaryRef encoder_specs = CFDictionaryCreate( nullptr, - (const void**) - &kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder, + (const void**)&kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder, (const void**)&kCFBooleanTrue, 1, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); Resource encoder_specs_resource( @@ -81,7 +188,8 @@ class VTH26xVideoEncoder : public VideoEncoder { if (type_ == VTH26xVideoEncoderType::kH264) { if (OSStatus err = VTSessionSetProperty( vtref_, kVTCompressionPropertyKey_ProfileLevel, - kVTProfileLevel_H264_Baseline_3_1); + ExtractProfile(profile_.value_or(H264ProfileLevelId( + H264Profile::kProfileBaseline, H264Level::kLevel3_1)))); err != noErr) { PLOG_ERROR << "Failed to set profile-level property: err=" << err; return false; @@ -110,6 +218,20 @@ class VTH26xVideoEncoder : public VideoEncoder { } } + // フレームレート + { + int value = settings.fps; + CFNumberRef cfnum = + CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &value); + Resource cfnum_resource([cfnum]() { CFRelease(cfnum); }); + OSStatus err = VTSessionSetProperty( + vtref_, kVTCompressionPropertyKey_ExpectedFrameRate, cfnum); + if (err != noErr) { + PLOG_ERROR << "Failed to set expected-frame-rate property: err=" << err; + return false; + } + } + // キーフレーム間隔 (7200 フレームまたは 4 分間) { int value = 7200; @@ -412,6 +534,7 @@ class VTH26xVideoEncoder : public VideoEncoder { }; VTH26xVideoEncoderType type_; + std::optional profile_; VTCompressionSessionRef vtref_ = nullptr; std::function callback_; @@ -420,8 +543,9 @@ class VTH26xVideoEncoder : public VideoEncoder { }; std::shared_ptr CreateVTH26xVideoEncoder( - VTH26xVideoEncoderType type) { - return std::make_shared(type); + VTH26xVideoEncoderType type, + std::optional profile) { + return std::make_shared(type, profile); } } // namespace sorac