Skip to content

Commit

Permalink
Project import generated by Copybara.
Browse files Browse the repository at this point in the history
GitOrigin-RevId: 6e5aa035cd1f6a9333962df5d3ab97a05bd5744e
  • Loading branch information
MediaPipe Team authored and Sebastian Schmidt committed Jun 28, 2022
1 parent 4a20e99 commit c688862
Show file tree
Hide file tree
Showing 144 changed files with 5,757 additions and 2,103 deletions.
2 changes: 1 addition & 1 deletion .bazelversion
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5.0.0
5.2.0
8 changes: 5 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM ubuntu:18.04
FROM ubuntu:20.04

MAINTAINER <[email protected]>

Expand Down Expand Up @@ -42,6 +42,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common && \
add-apt-repository -y ppa:openjdk-r/ppa && \
apt-get update && apt-get install -y openjdk-8-jdk && \
apt-get install -y mesa-common-dev libegl1-mesa-dev libgles2-mesa-dev && \
apt-get install -y mesa-utils && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

Expand All @@ -50,13 +52,13 @@ RUN pip3 install --upgrade setuptools
RUN pip3 install wheel
RUN pip3 install future
RUN pip3 install six==1.14.0
RUN pip3 install tensorflow==1.14.0
RUN pip3 install tensorflow==2.2.0
RUN pip3 install tf_slim

RUN ln -s /usr/bin/python3 /usr/bin/python

# Install bazel
ARG BAZEL_VERSION=5.0.0
ARG BAZEL_VERSION=5.2.0
RUN mkdir /bazel && \
wget --no-check-certificate -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/b\
azel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
Expand Down
5 changes: 3 additions & 2 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ http_archive(

http_archive(
name = "rules_cc",
strip_prefix = "rules_cc-main",
urls = ["https://github.com/bazelbuild/rules_cc/archive/main.zip"],
strip_prefix = "rules_cc-2f8c04c04462ab83c545ab14c0da68c3b4c96191",
# The commit can be updated if the build passes. Last updated 6/23/22.
urls = ["https://github.com/bazelbuild/rules_cc/archive/2f8c04c04462ab83c545ab14c0da68c3b4c96191.zip"],
)

http_archive(
Expand Down
1 change: 1 addition & 0 deletions mediapipe/calculators/audio/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ cc_test(
"//mediapipe/framework/formats:time_series_header_cc_proto",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/tool:test_util",
"@com_google_absl//absl/flags:flag",
],
)
Expand Down
25 changes: 13 additions & 12 deletions mediapipe/calculators/audio/audio_decoder_calculator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@
#include "mediapipe/framework/port/gtest.h"
#include "mediapipe/framework/port/parse_text_proto.h"
#include "mediapipe/framework/port/status_matchers.h"
#include "mediapipe/framework/tool/test_util.h"

namespace mediapipe {
namespace {

constexpr char kTestPackageRoot[] = "mediapipe/calculators/audio";

TEST(AudioDecoderCalculatorTest, TestWAV) {
CalculatorGraphConfig::Node node_config =
Expand All @@ -37,9 +41,8 @@ TEST(AudioDecoderCalculatorTest, TestWAV) {
})pb");
CalculatorRunner runner(node_config);
runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
file::JoinPath("./",
"/mediapipe/calculators/audio/"
"testdata/sine_wave_1k_44100_mono_2_sec_wav.audio"));
file::JoinPath(GetTestDataDir(kTestPackageRoot),
"sine_wave_1k_44100_mono_2_sec_wav.audio"));
MP_ASSERT_OK(runner.Run());
MP_EXPECT_OK(runner.Outputs()
.Tag("AUDIO_HEADER")
Expand Down Expand Up @@ -68,9 +71,8 @@ TEST(AudioDecoderCalculatorTest, Test48KWAV) {
})pb");
CalculatorRunner runner(node_config);
runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
file::JoinPath("./",
"/mediapipe/calculators/audio/"
"testdata/sine_wave_1k_48000_stereo_2_sec_wav.audio"));
file::JoinPath(GetTestDataDir(kTestPackageRoot),
"sine_wave_1k_48000_stereo_2_sec_wav.audio"));
MP_ASSERT_OK(runner.Run());
MP_EXPECT_OK(runner.Outputs()
.Tag("AUDIO_HEADER")
Expand Down Expand Up @@ -99,9 +101,8 @@ TEST(AudioDecoderCalculatorTest, TestMP3) {
})pb");
CalculatorRunner runner(node_config);
runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
file::JoinPath("./",
"/mediapipe/calculators/audio/"
"testdata/sine_wave_1k_44100_stereo_2_sec_mp3.audio"));
file::JoinPath(GetTestDataDir(kTestPackageRoot),
"sine_wave_1k_44100_stereo_2_sec_mp3.audio"));
MP_ASSERT_OK(runner.Run());
MP_EXPECT_OK(runner.Outputs()
.Tag("AUDIO_HEADER")
Expand Down Expand Up @@ -130,9 +131,8 @@ TEST(AudioDecoderCalculatorTest, TestAAC) {
})pb");
CalculatorRunner runner(node_config);
runner.MutableSidePackets()->Tag("INPUT_FILE_PATH") = MakePacket<std::string>(
file::JoinPath("./",
"/mediapipe/calculators/audio/"
"testdata/sine_wave_1k_44100_stereo_2_sec_aac.audio"));
file::JoinPath(GetTestDataDir(kTestPackageRoot),
"sine_wave_1k_44100_stereo_2_sec_aac.audio"));
MP_ASSERT_OK(runner.Run());
MP_EXPECT_OK(runner.Outputs()
.Tag("AUDIO_HEADER")
Expand All @@ -147,4 +147,5 @@ TEST(AudioDecoderCalculatorTest, TestAAC) {
std::ceil(44100.0 * 2 / 1024));
}

} // namespace
} // namespace mediapipe
81 changes: 58 additions & 23 deletions mediapipe/calculators/audio/spectrogram_calculator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,22 @@
#include <memory>
#include <string>

#include "Eigen/Core"
#include "absl/strings/string_view.h"
#include "audio/dsp/spectrogram/spectrogram.h"
#include "audio/dsp/window_functions.h"
#include "mediapipe/calculators/audio/spectrogram_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/matrix.h"
#include "mediapipe/framework/formats/time_series_header.pb.h"
#include "mediapipe/framework/port/core_proto_inc.h"
#include "mediapipe/framework/port/integral_types.h"
#include "mediapipe/framework/port/logging.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/source_location.h"
#include "mediapipe/framework/port/status_builder.h"
#include "mediapipe/util/time_series_util.h"

namespace mediapipe {

namespace {
constexpr char kFrameDurationTag[] = "FRAME_DURATION";
constexpr char kFrameOverlapTag[] = "FRAME_OVERLAP";
} // namespace
// MediaPipe Calculator for computing the "spectrogram" (short-time Fourier
// transform squared-magnitude, by default) of a multichannel input
// time series, including optionally overlapping frames. Options are
Expand All @@ -46,11 +44,14 @@ namespace mediapipe {
//
// Result is a MatrixData record (for single channel input and when the
// allow_multichannel_input flag is false), or a vector of MatrixData records,
// one for each channel (when the allow_multichannel_input flag is set). The
// rows of each spectrogram matrix correspond to the n_fft/2+1 unique complex
// values, or squared/linear/dB magnitudes, depending on the output_type option.
// Each input packet will result in zero or one output packets, each containing
// one Matrix for each channel of the input, where each Matrix has one or more
// one for each channel (when the allow_multichannel_input flag is set). Each
// waveform frame is converted to frequency by a fast Fourier transform whose
// size, n_fft, is the smallest power of two large enough to enclose the frame
// length of round(frame_duration_seconds * sample_rate).The rows of each
// spectrogram matrix(result) correspond to the n_fft/2+1 unique complex values,
// or squared/linear/dB magnitudes, depending on the output_type option. Each
// input packet will result in zero or one output packets, each containing one
// Matrix for each channel of the input, where each Matrix has one or more
// columns of spectral values, one for each complete frame of input samples. If
// the input packet contains too few samples to trigger a new output frame, no
// output packet is generated (since zero-length packets are not legal since
Expand All @@ -71,6 +72,22 @@ class SpectrogramCalculator : public CalculatorBase {
// Input stream with TimeSeriesHeader.
);

if (cc->InputSidePackets().HasTag(kFrameDurationTag)) {
cc->InputSidePackets()
.Tag(kFrameDurationTag)
.Set<double>(
// Optional side packet for frame_duration_seconds if provided.
);
}

if (cc->InputSidePackets().HasTag(kFrameOverlapTag)) {
cc->InputSidePackets()
.Tag(kFrameOverlapTag)
.Set<double>(
// Optional side packet for frame_overlap_seconds if provided.
);
}

SpectrogramCalculatorOptions spectrogram_options =
cc->Options<SpectrogramCalculatorOptions>();
if (!spectrogram_options.allow_multichannel_input()) {
Expand Down Expand Up @@ -184,27 +201,47 @@ class SpectrogramCalculator : public CalculatorBase {
// Fixed scale factor applied to output values (regardless of type).
double output_scale_;

static const float kLnPowerToDb;
static const float kLnSquaredMagnitudeToDb;
};
REGISTER_CALCULATOR(SpectrogramCalculator);

// Factor to convert ln(magnitude_squared) to deciBels = 10.0/ln(10.0).
const float SpectrogramCalculator::kLnPowerToDb = 4.342944819032518;
// DECIBELS = 20*log10(LINEAR_MAGNITUDE) = 10*Log10(SQUARED_MAGNITUDE)
// =10/ln(10)*ln(SQUARED_MAGNITUDE).
// Factor to convert ln(SQUARED_MAGNITUDE) to deciBels = 10.0/ln(10.0).
const float SpectrogramCalculator::kLnSquaredMagnitudeToDb = 4.342944819032518;

absl::Status SpectrogramCalculator::Open(CalculatorContext* cc) {
SpectrogramCalculatorOptions spectrogram_options =
cc->Options<SpectrogramCalculatorOptions>();
// Provide frame_duration_seconds and frame_overlap_seconds either from static
// options, or dynamically from a side packet, the side packet one will
// override the options one if provided.

double frame_duration_seconds = 0;
double frame_overlap_seconds = 0;
if (cc->InputSidePackets().HasTag(kFrameDurationTag)) {
frame_duration_seconds =
cc->InputSidePackets().Tag(kFrameDurationTag).Get<double>();
} else {
frame_duration_seconds = spectrogram_options.frame_duration_seconds();
}

if (cc->InputSidePackets().HasTag(kFrameOverlapTag)) {
frame_overlap_seconds =
cc->InputSidePackets().Tag(kFrameOverlapTag).Get<double>();
} else {
frame_overlap_seconds = spectrogram_options.frame_overlap_seconds();
}

use_local_timestamp_ = spectrogram_options.use_local_timestamp();

if (spectrogram_options.frame_duration_seconds() <= 0.0) {
if (frame_duration_seconds <= 0.0) {
// TODO: return an error.
}
if (spectrogram_options.frame_overlap_seconds() >=
spectrogram_options.frame_duration_seconds()) {
if (frame_overlap_seconds >= frame_duration_seconds) {
// TODO: return an error.
}
if (spectrogram_options.frame_overlap_seconds() < 0.0) {
if (frame_overlap_seconds < 0.0) {
// TODO: return an error.
}

Expand All @@ -220,10 +257,8 @@ absl::Status SpectrogramCalculator::Open(CalculatorContext* cc) {
// TODO: return an error.
}

frame_duration_samples_ =
round(spectrogram_options.frame_duration_seconds() * input_sample_rate_);
frame_overlap_samples_ =
round(spectrogram_options.frame_overlap_seconds() * input_sample_rate_);
frame_duration_samples_ = round(frame_duration_seconds * input_sample_rate_);
frame_overlap_samples_ = round(frame_overlap_seconds * input_sample_rate_);

pad_final_packet_ = spectrogram_options.pad_final_packet();
output_type_ = spectrogram_options.output_type();
Expand Down Expand Up @@ -419,7 +454,7 @@ absl::Status SpectrogramCalculator::ProcessVector(const Matrix& input_stream,
return ProcessVectorToOutput(
input_stream,
+[](const Matrix& col) -> const Matrix {
return kLnPowerToDb * col.array().log().matrix();
return kLnSquaredMagnitudeToDb * col.array().log().matrix();
}, cc);
}
// clang-format on
Expand Down
11 changes: 10 additions & 1 deletion mediapipe/calculators/audio/spectrogram_calculator.proto
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ message SpectrogramCalculatorOptions {

// Duration of overlap between adjacent windows.
// Hence, frame_rate = 1/(frame_duration_seconds - frame_overlap_seconds).
// Required that 0 <= frame_overlap_seconds < frame_duration_seconds.
// Note the frame_rate here is not the MediaPipe packet rate, the frame here
// means each Fourier transform analysis waveform frame, the output MediaPipe
// packet rate will the the same as input, if frame rate is lower than input
// packet rate, will result in intermittent empty output packets. Required
// that 0 <= frame_overlap_seconds < frame_duration_seconds.
optional double frame_overlap_seconds = 2 [default = 0.0];

// Whether to pad the final packet with zeros. If true, guarantees that
Expand All @@ -42,6 +46,11 @@ message SpectrogramCalculatorOptions {

// Output value type can be squared-magnitude, linear-magnitude,
// deciBels (dB, = 20*log10(linear_magnitude)), or std::complex.
// Their relationship:
// COMPLEX c = Re + Im*i;
// SQUARED_MAGNITUDE = Re^2 + Im^2;
// LINEAR_MAGNITUDE = sqrt(SQUARED_MAGNITUDE);
// DECIBELS = 20*log10(LINEAR_MAGNITUDE) = 10*log10(SQUARED_MAGNITUDE);
enum OutputType {
SQUARED_MAGNITUDE = 0;
LINEAR_MAGNITUDE = 1;
Expand Down
16 changes: 16 additions & 0 deletions mediapipe/calculators/core/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,22 @@ cc_library(
alwayslink = 1,
)

cc_test(
name = "packet_cloner_calculator_test",
srcs = ["packet_cloner_calculator_test.cc"],
deps = [
":packet_cloner_calculator",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework:timestamp",
"//mediapipe/framework/port:gtest_main",
"//mediapipe/framework/port:parse_text_proto",
"//mediapipe/framework/port:status",
"//mediapipe/framework/stream_handler:immediate_input_stream_handler",
"//mediapipe/framework/tool:sink",
"@com_google_absl//absl/strings",
],
)

cc_library(
name = "packet_inner_join_calculator",
srcs = ["packet_inner_join_calculator.cc"],
Expand Down
11 changes: 10 additions & 1 deletion mediapipe/calculators/core/concatenate_vector_calculator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,17 @@ typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmark>
ConcatenateLandmarkVectorCalculator;
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarkVectorCalculator);

typedef ConcatenateVectorCalculator<::mediapipe::LandmarkList>
ConcatenateLandmarkListVectorCalculator;
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarkListVectorCalculator);

typedef ConcatenateVectorCalculator<::mediapipe::NormalizedLandmarkList>
ConcatenateLandmarListVectorCalculator;
ConcatenateNormalizedLandmarkListVectorCalculator;
MEDIAPIPE_REGISTER_NODE(ConcatenateNormalizedLandmarkListVectorCalculator);

// For backwards compatibility, keep the version with the typo.
using ConcatenateLandmarListVectorCalculator =
ConcatenateNormalizedLandmarkListVectorCalculator;
MEDIAPIPE_REGISTER_NODE(ConcatenateLandmarListVectorCalculator);

typedef ConcatenateVectorCalculator<mediapipe::ClassificationList>
Expand Down
4 changes: 2 additions & 2 deletions mediapipe/calculators/core/flow_limiter_calculator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ constexpr char kOptionsTag[] = "OPTIONS";
// FlowLimiterCalculator is used to limit the number of frames in flight
// by dropping input frames when necessary.
//
// The input stream "FINISH" is used to signal the FlowLimiterCalculator
// when a frame is finished processing. Either a non-empty "FINISH" packet
// The input stream "FINISHED" is used to signal the FlowLimiterCalculator
// when a frame is finished processing. Either a non-empty "FINISHED" packet
// or a timestamp bound should be received for each processed frame.
//
// The combination of `max_in_flight: 1` and `max_in_queue: 1` generally gives
Expand Down
Loading

0 comments on commit c688862

Please sign in to comment.