Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Keyframe Selection: Add multi-threading and improve overall performances #1512

Merged
merged 5 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 136 additions & 46 deletions src/aliceVision/keyframe/KeyframeSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <cstdlib>
#include <iomanip>
#include <fstream>
#include <thread>


namespace fs = boost::filesystem;
Expand Down Expand Up @@ -320,31 +321,100 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, c
_frameWidth = 0;
_frameHeight = 0;

// Create feeds and count minimum number of frames
// Create single feed and count minimum number of frames
std::size_t nbFrames = std::numeric_limits<std::size_t>::max();
std::vector<std::unique_ptr<dataio::FeedProvider>> feeds;

for (std::size_t mediaIndex = 0; mediaIndex < _mediaPaths.size(); ++mediaIndex) {
const auto& path = _mediaPaths.at(mediaIndex);

// Create a feed provider per mediaPaths
feeds.push_back(std::make_unique<dataio::FeedProvider>(path));
const auto& feed = *feeds.back();
auto feed = std::make_unique<dataio::FeedProvider>(path);

// Check if feed is initialized
if (!feed.isInit()) {
if (!feed->isInit()) {
ALICEVISION_THROW(std::invalid_argument, "Cannot initialize the FeedProvider with " << path);
}

// Update minimum number of frames
nbFrames = std::min(nbFrames, (size_t)feed.nbFrames());
// Number of frames in the rig might slightly differ
nbFrames = std::min(nbFrames, static_cast<std::size_t>(feed->nbFrames()));

if (mediaIndex == 0) {
// Read first image and set _frameWidth and _frameHeight, since the feeds have been initialized
feed->goToFrame(0);
cv::Mat mat = readImage(*feed, rescaledWidthFlow);
// Will be used later on to determine the motion accumulation step
_frameWidth = mat.size().width;
_frameHeight = mat.size().height;
}
}

// Check if minimum number of frame is zero
if (nbFrames == 0) {
ALICEVISION_THROW(std::invalid_argument, "One or multiple medias can't be found or is empty!");
}

// With the number of threads available and the number of frames to process known,
// blocks can be prepared for multi-threading
int nbThreads = omp_get_max_threads();

std::size_t blockSize = (nbFrames / static_cast<std::size_t>(nbThreads)) + 1;

// If a block contains less than _minBlockSize frames (when there are lots of available threads for a small number
// of frames, for example), resize it: less threads will be spawned, but since new FeedProvider objects need to be
// created for each thread, we prevent spawing thread that will need to create FeedProvider objects
// for very few frames.
if (blockSize < _minBlockSize && nbFrames >= _minBlockSize) {
blockSize = _minBlockSize;
nbThreads = static_cast<int>(nbFrames / blockSize) + 1; // +1 to ensure that every frame in processed by a thread
}

std::vector<std::thread> threads;
ALICEVISION_LOG_INFO("Splitting " << nbFrames << " frames into " << nbThreads << " threads of size " << blockSize << ".");

for (std::size_t i = 0; i < nbThreads; i++) {
std::size_t startFrame = static_cast<std::size_t>(std::max(0, static_cast<int>(i * blockSize) - 1));
std::size_t endFrame = std::min(i * blockSize + blockSize, nbFrames);

// If there is an extra thread with no new frames to process, skip it.
// This might occur as a consequence of the "+1" when adjusting the number of threads.
if (startFrame >= nbFrames) {
break;
}

ALICEVISION_LOG_DEBUG("Starting thread to compute scores for frame " << startFrame << " to " << endFrame << ".");

threads.push_back(std::thread(&KeyframeSelector::computeScoresProc, this, startFrame, endFrame, nbFrames,
rescaledWidthSharpness, rescaledWidthFlow, sharpnessWindowSize, flowCellSize,
skipSharpnessComputation));
}

for (auto &th : threads) {
th.join();
}

return true;
}

bool KeyframeSelector::computeScoresProc(const std::size_t startFrame, const std::size_t endFrame,
const std::size_t nbFrames, const std::size_t rescaledWidthSharpness,
const std::size_t rescaledWidthFlow, const std::size_t sharpnessWindowSize,
const std::size_t flowCellSize, const bool skipSharpnessComputation)
{
std::vector<std::unique_ptr<dataio::FeedProvider>> feeds;

for (std::size_t mediaIndex = 0; mediaIndex < _mediaPaths.size(); ++mediaIndex) {
const auto& path = _mediaPaths.at(mediaIndex);

// Create a feed provider per mediaPaths
feeds.push_back(std::make_unique<dataio::FeedProvider>(path));
const auto& feed = *feeds.back();

// Check if feed is initialized
if (!feed.isInit()) {
ALICEVISION_THROW(std::invalid_argument, "Cannot initialize the FeedProvider with " << path);
}
}

// Feed provider variables
image::Image<image::RGBColor> image; // original image
camera::PinholeRadialK3 queryIntrinsics; // image associated camera intrinsics
Expand All @@ -354,26 +424,26 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, c
// Feed and metadata initialization
for (std::size_t mediaIndex = 0; mediaIndex < feeds.size(); ++mediaIndex) {
// First frame with offset
feeds.at(mediaIndex)->goToFrame(0);
feeds.at(mediaIndex)->goToFrame(startFrame);

if (!feeds.at(mediaIndex)->readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) {
ALICEVISION_THROW(std::invalid_argument, "Cannot read media first frame " << _mediaPaths[mediaIndex]);
}
}

std::size_t currentFrame = 0;
std::size_t currentFrame = startFrame;
cv::Mat currentMatSharpness; // OpenCV matrix for the sharpness computation
cv::Mat previousMatFlow, currentMatFlow; // OpenCV matrices for the optical flow computation
auto ptrFlow = cv::optflow::createOptFlow_DeepFlow();

while (currentFrame < nbFrames) {
while (currentFrame < endFrame) {
double minimalSharpness = skipSharpnessComputation ? 1.0f : std::numeric_limits<double>::max();
double minimalFlow = std::numeric_limits<double>::max();

for (std::size_t mediaIndex = 0; mediaIndex < feeds.size(); ++mediaIndex) {
auto& feed = *feeds.at(mediaIndex);

if (currentFrame > 0) { // Get currentFrame - 1 for the optical flow computation
if (currentFrame > startFrame) { // Get currentFrame - 1 for the optical flow computation
previousMatFlow = readImage(feed, rescaledWidthFlow);
feed.goToNextFrame();
}
Expand All @@ -397,14 +467,18 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, c
// currentFrame + 2 = next frame to evaluate with indexing starting at 1, for display reasons
ALICEVISION_LOG_WARNING("Invalid or missing frame " << currentFrame + 1
<< ", attempting to read frame " << currentFrame + 2 << ".");

{
// Push dummy scores for the frame that was skipped
const std::scoped_lock lock(_mutex);
_sharpnessScores[currentFrame] = -1.f;
_flowScores[currentFrame] = -1.f;
}

success = feed.goToFrame(++currentFrame);
if (success) {
currentMatSharpness = readImage(feed, rescaledWidthSharpness);
}

// Push dummy scores for the frame that was skipped
_sharpnessScores.push_back(-1.f);
_flowScores.push_back(-1.f);
}
}
}
Expand All @@ -415,32 +489,30 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, c
currentMatFlow = readImage(feed, rescaledWidthFlow);
}

if (_frameWidth == 0 && _frameHeight == 0) { // Will be used later on to determine the motion accumulation step
_frameWidth = currentMatFlow.size().width;
_frameHeight = currentMatFlow.size().height;
}

// Compute sharpness
if (!skipSharpnessComputation) {
const double sharpness = computeSharpness(currentMatSharpness, sharpnessWindowSize);
minimalSharpness = std::min(minimalSharpness, sharpness);
}

// Compute optical flow
if (currentFrame > 0) {
if (currentFrame > startFrame) {
const double flow = estimateFlow(ptrFlow, currentMatFlow, previousMatFlow, flowCellSize);
minimalFlow = std::min(minimalFlow, flow);
}

ALICEVISION_LOG_INFO("Finished processing frame " << currentFrame + 1 << "/" << nbFrames);
std::string rigInfo = feeds.size() > 1 ? " (media " + std::to_string(mediaIndex + 1) + "/" + std::to_string(feeds.size()) + ")" : "";
ALICEVISION_LOG_INFO("Finished processing frame " << currentFrame + 1 << "/" << nbFrames << rigInfo);
}

// Save scores for the current frame
_sharpnessScores.push_back(minimalSharpness);
_flowScores.push_back(currentFrame > 0 ? minimalFlow : -1.f);
{
// Save scores for the current frame
const std::scoped_lock lock(_mutex);
_sharpnessScores[currentFrame] = minimalSharpness;
_flowScores[currentFrame] = currentFrame > startFrame ? minimalFlow : -1.f;
}
++currentFrame;
}

return true;
}

Expand Down Expand Up @@ -508,8 +580,7 @@ bool KeyframeSelector::writeSelection(const std::vector<std::string>& brands,
metadata.push_back(oiio::ParamValue("Exif:FocalLength", mmFocals[id]));
metadata.push_back(oiio::ParamValue("Exif:ImageUniqueID", std::to_string(getRandomInt())));
metadata.push_back(oiio::ParamValue("Orientation", orientation)); // Will not propagate for PNG outputs
if (outputExtension != "jpg") // TODO: propagate pixelAspectRatio properly for JPG
metadata.push_back(oiio::ParamValue("PixelAspectRatio", pixelAspectRatio));
metadata.push_back(oiio::ParamValue("PixelAspectRatio", pixelAspectRatio));

fs::path folder = _outputFolder;
std::ostringstream filenameSS;
Expand Down Expand Up @@ -757,33 +828,52 @@ double KeyframeSelector::computeSharpness(const cv::Mat& grayscaleImage, const s
cv::Laplacian(grayscaleImage, laplacian, CV_64F);
cv::integral(laplacian, sum, squaredSum);

double totalCount = windowSize * windowSize;
double maxstd = 0.0;
int x, y;

// TODO: do not slide the window pixel by pixel to speed up computations
// Starts at 1 because the integral image is padded with 0s on the top and left borders
for (int y = 1; y < sum.rows - windowSize; ++y) {
for (int x = 1; x < sum.cols - windowSize; ++x) {
double tl = sum.at<double>(y, x);
double tr = sum.at<double>(y, x + windowSize);
double bl = sum.at<double>(y + windowSize, x);
double br = sum.at<double>(y + windowSize, x + windowSize);
const double s1 = br + tl - tr - bl;

tl = squaredSum.at<double>(y, x);
tr = squaredSum.at<double>(y, x + windowSize);
bl = squaredSum.at<double>(y + windowSize, x);
br = squaredSum.at<double>(y + windowSize, x + windowSize);
const double s2 = br + tl - tr - bl;

const double std2 = std::sqrt((s2 - (s1 * s1) / totalCount) / totalCount);
maxstd = std::max(maxstd, std2);
for (y = 1; y < sum.rows - windowSize; y += windowSize / 4) {
for (x = 1; x < sum.cols - windowSize; x += windowSize / 4) {
maxstd = std::max(maxstd, computeSharpnessStd(sum, squaredSum, x, y, windowSize));
}

// Compute sharpness over the last part of the image for windowSize along the x-axis;
// the overlap with the previous window might be greater than the previous ones
if (x >= sum.cols - windowSize) {
x = sum.cols - windowSize - 1;
maxstd = std::max(maxstd, computeSharpnessStd(sum, squaredSum, x, y, windowSize));
}
}

// Compute sharpness over the last part of the image for windowSize along the y-axis;
// the overlap with the previous window might be greater than the previous ones
if (y >= sum.rows - windowSize) {
y = sum.rows - windowSize - 1;
maxstd = std::max(maxstd, computeSharpnessStd(sum, squaredSum, x, y, windowSize));
}

return maxstd;
}

const double KeyframeSelector::computeSharpnessStd(const cv::Mat& sum, const cv::Mat& squaredSum, const int x,
const int y, const int windowSize)
{
const double totalCount = windowSize * windowSize;
double tl = sum.at<double>(y, x);
double tr = sum.at<double>(y, x + windowSize);
double bl = sum.at<double>(y + windowSize, x);
double br = sum.at<double>(y + windowSize, x + windowSize);
const double s1 = br + tl - tr - bl;

tl = squaredSum.at<double>(y, x);
tr = squaredSum.at<double>(y, x + windowSize);
bl = squaredSum.at<double>(y + windowSize, x);
br = squaredSum.at<double>(y + windowSize, x + windowSize);
const double s2 = br + tl - tr - bl;

return std::sqrt((s2 - (s1 * s1) / totalCount) / totalCount);
}

double KeyframeSelector::estimateFlow(const cv::Ptr<cv::DenseOpticalFlow>& ptrFlow, const cv::Mat& grayscaleImage,
const cv::Mat& previousGrayscaleImage, const std::size_t cellSize)
{
Expand Down
Loading