Skip to content

Commit

Permalink
PERF: Precompile ocl kernels upon object creation
Browse files Browse the repository at this point in the history
  • Loading branch information
NicerNewerCar committed Oct 25, 2023
1 parent d9fa295 commit 00181de
Show file tree
Hide file tree
Showing 15 changed files with 186 additions and 132 deletions.
43 changes: 23 additions & 20 deletions libautoscoper/src/gpu/opencl/BackgroundRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,13 @@ BackgroundRenderer::BackgroundRenderer() : image_(0)
viewport_[1] = -1.0f;
viewport_[2] = 2.0f;
viewport_[3] = 2.0f;
kernel_ = background_renderer_program_.compile(BackgroundRenderer_cl, "background_render_kernel");
}

BackgroundRenderer::~BackgroundRenderer()
{
if (image_) delete image_;
if (kernel_) delete kernel_;
}

void
Expand Down Expand Up @@ -108,30 +110,31 @@ BackgroundRenderer::set_viewport(float x, float y, float width, float height)
void
BackgroundRenderer::render(const Buffer* buffer, unsigned width, unsigned height, float threshold) const
{
Kernel* kernel = background_renderer_program_.compile(
BackgroundRenderer_cl, "background_render_kernel");

kernel->addBufferArg(buffer);
kernel->addArg(width);
kernel->addArg(height);
kernel->addArg(image_plane_[0]);
kernel->addArg(image_plane_[1]);
kernel->addArg(image_plane_[2]);
kernel->addArg(image_plane_[3]);
kernel->addArg(viewport_[0]);
kernel->addArg(viewport_[1]);
kernel->addArg(viewport_[2]);
kernel->addArg(viewport_[3]);
kernel->addArg(threshold),
kernel->addImageArg(image_);
// Kernel* kernel = background_renderer_program_.compile(
// BackgroundRenderer_cl, "background_render_kernel");

kernel_->addBufferArg(buffer);
kernel_->addArg(width);
kernel_->addArg(height);
kernel_->addArg(image_plane_[0]);
kernel_->addArg(image_plane_[1]);
kernel_->addArg(image_plane_[2]);
kernel_->addArg(image_plane_[3]);
kernel_->addArg(viewport_[0]);
kernel_->addArg(viewport_[1]);
kernel_->addArg(viewport_[2]);
kernel_->addArg(viewport_[3]);
kernel_->addArg(threshold),
kernel_->addImageArg(image_);

// Calculate the block and grid sizes.
kernel->block2d(BX, BY);
kernel->grid2d((width+BX-1)/BX, (height+BY-1)/BY);
kernel_->block2d(BX, BY);
kernel_->grid2d((width+BX-1)/BX, (height+BY-1)/BY);

kernel->launch();
kernel_->launch();

delete kernel;
kernel_->reset();
// delete kernel;
}

} } // namespace xromm::opencl
2 changes: 2 additions & 0 deletions libautoscoper/src/gpu/opencl/BackgroundRenderer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ namespace xromm { namespace gpu
Image* image_;
float image_plane_[4];
float viewport_[4];

mutable Kernel* kernel_;
};

} } // namespace xromm::opencl
Expand Down
27 changes: 15 additions & 12 deletions libautoscoper/src/gpu/opencl/ContrastFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ ContrastFilter::ContrastFilter()
std::stringstream name_stream;
name_stream << "ContrastFilter" << (++num_contrast_filters);
name_ = name_stream.str();
kernel_ = contrast_program_.compile(ContrastFilter_cl, KERNEL_NAME);
}

void
Expand All @@ -73,22 +74,24 @@ ContrastFilter::apply(
int width,
int height)
{
Kernel* kernel = contrast_program_.compile(ContrastFilter_cl, KERNEL_NAME);
// Kernel* kernel = contrast_program_.compile(ContrastFilter_cl, KERNEL_NAME);

kernel->block2d(KERNEL_X, KERNEL_Y);
kernel->grid2d((width-1)/KERNEL_X+1, (height-1)/KERNEL_Y+1);
kernel_->block2d(KERNEL_X, KERNEL_Y);
kernel_->grid2d((width-1)/KERNEL_X+1, (height-1)/KERNEL_Y+1);

kernel->addBufferArg(input);
kernel->addBufferArg(output);
kernel->addArg(width);
kernel->addArg(height);
kernel->addArg(alpha_);
kernel->addArg(beta_);
kernel->addArg(size_);
kernel_->addBufferArg(input);
kernel_->addBufferArg(output);
kernel_->addArg(width);
kernel_->addArg(height);
kernel_->addArg(alpha_);
kernel_->addArg(beta_);
kernel_->addArg(size_);

kernel->launch();
kernel_->launch();

delete kernel;
kernel_->reset();

//delete kernel;
}

} } // namespace xromm::opencl
3 changes: 2 additions & 1 deletion libautoscoper/src/gpu/opencl/ContrastFilter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class ContrastFilter : public Filter
{
public:
ContrastFilter();
virtual ~ContrastFilter() {}
~ContrastFilter() { if (kernel_) delete kernel_; }

// Apply the filter to the input image
virtual void apply(const Buffer* input,
Expand All @@ -72,6 +72,7 @@ class ContrastFilter : public Filter
float alpha_;
float beta_;
int size_;
Kernel* kernel_;
};

} } // namespace xromm::opencl
Expand Down
27 changes: 16 additions & 11 deletions libautoscoper/src/gpu/opencl/GaussianFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,14 @@ GaussianFilter::GaussianFilter()
name_ = name_stream.str();

set_radius(1);

kernel_ = gaussian_program_.compile(GaussianFilter_cl, KERNEL_NAME);
}

GaussianFilter::~GaussianFilter()
{
if (gaussian_ != NULL) delete gaussian_;
if (kernel_) delete kernel_;
}

void GaussianFilter::set_radius(float radius)
Expand Down Expand Up @@ -133,21 +136,23 @@ GaussianFilter::apply(const Buffer* input,
}
else
{
Kernel* kernel = gaussian_program_.compile(GaussianFilter_cl, KERNEL_NAME);
//Kernel* kernel = gaussian_program_.compile(GaussianFilter_cl, KERNEL_NAME);

kernel_->block2d(KERNEL_X, KERNEL_Y);
kernel_->grid2d((width-1)/KERNEL_X+1, (height-1)/KERNEL_Y+1);

kernel->block2d(KERNEL_X, KERNEL_Y);
kernel->grid2d((width-1)/KERNEL_X+1, (height-1)/KERNEL_Y+1);
kernel_->addBufferArg(input);
kernel_->addBufferArg(output);
kernel_->addArg(width);
kernel_->addArg(height);
kernel_->addBufferArg(gaussian_);
kernel_->addArg(filterSize_);

kernel->addBufferArg(input);
kernel->addBufferArg(output);
kernel->addArg(width);
kernel->addArg(height);
kernel->addBufferArg(gaussian_);
kernel->addArg(filterSize_);
kernel_->launch();

kernel->launch();
kernel_->reset();

delete kernel;
//delete kernel;
}
}

Expand Down
2 changes: 2 additions & 0 deletions libautoscoper/src/gpu/opencl/GaussianFilter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ class GaussianFilter : public Filter
float radius_;
Buffer* gaussian_;
int filterSize_;

Kernel* kernel_;
};

} } // namespace xromm::opencl
Expand Down
57 changes: 33 additions & 24 deletions libautoscoper/src/gpu/opencl/Ncc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ static Buffer* d_den2s = NULL;
static Program ncc_kernel_;
static Program ncc_sum_kernel_;

Kernel* ncc_kernel;
Kernel* ncc_sum_kernel;

//////// Helper functions ////////

static void get_max_threads()
Expand Down Expand Up @@ -108,7 +111,7 @@ static float ncc_sum(Buffer* f, unsigned n)
size_t numThreads, numBlocks, sizeMem;
get_device_params(n, numThreads, numBlocks, sizeMem);

Kernel* kernel = ncc_sum_kernel_.compile(NccSum_cl, "ncc_sum_kernel");
//Kernel* kernel = ncc_sum_kernel_.compile(NccSum_cl, "ncc_sum_kernel");

while (n > 1)
{
Expand All @@ -118,15 +121,15 @@ static float ncc_sum(Buffer* f, unsigned n)
std::cerr << "ncc_sum[" << n << "] sizeMem = " << sizeMem << std::endl;
#endif

kernel->block2d(numThreads, 1);
kernel->grid2d(1, numBlocks);
ncc_sum_kernel->block2d(numThreads, 1);
ncc_sum_kernel->grid2d(1, numBlocks);

kernel->addBufferArg(f);
kernel->addBufferArg(d_sums);
kernel->addLocalMem(sizeMem);
kernel->addArg(n);
ncc_sum_kernel->addBufferArg(f);
ncc_sum_kernel->addBufferArg(d_sums);
ncc_sum_kernel->addLocalMem(sizeMem);
ncc_sum_kernel->addArg(n);

kernel->launch();
ncc_sum_kernel->launch();

#if DEBUG
float *tmp = new float[numBlocks];
Expand All @@ -142,10 +145,10 @@ static float ncc_sum(Buffer* f, unsigned n)
get_device_params(n, numThreads, numBlocks, sizeMem);
f = d_sums;

kernel->reset();
ncc_sum_kernel->reset();
}

delete kernel;
//delete kernel;

float h_sum;
d_sums->write(&h_sum, sizeof(float));
Expand All @@ -171,6 +174,8 @@ void ncc_init(unsigned max_n)

g_max_n = max_n;
}
ncc_kernel = ncc_kernel_.compile(Ncc_cl, "ncc_kernel");
ncc_sum_kernel = ncc_sum_kernel_.compile(NccSum_cl, "ncc_sum_kernel");
}

void ncc_deinit()
Expand All @@ -179,6 +184,8 @@ void ncc_deinit()
delete d_nums;
delete d_den1s;
delete d_den2s;
delete ncc_kernel;
delete ncc_sum_kernel;

g_max_n = 0;
}
Expand All @@ -197,24 +204,26 @@ float ncc(Buffer* f, Buffer* g, Buffer* mask, unsigned n)
size_t numThreads, numBlocks, sizeMem;
get_device_params(n, numThreads, numBlocks, sizeMem);

Kernel* kernel = ncc_kernel_.compile(Ncc_cl, "ncc_kernel");
//Kernel* kernel = ncc_kernel_.compile(Ncc_cl, "ncc_kernel");

ncc_kernel->block1d(numThreads);
ncc_kernel->grid1d(numBlocks);

kernel->block1d(numThreads);
kernel->grid1d(numBlocks);
ncc_kernel->addBufferArg(f);
ncc_kernel->addArg(meanF);
ncc_kernel->addBufferArg(g);
ncc_kernel->addArg(meanG);
ncc_kernel->addBufferArg(mask);
ncc_kernel->addBufferArg(d_nums);
ncc_kernel->addBufferArg(d_den1s);
ncc_kernel->addBufferArg(d_den2s);
ncc_kernel->addArg(n);

kernel->addBufferArg(f);
kernel->addArg(meanF);
kernel->addBufferArg(g);
kernel->addArg(meanG);
kernel->addBufferArg(mask);
kernel->addBufferArg(d_nums);
kernel->addBufferArg(d_den1s);
kernel->addBufferArg(d_den2s);
kernel->addArg(n);
ncc_kernel->launch();

kernel->launch();
ncc_kernel->reset();

delete kernel;
//delete kernel;

float den = sqrt(ncc_sum(d_den1s,n)*ncc_sum(d_den2s,n));

Expand Down
43 changes: 24 additions & 19 deletions libautoscoper/src/gpu/opencl/RadRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,14 @@ RadRenderer::RadRenderer() : image_(0)
std::stringstream name_stream;
name_stream << "RadRenderer" << (++num_rad_renderers);
name_ = name_stream.str();

kernel_ = rad_renderer_program_.compile( RadRenderer_cl, "rad_render_kernel");
}

RadRenderer::~RadRenderer()
{
if (image_) delete image_;
if (kernel_) delete kernel_;
}

void
Expand Down Expand Up @@ -119,29 +122,31 @@ RadRenderer::set_viewport(float x, float y, float width, float height)
void
RadRenderer::render(const Buffer* buffer, unsigned width, unsigned height) const
{
Kernel* kernel = rad_renderer_program_.compile(
RadRenderer_cl, "rad_render_kernel");

kernel->addBufferArg(buffer);
kernel->addArg(width);
kernel->addArg(height);
kernel->addArg(image_plane_[0]);
kernel->addArg(image_plane_[1]);
kernel->addArg(image_plane_[2]);
kernel->addArg(image_plane_[3]);
kernel->addArg(viewport_[0]);
kernel->addArg(viewport_[1]);
kernel->addArg(viewport_[2]);
kernel->addArg(viewport_[3]);
kernel->addImageArg(image_);
/*Kernel* kernel = rad_renderer_program_.compile(
RadRenderer_cl, "rad_render_kernel");*/

kernel_->addBufferArg(buffer);
kernel_->addArg(width);
kernel_->addArg(height);
kernel_->addArg(image_plane_[0]);
kernel_->addArg(image_plane_[1]);
kernel_->addArg(image_plane_[2]);
kernel_->addArg(image_plane_[3]);
kernel_->addArg(viewport_[0]);
kernel_->addArg(viewport_[1]);
kernel_->addArg(viewport_[2]);
kernel_->addArg(viewport_[3]);
kernel_->addImageArg(image_);

// Calculate the block and grid sizes.
kernel->block2d(BX, BY);
kernel->grid2d((width+BX-1)/BX, (height+BY-1)/BY);
kernel_->block2d(BX, BY);
kernel_->grid2d((width+BX-1)/BX, (height+BY-1)/BY);

kernel_->launch();

kernel->launch();
kernel_->reset();

delete kernel;
//delete kernel;
}

} } // namespace xromm::opencl
2 changes: 2 additions & 0 deletions libautoscoper/src/gpu/opencl/RadRenderer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ class RadRenderer
float image_plane_[4];
float viewport_[4];
std::string name_;

Kernel* kernel_;
};

} } // namespace xromm::opencl
Expand Down
Loading

0 comments on commit 00181de

Please sign in to comment.