Skip to content

Commit

Permalink
cleaner array conversion in compression kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
AdhocMan committed Feb 17, 2022
1 parent 6728a03 commit ceac18c
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/compression/gpu_kernels/compression_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ auto decompress_gpu(const gpu::StreamType stream, const GPUArrayView1D<int>& ind
const dim3 threadGrid(std::min(
static_cast<int>((indices.size() + threadBlock.x - 1) / threadBlock.x), gpu::GridSizeMedium));
// const dim3 threadGrid(indices.size() < 4 ? 1 : indices.size() / 4);
launch_kernel(decompress_kernel<double>, threadGrid, threadBlock, 0, stream, indices, input,
launch_kernel(decompress_kernel<double>, threadGrid, threadBlock, 0, stream,
GPUArrayConstView1D<int>(indices), input,
GPUArrayView1D<typename gpu::fft::ComplexType<double>::type>(
output.data(), output.size(), output.device_id()));
}
Expand All @@ -71,7 +72,8 @@ auto decompress_gpu(const gpu::StreamType stream, const GPUArrayView1D<int>& ind
const dim3 threadBlock(gpu::BlockSizeMedium);
const dim3 threadGrid(std::min(
static_cast<int>((indices.size() + threadBlock.x - 1) / threadBlock.x), gpu::GridSizeMedium));
launch_kernel(decompress_kernel<float>, threadGrid, threadBlock, 0, stream, indices, input,
launch_kernel(decompress_kernel<float>, threadGrid, threadBlock, 0, stream,
GPUArrayConstView1D<int>(indices), input,
GPUArrayView1D<typename gpu::fft::ComplexType<float>::type>(
output.data(), output.size(), output.device_id()));
}
Expand Down

0 comments on commit ceac18c

Please sign in to comment.