Skip to content

Commit

Permalink
uint32_t support (split across 4x Y)
Browse files Browse the repository at this point in the history
  • Loading branch information
tbiedert committed Jun 25, 2018
1 parent fbb6bf2 commit 4f3e97f
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 38 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ networked interactive server/client application.

Designed for both remote rendering solutions and general compression of arbitrary image data, NvPipe accepts frames in various formats and supports access to host memory, CUDA device memory, OpenGL textures and OpenGL pixel buffer objects.

Supported formats are 32 bit RGBA frames (8 bit per channel; alpha is not supported by the underlying video codecs and is ignored) and unsigned integer grayscale frames with 4 bit, 8 bit or 16 bit per pixel.
Supported formats are 32 bit RGBA frames (8 bit per channel; alpha is not supported by the underlying video codecs and is ignored) and unsigned integer grayscale frames with 4 bit, 8 bit, 16 bit or 32 bit per pixel.

Besides conventional lossy video compression based on target bitrate and framerate, also fully lossless compression is available enabling exact bit pattern reconstruction.

Expand Down
44 changes: 35 additions & 9 deletions examples/lossless.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ void test(const uint8_t* data, NvPipe_Format format, uint32_t width, uint32_t he
dataSize /= 2;
else if (format == NVPIPE_UINT16)
dataSize *= 2;
else if (format == NVPIPE_UINT32)
dataSize *= 4;


Timer timer;

Expand Down Expand Up @@ -126,6 +129,8 @@ void test(const uint8_t* data, NvPipe_Format format, uint32_t width, uint32_t he
std::cout << " - [as UINT8] ";
else if (format == NVPIPE_UINT16)
std::cout << " - [as UINT16] ";
else if (format == NVPIPE_UINT32)
std::cout << " - [as UINT32] ";

std::cout << std::fixed << std::setprecision(1) << " Size: " << size * 0.001 << " KB, Encode: " << encodeMs << " ms, Decode: " << decodeMs << " ms - ";

Expand All @@ -143,16 +148,37 @@ int main(int argc, char* argv[])
uint32_t width = 1024;
uint32_t height = 1024;

// Dummy input
std::vector<uint8_t> image(width * height);
for (uint32_t y = 0; y < height; ++y)
for (uint32_t x = 0; x < width; ++x)
image[y * width + x] = (255.0f * x * y) / (width * height) * (y % 100 < 50);
// UINT 8 test
{
std::vector<uint8_t> image(width * height);
for (uint32_t y = 0; y < height; ++y)
for (uint32_t x = 0; x < width; ++x)
image[y * width + x] = (255.0f * x * y) / (width * height) * (y % 100 < 50);

std::cout << std::fixed << std::setprecision(1) << "Input: " << width << " x " << height << " UINT8 (Raw size: " << (width * height) * 0.001 << " KB)" << std::endl;
test(image.data(), NVPIPE_UINT4, width * 2, height);
test(image.data(), NVPIPE_UINT8, width, height);
test(image.data(), NVPIPE_UINT16, width / 2, height);
test(image.data(), NVPIPE_UINT32, width / 4, height);
}

std::cout << std::endl;


// UINT32 test
{
std::vector<uint32_t> image(width * height);
for (uint32_t y = 0; y < height; ++y)
for (uint32_t x = 0; x < width; ++x)
image[y * width + x] = (4294967295.0f * x * y) / (width * height) * (y % 100 < 50);

std::cout << std::fixed << std::setprecision(1) << "Input: " << width << " x " << height << " UINT32 (Raw size: " << (width * height * 4) * 0.001 << " KB)" << std::endl;
// test((uint8_t*) image.data(), NVPIPE_UINT4, width * 8, height);
test((uint8_t*) image.data(), NVPIPE_UINT8, width * 4, height);
test((uint8_t*) image.data(), NVPIPE_UINT16, width * 2, height);
test((uint8_t*) image.data(), NVPIPE_UINT32, width, height);
}

std::cout << std::fixed << std::setprecision(1) << "Input: " << width << " x " << height << " UINT8 (Raw size: " << (width * height) * 0.001 << " KB)" << std::endl;
test(image.data(), NVPIPE_UINT4, width * 2, height);
test(image.data(), NVPIPE_UINT8, width, height);
test(image.data(), NVPIPE_UINT16, width / 2, height);

return 0;
}
119 changes: 92 additions & 27 deletions src/NvPipe.cu
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ inline uint64_t getFrameSize(NvPipe_Format format, uint32_t width, uint32_t heig
return width * height;
else if (format == NVPIPE_UINT16)
return width * height * 2;
else if (format == NVPIPE_UINT32)
return width * height * 4;

return 0;
}
Expand All @@ -106,14 +108,12 @@ void uint4_to_nv12(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t
// Even thread: higher 4 bits, odd thread: lower 4 bits
dst[j] = (x & 1 == 1) ? (src[i] & 0xF) : ((src[i] & 0xF0) >> 4);

// Blank UV channel (kill 3 of 4 threads)
if(x & 1 == 1 || y & 1 == 1)
return;

uint8_t* UV = dst + dstPitch * height;
const uint32_t k = y / 2 * (dstPitch / 2) + x / 2;
UV[2 * k + 0] = 0;
UV[2 * k + 1] = 0;
// Blank UV channel
if (y < height / 2)
{
uint8_t* UV = dst + dstPitch * (height + y);
UV[x] = 0;
}
}
}

Expand Down Expand Up @@ -153,14 +153,12 @@ void uint8_to_nv12(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t
// Copy grayscale image to Y channel
dst[j] = src[i];

// Blank UV channel (kill 3 of 4 threads)
if(x & 1 == 1 || y & 1 == 1)
return;

uint8_t* UV = dst + dstPitch * height;
const uint32_t k = y / 2 * (dstPitch / 2) + x / 2;
UV[2 * k + 0] = 0;
UV[2 * k + 1] = 0;
// Blank UV channel
if (y < height / 2)
{
uint8_t* UV = dst + dstPitch * (height + y);
UV[x] = 0;
}
}
}

Expand Down Expand Up @@ -198,17 +196,13 @@ void uint16_to_nv12(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_
// Copy lower byte to right half of Y channel
dst[j + width] = src[i + 1];


// Blank UV channel (kill 3 of 4 threads)
if(x & 1 == 1 || y & 1 == 1)
return;

uint8_t* UV = dst + dstPitch * height;
const uint32_t k = y / 2 * (dstPitch / 2) + x / 2;
UV[4 * k + 0] = 0;
UV[4 * k + 1] = 0;
UV[4 * k + 2] = 0;
UV[4 * k + 3] = 0;
// Blank UV channel
if (y < height / 2)
{
uint8_t* UV = dst + dstPitch * (height + y);
UV[2 * x + 0] = 0;
UV[2 * x + 1] = 0;
}
}
}

Expand All @@ -231,7 +225,58 @@ void nv12_to_uint16(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_
}
}

__global__
void uint32_to_nv12(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t dstPitch, uint32_t width, uint32_t height)
{
const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;

if (x < width && y < height)
{
const uint32_t i = y * srcPitch + 4 * x;
const uint32_t j = y * dstPitch + x;

// Copy highest byte to left quarter of Y channel,
// ...
// Copy lowest byte to right quarter of Y channel
dst[j] = src[i];
dst[j + width] = src[i + 1];
dst[j + 2 * width] = src[i + 2];
dst[j + 3 * width] = src[i + 3];

// Blank UV channel
if (y < height / 2)
{
uint8_t* UV = dst + dstPitch * (height + y);
UV[4 * x + 0] = 0;
UV[4 * x + 1] = 0;
UV[4 * x + 2] = 0;
UV[4 * x + 3] = 0;
}
}
}

__global__
void nv12_to_uint32(const uint8_t* src, uint32_t srcPitch, uint8_t* dst, uint32_t dstPitch, uint32_t width, uint32_t height)
{
const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x;
const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y;

if (x < width && y < height)
{
const uint32_t i = y * srcPitch + x;
const uint32_t j = y * dstPitch + 4 * x;

// Copy highest byte from left quarter of Y channel
// ...
// Copy lowest byte from right quarter of Y channel
dst[j] = src[i];
dst[j + 1] = src[i + width];
dst[j + 2] = src[i + 2 * width];
dst[j + 3] = src[i + 3 * width];

}
}

#ifdef NVPIPE_WITH_OPENGL
/**
Expand Down Expand Up @@ -350,6 +395,8 @@ public:
// Recreate encoder if size changed
if (this->format == NVPIPE_UINT16)
this->recreate(width * 2, height); // split into two adjecent tiles in Y channel
else if (this->format == NVPIPE_UINT32)
this->recreate(width * 4, height); // split into four adjecent tiles in Y channel
else
this->recreate(width, height);

Expand Down Expand Up @@ -398,6 +445,14 @@ public:

uint16_to_nv12<<<gridSize, blockSize>>>((uint8_t*) (copyToDevice ? this->deviceBuffer : src), width * 2, (uint8_t*) f->inputPtr, f->pitch, width, height);
}
else if (this->format == NVPIPE_UINT32)
{
// one thread per pixel (split 32 bit into 4x 8 bit)
dim3 gridSize(width / 16 + 1, height / 2 + 1);
dim3 blockSize(16, 2);

uint32_to_nv12<<<gridSize, blockSize>>>((uint8_t*) (copyToDevice ? this->deviceBuffer : src), width * 4, (uint8_t*) f->inputPtr, f->pitch, width, height);
}
}

// Encode
Expand Down Expand Up @@ -620,6 +675,8 @@ public:
// Recreate decoder if size changed
if (this->format == NVPIPE_UINT16)
this->recreate(width * 2, height); // split into two adjecent tiles in Y channel
else if (this->format == NVPIPE_UINT32)
this->recreate(width * 4, height); // split into four adjecent tiles in Y channel
else
this->recreate(width, height);

Expand Down Expand Up @@ -664,6 +721,14 @@ public:

nv12_to_uint16<<<gridSize, blockSize>>>(decoded, this->decoder->GetDeviceFramePitch(), dstDevice, width * 2, width, height);
}
else if (this->format == NVPIPE_UINT32)
{
// one thread per pixel (merge 4x8 bit into 32 bit pixels)
dim3 gridSize(width / 16 + 1, height / 2 + 1);
dim3 blockSize(16, 2);

nv12_to_uint32<<<gridSize, blockSize>>>(decoded, this->decoder->GetDeviceFramePitch(), dstDevice, width * 4, width, height);
}

// Copy to host if necessary
if (copyToHost)
Expand Down
3 changes: 2 additions & 1 deletion src/NvPipe.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ typedef enum {
NVPIPE_RGBA32,
NVPIPE_UINT4,
NVPIPE_UINT8,
NVPIPE_UINT16
NVPIPE_UINT16,
NVPIPE_UINT32
} NvPipe_Format;


Expand Down

0 comments on commit 4f3e97f

Please sign in to comment.