Skip to content

Commit

Permalink
Fix for second bounce.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbikker committed Dec 20, 2024
1 parent e711d22 commit 2affe64
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 21 deletions.
32 changes: 15 additions & 17 deletions tiny_bvh_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ static int triCount = 0, frameIdx = 0, spp = 0;
static Kernel* init, * clear, * generate, * extend, * shade;
static Kernel* updateCounters1, * updateCounters2, * traceShadows, * finalize;
static Buffer* pixels, * accumulator, * raysIn, * raysOut, * connections, * triData;
static Buffer* cwbvhNodes = 0, * cwbvhTris = 0;
static size_t computeUnits;

// View pyramid for a pinhole camera
struct RenderData
Expand All @@ -51,12 +53,9 @@ void AddQuad( const bvhvec3 pos, const float w, const float d, int c )
data[0] = bvhvec3( -w, 0, -d ), data[1] = bvhvec3( w, 0, -d );
data[2] = bvhvec3( w, 0, d ), data[3] = bvhvec3( -w, 0, -d ), tris = data;
data[4] = bvhvec3( w, 0, d ), data[5] = bvhvec3( -w, 0, d ), triCount += 2;
for( int i = 0; i < 6; i++ ) data[i] = 0.5f * data[i] + pos, data[i].w = *(float*)&c;
for (int i = 0; i < 6; i++) data[i] = 0.5f * data[i] + pos, data[i].w = *(float*)&c;
}

Buffer* cwbvhNodes = 0;
Buffer* cwbvhTris = 0;

// Application init
void Init()
{
Expand All @@ -70,29 +69,28 @@ void Init()
updateCounters2 = new Kernel( "wavefront.cl", "UpdateCounters2" );
traceShadows = new Kernel( "wavefront.cl", "Connect" );
finalize = new Kernel( "wavefront.cl", "Finalize" );
// create OpenCL buffers
// we need the 'compute unit' or 'SM' count for wavefront rendering; ask OpenCL for it.
clGetDeviceInfo( init->GetDeviceID(), CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( size_t ), &computeUnits, NULL );
// create OpenCL buffers for wavefront path tracing
int N = SCRWIDTH * SCRHEIGHT;
pixels = new Buffer( N * sizeof( uint32_t ) );
accumulator = new Buffer( N * sizeof( bvhvec4 ) );
raysIn = new Buffer( N * sizeof( bvhvec4 ) * 4 );
raysOut = new Buffer( N * sizeof( bvhvec4 ) * 4 );
connections = new Buffer( N * sizeof( bvhvec4 ) * 3 );
connections = new Buffer( N * 2 * sizeof( bvhvec4 ) * 3 );
accumulator = new Buffer( N * sizeof( bvhvec4 ) );
pixels = new Buffer( N * sizeof( uint32_t ) );
// load raw vertex data
AddMesh( "./testdata/cryteksponza.bin", 1, bvhvec3( 0 ), 0xffffff );
AddMesh( "./testdata/lucy.bin", 1.1f, bvhvec3( -2, 4.1f, -3 ), 0xaaaaff );
AddQuad( bvhvec3( 0, 30, -1 ), 9, 5, 0x1ffffff );
AddQuad( bvhvec3( 0, 30, -1 ), 9, 5, 0x1ffffff ); // hard-coded light source
// build bvh (here: 'compressed wide bvh', for efficient GPU rendering)
bvh.Build( tris, triCount );
// create gpu buffers
// create OpenCL buffers for BVH data
cwbvhNodes = new Buffer( bvh.usedBlocks * sizeof( bvhvec4 ), bvh.bvh8Data );
cwbvhTris = new Buffer( bvh.idxCount * 3 * sizeof( bvhvec4 ), bvh.bvh8Tris );
cwbvhNodes->CopyToDevice();
cwbvhTris->CopyToDevice();
raysIn = new Buffer( N * sizeof( bvhvec4 ) * 4 );
raysOut = new Buffer( N * sizeof( bvhvec4 ) * 4 );
connections = new Buffer( N * sizeof( bvhvec4 ) * 3 );
accumulator = new Buffer( N * sizeof( bvhvec4 ) );
pixels = new Buffer( N * sizeof( uint32_t ) );
triData = new Buffer( triCount * 3 * sizeof( bvhvec4 ), tris );
triData->CopyToDevice();
// load camera position / direction from file
Expand Down Expand Up @@ -135,20 +133,20 @@ void Tick( float delta_time_s, fenster& f, uint32_t* buf )
// wavefront step 0: render on the GPU
init->SetArguments( N, rd.eye, rd.p0, rd.p1, rd.p2, frameIdx, cwbvhNodes, cwbvhTris );
init->Run( 1 ); // init atomic counters, set buffer ptrs etc.
generate->SetArguments( raysOut );
generate->SetArguments( raysOut, frameIdx * 19191 );
generate->Run2D( oclint2( SCRWIDTH, SCRHEIGHT ) );
for (int i = 0; i < 2; i++)
{
swap( raysOut, raysIn );
extend->SetArguments( raysIn );
extend->Run( N /* todo: 64 * SM count */ );
extend->Run( computeUnits * 64 * 16, 64 );
updateCounters1->Run( 1 );
shade->SetArguments( accumulator, raysIn, raysOut, connections, triData );
shade->Run( N /* todo: 64 * SM count */ );
shade->Run( computeUnits * 64 * 16, 64 );
updateCounters2->Run( 1 );
}
traceShadows->SetArguments( accumulator, connections );
traceShadows->Run( 1024 );
traceShadows->Run( computeUnits * 64 * 8, 64 );
finalize->SetArguments( accumulator, 1.0f / (float)spp++, pixels );
finalize->Run2D( oclint2( SCRWIDTH, SCRHEIGHT ) );
pixels->CopyFromDevice();
Expand Down
5 changes: 5 additions & 0 deletions tiny_ocl.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ class Kernel
static bool InitCL();
static void CheckCLStarted();
static void KillCL();
static cl_device_id GetDeviceID() { return device; }
private:
// data members
char* sourceFile = 0;
Expand Down Expand Up @@ -946,6 +947,10 @@ bool Kernel::InitCL()
clGetDeviceInfo( devices[deviceUsed], CL_DEVICE_NAME, 1024, &device_string, NULL );
clGetDeviceInfo( devices[deviceUsed], CL_DEVICE_VERSION, 1024, &device_platform, NULL );
printf( "Device # %u, %s (%s)\n", deviceUsed, device_string, device_platform );
// print compute unit count
size_t computeUnits;
clGetDeviceInfo( devices[deviceUsed], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( size_t ), &computeUnits, NULL );
printf( "Compute units / SM count: %iKB\n", (int)computeUnits );
// print local memory size
size_t localMem;
clGetDeviceInfo( devices[deviceUsed], CL_DEVICE_LOCAL_MEM_SIZE, sizeof( size_t ), &localMem, NULL );
Expand Down
8 changes: 4 additions & 4 deletions wavefront.cl
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,13 @@ void kernel Clear( global float4* accumulator )
}

// primary ray generation
void kernel Generate( global struct PathState* raysOut )
void kernel Generate( global struct PathState* raysOut, uint frameSeed )
{
const uint x = get_global_id( 0 ), y = get_global_id( 1 );
const uint id = x + y * get_global_size( 0 );
const float u = (float)x / (float)get_global_size( 0 );
const float v = (float)y / (float)get_global_size( 1 );
uint seed = WangHash( id * 13131 + frameSeed );
const float u = ((float)x + RandomFloat( &seed )) / (float)get_global_size( 0 );
const float v = ((float)y + RandomFloat( &seed )) / (float)get_global_size( 1 );
const float4 P = rd.p0 + u * (rd.p1 - rd.p0) + v * (rd.p2 - rd.p0);
raysOut[id].T = (float4)( 1, 1, 1, 1 /* pdf */ );
raysOut[id].O = (float4)( rd.eye.xyz, as_float( id << 4 /* low bits: depth */ ) );
Expand Down Expand Up @@ -130,7 +131,6 @@ void kernel UpdateCounters1()
{
if (get_global_id( 0 ) != 0) return;
extendTasks = 0;
connectTasks = 0;
}

// shade: process intersection results; this evaluates the BRDF and creates
Expand Down

0 comments on commit 2affe64

Please sign in to comment.