From 4813839138c1b04ac97588bd303784c33d936161 Mon Sep 17 00:00:00 2001 From: David Rowenhorst Date: Sat, 11 May 2024 23:42:23 -0400 Subject: [PATCH] Checkpoint Signed-off by: David Rowenhorst --- pyebsdindex/opencl/clnlpar.cl | 53 ++++++++++++++++++++++++++++++++++ pyebsdindex/opencl/nlpar_cl.py | 17 +++++++---- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/pyebsdindex/opencl/clnlpar.cl b/pyebsdindex/opencl/clnlpar.cl index 938f2f9..9e3e451 100644 --- a/pyebsdindex/opencl/clnlpar.cl +++ b/pyebsdindex/opencl/clnlpar.cl @@ -175,6 +175,59 @@ __kernel void calcsigma( __global float *data, __global float16 *mask, } +__kernel void normd( + const __global float *sigma, + const __global float *n, + __global float *d, + const long sr){ + + const long x = get_global_id(0); + const long y = get_global_id(1); + const long ncol = get_global_size(0); + const long nrow = get_global_size(1); + const long indx_xy = x+y*ncol; + + long i, j; + long indx_j, indx_ij, count; + + //long nnn = (2*nn+1) * (2*nn+1); + + float sigma_xy = sigma[indx_xy]; + sigma_xy *= sigma_xy; + float sigma_ij, nn, dd; + count = 0; + for(j=y-nn; j<=y+nn; ++j){ + + indx_j = (j >= 0) ? (j): abs(j); + indx_j = (indx_j < nrow) ? (indx_j): nrow - (indx_j -nrow +1); + indx_j = ncol * indx_j; + + for(i=x-nn; i<=x+nn; ++i){ + dd = d[count]; + nn = n[count]; + if (nn > 0){ + indx_ij = (i >= 0) ? (i): abs(i); + indx_ij = (indx_ij < ncol) ? (indx_ij): ncol - (indx_ij -ncol +1); + indx_ij = (indx_ij + indx_j); + sigma_ij = sigma[indx_ij]; + sigma_ij *= sigma_ij; + + sigma_ij = sigma_ij + sigma_xy; + dd -= nn*sigma_ij; + dd /= sigma_ij * sqrt(2.0*nn); + //printf("%f\n", dd) ; + d[count] = dd; + count += 1; + } + } + + } + + +} + + + __kernel void calcnlpar( const __global float *data, diff --git a/pyebsdindex/opencl/nlpar_cl.py b/pyebsdindex/opencl/nlpar_cl.py index a82618c..82ca0f1 100644 --- a/pyebsdindex/opencl/nlpar_cl.py +++ b/pyebsdindex/opencl/nlpar_cl.py @@ -71,13 +71,12 @@ def d2normcl(d2, n2, sigmapad): dthresh = np.float32(dthresh) lamopt_values = [] - sigma, d2, n2 = self.calcsigma_cl(nn=1, saturation_protect=saturation_protect, automask=automask) + sigma, d2, n2 = self.calcsigma_cl(nn=1, saturation_protect=saturation_protect, automask=automask, normalize_d=True) sigmapad = np.pad(sigma, 1, mode='reflect') - - d2normcl(d2, n2, sigmapad) + #print(d2.min(), d2.max(), d2.mean()) lamopt_values_chnk = [] for tw in target_weights: @@ -97,7 +96,7 @@ def d2normcl(d2, n2, sigmapad): return lamopt_values.flatten() - def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, gpuid = None, **kwargs): + def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, normalize_d=False, gpuid = None, **kwargs): if gpuid is None: clparams = openclparam.OpenClParam() @@ -226,7 +225,14 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, gpuid = None, dist_local, count_local, np.int64(nn), np.int64(npatsteps), np.int64(npat_point), np.float32(mxval) ) - queue.finish() + + + #cl.enqueue_barrier(queue) + # prg.normd(queue, (np.uint32(ncolchunk), np.uint32(nrowchunk)), None, + # sigmachunk_gpu, + # count_local, dist_local, + # np.int64(nn)) + queue.flush() cl.enqueue_copy(queue, distchunk, dist_local, is_blocking=False) cl.enqueue_copy(queue, countchunk, count_local, is_blocking=False) @@ -244,6 +250,7 @@ def calcsigma_cl(self,nn=1,saturation_protect=True,automask=True, gpuid = None, datapad_gpu.release() queue.flush() queue = None + self.sigma = sigma return sigma, dist, countnn def calcnlpar_cl(self,chunksize=0, searchradius=None, lam = None, dthresh = None, saturation_protect=True, automask=True,