From 0eafdb996775d0068a137144417b4eca8a00c38a Mon Sep 17 00:00:00 2001
From: David Rowenhorst <david.rowenhorst@nrl.navy.mil>
Date: Mon, 4 Nov 2024 17:07:07 -0500
Subject: [PATCH] Update tutorial for indexing.  Introduced the IQ metric.

Signed-off by: David Rowenhorst <david.rowenhorst@nrl.navy.mil>
---
 doc/tutorials/ebsd_index_demo.ipynb  | 110 +++++++++++++++++++++------
 pyebsdindex/EBSDImage/IPFcolor.py    |   5 +-
 pyebsdindex/_ebsd_index_parallel.py  |   5 +-
 pyebsdindex/_ebsd_index_single.py    |   4 +-
 pyebsdindex/band_detect.py           |  14 +++-
 pyebsdindex/opencl/band_detect_cl.py |  22 ++++--
 pyebsdindex/opencl/clkernels.cl      |   2 +-
 7 files changed, 121 insertions(+), 41 deletions(-)

diff --git a/doc/tutorials/ebsd_index_demo.ipynb b/doc/tutorials/ebsd_index_demo.ipynb
index 41936f1..bbce4a3 100644
--- a/doc/tutorials/ebsd_index_demo.ipynb
+++ b/doc/tutorials/ebsd_index_demo.ipynb
@@ -22,29 +22,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "fuzzy-imaging",
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'pyebsdindex'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[1], line 5\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mh5py\u001b[39;00m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcopy\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpyebsdindex\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tripletvote, ebsd_pattern, ebsd_index, ebsdfile, pcopt\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpyebsdindex\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mEBSDImage\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mEBSDImage\u001b[39;00m\n",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pyebsdindex'"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import h5py\n",
     "import copy\n",
     "from pyebsdindex import tripletvote, ebsd_pattern, ebsd_index, ebsdfile, pcopt\n",
-    "import pyebsdindex.EBSDImage as EBSDImage\n"
+    "from pyebsdindex.EBSDImage import IPFcolor, scalarimage"
    ]
   },
   {
@@ -71,7 +59,7 @@
     "PC = np.array([0.46, 0.70, 0.64]) # this is pulled from the .ang/ctf/h5 file, but only is a rough guess.  We will refine in a later. \n",
     "cam_elev = 5.3 # The tilt of the camera from horizontal -- positive angles are tilted below the horizontal. See diagrams in PyEBSDIndex paper for full description.    \n",
     "sampleTilt = 70.0 # sample tilt \n",
-    "vendor = 'EDAX' # notes the conventions for pattern center and orientations.  "
+    "vendor = 'EDAX' # notes the conventions for pattern center and orientations.    "
    ]
   },
   {
@@ -224,12 +212,60 @@
     "imshape = (indxer.fID.nRows, indxer.fID.nCols)\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "decdc6a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dat1"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "d1241ddf-06ba-4c89-b3a7-b4ba25c9fd22",
    "metadata": {},
    "source": [
-    "The data output *dat1* here, is a complex numpy array (or array of structured data), that is `[nphases+1, npoints]`.  The data is stored for each phase used in indexing and the dat1\\[-1\\] layer uses the best guess on which is the most likely phase, based on the fit, and number of bands matched for each phase.  Each data entry contains the orientation expressed as a quaternion (quat) (using EDAX convention by default), Pattern Quality (pq), Confidence Metric (cm), Phase ID (phase), Fit (fit) and Number of Bands Matched (nmatch).  There are some other metrics reported, but these are mostly for debugging purposes.  "
+    "### Indexed ebsd data\n",
+    "The data output `dat1` here, is a compound numpy array (or array of structured data), that is `[nphases+1, npoints]`.  The data is stored for each phase used in indexing and the dat1\\[-1\\] layer uses the best guess on which is the most likely phase, based on the fit, and number of bands matched for each phase.  Each data entry contains:\n",
+    "\n",
+    "\n",
+    "`'quat'`: the orientation expressed as a quaternion with [q0, q1\\*i, q2\\*j, q3\\*k ] using EDAX rotation/reference frame convention by default.\n",
+    "\n",
+    "`'iq;`: pattern Image Quality, here expressed as the mean normalized peak intensity (e.g. peak intensity relative to average convolved radon intensity.)\n",
+    "Tends to be a consistent value from scan-to-scan between 1.5 - 2.0.  Values near 1.0 indicate very little contrast between the peaks and the background, and are an indicator that the pattern is not very informative.  \n",
+    "\n",
+    "`'pq'`: Pattern Quality, here defined as the mean peak intensity of the detected bands as measured on the convolved radon.\n",
+    "\n",
+    "`'cm'`: Confidence Metric, a measure between `[0,1.0]` of the confidence of the index solution.  \n",
+    "\n",
+    "`'phase'`: Phase ID index indicating the phase (as it appears in the phase list), with -1 reserved for unindexed patterns assigned to no phase. \n",
+    "\n",
+    "`'fit'`: The Fit, or MAD, with units of degrees. \n",
+    "\n",
+    "`'nmatch'`: Number of bands matched.  \n",
+    "\n",
+    "\n",
+    "\n",
+    "### Band data \n",
+    "The second output, `bnd1` is also a compound numpy array, with dimensions `[npoints, nbands]`.  Each entry these values for each band:\n",
+    "\n",
+    "`'max'`:  convolved radon peak maximum value. \n",
+    "\n",
+    "`'normmax'`: convolved radon normalized (by average convolved radon intensity) peak hights.  Better for making IPF images.  Normally between values 1.0-2.0\n",
+    "\n",
+    "`'maxloc'`: the integer location of that max in the randon space as `(int)[rho_indx, theta_index]`.\n",
+    "\n",
+    "`'avemax'`: the nearest neighbor average max for the peak.  This is what is later used to calculate the pattern quality. \n",
+    "\n",
+    "`'aveloc'`: the sub-pixel location of the max `(float)[rho_indx, theta_index]`.\n",
+    "\n",
+    "`'theta'`and`'rho'`: the equivalent Radon values for theta and rho for the sub-pixel max.\n",
+    "\n",
+    "`'valid'`: a 0,1 which indicates if the band is valid or not.  \n",
+    "\n",
+    "There are some other metrics reported, but these are mostly for debugging purposes.  Also - these fields may be added onto in the future, but those listed here are expected to be stable.   "
    ]
   },
   {
@@ -265,7 +301,7 @@
    "source": [
     "Now use that indexer object to index the whole file.  Setting `npats = -1` will index to the end of the file/array (latter on will be an example of using an array as input).  \n",
     "\n",
-    "The defaults will be to detect all the GPUs on your machine, and use them.  Scheduling is dynamic, so it does not matter if the GPUs are matched. After radon processing/peak finding, the cpus take over for performing the index voting -- thus the number of CPUs needed will depend highly on the number of phases that need to be indexed.  The number of CPUs needed also is dependent on how fast your GPUs are - on a 2019 MacPro with a Radeon 6800 GPU there are diminishing returns of including more than 32 CPUs when using the above conditions.  \n",
+    "The defaults will be to detect all the GPUs on your machine, and use them.  Scheduling is dynamic, so it does not matter if the GPUs are matched. After radon processing/peak finding, the cpus take over for performing the index voting -- thus the number of CPUs needed will depend highly on the number of phases that need to be indexed.  Using the default with `ncpu = -1` will automatically allocate min(10 cpu processes/phase, number of cpu cores on machine).  \n",
     "\n",
     "The first time this executes, it will take longer as the JIT compilers need to do the initial compile. Currently, the program cache is set to the system `/tmp` directory, so after reboots, many of the programs will need to be recompiled (which happens automatically with the first run)"
    ]
@@ -306,6 +342,24 @@
     "ipfim = IPFcolor.makeipf(data, indxer); plt.imshow(ipfim)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "69ec109e",
+   "metadata": {},
+   "source": [
+    "There are some options for using other data metrics for decorating the IPF maps: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d948199a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ipfim = IPFcolor.makeipf(data, indxer, graychannel='iq', addmicronbar=True, gamma=0.75); plt.imshow(ipfim); plt.axis('off')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -313,7 +367,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fit = (data[-1]['fit']).reshape(imshape[0],imshape[1]); plt.imshow(fit.clip(0, 2.0))"
+    "fit = scalarimage.scalarimage(data, indxer, datafield='fit'); plt.imshow(fit);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "81902e97",
+   "metadata": {},
+   "source": [
+    "Or if you would rather do it yourself, something like this would work:"
    ]
   },
   {
@@ -323,7 +385,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pq = (data[-1]['pq']).reshape(imshape[0],imshape[1]); plt.imshow(pq)"
+    "pq = (data[-1]['pq']).reshape(imshape[0],imshape[1]); plt.imshow(pq, cmap='gray')\n",
+    "print(pq.min(), pq.max())"
    ]
   },
   {
@@ -432,7 +495,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ipfim = IPFcolor.makeipf(datasm, indxer, xsize = 200); plt.imshow(ipfim) # xsize needs to be defined for array inputs. "
+    "ipfim = IPFcolor.makeipf(datasm, indxer, xsize = 200, graychannel='nmatch'); plt.imshow(ipfim) # xsize needs to be defined for array inputs. "
    ]
   },
   {
@@ -513,6 +576,7 @@
     "h5file = '/Path/to/hdf5/file.h5'\n",
     "f = h5py.File(h5file, 'r') # this is an HDF5 file type used by EDAX.  \n",
     "h5pats = f['/Scan 1/EBSD/Data/Pattern'] # location of the pattern array within the HDF5 file.  \n",
+    "\n",
     "# index the first 1000\n",
     "h5data, h5bnddata, indxer=ebsd_index.index_pats(patsin = h5pats[0:1000,:,:],\n",
     "                                     patstart = 0, npats = 1000,return_indexer_obj = True,\n",
@@ -522,9 +586,9 @@
     "                                     phaselist = phaselist, \n",
     "                                     PC = PC, camElev=cam_elev, sampleTilt=sampleTilt, \n",
     "                                     vendor = vendor, \n",
-    "                                     verbose = 2)\n",
+    "                                     verbose = 0)\n",
     "#now index them all\n",
-    "h5data, h5banddata = ebsd_index.index_pats_distributed(patsin = h5pats, ebsd_indexer_obj = indxer, ncpu = 28)"
+    "h5data, h5banddata = ebsd_index.index_pats_distributed(patsin = h5pats, ebsd_indexer_obj = indxer, ncpu = -1, verbose=2)"
    ]
   },
   {
diff --git a/pyebsdindex/EBSDImage/IPFcolor.py b/pyebsdindex/EBSDImage/IPFcolor.py
index 7082763..e1ade33 100644
--- a/pyebsdindex/EBSDImage/IPFcolor.py
+++ b/pyebsdindex/EBSDImage/IPFcolor.py
@@ -86,10 +86,13 @@ def makeipf(ebsddata, indexer, vector=np.array([0,0,1.0]), xsize = None, ysize =
     else:
       gchan = graychannel
     gray = scalarimage.scalarimage(ebsddata, indexer,
+                       xsize=xsize,
+                       ysize=ysize,
                        addmicronbar=False,
                        datafield=gchan,
                        cmap='gray',
-                       rescalenice=True)
+                       rescalenice=True, **kwargs
+                       )
     ipf_out *= gray**gamma
 
 
diff --git a/pyebsdindex/_ebsd_index_parallel.py b/pyebsdindex/_ebsd_index_parallel.py
index 39cfa19..069fba5 100644
--- a/pyebsdindex/_ebsd_index_parallel.py
+++ b/pyebsdindex/_ebsd_index_parallel.py
@@ -307,10 +307,11 @@ def index_pats_distributed(
         ngpu = 0
 
     if ncpu == 0:
-        ncpu = os.cpu_count()
+        ncpu = max(1,os.cpu_count()//2)
     if ncpu <= 0:
         if ngpu > 0:
-            ncpu = min(os.cpu_count(), len(indexer.phaseLib)*10) # this is a heuristic, and may be highly dependent on hardware
+            ncpu = max(1,min(os.cpu_count(), int(len(indexer.phaseLib)*10)))
+            # this is a heuristic, and may be highly dependent on hardware
         else:
             ncpu = max(1,os.cpu_count()//2)
     if ncpu != -1:
diff --git a/pyebsdindex/_ebsd_index_single.py b/pyebsdindex/_ebsd_index_single.py
index 9397e08..06c0000 100644
--- a/pyebsdindex/_ebsd_index_single.py
+++ b/pyebsdindex/_ebsd_index_single.py
@@ -709,8 +709,8 @@ def _indexbandsphase(self, banddata, bandnorm, verbose=0):
 
         for j in range(len(self.phaseLib)):
 
-            indxData['pq'][j, :] = np.mean(banddata['avemax'] * banddata['valid'], axis=1) #/ shpBandDat[-1]
-
+            indxData['pq'][j, :] = np.mean(banddata['max'] * banddata['valid'], axis=1) #/ shpBandDat[-1]
+            indxData['iq'][j, :] = np.mean(banddata['normmax'] * banddata['valid'], axis=1)  # / shpBandDat[-1]
 
             p2do = np.ravel(np.nonzero(np.max(indxData["nmatch"], axis=0) < earlyexit)[0])
 
diff --git a/pyebsdindex/band_detect.py b/pyebsdindex/band_detect.py
index 0869b7d..95f3633 100644
--- a/pyebsdindex/band_detect.py
+++ b/pyebsdindex/band_detect.py
@@ -103,8 +103,8 @@ def __init__(
     self.patternmask = None
     self.useCPU = True
 
-    self.dataType = np.dtype([('id', np.int32), ('max', np.float32), \
-                    ('maxloc', np.float32, (2)), ('avemax', np.float32), ('aveloc', np.float32, (2)),\
+    self.dataType = np.dtype([('id', np.int32), ('max', np.float32), ('normmax', np.float32),
+                    ('maxloc', np.float32, (2)), ('avemax', np.float32), ('aveloc', np.float32, (2)),
                     ('pqmax', np.float32), ('width', np.float32), ('theta', np.float32), ('rho', np.float32),
                     ('valid', np.int8),('band_match_index', np.int64, (self.nPhases, ))])
 
@@ -419,14 +419,16 @@ def find_bands(self, patternsIn, verbose=0, chunksize=-1,  **kwargs):
       rdnNorm = self.radonPlan.radon_faster(patterns[chnk[0]:chnk[1],:,:], self.padding, fixArtifacts=False, background=self.backgroundsub)
       rdntime += timer() - tic1
       tic1 = timer()
-      rdnConv = self.rdn_conv(rdnNorm)
+      rdnConv, imageave = self.rdn_conv(rdnNorm)
       convtime += timer()-tic1
       tic1 = timer()
       lMaxRdn= self.rdn_local_max(rdnConv)
       lmaxtime +=  timer()-tic1
       tic1 = timer()
       bandDataChunk= self.band_label(chnk[1]-chnk[0], rdnConv, rdnNorm, lMaxRdn)
+      bandDataChunk['normmax'] /= imageave.clip(1e-7)
       bandData[chnk[0]:chnk[1]] = bandDataChunk
+
       if (verbose > 1) and (chnk[1] == nPats): # need to pull the radonconv off the gpu
         rdnConv = rdnConv[:,:,0:chnk[1]-chnk[0] ]
 
@@ -542,11 +544,14 @@ def rdn_conv(self, radonIn):
 
     #print(rdnConv.min(),rdnConv.max())
     mns = (rdnConv[self.padding[0]:shprdn[1]-self.padding[0],self.padding[1]:shprdn[1]-self.padding[1],:]).min(axis=0).min(axis=0)
+    ave = np.mean(rdnConv[self.padding[0]:shprdn[1] - self.padding[0], self.padding[1]:shprdn[1] - self.padding[1],:], axis=(0,1))
+
+    ave -= mns
 
     rdnConv -= mns.reshape((1,1, shp[2]))
     rdnConv = rdnConv.clip(min=0.0)
 
-    return rdnConv
+    return rdnConv, ave
 
   def rdn_local_max(self, rdn, clparams=None, rdn_gpu=None, use_gpu=False):
 
@@ -587,6 +592,7 @@ def band_label(self,nPats,rdnConvIn,rdnNormIn,lMaxRdnIn):
     )
 
     bandData['max']    = bdat[0][0:nPats, :]
+    bandData['normmax'] = bdat[0][0:nPats, :]
     bandData['avemax'] = bdat[1][0:nPats, :]
     bandData['maxloc'] = bdat[2][0:nPats, :, :]
     bandData['aveloc'] = bdat[3][0:nPats, :, :]
diff --git a/pyebsdindex/opencl/band_detect_cl.py b/pyebsdindex/opencl/band_detect_cl.py
index a62db8a..7b24c17 100644
--- a/pyebsdindex/opencl/band_detect_cl.py
+++ b/pyebsdindex/opencl/band_detect_cl.py
@@ -116,7 +116,7 @@ def find_bands(self, patternsIn, verbose=0, clparams=None, chunksize=528, useCPU
 
         rdntime += timer() - tic1
         tic1 = timer()
-        rdnConv, clparams = self.rdn_convCL2(rdnNorm, clparams=clparams, returnBuff=True, separableKernel=True)
+        rdnConv, imageave, clparams = self.rdn_convCL2(rdnNorm, clparams=clparams, returnBuff=True, separableKernel=True)
         rdnNorm.release()
 
         convtime += timer()-tic1
@@ -128,6 +128,8 @@ def find_bands(self, patternsIn, verbose=0, clparams=None, chunksize=528, useCPU
         bandDataChunk = self.band_labelCL(rdnConv, lMaxRdn, clparams=clparams)
         lMaxRdn.release()
         bandData['max'][chnk[0]:chnk[1]] = bandDataChunk[0][0:nPatsChunk, :]
+        bandData['normmax'][chnk[0]:chnk[1]] = (bandDataChunk[0][0:nPatsChunk, :] /
+                                                  imageave[0:nPatsChunk].reshape(nPatsChunk, 1).clip(1e-7))
         bandData['avemax'][chnk[0]:chnk[1]] = bandDataChunk[1][0:nPatsChunk, :]
         bandData['maxloc'][chnk[0]:chnk[1]] = bandDataChunk[2][0:nPatsChunk, :, :]
         bandData['aveloc'][chnk[0]:chnk[1]] = bandDataChunk[3][0:nPatsChunk, :, :]
@@ -154,11 +156,11 @@ def find_bands(self, patternsIn, verbose=0, clparams=None, chunksize=528, useCPU
         rdnConv = None
 
         blabeltime += timer() - tic1
-
-      #bandData['avemax'] *= pscale[1]
-      #bandData['avemax'] += pscale[0]
-      #bandData['max'] *= pscale[1]
-      #bandData['max'] += pscale[0]
+      # correct any scaling that happened due to float-int conversion.
+      bandData['avemax'] *= pscale[1]
+      bandData['avemax'] += pscale[0]
+      bandData['max'] *= pscale[1]
+      bandData['max'] += pscale[0]
       tottime = timer() - tic0
       # going to manually clear the clparams queue -- this should clear the memory of the queue off the GPU
 
@@ -472,6 +474,10 @@ def rdn_convCL2(self, radonIn, clparams=None, separableKernel=True, returnBuff =
 
     #rdn_gpu.release()
     mns.release()
+
+    imageave = np.ones((nImCL), dtype=np.float32)
+    cl.enqueue_copy(queue, imageave, ave, is_blocking=True)
+
     if kern_gpu is None:
       kern_gpu_y.release()
       kern_gpu_x.release()
@@ -485,9 +491,9 @@ def rdn_convCL2(self, radonIn, clparams=None, separableKernel=True, returnBuff =
       cl.enqueue_copy(queue, resultConv, rdnConv_gpu, is_blocking=True)
       rdnConv_gpu.release()
       rdnConv_gpu = None
-      return resultConv, clparams
+      return resultConv, imageave, clparams
     else:
-      return rdnConv_gpu, clparams
+      return rdnConv_gpu, imageave, clparams
 
 
 
diff --git a/pyebsdindex/opencl/clkernels.cl b/pyebsdindex/opencl/clkernels.cl
index c7039be..aaa7ffd 100644
--- a/pyebsdindex/opencl/clkernels.cl
+++ b/pyebsdindex/opencl/clkernels.cl
@@ -450,7 +450,7 @@ __kernel void imageSubMinNormWClip( __global float16 *im1,
   const float16 im1val = im1[indx];
   float16 value = im1val - imMin[z];
   value = select(value, (float16) (0.0f), (value < (float16) (0.0f)) );
-  value *= ((float16) (1.0))/imAve[z];
+  //value *= ((float16) (1.0))/imAve[z];
 
   //im1[indx] = (value < (float16) (0.0f)) ? (float16) (0.0f) : value;
   im1[indx] = value;