Merge branch 'main' into develop

* main: 0.3.1 (USNavalResearchLaboratory#59) Prep for 0.3.0 release. (USNavalResearchLaboratory#58)
drowenhorst-nrl · May 24, 2024 · a24f7a6 · a24f7a6
2 parents 7da8c0d + 8c09e6b
commit a24f7a6
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 30 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -5,19 +5,29 @@ Changelog
 All notable changes to PyEBSDIndex will be documented in this file. The format is based
 on `Keep a Changelog <https://keepachangelog.com/en/1.1.0>`_.
 
-0.2.2dev
+0.3.1 (2024-05-24)
 ==================
-Added
+
+Fixed
 -----
+- Fixed issue when multiple OpenCL platforms are detected.  Will default to discrete GPUs, with whatever platform has the most discrete GPUs attached.  Otherwise, will fall back to integrated graphics.
 
 
+0.3.0 (2024-05-23)
+==================
+Added
+-----
+- NLPAR should now use GPU if pyopencl is installed, and a GPU is found. Expect 2-10x improvement in speed.
+- Faster band indexing. Should lead to increased pattern indexing speed.
+
 Changed
 -------
-
+- PyEBSDIndex will now automatically select discrete GPUs if both integrated and discrete GPUs are found. If no discrete GPUs are found, it will use the integrated GPU.
+- Numba will now cache in the directory ~/.pyebsdindex/  This *might* help with less recompilinging after restarts.
 
 Removed
 -------
-
+- Removed ``band_vote`` modual as that is now wrapped into triplevote.
 
 Fixed
 -----

diff --git a/pyebsdindex/__init__.py b/pyebsdindex/__init__.py
@@ -7,7 +7,7 @@
 ]
 __description__ = "Python based tool for Radon based EBSD indexing"
 __name__ = "pyebsdindex"
-__version__ = "0.2.2dev"
+__version__ = "0.3.1"
 
 
 # Try to import only once - also will perform check that at least one GPU is found.

diff --git a/pyebsdindex/opencl/openclparam.py b/pyebsdindex/opencl/openclparam.py
@@ -54,37 +54,40 @@ def __init__(self, gpu_id=0):
         print(e)
 
   def get_platform(self):
-    self.platform = cl.get_platforms()[0]
+    self.platform = cl.get_platforms()
     return self.platform
-  def get_gpu(self, get_integrated_and_discrete=False):
-
+  def get_gpu(self):
     if self.platform is None:
       self.get_platform()
 
-    gpu = self.platform.get_devices(device_type=cl.device_type.GPU)
-    if get_integrated_and_discrete == True: # get all GPU, regardless of integrated or not
-      self.gpu = gpu
-      self.ngpu = len(self.gpu)
+    if type(self.platform) is not list:
+      self.platform = [self.platform]
+
+    pgpudiscrete = np.zeros(len(self.platform), dtype=int)
 
+    for i in range(len(self.platform)):
+      g = self.platform[i].get_devices(device_type=cl.device_type.GPU)
+      if len(g) > 0:
+        discrete = np.zeros(len(g), dtype=int)
+        for j in range(len(g)):
+          discrete[j] = g[j].host_unified_memory == False
+          if discrete[j] > 0:
+            pgpudiscrete[i] += 1
+      else:
+        pgpudiscrete[i] = -1
+    gpu = []
+    if pgpudiscrete.max() > 0:  # discrete graphics found
+      self.platform = [self.platform[pgpudiscrete.argmax()]]
+      g = self.platform[0].get_devices(device_type=cl.device_type.GPU)
+      for g1 in g:
+        if g1.host_unified_memory == False:
+          gpu.append(g1)
+    elif pgpudiscrete.max() == 0:  # only integrated graphics available
+      self.platform = [self.platform[pgpudiscrete.argmax()]]
+      gpu.extend(self.platform[0].get_devices(device_type=cl.device_type.GPU))
     else:
-      if len(gpu) == 1: # only one GPU -- keep it even if integrated.
-        self.gpu = gpu
-        self.ngpu = len(self.gpu)
-      elif len(gpu) > 1: # More than one gpu
-        gpukeep = []
-        gpudrop = []
-        for g in gpu:
-          if (g.host_unified_memory == 1): # these are integrated GPU
-            gpudrop.append(g)
-          else:
-            gpukeep.append(g) # these are discrete GPU
-        if len(gpukeep) > 0: # prefer to keep discrete
-          self.gpu = gpukeep
-        else:
-          self.gpu = gpudrop #but will take integrated if needed.
-      self.ngpu = len(self.gpu)
-    if len(self.gpu) - 1 < self.gpu_id:
-      self.gpu_id = len(self.gpu) - 1
+      pass
+    self.gpu = gpu
     return self.gpu