Merge branch 'master' into dependabot/pip/werkzeug-3.0.1

SCIInstitute · Nov 7, 2023 · dc3b1aa · dc3b1aa
2 parents 971b250 + ec843fb
commit dc3b1aa
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 33 deletions.
diff --git a/Examples/Python/notebooks/tutorials/getting-started-with-data-augmentation.ipynb b/Examples/Python/notebooks/tutorials/getting-started-with-data-augmentation.ipynb
@@ -131,11 +131,11 @@
     "\n",
     "# Get particles path list\n",
     "model_dir =  data_dir + \"shape_models/femur/1024/\" \n",
-    "local_particle_list = []\n",
+    "world_particle_list = []\n",
     "for file in os.listdir(model_dir):\n",
-    "    if \"local\" in file:\n",
-    "        local_particle_list.append(model_dir + file)\n",
-    "local_particle_list = sorted(local_particle_list)\n",
+    "    if \"world\" in file:\n",
+    "        world_particle_list.append(model_dir + file)\n",
+    "world_particle_list = sorted(world_particle_list)\n",
     "\n",
     "print(\"Total shapes in original dataset: \"+ str(len(img_list)))"
    ]
@@ -150,21 +150,19 @@
     "\n",
     "```python\n",
     "DataAugmentationUtils.runDataAugmentation(out_dir, img_list, \n",
-    "                                          local_point_list, num_samples, \n",
+    "                                          world_point_list, num_samples, \n",
     "                                          num_dim, percent_variability, \n",
-    "                                          sampler_type, mixture_num,\n",
-    "                                          world_point_list)\n",
+    "                                          sampler_type, mixture_num)\n",
     "```\n",
     "**Input arguments:**\n",
     "\n",
     "* `out_dir`: Path to the directory where augmented data will be stored\n",
     "* `img_list`: List of paths to images of the original dataset.\n",
-    "* `local_point_list`: List of paths to local `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.\n",
+    "* `world_point_list`: List of paths to world `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.\n",
     "* `num_dim`: The number of dimensions to reduce to in PCA embedding. If zero or not specified, the percent_variability option is used to select the numnber of dimensions.\n",
     "* `percent_variability`: The proportion of variability in the data to be preserved in embedding. Used if `num_dim` is zero or not specified. Default value is 0.95 which preserves 95% of the varibaility in the data.\n",
     "* `sampler_type`: The type of parametric distribution to fit and sample from. Options: `gaussian`, `mixture`, or `kde`. Default: `kde`.\n",
     "* `mixture_num`: Only necessary if `sampler_type` is `mixture`. The number of clusters (i.e., mixture components) to be used in fitting a mixture model. If zero or not specified, the optimal number of clusters will be automatically determined using the [elbow method](https://en.wikipedia.org/wiki/Elbow_method_(clustering)).\n",
-    "* `world_point_list`: List of paths to world `.particles` files of the original dataset. This is optional and should be provided in cases where procrustes was used for the original optimization, resulting in a difference between world and local particle files. Note, this list should be ordered in correspondence with the `img_list` and `local_point_list`.\n",
     "\n",
     "\n",
     "In this notebook we will keep most arguments the same and explore the effect of changing the `sampler_type`.\n",
@@ -191,7 +189,7 @@
    "source": [
     "output_directory = '../Output/GaussianAugmentation/'\n",
     "sampler_type = \"gaussian\"\n",
-    "embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, local_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
+    "embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, world_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
     "aug_data_csv = output_directory + \"/TotalData.csv\""
    ]
   },
@@ -245,7 +243,7 @@
    "source": [
     "output_directory = '../Output/MixtureAugmentation/'\n",
     "sampler_type = \"mixture\"\n",
-    "embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, local_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
+    "embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, world_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
     "aug_data_csv = output_directory + \"/TotalData.csv\""
    ]
   },
@@ -288,7 +286,7 @@
    "source": [
     "output_directory = '../Output/KDEAugmentation/'\n",
     "sampler_type = \"kde\"\n",
-    "embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, local_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
+    "embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, world_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
     "aug_data_csv = output_directory + \"/TotalData.csv\""
    ]
   },
@@ -319,7 +317,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -333,7 +331,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.8"
+   "version": "3.9.13"
   },
   "toc": {
    "base_numbering": 1,

diff --git a/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/__init__.py b/Python/DataAugmentationUtilsPackage/DataAugmentationUtils/__init__.py
@@ -17,12 +17,19 @@
 '''
 
 
-def runDataAugmentation(out_dir, img_list, local_point_list, num_samples=3, num_dim=0, percent_variability=0.95, sampler_type="KDE", mixture_num=0, processes=1, world_point_list=None):
+def runDataAugmentation(out_dir, img_list, world_point_list, num_samples=3, num_dim=0, percent_variability=0.95, sampler_type="KDE", mixture_num=0, processes=1):
+    sw_message("Running point based data augmentation.")
+    num_dim = DataAugmentation.point_based_aug(out_dir, img_list, world_point_list, num_samples, num_dim, percent_variability, sampler_type, mixture_num, processes)
+    sw_message("Done.")
+    return num_dim
+
+def runLocalDataAugmentation(out_dir, img_list, local_point_list, world_point_list, num_samples=3, num_dim=0, percent_variability=0.95, sampler_type="KDE", mixture_num=0, processes=1):
     sw_message("Running point based data augmentation.")
     num_dim = DataAugmentation.point_based_aug(out_dir, img_list, local_point_list, num_samples, num_dim, percent_variability, sampler_type, mixture_num, processes, world_point_list)
     sw_message("Done.")
     return num_dim
 
+
 def visualizeAugmentation(data_csv, viz_type='splom', show=True):
     if viz_type == 'splom':
         Visualize.splom(data_csv)

diff --git a/docs/deep-learning/data-augmentation.md b/docs/deep-learning/data-augmentation.md
@@ -25,23 +25,37 @@ To run the complete data augmentation process as detailed in [Data Augmentation
 
 ```python
 DataAugmentationUtils.runDataAugmentation(out_dir, img_list, 
-                                          local_point_list, num_samples, 
+                                          world_point_list, num_samples, 
                                           num_dim, percent_variability, 
-                                          sampler_type, mixture_num,
-                                          world_point_list)
+                                          sampler_type, mixture_num)
 ```
 
+This generates image/particle pairs in the world coordinate system and assumes the images in `img_list` are groomed/aligned.
+
+Local image/particle pairs can alos be generated using:
+
+```python
+DataAugmentationUtils.runLocalDataAugmentation(out_dir, img_list, 
+                                          world_point_list, local_point_list, 
+                                          num_samples, num_dim, percent_variability, 
+                                          sampler_type, mixture_num)
+```
+This generates image/particle pairs in the local coordinate system and assumes the images in img_list are the original/unaligned images. The world_point_list needs to be provided in this case so that PCA is done in the world coordinate system. New samples are generated by sampling the world PCA subspace, then mapping it to local points using the transform from world to local of the closest real example.
+
+
 
 **Input arguments:**
 
 * `out_dir`: Path to the directory where augmented data will be stored
 * `img_list`: List of paths to images of the original dataset.
-* `local_point_list`: List of paths to local `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.
+* `world_point_list`: List of paths to the world `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.
 * `num_dim`: The number of dimensions to reduce to in PCA embedding. If zero or not specified, the percent_variability option is used to select the numnber of dimensions.
 * `percent_variability`: The proportion of variability in the data to be preserved in embedding. Used if `num_dim` is zero or not specified. Default value is 0.95 which preserves 95% of the varibaility in the data.
 * `sampler_type`: The type of parametric distribution to fit and sample from. Options: `gaussian`, `mixture`, or `kde`. Default: `kde`.
 * `mixture_num`: Only necessary if `sampler_type` is `mixture`. The number of clusters (i.e., mixture components) to be used in fitting a mixture model. If zero or not specified, the optimal number of clusters will be automatically determined using the [elbow method](https://en.wikipedia.org/wiki/Elbow_method_(clustering)).
-* `world_point_list`: List of paths to world `.particles` files of the original dataset. This is optional and should be provided in cases where procrustes was used for the original optimization, resulting in a difference between world and local particle files. Note, this list should be ordered in correspondence with the `img_list` and `local_point_list`.
+
+For `runLocalDataAugmentation()` the following argument must also be provided:
+* `local_point_list`: List of paths to local `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list` and `world_point_list`.
 
 ### Visualizing Data Augmentation
 

diff --git a/docs/notebooks/getting-started-with-data-augmentation.ipynb b/docs/notebooks/getting-started-with-data-augmentation.ipynb
@@ -131,11 +131,11 @@
     "\n",
     "# Get particles path list\n",
     "model_dir =  data_dir + \"shape_models/femur/1024/\" \n",
-    "local_particle_list = []\n",
+    "world_particle_list = []\n",
     "for file in os.listdir(model_dir):\n",
-    "    if \"local\" in file:\n",
-    "        local_particle_list.append(model_dir + file)\n",
-    "local_particle_list = sorted(local_particle_list)\n",
+    "    if \"world\" in file:\n",
+    "        world_particle_list.append(model_dir + file)\n",
+    "world_particle_list = sorted(world_particle_list)\n",
     "\n",
     "print(\"Total shapes in original dataset: \"+ str(len(img_list)))"
    ]
@@ -150,21 +150,19 @@
     "\n",
     "```python\n",
     "DataAugmentationUtils.runDataAugmentation(out_dir, img_list, \n",
-    "                                          local_point_list, num_samples, \n",
+    "                                          world_point_list, num_samples, \n",
     "                                          num_dim, percent_variability, \n",
-    "                                          sampler_type, mixture_num,\n",
-    "                                          world_point_list)\n",
+    "                                          sampler_type, mixture_num)\n",
     "```\n",
     "**Input arguments:**\n",
     "\n",
     "* `out_dir`: Path to the directory where augmented data will be stored\n",
     "* `img_list`: List of paths to images of the original dataset.\n",
-    "* `local_point_list`: List of paths to local `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.\n",
+    "* `world_point_list`: List of paths to world `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.\n",
     "* `num_dim`: The number of dimensions to reduce to in PCA embedding. If zero or not specified, the percent_variability option is used to select the numnber of dimensions.\n",
     "* `percent_variability`: The proportion of variability in the data to be preserved in embedding. Used if `num_dim` is zero or not specified. Default value is 0.95 which preserves 95% of the varibaility in the data.\n",
     "* `sampler_type`: The type of parametric distribution to fit and sample from. Options: `gaussian`, `mixture`, or `kde`. Default: `kde`.\n",
     "* `mixture_num`: Only necessary if `sampler_type` is `mixture`. The number of clusters (i.e., mixture components) to be used in fitting a mixture model. If zero or not specified, the optimal number of clusters will be automatically determined using the [elbow method](https://en.wikipedia.org/wiki/Elbow_method_(clustering)).\n",
-    "* `world_point_list`: List of paths to world `.particles` files of the original dataset. This is optional and should be provided in cases where procrustes was used for the original optimization, resulting in a difference between world and local particle files. Note, this list should be ordered in correspondence with the `img_list` and `local_point_list`.\n",
     "\n",
     "\n",
     "In this notebook we will keep most arguments the same and explore the effect of changing the `sampler_type`.\n",
@@ -191,7 +189,7 @@
    "source": [
     "output_directory = '../Output/GaussianAugmentation/'\n",
     "sampler_type = \"gaussian\"\n",
-    "embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, local_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
+    "embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, world_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
     "aug_data_csv = output_directory + \"/TotalData.csv\""
    ]
   },
@@ -245,7 +243,7 @@
    "source": [
     "output_directory = '../Output/MixtureAugmentation/'\n",
     "sampler_type = \"mixture\"\n",
-    "embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, local_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
+    "embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, world_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
     "aug_data_csv = output_directory + \"/TotalData.csv\""
    ]
   },
@@ -319,7 +317,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -333,7 +331,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.8"
+   "version": "3.9.13"
   },
   "toc": {
    "base_numbering": 1,