Skip to content

Commit

Permalink
Merge branch 'master' into dependabot/pip/werkzeug-3.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
akenmorris authored Nov 7, 2023
2 parents 971b250 + ec843fb commit dc3b1aa
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@
"\n",
"# Get particles path list\n",
"model_dir = data_dir + \"shape_models/femur/1024/\" \n",
"local_particle_list = []\n",
"world_particle_list = []\n",
"for file in os.listdir(model_dir):\n",
" if \"local\" in file:\n",
" local_particle_list.append(model_dir + file)\n",
"local_particle_list = sorted(local_particle_list)\n",
" if \"world\" in file:\n",
" world_particle_list.append(model_dir + file)\n",
"world_particle_list = sorted(world_particle_list)\n",
"\n",
"print(\"Total shapes in original dataset: \"+ str(len(img_list)))"
]
Expand All @@ -150,21 +150,19 @@
"\n",
"```python\n",
"DataAugmentationUtils.runDataAugmentation(out_dir, img_list, \n",
" local_point_list, num_samples, \n",
" world_point_list, num_samples, \n",
" num_dim, percent_variability, \n",
" sampler_type, mixture_num,\n",
" world_point_list)\n",
" sampler_type, mixture_num)\n",
"```\n",
"**Input arguments:**\n",
"\n",
"* `out_dir`: Path to the directory where augmented data will be stored\n",
"* `img_list`: List of paths to images of the original dataset.\n",
"* `local_point_list`: List of paths to local `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.\n",
"* `world_point_list`: List of paths to world `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.\n",
"* `num_dim`: The number of dimensions to reduce to in PCA embedding. If zero or not specified, the percent_variability option is used to select the numnber of dimensions.\n",
"* `percent_variability`: The proportion of variability in the data to be preserved in embedding. Used if `num_dim` is zero or not specified. Default value is 0.95 which preserves 95% of the varibaility in the data.\n",
"* `sampler_type`: The type of parametric distribution to fit and sample from. Options: `gaussian`, `mixture`, or `kde`. Default: `kde`.\n",
"* `mixture_num`: Only necessary if `sampler_type` is `mixture`. The number of clusters (i.e., mixture components) to be used in fitting a mixture model. If zero or not specified, the optimal number of clusters will be automatically determined using the [elbow method](https://en.wikipedia.org/wiki/Elbow_method_(clustering)).\n",
"* `world_point_list`: List of paths to world `.particles` files of the original dataset. This is optional and should be provided in cases where procrustes was used for the original optimization, resulting in a difference between world and local particle files. Note, this list should be ordered in correspondence with the `img_list` and `local_point_list`.\n",
"\n",
"\n",
"In this notebook we will keep most arguments the same and explore the effect of changing the `sampler_type`.\n",
Expand All @@ -191,7 +189,7 @@
"source": [
"output_directory = '../Output/GaussianAugmentation/'\n",
"sampler_type = \"gaussian\"\n",
"embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, local_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
"embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, world_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
"aug_data_csv = output_directory + \"/TotalData.csv\""
]
},
Expand Down Expand Up @@ -245,7 +243,7 @@
"source": [
"output_directory = '../Output/MixtureAugmentation/'\n",
"sampler_type = \"mixture\"\n",
"embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, local_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
"embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, world_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
"aug_data_csv = output_directory + \"/TotalData.csv\""
]
},
Expand Down Expand Up @@ -288,7 +286,7 @@
"source": [
"output_directory = '../Output/KDEAugmentation/'\n",
"sampler_type = \"kde\"\n",
"embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, local_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
"embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, world_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
"aug_data_csv = output_directory + \"/TotalData.csv\""
]
},
Expand Down Expand Up @@ -319,7 +317,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -333,7 +331,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.8"
"version": "3.9.13"
},
"toc": {
"base_numbering": 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,19 @@
'''


def runDataAugmentation(out_dir, img_list, local_point_list, num_samples=3, num_dim=0, percent_variability=0.95, sampler_type="KDE", mixture_num=0, processes=1, world_point_list=None):
def runDataAugmentation(out_dir, img_list, world_point_list, num_samples=3, num_dim=0, percent_variability=0.95, sampler_type="KDE", mixture_num=0, processes=1):
sw_message("Running point based data augmentation.")
num_dim = DataAugmentation.point_based_aug(out_dir, img_list, world_point_list, num_samples, num_dim, percent_variability, sampler_type, mixture_num, processes)
sw_message("Done.")
return num_dim

def runLocalDataAugmentation(out_dir, img_list, local_point_list, world_point_list, num_samples=3, num_dim=0, percent_variability=0.95, sampler_type="KDE", mixture_num=0, processes=1):
sw_message("Running point based data augmentation.")
num_dim = DataAugmentation.point_based_aug(out_dir, img_list, local_point_list, num_samples, num_dim, percent_variability, sampler_type, mixture_num, processes, world_point_list)
sw_message("Done.")
return num_dim


def visualizeAugmentation(data_csv, viz_type='splom', show=True):
if viz_type == 'splom':
Visualize.splom(data_csv)
Expand Down
24 changes: 19 additions & 5 deletions docs/deep-learning/data-augmentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,37 @@ To run the complete data augmentation process as detailed in [Data Augmentation

```python
DataAugmentationUtils.runDataAugmentation(out_dir, img_list,
local_point_list, num_samples,
world_point_list, num_samples,
num_dim, percent_variability,
sampler_type, mixture_num,
world_point_list)
sampler_type, mixture_num)
```

This generates image/particle pairs in the world coordinate system and assumes the images in `img_list` are groomed/aligned.

Local image/particle pairs can alos be generated using:

```python
DataAugmentationUtils.runLocalDataAugmentation(out_dir, img_list,
world_point_list, local_point_list,
num_samples, num_dim, percent_variability,
sampler_type, mixture_num)
```
This generates image/particle pairs in the local coordinate system and assumes the images in img_list are the original/unaligned images. The world_point_list needs to be provided in this case so that PCA is done in the world coordinate system. New samples are generated by sampling the world PCA subspace, then mapping it to local points using the transform from world to local of the closest real example.



**Input arguments:**

* `out_dir`: Path to the directory where augmented data will be stored
* `img_list`: List of paths to images of the original dataset.
* `local_point_list`: List of paths to local `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.
* `world_point_list`: List of paths to the world `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.
* `num_dim`: The number of dimensions to reduce to in PCA embedding. If zero or not specified, the percent_variability option is used to select the numnber of dimensions.
* `percent_variability`: The proportion of variability in the data to be preserved in embedding. Used if `num_dim` is zero or not specified. Default value is 0.95 which preserves 95% of the varibaility in the data.
* `sampler_type`: The type of parametric distribution to fit and sample from. Options: `gaussian`, `mixture`, or `kde`. Default: `kde`.
* `mixture_num`: Only necessary if `sampler_type` is `mixture`. The number of clusters (i.e., mixture components) to be used in fitting a mixture model. If zero or not specified, the optimal number of clusters will be automatically determined using the [elbow method](https://en.wikipedia.org/wiki/Elbow_method_(clustering)).
* `world_point_list`: List of paths to world `.particles` files of the original dataset. This is optional and should be provided in cases where procrustes was used for the original optimization, resulting in a difference between world and local particle files. Note, this list should be ordered in correspondence with the `img_list` and `local_point_list`.

For `runLocalDataAugmentation()` the following argument must also be provided:
* `local_point_list`: List of paths to local `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list` and `world_point_list`.

### Visualizing Data Augmentation

Expand Down
24 changes: 11 additions & 13 deletions docs/notebooks/getting-started-with-data-augmentation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@
"\n",
"# Get particles path list\n",
"model_dir = data_dir + \"shape_models/femur/1024/\" \n",
"local_particle_list = []\n",
"world_particle_list = []\n",
"for file in os.listdir(model_dir):\n",
" if \"local\" in file:\n",
" local_particle_list.append(model_dir + file)\n",
"local_particle_list = sorted(local_particle_list)\n",
" if \"world\" in file:\n",
" world_particle_list.append(model_dir + file)\n",
"world_particle_list = sorted(world_particle_list)\n",
"\n",
"print(\"Total shapes in original dataset: \"+ str(len(img_list)))"
]
Expand All @@ -150,21 +150,19 @@
"\n",
"```python\n",
"DataAugmentationUtils.runDataAugmentation(out_dir, img_list, \n",
" local_point_list, num_samples, \n",
" world_point_list, num_samples, \n",
" num_dim, percent_variability, \n",
" sampler_type, mixture_num,\n",
" world_point_list)\n",
" sampler_type, mixture_num)\n",
"```\n",
"**Input arguments:**\n",
"\n",
"* `out_dir`: Path to the directory where augmented data will be stored\n",
"* `img_list`: List of paths to images of the original dataset.\n",
"* `local_point_list`: List of paths to local `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.\n",
"* `world_point_list`: List of paths to world `.particles` files of the original dataset. Note, this list should be ordered in correspondence with the `img_list`.\n",
"* `num_dim`: The number of dimensions to reduce to in PCA embedding. If zero or not specified, the percent_variability option is used to select the numnber of dimensions.\n",
"* `percent_variability`: The proportion of variability in the data to be preserved in embedding. Used if `num_dim` is zero or not specified. Default value is 0.95 which preserves 95% of the varibaility in the data.\n",
"* `sampler_type`: The type of parametric distribution to fit and sample from. Options: `gaussian`, `mixture`, or `kde`. Default: `kde`.\n",
"* `mixture_num`: Only necessary if `sampler_type` is `mixture`. The number of clusters (i.e., mixture components) to be used in fitting a mixture model. If zero or not specified, the optimal number of clusters will be automatically determined using the [elbow method](https://en.wikipedia.org/wiki/Elbow_method_(clustering)).\n",
"* `world_point_list`: List of paths to world `.particles` files of the original dataset. This is optional and should be provided in cases where procrustes was used for the original optimization, resulting in a difference between world and local particle files. Note, this list should be ordered in correspondence with the `img_list` and `local_point_list`.\n",
"\n",
"\n",
"In this notebook we will keep most arguments the same and explore the effect of changing the `sampler_type`.\n",
Expand All @@ -191,7 +189,7 @@
"source": [
"output_directory = '../Output/GaussianAugmentation/'\n",
"sampler_type = \"gaussian\"\n",
"embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, local_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
"embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, world_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
"aug_data_csv = output_directory + \"/TotalData.csv\""
]
},
Expand Down Expand Up @@ -245,7 +243,7 @@
"source": [
"output_directory = '../Output/MixtureAugmentation/'\n",
"sampler_type = \"mixture\"\n",
"embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, local_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
"embedded_dim = DataAugmentationUtils.runDataAugmentation(output_directory, img_list, world_particle_list, num_samples, num_dim, percent_variability, sampler_type)\n",
"aug_data_csv = output_directory + \"/TotalData.csv\""
]
},
Expand Down Expand Up @@ -319,7 +317,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -333,7 +331,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.8"
"version": "3.9.13"
},
"toc": {
"base_numbering": 1,
Expand Down

0 comments on commit dc3b1aa

Please sign in to comment.