code cleanup, working version of NIFTI functions

rpomponio · Aug 7, 2020 · 4931a4b · 4931a4b
1 parent ae99f42
commit 4931a4b
Show file tree

Hide file tree

Showing 5 changed files with 69 additions and 17 deletions.
diff --git a/README.rst b/README.rst
@@ -41,7 +41,7 @@ achieved with an external model, or by using the argument* `return_s_data`
 Installation
 ------------
 
-Latest version: ``0.3.x`` (August 2020)
+Latest version: ``0.4.x`` (August 2020)
 
 Requirements:
 

diff --git a/neuroHarmonize/harmonizationApply.py b/neuroHarmonize/harmonizationApply.py
@@ -58,10 +58,10 @@ def harmonizationApply(data, covars, model):
     # check sites are identical in training dataset
     check_sites = info_dict['n_batch']==model['info_dict']['n_batch']
     if not check_sites:
-        raise ValueError('Number of sites in holdout data not identical to training data.')
+        raise ValueError('Number of sites in holdout data not identical to training data. Check `covars` argument.')
     check_sites = np.mean(batch_labels==model['SITE_labels'])
     if check_sites!=1:
-        raise ValueError('Labels of sites in holdout data not identical to training data.')
+        raise ValueError('Labels of sites in holdout data not identical to training data. Check values in "SITE" column.')
     # apply ComBat without re-learning model parameters
     design = make_design_matrix(covars, batch_col, cat_cols, num_cols)
     ### additional setup if smoothing is performed
@@ -84,7 +84,7 @@ def harmonizationApply(data, covars, model):
         # check formulas are identical in training dataset
         check_formula = formula==smooth_model['formula']
         if not check_formula:
-            raise ValueError('GAM formula for holdout data not identical to training data.')
+            raise ValueError('GAM formula for holdout data not identical to training data. Check arguments.')
         # for matrix operations, a modified design matrix is required
         design = np.concatenate((df_gam, bs_basis), axis=1)
     ###
@@ -134,6 +134,8 @@ def applyModelOne(data, covars, model):
     # prep covariate data
     batch_labels = model['SITE_labels']
     batch_i = covars.SITE.values[0]
+    if batch_i not in batch_labels:
+        raise ValueError('Site Label "%s" not in the training set. Check `covars` argument.' % batch_i)
     batch_level_i = np.argwhere(batch_i==batch_labels)[0]
     batch_col = covars.columns.get_loc('SITE')
     cat_cols = []
@@ -188,7 +190,7 @@ def loadHarmonizationModel(file_name):
     by file_name using the pickle package.
     """
     if not os.path.exists(file_name):
-        raise ValueError('Model file does not exist: %s' % file_name)
+        raise ValueError('Model file does not exist: %s. Did you run `saveHarmonizationModel`?' % file_name)
     in_file = open(file_name,'rb')
     model = pickle.load(in_file)
     in_file.close()

diff --git a/neuroHarmonize/harmonizationLearn.py b/neuroHarmonize/harmonizationLearn.py
@@ -161,8 +161,7 @@ def standardizeAcrossFeatures(X, design, info_dict, smooth_model):
         df_gam = smooth_model['df_gam']
 
         if X.shape[0] > 10:
-            print('\nWARNING: more than 10 variables will be harmonized with smoothing model.')
-            print(' Computation may take several minutes.')
+            print('\n[neuroHarmonize]: smoothing more than 10 variables may take several minutes of computation.')
         # initialize penalization weight (not the final weight)
         alpha = np.array([1.0] * len(smooth_cols))
         # initialize an empty matrix for beta
@@ -244,13 +243,13 @@ def saveHarmonizationModel(model, file_name):
     
     """
     if os.path.exists(file_name):
-        raise ValueError('Model file already exists: %s Change name or delete to save.' % file_name)
+        raise ValueError('Model file already exists: %s. Change name or delete to save.' % file_name)
     # estimate size of out_file
     est_size = 0
     for key in ['design', 'B_hat', 'grand_mean', 'var_pooled',
                 'gamma_star', 'delta_star', 'gamma_hat', 'delta_hat']:
         est_size += model[key].nbytes / 1e6
-    print('Saving model object, estimated size in MB: %4.2f' % est_size)
+    print('\n[neuroHarmonize]: Saving model object, estimated size in MB: %4.2f' % est_size)
     out_file = open(file_name, 'wb')
     pickle.dump(model, out_file)
     out_file.close()

diff --git a/neuroHarmonize/harmonizationNIFTI.py b/neuroHarmonize/harmonizationNIFTI.py
@@ -49,7 +49,7 @@ def createMaskNIFTI(paths, threshold=0.0, output_path='thresholded_mask.nii.gz')
         nifti_i = nib.load(paths.PATH[i])
         nifti_sum += nifti_i.get_fdata()
         if (i==500):
-            print('PROGRESS: loaded %d of %d images...' % (i, n_images))
+            print('\n[neuroHarmonize]: loaded %d of %d images...' % (i, n_images))
     # compute average intensities
     nifti_avg = nifti_sum / n_images    
     # apply threshold
@@ -72,7 +72,7 @@ def flattenNIFTIs(paths, mask_path, output_path='flattened_NIFTI_array.npy'):
         dimensions must be identical for all images
 
     mask_path : a str
-        file path to the mask, must be created with `create_NIFTI_mask`
+        file path to the mask, must be created with `createMaskNIFTI`
 
     output_path : a str, default "flattened_NIFTI_array.npy"
 
@@ -83,8 +83,7 @@ def flattenNIFTIs(paths, mask_path, output_path='flattened_NIFTI_array.npy'):
         dimensions are N_Images x N_Masked_Voxels
 
     """
-    print('\nWARNING: this procedure will consume large amounts of memory.')
-    print(' Consider down-sampling, masking aggressively, and/or sub-sampling NIFTIs...')
+    print('\n[neuroHarmonize]: Flattening NIFTIs will consume large amounts of memory. Down-sampling may help.')
     # load mask (1=GM tissue, 0=Non-GM)
     nifti_mask = (nib.load(mask_path).get_fdata().astype(int)==1)
     n_voxels_flattened = np.sum(nifti_mask)
@@ -93,14 +92,66 @@ def flattenNIFTIs(paths, mask_path, output_path='flattened_NIFTI_array.npy'):
     # initialize empty container
     nifti_array = np.zeros((n_images, n_voxels_flattened))
     # iterate over images and fill container
-    print('Flattening %d NIFTI images with %d voxels...' % (n_images, n_voxels_flattened))
+    print('\n[neuroHarmonize]: Flattening %d NIFTI images with %d voxels...' % (n_images, n_voxels_flattened))
     for i in range(0, n_images):
         nifti_i = nib.load(paths.PATH[i]).get_fdata()
         nifti_array[i, :] = nifti_i[nifti_mask]
         if (i==500):
-            print('PROGRESS: loaded %d of %d images...' % (i, n_images))
+            print('\n[neuroHarmonize]: loaded %d of %d images...' % (i, n_images))
     # save array of flattened images
-    print('Size of array in MB: %2.3f' % (nifti_array.nbytes / 1e6))
+    print('\n[neuroHarmonize]: Size of array in MB: %2.3f' % (nifti_array.nbytes / 1e6))
     np.save(output_path, nifti_array)
     return nifti_array   
 
+def applyModelNIFTIs(covars, model, paths, mask_path):
+    """
+    Applies harmonization model sequentially to NIFTI images. This function
+    will reduce burden on memory resources for large datasets.
+        
+    Arguments
+    ---------
+    covars : a pandas DataFrame 
+        contains covariates to control for during harmonization
+        all covariates must be encoded numerically (no categorical variables)
+        must contain a single column "SITE" with site labels for ComBat
+        dimensions are N_samples x (N_covariates + 1)
+
+    model : a dictionary of model parameters
+        the output of a call to `harmonizationLearn`
+
+    paths : a pandas DataFrame
+        must contain a column "PATH" with file paths to NIFTIs and must also
+        contain a column "PATH_NEW" with file paths to the new NIFTIS that
+        will be created with this function
+        dimensions must be identical for all images
+
+    mask_path : a str
+        file path to the mask, must be created with `createMaskNIFTI`
+
+    Returns
+    -------
+    affine : a numpy array
+        affine matrix used to save mask
+    """
+    # load mask (1=include, 0=exclude)
+    nifti_mask = (nib.load(mask_path).get_fdata().astype(int)==1)
+    n_voxels_flattened = np.sum(nifti_mask)
+    # count number of images
+    n_images = paths.shape[0]
+    # begin loading images
+    affine_0 = nib.load(paths.PATH[0]).affine
+    # apply harmonization model
+    for i in range(0, n_images):
+        path_new = paths.PATH_NEW.values[i]
+        covars = covars.iloc[[i], :]
+        nifti = nib.load(paths.PATH[i])
+        nifti_array = nifti.get_fdata()[nifti_mask].reshape((1, n_voxels_flattened))
+        affine = nifti.affine
+        nifti_array_adj = applyModelOne(nifti_array, covars, model)
+        nifti_out = nifti_mask.astype(float).copy()
+        nifti_out[nifti_mask] = nifti_array_adj[0, :]
+        nifti_out = nib.Nifti1Image(nifti_out, affine)
+        nifti_out.to_filename(path_new)
+        if (i==500):
+            print('\n[neuroHarmonize]: saved %d of %d images...' % (i, n_images))
+    return None
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@ def readme():
         return f.read()
 
 setup(name='neuroHarmonize',
-      version='0.3.8',
+      version='0.4.0',
       description='Harmonization tools for multi-center neuroimaging studies.',
       long_description=readme(),
       url='https://github.com/rpomponio/neuroHarmonize',