Skip to content

Commit

Permalink
code cleanup, working version of NIFTI functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Ray Pomponio authored and Ray Pomponio committed Aug 7, 2020
1 parent ae99f42 commit 4931a4b
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 17 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ achieved with an external model, or by using the argument* `return_s_data`
Installation
------------

Latest version: ``0.3.x`` (August 2020)
Latest version: ``0.4.x`` (August 2020)

Requirements:

Expand Down
10 changes: 6 additions & 4 deletions neuroHarmonize/harmonizationApply.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ def harmonizationApply(data, covars, model):
# check sites are identical in training dataset
check_sites = info_dict['n_batch']==model['info_dict']['n_batch']
if not check_sites:
raise ValueError('Number of sites in holdout data not identical to training data.')
raise ValueError('Number of sites in holdout data not identical to training data. Check `covars` argument.')
check_sites = np.mean(batch_labels==model['SITE_labels'])
if check_sites!=1:
raise ValueError('Labels of sites in holdout data not identical to training data.')
raise ValueError('Labels of sites in holdout data not identical to training data. Check values in "SITE" column.')
# apply ComBat without re-learning model parameters
design = make_design_matrix(covars, batch_col, cat_cols, num_cols)
### additional setup if smoothing is performed
Expand All @@ -84,7 +84,7 @@ def harmonizationApply(data, covars, model):
# check formulas are identical in training dataset
check_formula = formula==smooth_model['formula']
if not check_formula:
raise ValueError('GAM formula for holdout data not identical to training data.')
raise ValueError('GAM formula for holdout data not identical to training data. Check arguments.')
# for matrix operations, a modified design matrix is required
design = np.concatenate((df_gam, bs_basis), axis=1)
###
Expand Down Expand Up @@ -134,6 +134,8 @@ def applyModelOne(data, covars, model):
# prep covariate data
batch_labels = model['SITE_labels']
batch_i = covars.SITE.values[0]
if batch_i not in batch_labels:
raise ValueError('Site Label "%s" not in the training set. Check `covars` argument.' % batch_i)
batch_level_i = np.argwhere(batch_i==batch_labels)[0]
batch_col = covars.columns.get_loc('SITE')
cat_cols = []
Expand Down Expand Up @@ -188,7 +190,7 @@ def loadHarmonizationModel(file_name):
by file_name using the pickle package.
"""
if not os.path.exists(file_name):
raise ValueError('Model file does not exist: %s' % file_name)
raise ValueError('Model file does not exist: %s. Did you run `saveHarmonizationModel`?' % file_name)
in_file = open(file_name,'rb')
model = pickle.load(in_file)
in_file.close()
Expand Down
7 changes: 3 additions & 4 deletions neuroHarmonize/harmonizationLearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,7 @@ def standardizeAcrossFeatures(X, design, info_dict, smooth_model):
df_gam = smooth_model['df_gam']

if X.shape[0] > 10:
print('\nWARNING: more than 10 variables will be harmonized with smoothing model.')
print(' Computation may take several minutes.')
print('\n[neuroHarmonize]: smoothing more than 10 variables may take several minutes of computation.')
# initialize penalization weight (not the final weight)
alpha = np.array([1.0] * len(smooth_cols))
# initialize an empty matrix for beta
Expand Down Expand Up @@ -244,13 +243,13 @@ def saveHarmonizationModel(model, file_name):
"""
if os.path.exists(file_name):
raise ValueError('Model file already exists: %s Change name or delete to save.' % file_name)
raise ValueError('Model file already exists: %s. Change name or delete to save.' % file_name)
# estimate size of out_file
est_size = 0
for key in ['design', 'B_hat', 'grand_mean', 'var_pooled',
'gamma_star', 'delta_star', 'gamma_hat', 'delta_hat']:
est_size += model[key].nbytes / 1e6
print('Saving model object, estimated size in MB: %4.2f' % est_size)
print('\n[neuroHarmonize]: Saving model object, estimated size in MB: %4.2f' % est_size)
out_file = open(file_name, 'wb')
pickle.dump(model, out_file)
out_file.close()
Expand Down
65 changes: 58 additions & 7 deletions neuroHarmonize/harmonizationNIFTI.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def createMaskNIFTI(paths, threshold=0.0, output_path='thresholded_mask.nii.gz')
nifti_i = nib.load(paths.PATH[i])
nifti_sum += nifti_i.get_fdata()
if (i==500):
print('PROGRESS: loaded %d of %d images...' % (i, n_images))
print('\n[neuroHarmonize]: loaded %d of %d images...' % (i, n_images))
# compute average intensities
nifti_avg = nifti_sum / n_images
# apply threshold
Expand All @@ -72,7 +72,7 @@ def flattenNIFTIs(paths, mask_path, output_path='flattened_NIFTI_array.npy'):
dimensions must be identical for all images
mask_path : a str
file path to the mask, must be created with `create_NIFTI_mask`
file path to the mask, must be created with `createMaskNIFTI`
output_path : a str, default "flattened_NIFTI_array.npy"
Expand All @@ -83,8 +83,7 @@ def flattenNIFTIs(paths, mask_path, output_path='flattened_NIFTI_array.npy'):
dimensions are N_Images x N_Masked_Voxels
"""
print('\nWARNING: this procedure will consume large amounts of memory.')
print(' Consider down-sampling, masking aggressively, and/or sub-sampling NIFTIs...')
print('\n[neuroHarmonize]: Flattening NIFTIs will consume large amounts of memory. Down-sampling may help.')
# load mask (1=GM tissue, 0=Non-GM)
nifti_mask = (nib.load(mask_path).get_fdata().astype(int)==1)
n_voxels_flattened = np.sum(nifti_mask)
Expand All @@ -93,14 +92,66 @@ def flattenNIFTIs(paths, mask_path, output_path='flattened_NIFTI_array.npy'):
# initialize empty container
nifti_array = np.zeros((n_images, n_voxels_flattened))
# iterate over images and fill container
print('Flattening %d NIFTI images with %d voxels...' % (n_images, n_voxels_flattened))
print('\n[neuroHarmonize]: Flattening %d NIFTI images with %d voxels...' % (n_images, n_voxels_flattened))
for i in range(0, n_images):
nifti_i = nib.load(paths.PATH[i]).get_fdata()
nifti_array[i, :] = nifti_i[nifti_mask]
if (i==500):
print('PROGRESS: loaded %d of %d images...' % (i, n_images))
print('\n[neuroHarmonize]: loaded %d of %d images...' % (i, n_images))
# save array of flattened images
print('Size of array in MB: %2.3f' % (nifti_array.nbytes / 1e6))
print('\n[neuroHarmonize]: Size of array in MB: %2.3f' % (nifti_array.nbytes / 1e6))
np.save(output_path, nifti_array)
return nifti_array

def applyModelNIFTIs(covars, model, paths, mask_path):
"""
Applies harmonization model sequentially to NIFTI images. This function
will reduce burden on memory resources for large datasets.
Arguments
---------
covars : a pandas DataFrame
contains covariates to control for during harmonization
all covariates must be encoded numerically (no categorical variables)
must contain a single column "SITE" with site labels for ComBat
dimensions are N_samples x (N_covariates + 1)
model : a dictionary of model parameters
the output of a call to `harmonizationLearn`
paths : a pandas DataFrame
must contain a column "PATH" with file paths to NIFTIs and must also
contain a column "PATH_NEW" with file paths to the new NIFTIS that
will be created with this function
dimensions must be identical for all images
mask_path : a str
file path to the mask, must be created with `createMaskNIFTI`
Returns
-------
affine : a numpy array
affine matrix used to save mask
"""
# load mask (1=include, 0=exclude)
nifti_mask = (nib.load(mask_path).get_fdata().astype(int)==1)
n_voxels_flattened = np.sum(nifti_mask)
# count number of images
n_images = paths.shape[0]
# begin loading images
affine_0 = nib.load(paths.PATH[0]).affine
# apply harmonization model
for i in range(0, n_images):
path_new = paths.PATH_NEW.values[i]
covars = covars.iloc[[i], :]
nifti = nib.load(paths.PATH[i])
nifti_array = nifti.get_fdata()[nifti_mask].reshape((1, n_voxels_flattened))
affine = nifti.affine
nifti_array_adj = applyModelOne(nifti_array, covars, model)
nifti_out = nifti_mask.astype(float).copy()
nifti_out[nifti_mask] = nifti_array_adj[0, :]
nifti_out = nib.Nifti1Image(nifti_out, affine)
nifti_out.to_filename(path_new)
if (i==500):
print('\n[neuroHarmonize]: saved %d of %d images...' % (i, n_images))
return None
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def readme():
return f.read()

setup(name='neuroHarmonize',
version='0.3.8',
version='0.4.0',
description='Harmonization tools for multi-center neuroimaging studies.',
long_description=readme(),
url='https://github.com/rpomponio/neuroHarmonize',
Expand Down

0 comments on commit 4931a4b

Please sign in to comment.