diff --git a/idconn/io.py b/idconn/io.py
index b5f43e1..23b563c 100644
--- a/idconn/io.py
+++ b/idconn/io.py
@@ -35,7 +35,6 @@ def calc_fd(confounds):
     fd = np.sum([delta_x, delta_y, delta_z, delta_alpha, delta_beta, delta_gamma], axis=0)
     return fd
 
-
 def build_statsmodel_json(
     name,
     task,
@@ -132,7 +131,6 @@ def build_statsmodel_json(
         json.dump(statsmodel, outfile)
     return statsmodel_json
 
-
 def atlas_picker(atlas, path, key=None):
     """Takes in atlas name and path to file, if local, returns
     nifti-like object (usually file path to downloaded atlas),
@@ -192,8 +190,7 @@ def atlas_picker(atlas, path, key=None):
 
     return atlas, path
 
-
-def vectorize_corrmats(matrices):
+def vectorize_corrmats(matrices, diagonal=False):
     """Returns the vectorized upper triangles of a 3-dimensional array
     (i.e., node x node x matrix) of matrices. Output will be a 2-dimensional
     array (i.e., matrix x node^2)
@@ -210,11 +207,15 @@ def vectorize_corrmats(matrices):
         the input matrices.
     """
     # print(f'\n\n\n{matrices.shape}, {matrices.ndim}\n\n\n')
+    if diagonal == True:
+        k = 0
+    else:
+        k = 1
     num_node = matrices.shape[1]
-    upper_tri = np.triu_indices(num_node, k=1)
+    upper_tri = np.triu_indices(num_node, k=k)
     if matrices.ndim == 3:
         num_node = matrices.shape[1]
-        upper_tri = np.triu_indices(num_node, k=1)
+        upper_tri = np.triu_indices(num_node, k=k)
         num_matrices = matrices.shape[0]
         edge_vector = []
         for matrix in range(0, num_matrices):
@@ -234,7 +235,7 @@ def vectorize_corrmats(matrices):
     elif matrices.ndim == 1:
         if matrices[0].ndim == 2:
             num_node = matrices[0].shape[0]
-            upper_tri = np.triu_indices(num_node, k=1)
+            upper_tri = np.triu_indices(num_node, k=k)
             edge_vector = []
             for matrix in matrices:
                 vectorized = matrix[upper_tri]
@@ -248,7 +249,6 @@ def vectorize_corrmats(matrices):
     edge_vector = np.asarray(edge_vector)
     return edge_vector
 
-
 def read_corrmats(layout, task, deriv_name, atlas, z_score=True, vectorized=True, verbose=False):
     """Returns a node x node x (subject x session) matrix of correlation matrices
     from a BIDS derivative folder. Optionally returns a node^2 x (subject x session)
@@ -419,8 +419,7 @@ def read_corrmats(layout, task, deriv_name, atlas, z_score=True, vectorized=True
     ppt_df.replace({"": np.nan}, inplace=True)
     return ppt_df
 
-
-def undo_vectorize(edges, num_node=None):
+def undo_vectorize(edges, num_node=None, diagonal=False):
     """
     Puts an edge vector back into an adjacency matrix.
     Parameters
@@ -439,15 +438,25 @@ def undo_vectorize(edges, num_node=None):
     # num_node = (np.sqrt((8 * j) + 1) + 1) / 2
     if num_node == None:
         j = len(edges)
-        num_node = int((np.sqrt((8 * j) + 1) + 1) / 2)
+        if diagonal == False:
+            num_node = int((np.sqrt((8 * j) + 1) + 1) / 2)
+        else:
+            num_node = int((np.sqrt((8 * j) + 1) - 1) / 2)
     else:
         num_node = int(num_node)
     X = np.zeros((num_node, num_node))
-    X[np.triu_indices(X.shape[0], k=1)] = edges
+    if diagonal == False:
+        k=1
+    if diagonal == True:
+        k=0
+    X[np.triu_indices(num_node, k=k)] = edges
+    diag_X = X[np.diag_indices(num_node,2)]
     X = X + X.T
+    if diagonal == True:
+        X[np.diag_indices(num_node,2)] = diag_X
+    #print('did undo_vectorize work?', np.allclose(X, X.T))
     return X
 
-
 def plot_edges(
     adj,
     atlas_nii,
@@ -499,7 +508,7 @@ def plot_edges(
     print("edge plotting threshold: ", threshold)
 
     if node_size == "strength":
-        node_strength = np.sum(adj, axis=0)
+        node_strength = np.abs(np.sum(adj, axis=0))
         # node_strength /= np.max(node_strength)
         # node_strength **= 4
         node_strength = node_strength / np.max(node_strength) * 60
@@ -535,7 +544,7 @@ def plot_edges(
         nimg = nib.load(atlas_nii)
         regn_sch_arr = nimg.get_fdata()
         for i in np.arange(0, num_node):
-            regn_sch_arr[np.where(regn_sch_arr == i + 1)] = np.sum(adj[i])
+            regn_sch_arr[np.where(regn_sch_arr == i + 1)] = np.sum((adj[i]))
         strength_nimg = nib.Nifti1Image(regn_sch_arr, nimg.affine)
         # replace this filename with BIDSy output
         # nib.save(strength_nimg, f'/Users/katherine.b/Dropbox/{title}predictive-strength.nii')
@@ -558,6 +567,7 @@ def plot_edges(
         i = plotting.plot_surf_stat_map(
             fsaverage.pial_left,
             texture_l,
+            bg_map=fsaverage.sulc_left,
             symmetric_cbar=False,
             threshold=0.5,
             cmap=cmap,
@@ -568,6 +578,7 @@ def plot_edges(
         j = plotting.plot_surf_stat_map(
             fsaverage.pial_left,
             texture_l,
+            bg_map=fsaverage.sulc_left,
             symmetric_cbar=False,
             threshold=0.5,
             cmap=cmap,
@@ -578,6 +589,7 @@ def plot_edges(
         k = plotting.plot_surf_stat_map(
             fsaverage.pial_right,
             texture_r,
+            bg_map=fsaverage.sulc_right,
             symmetric_cbar=False,
             threshold=0.5,
             cmap=cmap,
@@ -588,6 +600,7 @@ def plot_edges(
         l = plotting.plot_surf_stat_map(
             fsaverage.pial_right,
             texture_r,
+            bg_map=fsaverage.sulc_right,
             symmetric_cbar=False,
             threshold=0.5,
             cmap=cmap,
diff --git a/idconn/workflows/nbs_predict-e2.py b/idconn/workflows/nbs_predict-e2.py
new file mode 100644
index 0000000..c92d274
--- /dev/null
+++ b/idconn/workflows/nbs_predict-e2.py
@@ -0,0 +1,419 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import nibabel as nib
+import seaborn as sns
+import bids
+import matplotlib.pyplot as plt
+from os.path import join
+from datetime import datetime
+from time import strftime
+from scipy.stats import spearmanr
+from idconn import nbs, io
+
+from bct import threshold_proportional
+
+
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_validate
+from sklearn.preprocessing import Normalizer, StandardScaler
+from sklearn.metrics import mean_squared_error
+from matplotlib.colors import ListedColormap
+import matplotlib as mpl
+
+
+import warnings
+import json
+
+warnings.simplefilter("ignore")
+
+today = datetime.today()
+today_str = strftime("%m_%d_%Y")
+
+TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
+TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+DERIV_NAME = "IDConn"
+OUTCOME = "estradiol"
+CONFOUNDS = ["framewise_displacement"]
+TASK = "rest"
+ATLAS = "craddock2012"
+THRESH = 0.5
+alpha = 0.01
+atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+
+
+layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+
+dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = dat["adj"].dropna().index
+dat = dat.loc[keep]
+
+groups = dat["bc"]
+# print(dat['adj'].values.shape)
+num_node = dat.iloc[0]["adj"].shape[0]
+
+matrices = np.vstack(dat["adj"].values).reshape((len(keep), num_node, num_node))
+upper_tri = np.triu_indices(num_node, k=1)
+
+outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
+
+#print(len(np.unique(outcome)))
+
+if CONFOUNDS is not None:
+    confounds = dat[CONFOUNDS]
+    base_name = f"nbs-predict_outcome-{OUTCOME}_confounds-{CONFOUNDS}"
+else:
+    confounds = None
+    base_name = f"nbs-predict_outcome-{OUTCOME}"
+# print(dat['bc'])
+
+weighted_average, cv_results = nbs.kfold_nbs(
+    matrices, outcome, confounds, alpha, groups=groups, n_splits=3, n_iterations=3
+)
+
+fig, fig2, nimg = io.plot_edges(
+    weighted_average,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Precision-Weighted Average",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}")
+)
+
+
+avg_df = pd.DataFrame(
+    weighted_average,
+    index=range(0, weighted_average.shape[0]),
+    columns=range(0, weighted_average.shape[1]),
+)
+
+cv_results.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_models-{today_str}.tsv"), sep="\t"
+)
+avg_df.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
+)
+
+best = cv_results.sort_values(by='score', ascending=False).iloc[0]['model']
+
+# this uses the most predictive subnetwork as features in the model
+# might replace with thresholded weighted_average
+# or use _all_ the edges in weighted_average with KRR or ElasticNet...
+# ORRR use thresholded weighted average edges with ElasticNet...
+# - stays true to NBS-Predict
+# - increases parsimony while handling multicollinearity...
+# either way, I don't think cv_results is necessary
+
+# here is where we'd threshold the weighted average to use for elastic-net
+weighted_average = np.where(weighted_average > 0, weighted_average, 0)
+#print(np.sum(weighted_average))
+#nbs_vector = weighted_average[upper_tri]
+#p75 = np.percentile(nbs_vector, 75)
+#filter = np.where(nbs_vector >= p75, True, False)
+#print(np.sum(filter))
+# print(nbs_vector.shape, filter.shape)
+
+thresh_average = threshold_proportional(weighted_average, THRESH)
+nbs_vector2 = thresh_average[upper_tri]
+#p75 = np.percentile(nbs_vector, 75)
+filter = np.where(nbs_vector2 > 0, True, False)
+
+# mask = io.vectorize_corrmats(filter)
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:,filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_train = dat[CONFOUNDS].values
+    outcome_train = np.reshape(outcome, (outcome.shape[0],))
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_train)) <= 2:
+        resid_edges = nbs.residualize(X=edges_train, confounds=confounds_train)
+        train_outcome = outcome
+    elif len(np.unique(outcome_train)) > 3:
+        train_outcome, resid_edges = nbs.residualize(
+            X=edges_train, y=outcome_train, confounds=confounds_train
+        )
+    train_features = resid_edges
+else:
+    train_features = edges_train
+    train_outcome = outcome
+
+x_scaler = StandardScaler()
+y_scaler = StandardScaler()
+train_features = x_scaler.fit_transform(train_features)
+if len(np.unique(train_outcome)) <= 2:
+    pass
+else:
+    train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
+
+
+
+# run the model on the whole test dataset to get params
+
+# classification if the outcome is binary (for now)
+# could be extended to the multiclass case?
+train_metrics = {}
+if len(np.unique(outcome)) == 2:
+    model = LogisticRegression(
+        penalty="l2", 
+        solver="saga", 
+        C=best.C_[0]
+        )
+    train_metrics["alpha"] = best.C_[0]
+    #train_metrics["l1_ratio"] = best.l1_ratio_
+else:
+    model = Ridge(
+        solver="auto",  
+        alpha=best.alpha_,
+        fit_intercept=False,
+        )
+    train_metrics["alpha"] = best.alpha_
+
+cv = RepeatedKFold(n_splits=5, n_repeats=10)
+
+    #train_metrics["l1_ratio"] = best.l1_ratio_
+#print(params)
+#model.set_params(**params)
+# train ElasticNet on full train dataset, using feature extraction from NBS-Predict
+#fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+scores = cross_validate(
+    model, 
+    train_features, 
+    train_outcome, 
+    groups=groups, 
+    cv=cv,
+    return_estimator=True, 
+    return_train_score=True
+    )
+train_metrics["in_sample_test"] = np.mean(scores['test_score'])
+train_metrics["in_sample_train"] = np.mean(scores['train_score'])
+
+fitted = scores['estimator'][0]
+y_pred = fitted.predict(X=train_features)
+train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
+
+dat[f'{OUTCOME}_pred'] = y_pred
+dat[f'{OUTCOME}_scaled'] = train_outcome
+
+Ys = dat[[f'{OUTCOME}_pred', f'{OUTCOME}_scaled', 'bc', 'cycle_day']]
+Ys.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
+
+train_colors = ['#a08ad1', #light
+                '#685690', #medium
+                '#3f2d69' #dark
+                ]
+light_cmap = sns.color_palette('dark:#a08ad1')
+dark_cmap = sns.color_palette('dark:#685690')
+
+fig,ax = plt.subplots()
+g = sns.scatterplot(x='cycle_day', 
+                    y=f'{OUTCOME}_pred', 
+                    style='bc', 
+                    data=Ys,  
+                    ax=ax, 
+                    palette=dark_cmap)
+h = sns.scatterplot(x='cycle_day',
+                    y=f'{OUTCOME}_scaled', 
+                    style='bc', 
+                    data=Ys, 
+                    ax=ax, 
+                    palette=light_cmap)
+ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+
+mse = mean_squared_error(train_outcome, y_pred)
+train_metrics["mean squared error"] = mse
+print("In-sample train score: ", train_metrics["in_sample_train"])
+print("In-sample test score: ", train_metrics["in_sample_test"])
+print("In-sample mean squared error: ", mse)
+# print(np.mean(train_features))
+with open(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(train_metrics, fp)
+
+# yoink the coefficients? for a more parsimonious figure?
+#print(fitted.coef_.shape)
+#print(fitted.coef_)
+coeff_vec = np.zeros_like(filter)
+j = 0
+for i in range(0, filter.shape[0]):
+    if filter[i] == True:
+        #print(j)
+        #print(fitted.coef_[0, j])
+        coeff_vec[i] = fitted.coef_[0, j]
+        j += 1
+    else:
+        pass
+
+# print(coeff_vec)
+print(coeff_vec)
+coef_mat = io.undo_vectorize(coeff_vec, num_node=num_node)
+
+coef_df = pd.DataFrame(coef_mat, columns=avg_df.columns, index=avg_df.index)
+coef_df.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.csv"))
+
+fig, fig2, nimg = io.plot_edges(
+    coef_mat,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Coefficients",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}")
+)
+
+
+layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+# scale after residualizing omg
+test_features = x_scaler.transform(test_features)
+if len(np.unique(test_outcome)) <= 2:
+    pass
+else:
+    test_outcome = y_scaler.transform(test_outcome.reshape(-1, 1))
+# print(test_features.shape)
+# if the model is a logistic regression, i.e. with a binary outcome
+# then score is prediction accuracy
+# if the model is a linear regression, i.e., with a continuous outcome
+# then the score is R^2 (coefficient of determination)
+
+# fit trained ElasticNet, initialized via warm_start
+# prob in CV?
+# fitted_test = fitted.fit(X=test_features, y=np.ravel(test_outcome))
+# score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
+test_metrics = {}
+
+#cross_validate(model, )
+y_pred = fitted.predict(X=test_features)
+score = fitted.score(X=test_features, y=np.ravel(test_outcome))
+if len(np.unique(test_outcome)) == 2:
+    test_metrics["accuracy"] = score
+else:
+    test_metrics["coefficient of determination"] = score
+corr = spearmanr(test_outcome, y_pred)
+test_metrics["pred_v_actual_corr"] = corr
+mse = mean_squared_error(test_outcome, y_pred)
+test_metrics["mean squared error"] = mse
+print("Out-of-sample prediction score:\t", score)
+print("Out-of-sample mean squared error:\t", mse)
+# print(np.mean(test_features))
+# pred_outcome = fitted.predict(test_features)
+test_df[f'{OUTCOME}_scaled'] = test_outcome
+test_df[f'{OUTCOME}_pred'] = y_pred
+Ys = test_df[[f'{OUTCOME}_scaled', 
+              f'{OUTCOME}_pred',
+              'cycle_day', 
+              'bc']]
+Ys.to_csv(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
+
+Ys['ppts'] = Ys.index.get_level_values(0)
+
+
+light_colors = ['#33ACE3', #Bubbles
+                '#EA6964', #Blossom
+                '#4AB62C' #Buttercup
+                ]
+dark_colors = ['#1278a6', 
+               '#a11510', 
+               '#228208']
+light = ListedColormap(light_colors, name='light_powderpuff')
+dark = ListedColormap(dark_colors, name='dark_powderpuff')
+mpl.colormaps.register(cmap=light)
+mpl.colormaps.register(cmap=dark)
+
+fig,ax = plt.subplots()
+g = sns.scatterplot(x='cycle_day', 
+                    y=f'{OUTCOME}_pred', 
+                    style='bc', 
+                    data=Ys, 
+                    hue='ppts',  
+                    hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'],
+                    ax=ax, 
+                    palette='light_powderpuff'
+                    )
+h = sns.scatterplot(x='cycle_day',
+                     y=f'{OUTCOME}_scaled', 
+                     style='bc', 
+                     data=Ys, 
+                     hue='ppts',
+                     hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'], 
+                     ax=ax, 
+                     palette='dark_powderpuff')
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc='center left')
+fig.savefig(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+
+
+
+#print(test_outcome, "\n", y_pred)
+# print(pred_outcome)
+if len(np.unique(test_outcome)) > 2:
+    
+    print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
+    test_metrics["spearman correlation"] = corr
+with open(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(test_metrics, fp)
+np.savetxt(join(TEST_DSET, f"{base_name}_predicted-values_fit-{today_str}.txt"), y_pred)
diff --git a/idconn/workflows/nbs_predict-e2xp4-bc.py b/idconn/workflows/nbs_predict-e2xp4-bc.py
new file mode 100644
index 0000000..ad6a6d8
--- /dev/null
+++ b/idconn/workflows/nbs_predict-e2xp4-bc.py
@@ -0,0 +1,422 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import nibabel as nib
+import seaborn as sns
+import bids
+import matplotlib.pyplot as plt
+from os.path import join
+from datetime import datetime
+from time import strftime
+from scipy.stats import spearmanr
+from idconn import nbs, io
+
+from bct import threshold_proportional
+
+
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_validate
+from sklearn.preprocessing import Normalizer, StandardScaler
+from sklearn.metrics import mean_squared_error
+from matplotlib.colors import ListedColormap
+import matplotlib as mpl
+
+
+import warnings
+import json
+
+warnings.simplefilter("ignore")
+
+today = datetime.today()
+today_str = strftime("%m_%d_%Y")
+
+TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
+TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+DERIV_NAME = "IDConn"
+OUTCOME = "estradiol÷progesterone"
+CONFOUNDS = ["framewise_displacement", "bc"]
+TASK = "rest"
+ATLAS = "craddock2012"
+THRESH = 0.5
+alpha = 0.01
+atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+
+
+layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+
+dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+dat['estradiol÷progesterone'] = dat['estradiol'] / dat['progesterone']
+
+keep = dat["adj"].dropna().index
+dat = dat.loc[keep]
+
+groups = dat["bc"]
+# print(dat['adj'].values.shape)
+num_node = dat.iloc[0]["adj"].shape[0]
+
+matrices = np.vstack(dat["adj"].values).reshape((len(keep), num_node, num_node))
+upper_tri = np.triu_indices(num_node, k=1)
+
+outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
+
+#print(len(np.unique(outcome)))
+
+if CONFOUNDS is not None:
+    confounds = dat[CONFOUNDS]
+    base_name = f"nbs-predict_outcome-{OUTCOME}_confounds-{CONFOUNDS}"
+else:
+    confounds = None
+    base_name = f"nbs-predict_outcome-{OUTCOME}"
+# print(dat['bc'])
+
+weighted_average, cv_results = nbs.kfold_nbs(
+    matrices, outcome, confounds, alpha, groups=groups, n_splits=5, n_iterations=1000
+)
+
+fig, fig2, nimg = io.plot_edges(
+    weighted_average,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Precision-Weighted Average",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}")
+)
+
+
+avg_df = pd.DataFrame(
+    weighted_average,
+    index=range(0, weighted_average.shape[0]),
+    columns=range(0, weighted_average.shape[1]),
+)
+
+cv_results.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_models-{today_str}.tsv"), sep="\t"
+)
+avg_df.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
+)
+
+best = cv_results.sort_values(by='score', ascending=False).iloc[0]['model']
+
+# this uses the most predictive subnetwork as features in the model
+# might replace with thresholded weighted_average
+# or use _all_ the edges in weighted_average with KRR or ElasticNet...
+# ORRR use thresholded weighted average edges with ElasticNet...
+# - stays true to NBS-Predict
+# - increases parsimony while handling multicollinearity...
+# either way, I don't think cv_results is necessary
+
+# here is where we'd threshold the weighted average to use for elastic-net
+weighted_average = np.where(weighted_average > 0, weighted_average, 0)
+#print(np.sum(weighted_average))
+#nbs_vector = weighted_average[upper_tri]
+#p75 = np.percentile(nbs_vector, 75)
+#filter = np.where(nbs_vector >= p75, True, False)
+#print(np.sum(filter))
+# print(nbs_vector.shape, filter.shape)
+
+thresh_average = threshold_proportional(weighted_average, THRESH)
+nbs_vector2 = thresh_average[upper_tri]
+#p75 = np.percentile(nbs_vector, 75)
+filter = np.where(nbs_vector2 > 0, True, False)
+
+# mask = io.vectorize_corrmats(filter)
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:,filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_train = dat[CONFOUNDS].values
+    outcome_train = np.reshape(outcome, (outcome.shape[0],))
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_train)) <= 2:
+        resid_edges = nbs.residualize(X=edges_train, confounds=confounds_train)
+        train_outcome = outcome
+    elif len(np.unique(outcome_train)) > 3:
+        train_outcome, resid_edges = nbs.residualize(
+            X=edges_train, y=outcome_train, confounds=confounds_train
+        )
+    train_features = resid_edges
+else:
+    train_features = edges_train
+    train_outcome = outcome
+
+x_scaler = StandardScaler()
+y_scaler = StandardScaler()
+train_features = x_scaler.fit_transform(train_features)
+if len(np.unique(train_outcome)) <= 2:
+    pass
+else:
+    train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
+
+
+
+# run the model on the whole test dataset to get params
+
+# classification if the outcome is binary (for now)
+# could be extended to the multiclass case?
+train_metrics = {}
+if len(np.unique(outcome)) == 2:
+    model = LogisticRegression(
+        penalty="l2", 
+        solver="saga", 
+        C=best.C_[0]
+        )
+    train_metrics["alpha"] = best.C_[0]
+    #train_metrics["l1_ratio"] = best.l1_ratio_
+else:
+    model = Ridge(
+        solver="auto",  
+        alpha=best.alpha_,
+        fit_intercept=False,
+        )
+    train_metrics["alpha"] = best.alpha_
+
+cv = RepeatedKFold(n_splits=5, n_repeats=10)
+
+    #train_metrics["l1_ratio"] = best.l1_ratio_
+#print(params)
+#model.set_params(**params)
+# train ElasticNet on full train dataset, using feature extraction from NBS-Predict
+#fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+scores = cross_validate(
+    model, 
+    train_features, 
+    train_outcome, 
+    groups=groups, 
+    cv=cv,
+    return_estimator=True, 
+    return_train_score=True
+    )
+train_metrics["in_sample_test"] = np.mean(scores['test_score'])
+train_metrics["in_sample_train"] = np.mean(scores['train_score'])
+
+fitted = scores['estimator'][0]
+y_pred = fitted.predict(X=train_features)
+train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
+
+dat[f'{OUTCOME}_pred'] = y_pred
+dat[f'{OUTCOME}_scaled'] = train_outcome
+
+Ys = dat[[f'{OUTCOME}_pred', f'{OUTCOME}_scaled', 'bc', 'cycle_day']]
+Ys.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
+
+train_colors = ['#a08ad1', #light
+                '#685690', #medium
+                '#3f2d69' #dark
+                ]
+light_cmap = sns.color_palette('dark:#a08ad1')
+dark_cmap = sns.color_palette('dark:#685690')
+
+fig,ax = plt.subplots()
+g = sns.scatterplot(x='cycle_day', 
+                    y=f'{OUTCOME}_pred', 
+                    style='bc', 
+                    data=Ys,  
+                    ax=ax, 
+                    palette=dark_cmap)
+h = sns.scatterplot(x='cycle_day',
+                    y=f'{OUTCOME}_scaled', 
+                    style='bc', 
+                    data=Ys, 
+                    ax=ax, 
+                    palette=light_cmap)
+ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+
+mse = mean_squared_error(train_outcome, y_pred)
+train_metrics["mean squared error"] = mse
+print("In-sample train score: ", train_metrics["in_sample_train"])
+print("In-sample test score: ", train_metrics["in_sample_test"])
+print("In-sample mean squared error: ", mse)
+# print(np.mean(train_features))
+with open(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(train_metrics, fp)
+
+# yoink the coefficients? for a more parsimonious figure?
+#print(fitted.coef_.shape)
+#print(fitted.coef_)
+coeff_vec = np.zeros_like(filter)
+j = 0
+for i in range(0, filter.shape[0]):
+    if filter[i] == True:
+        #print(j)
+        #print(fitted.coef_[0, j])
+        coeff_vec[i] = fitted.coef_[0, j]
+        j += 1
+    else:
+        pass
+
+# print(coeff_vec)
+print(coeff_vec)
+coef_mat = io.undo_vectorize(coeff_vec, num_node=num_node)
+
+coef_df = pd.DataFrame(coef_mat, columns=avg_df.columns, index=avg_df.index)
+coef_df.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.csv"))
+
+fig, fig2, nimg = io.plot_edges(
+    coef_mat,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Coefficients",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}")
+)
+
+
+layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+test_df['estradiol÷progesterone'] = test_df['estradiol'] / test_df['progesterone']
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+# scale after residualizing omg
+test_features = x_scaler.transform(test_features)
+if len(np.unique(test_outcome)) <= 2:
+    pass
+else:
+    test_outcome = y_scaler.transform(test_outcome.reshape(-1, 1))
+# print(test_features.shape)
+# if the model is a logistic regression, i.e. with a binary outcome
+# then score is prediction accuracy
+# if the model is a linear regression, i.e., with a continuous outcome
+# then the score is R^2 (coefficient of determination)
+
+# fit trained ElasticNet, initialized via warm_start
+# prob in CV?
+# fitted_test = fitted.fit(X=test_features, y=np.ravel(test_outcome))
+# score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
+test_metrics = {}
+
+#cross_validate(model, )
+y_pred = fitted.predict(X=test_features)
+score = fitted.score(X=test_features, y=np.ravel(test_outcome))
+if len(np.unique(test_outcome)) == 2:
+    test_metrics["accuracy"] = score
+else:
+    test_metrics["coefficient of determination"] = score
+corr = spearmanr(test_outcome, y_pred)
+test_metrics["pred_v_actual_corr"] = corr
+mse = mean_squared_error(test_outcome, y_pred)
+test_metrics["mean squared error"] = mse
+print("Out-of-sample prediction score:\t", score)
+print("Out-of-sample mean squared error:\t", mse)
+# print(np.mean(test_features))
+# pred_outcome = fitted.predict(test_features)
+test_df[f'{OUTCOME}_scaled'] = test_outcome
+test_df[f'{OUTCOME}_pred'] = y_pred
+Ys = test_df[[f'{OUTCOME}_scaled', 
+              f'{OUTCOME}_pred',
+              'cycle_day', 
+              'bc']]
+Ys.to_csv(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
+
+Ys['ppts'] = Ys.index.get_level_values(0)
+
+
+light_colors = ['#33ACE3', #Bubbles
+                '#EA6964', #Blossom
+                '#4AB62C' #Buttercup
+                ]
+dark_colors = ['#1278a6', 
+               '#a11510', 
+               '#228208']
+light = ListedColormap(light_colors, name='light_powderpuff')
+dark = ListedColormap(dark_colors, name='dark_powderpuff')
+mpl.colormaps.register(cmap=light)
+mpl.colormaps.register(cmap=dark)
+
+fig,ax = plt.subplots()
+g = sns.scatterplot(x='cycle_day', 
+                    y=f'{OUTCOME}_pred', 
+                    style='bc', 
+                    data=Ys, 
+                    hue='ppts',  
+                    hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'],
+                    ax=ax, 
+                    palette='light_powderpuff'
+                    )
+h = sns.scatterplot(x='cycle_day',
+                     y=f'{OUTCOME}_scaled', 
+                     style='bc', 
+                     data=Ys, 
+                     hue='ppts',
+                     hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'], 
+                     ax=ax, 
+                     palette='dark_powderpuff')
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc='center left')
+fig.savefig(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+
+
+
+#print(test_outcome, "\n", y_pred)
+# print(pred_outcome)
+if len(np.unique(test_outcome)) > 2:
+    
+    print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
+    test_metrics["spearman correlation"] = corr
+with open(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(test_metrics, fp)
+np.savetxt(join(TEST_DSET, f"{base_name}_predicted-values_fit-{today_str}.txt"), y_pred)
diff --git a/idconn/workflows/nbs_predict-e2xp4.py b/idconn/workflows/nbs_predict-e2xp4.py
new file mode 100644
index 0000000..022d8b9
--- /dev/null
+++ b/idconn/workflows/nbs_predict-e2xp4.py
@@ -0,0 +1,422 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import nibabel as nib
+import seaborn as sns
+import bids
+import matplotlib.pyplot as plt
+from os.path import join
+from datetime import datetime
+from time import strftime
+from scipy.stats import spearmanr
+from idconn import nbs, io
+
+from bct import threshold_proportional
+
+
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_validate
+from sklearn.preprocessing import Normalizer, StandardScaler
+from sklearn.metrics import mean_squared_error
+from matplotlib.colors import ListedColormap
+import matplotlib as mpl
+
+
+import warnings
+import json
+
+warnings.simplefilter("ignore")
+
+today = datetime.today()
+today_str = strftime("%m_%d_%Y")
+
+TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
+TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+DERIV_NAME = "IDConn"
+OUTCOME = "estradiol÷progesterone"
+CONFOUNDS = ["framewise_displacement"]
+TASK = "rest"
+ATLAS = "craddock2012"
+THRESH = 0.5
+alpha = 0.01
+atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+
+
+layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+
+dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+dat['estradiol÷progesterone'] = dat['estradiol'] / dat['progesterone']
+
+keep = dat["adj"].dropna().index
+dat = dat.loc[keep]
+
+groups = dat["bc"]
+# print(dat['adj'].values.shape)
+num_node = dat.iloc[0]["adj"].shape[0]
+
+matrices = np.vstack(dat["adj"].values).reshape((len(keep), num_node, num_node))
+upper_tri = np.triu_indices(num_node, k=1)
+
+outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
+
+#print(len(np.unique(outcome)))
+
+if CONFOUNDS is not None:
+    confounds = dat[CONFOUNDS]
+    base_name = f"nbs-predict_outcome-{OUTCOME}_confounds-{CONFOUNDS}"
+else:
+    confounds = None
+    base_name = f"nbs-predict_outcome-{OUTCOME}"
+# print(dat['bc'])
+
+weighted_average, cv_results = nbs.kfold_nbs(
+    matrices, outcome, confounds, alpha, groups=groups, n_splits=5, n_iterations=1000
+)
+
+fig, fig2, nimg = io.plot_edges(
+    weighted_average,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Precision-Weighted Average",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}")
+)
+
+
+avg_df = pd.DataFrame(
+    weighted_average,
+    index=range(0, weighted_average.shape[0]),
+    columns=range(0, weighted_average.shape[1]),
+)
+
+cv_results.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_models-{today_str}.tsv"), sep="\t"
+)
+avg_df.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
+)
+
+best = cv_results.sort_values(by='score', ascending=False).iloc[0]['model']
+
+# this uses the most predictive subnetwork as features in the model
+# might replace with thresholded weighted_average
+# or use _all_ the edges in weighted_average with KRR or ElasticNet...
+# ORRR use thresholded weighted average edges with ElasticNet...
+# - stays true to NBS-Predict
+# - increases parsimony while handling multicollinearity...
+# either way, I don't think cv_results is necessary
+
+# here is where we'd threshold the weighted average to use for elastic-net
+weighted_average = np.where(weighted_average > 0, weighted_average, 0)
+#print(np.sum(weighted_average))
+#nbs_vector = weighted_average[upper_tri]
+#p75 = np.percentile(nbs_vector, 75)
+#filter = np.where(nbs_vector >= p75, True, False)
+#print(np.sum(filter))
+# print(nbs_vector.shape, filter.shape)
+
+thresh_average = threshold_proportional(weighted_average, THRESH)
+nbs_vector2 = thresh_average[upper_tri]
+#p75 = np.percentile(nbs_vector, 75)
+filter = np.where(nbs_vector2 > 0, True, False)
+
+# mask = io.vectorize_corrmats(filter)
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:,filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_train = dat[CONFOUNDS].values
+    outcome_train = np.reshape(outcome, (outcome.shape[0],))
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_train)) <= 2:
+        resid_edges = nbs.residualize(X=edges_train, confounds=confounds_train)
+        train_outcome = outcome
+    elif len(np.unique(outcome_train)) > 3:
+        train_outcome, resid_edges = nbs.residualize(
+            X=edges_train, y=outcome_train, confounds=confounds_train
+        )
+    train_features = resid_edges
+else:
+    train_features = edges_train
+    train_outcome = outcome
+
+x_scaler = StandardScaler()
+y_scaler = StandardScaler()
+train_features = x_scaler.fit_transform(train_features)
+if len(np.unique(train_outcome)) <= 2:
+    pass
+else:
+    train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
+
+
+
+# run the model on the whole test dataset to get params
+
+# classification if the outcome is binary (for now)
+# could be extended to the multiclass case?
+train_metrics = {}
+if len(np.unique(outcome)) == 2:
+    model = LogisticRegression(
+        penalty="l2", 
+        solver="saga", 
+        C=best.C_[0]
+        )
+    train_metrics["alpha"] = best.C_[0]
+    #train_metrics["l1_ratio"] = best.l1_ratio_
+else:
+    model = Ridge(
+        solver="auto",  
+        alpha=best.alpha_,
+        fit_intercept=False,
+        )
+    train_metrics["alpha"] = best.alpha_
+
+cv = RepeatedKFold(n_splits=5, n_repeats=10)
+
+    #train_metrics["l1_ratio"] = best.l1_ratio_
+#print(params)
+#model.set_params(**params)
+# train ElasticNet on full train dataset, using feature extraction from NBS-Predict
+#fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+scores = cross_validate(
+    model, 
+    train_features, 
+    train_outcome, 
+    groups=groups, 
+    cv=cv,
+    return_estimator=True, 
+    return_train_score=True
+    )
+train_metrics["in_sample_test"] = np.mean(scores['test_score'])
+train_metrics["in_sample_train"] = np.mean(scores['train_score'])
+
+fitted = scores['estimator'][0]
+y_pred = fitted.predict(X=train_features)
+train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
+
+dat[f'{OUTCOME}_pred'] = y_pred
+dat[f'{OUTCOME}_scaled'] = train_outcome
+
+Ys = dat[[f'{OUTCOME}_pred', f'{OUTCOME}_scaled', 'bc', 'cycle_day']]
+Ys.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
+
+train_colors = ['#a08ad1', #light
+                '#685690', #medium
+                '#3f2d69' #dark
+                ]
+light_cmap = sns.color_palette('dark:#a08ad1')
+dark_cmap = sns.color_palette('dark:#685690')
+
+fig,ax = plt.subplots()
+g = sns.scatterplot(x='cycle_day', 
+                    y=f'{OUTCOME}_pred', 
+                    style='bc', 
+                    data=Ys,  
+                    ax=ax, 
+                    palette=dark_cmap)
+h = sns.scatterplot(x='cycle_day',
+                    y=f'{OUTCOME}_scaled', 
+                    style='bc', 
+                    data=Ys, 
+                    ax=ax, 
+                    palette=light_cmap)
+ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+
+mse = mean_squared_error(train_outcome, y_pred)
+train_metrics["mean squared error"] = mse
+print("In-sample train score: ", train_metrics["in_sample_train"])
+print("In-sample test score: ", train_metrics["in_sample_test"])
+print("In-sample mean squared error: ", mse)
+# print(np.mean(train_features))
+with open(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(train_metrics, fp)
+
+# yoink the coefficients? for a more parsimonious figure?
+#print(fitted.coef_.shape)
+#print(fitted.coef_)
+coeff_vec = np.zeros_like(filter)
+j = 0
+for i in range(0, filter.shape[0]):
+    if filter[i] == True:
+        #print(j)
+        #print(fitted.coef_[0, j])
+        coeff_vec[i] = fitted.coef_[0, j]
+        j += 1
+    else:
+        pass
+
+# print(coeff_vec)
+print(coeff_vec)
+coef_mat = io.undo_vectorize(coeff_vec, num_node=num_node)
+
+coef_df = pd.DataFrame(coef_mat, columns=avg_df.columns, index=avg_df.index)
+coef_df.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.csv"))
+
+fig, fig2, nimg = io.plot_edges(
+    coef_mat,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Coefficients",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}")
+)
+
+
+layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+test_df['estradiol÷progesterone'] = test_df['estradiol'] / test_df['progesterone']
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+# scale after residualizing omg
+test_features = x_scaler.transform(test_features)
+if len(np.unique(test_outcome)) <= 2:
+    pass
+else:
+    test_outcome = y_scaler.transform(test_outcome.reshape(-1, 1))
+# print(test_features.shape)
+# if the model is a logistic regression, i.e. with a binary outcome
+# then score is prediction accuracy
+# if the model is a linear regression, i.e., with a continuous outcome
+# then the score is R^2 (coefficient of determination)
+
+# fit trained ElasticNet, initialized via warm_start
+# prob in CV?
+# fitted_test = fitted.fit(X=test_features, y=np.ravel(test_outcome))
+# score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
+test_metrics = {}
+
+#cross_validate(model, )
+y_pred = fitted.predict(X=test_features)
+score = fitted.score(X=test_features, y=np.ravel(test_outcome))
+if len(np.unique(test_outcome)) == 2:
+    test_metrics["accuracy"] = score
+else:
+    test_metrics["coefficient of determination"] = score
+corr = spearmanr(test_outcome, y_pred)
+test_metrics["pred_v_actual_corr"] = corr
+mse = mean_squared_error(test_outcome, y_pred)
+test_metrics["mean squared error"] = mse
+print("Out-of-sample prediction score:\t", score)
+print("Out-of-sample mean squared error:\t", mse)
+# print(np.mean(test_features))
+# pred_outcome = fitted.predict(test_features)
+test_df[f'{OUTCOME}_scaled'] = test_outcome
+test_df[f'{OUTCOME}_pred'] = y_pred
+Ys = test_df[[f'{OUTCOME}_scaled', 
+              f'{OUTCOME}_pred',
+              'cycle_day', 
+              'bc']]
+Ys.to_csv(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
+
+Ys['ppts'] = Ys.index.get_level_values(0)
+
+
+light_colors = ['#33ACE3', #Bubbles
+                '#EA6964', #Blossom
+                '#4AB62C' #Buttercup
+                ]
+dark_colors = ['#1278a6', 
+               '#a11510', 
+               '#228208']
+light = ListedColormap(light_colors, name='light_powderpuff')
+dark = ListedColormap(dark_colors, name='dark_powderpuff')
+mpl.colormaps.register(cmap=light)
+mpl.colormaps.register(cmap=dark)
+
+fig,ax = plt.subplots()
+g = sns.scatterplot(x='cycle_day', 
+                    y=f'{OUTCOME}_pred', 
+                    style='bc', 
+                    data=Ys, 
+                    hue='ppts',  
+                    hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'],
+                    ax=ax, 
+                    palette='light_powderpuff'
+                    )
+h = sns.scatterplot(x='cycle_day',
+                     y=f'{OUTCOME}_scaled', 
+                     style='bc', 
+                     data=Ys, 
+                     hue='ppts',
+                     hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'], 
+                     ax=ax, 
+                     palette='dark_powderpuff')
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc='center left')
+fig.savefig(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+
+
+
+#print(test_outcome, "\n", y_pred)
+# print(pred_outcome)
+if len(np.unique(test_outcome)) > 2:
+    
+    print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
+    test_metrics["spearman correlation"] = corr
+with open(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(test_metrics, fp)
+np.savetxt(join(TEST_DSET, f"{base_name}_predicted-values_fit-{today_str}.txt"), y_pred)
diff --git a/idconn/workflows/nbs_predict-p4.py b/idconn/workflows/nbs_predict-p4.py
new file mode 100644
index 0000000..559b4ff
--- /dev/null
+++ b/idconn/workflows/nbs_predict-p4.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python3
+import pandas as pd
+import numpy as np
+import nibabel as nib
+import seaborn as sns
+import bids
+import matplotlib.pyplot as plt
+from os.path import join
+from datetime import datetime
+from time import strftime
+from scipy.stats import spearmanr
+from idconn import nbs, io
+
+from bct import threshold_proportional
+
+
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_validate
+from sklearn.preprocessing import Normalizer, StandardScaler
+from sklearn.metrics import mean_squared_error
+from matplotlib.colors import ListedColormap
+import matplotlib as mpl
+
+
+import warnings
+import json
+
+warnings.simplefilter("ignore")
+
+today = datetime.today()
+today_str = strftime("%m_%d_%Y")
+
+TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
+TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+DERIV_NAME = "IDConn"
+OUTCOME = "progesterone"
+CONFOUNDS = ["framewise_displacement"]
+TASK = "rest"
+ATLAS = "craddock2012"
+THRESH = 0.5
+alpha = 0.01
+atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+
+
+layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+
+dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = dat["adj"].dropna().index
+dat = dat.loc[keep]
+
+groups = dat["bc"]
+# print(dat['adj'].values.shape)
+num_node = dat.iloc[0]["adj"].shape[0]
+
+matrices = np.vstack(dat["adj"].values).reshape((len(keep), num_node, num_node))
+upper_tri = np.triu_indices(num_node, k=1)
+
+outcome = np.reshape(dat[OUTCOME].values, (len(dat[OUTCOME]), 1))
+
+#print(len(np.unique(outcome)))
+
+if CONFOUNDS is not None:
+    confounds = dat[CONFOUNDS]
+    base_name = f"nbs-predict_outcome-{OUTCOME}_confounds-{CONFOUNDS}"
+else:
+    confounds = None
+    base_name = f"nbs-predict_outcome-{OUTCOME}"
+# print(dat['bc'])
+
+weighted_average, cv_results = nbs.kfold_nbs(
+    matrices, outcome, confounds, alpha, groups=groups, n_splits=5, n_iterations=1000
+)
+
+fig, fig2, nimg = io.plot_edges(
+    weighted_average,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Precision-Weighted Average",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-strength-{today_str}")
+)
+
+
+avg_df = pd.DataFrame(
+    weighted_average,
+    index=range(0, weighted_average.shape[0]),
+    columns=range(0, weighted_average.shape[1]),
+)
+
+cv_results.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_models-{today_str}.tsv"), sep="\t"
+)
+avg_df.to_csv(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_weighted-{today_str}.tsv"), sep="\t"
+)
+
+best = cv_results.sort_values(by='score', ascending=False).iloc[0]['model']
+
+# this uses the most predictive subnetwork as features in the model
+# might replace with thresholded weighted_average
+# or use _all_ the edges in weighted_average with KRR or ElasticNet...
+# ORRR use thresholded weighted average edges with ElasticNet...
+# - stays true to NBS-Predict
+# - increases parsimony while handling multicollinearity...
+# either way, I don't think cv_results is necessary
+
+# here is where we'd threshold the weighted average to use for elastic-net
+weighted_average = np.where(weighted_average > 0, weighted_average, 0)
+#print(np.sum(weighted_average))
+#nbs_vector = weighted_average[upper_tri]
+#p75 = np.percentile(nbs_vector, 75)
+#filter = np.where(nbs_vector >= p75, True, False)
+#print(np.sum(filter))
+# print(nbs_vector.shape, filter.shape)
+
+thresh_average = threshold_proportional(weighted_average, THRESH)
+nbs_vector2 = thresh_average[upper_tri]
+#p75 = np.percentile(nbs_vector, 75)
+filter = np.where(nbs_vector2 > 0, True, False)
+
+# mask = io.vectorize_corrmats(filter)
+edges_train = np.vstack(dat["edge_vector"].dropna().values)[:,filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_train = dat[CONFOUNDS].values
+    outcome_train = np.reshape(outcome, (outcome.shape[0],))
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_train)) <= 2:
+        resid_edges = nbs.residualize(X=edges_train, confounds=confounds_train)
+        train_outcome = outcome
+    elif len(np.unique(outcome_train)) > 3:
+        train_outcome, resid_edges = nbs.residualize(
+            X=edges_train, y=outcome_train, confounds=confounds_train
+        )
+    train_features = resid_edges
+else:
+    train_features = edges_train
+    train_outcome = outcome
+
+x_scaler = StandardScaler()
+y_scaler = StandardScaler()
+train_features = x_scaler.fit_transform(train_features)
+if len(np.unique(train_outcome)) <= 2:
+    pass
+else:
+    train_outcome = y_scaler.fit_transform(train_outcome.reshape(-1, 1))
+
+
+
+# run the model on the whole test dataset to get params
+
+# classification if the outcome is binary (for now)
+# could be extended to the multiclass case?
+train_metrics = {}
+if len(np.unique(outcome)) == 2:
+    model = LogisticRegression(
+        penalty="l2", 
+        solver="saga", 
+        C=best.C_[0]
+        )
+    train_metrics["alpha"] = best.C_[0]
+    #train_metrics["l1_ratio"] = best.l1_ratio_
+else:
+    model = Ridge(
+        solver="auto",  
+        alpha=best.alpha_,
+        fit_intercept=False,
+        )
+    train_metrics["alpha"] = best.alpha_
+
+cv = RepeatedKFold(n_splits=5, n_repeats=10)
+
+    #train_metrics["l1_ratio"] = best.l1_ratio_
+#print(params)
+#model.set_params(**params)
+# train ElasticNet on full train dataset, using feature extraction from NBS-Predict
+#fitted = model.fit(X=train_features, y=np.ravel(train_outcome))
+scores = cross_validate(
+    model, 
+    train_features, 
+    train_outcome, 
+    groups=groups, 
+    cv=cv,
+    return_estimator=True, 
+    return_train_score=True
+    )
+train_metrics["in_sample_test"] = np.mean(scores['test_score'])
+train_metrics["in_sample_train"] = np.mean(scores['train_score'])
+
+fitted = scores['estimator'][0]
+y_pred = fitted.predict(X=train_features)
+train_metrics["true_v_pred_corr"] = spearmanr(y_pred, train_outcome)
+
+dat[f'{OUTCOME}_pred'] = y_pred
+dat[f'{OUTCOME}_scaled'] = train_outcome
+
+Ys = dat[[f'{OUTCOME}_pred', f'{OUTCOME}_scaled', 'bc', 'cycle_day']]
+Ys.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
+
+train_colors = ['#a08ad1', #light
+                '#685690', #medium
+                '#3f2d69' #dark
+                ]
+light_cmap = sns.color_palette('dark:#a08ad1')
+dark_cmap = sns.color_palette('dark:#685690')
+
+fig,ax = plt.subplots()
+g = sns.scatterplot(x='cycle_day', 
+                    y=f'{OUTCOME}_pred', 
+                    style='bc', 
+                    data=Ys,  
+                    ax=ax, 
+                    palette=dark_cmap)
+h = sns.scatterplot(x='cycle_day',
+                    y=f'{OUTCOME}_scaled', 
+                    style='bc', 
+                    data=Ys, 
+                    ax=ax, 
+                    palette=light_cmap)
+ax.legend(bbox_to_anchor=(1.0, 0.5))
+fig.savefig(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+
+mse = mean_squared_error(train_outcome, y_pred)
+train_metrics["mean squared error"] = mse
+print("In-sample train score: ", train_metrics["in_sample_train"])
+print("In-sample test score: ", train_metrics["in_sample_test"])
+print("In-sample mean squared error: ", mse)
+# print(np.mean(train_features))
+with open(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(train_metrics, fp)
+
+# yoink the coefficients? for a more parsimonious figure?
+#print(fitted.coef_.shape)
+coeff_vec = np.zeros_like(filter)
+j = 0
+for i in range(0, filter.shape[0]):
+    if filter[i] == True:
+        #print(j)
+        coeff_vec[i] = fitted.coef_[0, j]
+        j += 1
+    else:
+        pass
+
+# print(coeff_vec)
+
+coef_mat = io.undo_vectorize(coeff_vec, num_node=num_node)
+coef_df = pd.DataFrame(coef_mat, columns=avg_df.columns, index=avg_df.index)
+coef_df.to_csv(join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.csv"))
+
+fig, fig2, nimg = io.plot_edges(
+    coef_mat,
+    atlas_fname,
+    threshold="computed",
+    title=f"{OUTCOME} Coefficients",
+    strength=True,
+    cmap="seismic",
+    node_size="strength",
+)
+
+fig.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-{today_str}.png"), dpi=400
+)
+fig2.savefig(
+    join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}.png"),
+    dpi=400,
+)
+nib.save(
+    nimg, join(TRAIN_DSET, "derivatives", DERIV_NAME, f"{base_name}_betas-strength-{today_str}")
+)
+
+
+layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+# scale after residualizing omg
+test_features = x_scaler.transform(test_features)
+if len(np.unique(test_outcome)) <= 2:
+    pass
+else:
+    test_outcome = y_scaler.transform(test_outcome.reshape(-1, 1))
+# print(test_features.shape)
+# if the model is a logistic regression, i.e. with a binary outcome
+# then score is prediction accuracy
+# if the model is a linear regression, i.e., with a continuous outcome
+# then the score is R^2 (coefficient of determination)
+
+# fit trained ElasticNet, initialized via warm_start
+# prob in CV?
+# fitted_test = fitted.fit(X=test_features, y=np.ravel(test_outcome))
+# score = fitted_test.score(X=test_features, y=np.ravel(test_outcome))
+test_metrics = {}
+
+#cross_validate(model, )
+y_pred = fitted.predict(X=test_features)
+score = fitted.score(X=test_features, y=np.ravel(test_outcome))
+if len(np.unique(test_outcome)) == 2:
+    test_metrics["accuracy"] = score
+else:
+    test_metrics["coefficient of determination"] = score
+corr = spearmanr(test_outcome, y_pred)
+test_metrics["pred_v_actual_corr"] = corr
+mse = mean_squared_error(test_outcome, y_pred)
+test_metrics["mean squared error"] = mse
+print("Out-of-sample prediction score:\t", score)
+print("Out-of-sample mean squared error:\t", mse)
+# print(np.mean(test_features))
+# pred_outcome = fitted.predict(test_features)
+test_df[f'{OUTCOME}_scaled'] = test_outcome
+test_df[f'{OUTCOME}_pred'] = y_pred
+Ys = test_df[[f'{OUTCOME}_scaled', 
+              f'{OUTCOME}_pred',
+              'cycle_day', 
+              'bc']]
+Ys.to_csv(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.tsv"), sep='\t')
+
+Ys['ppts'] = Ys.index.get_level_values(0)
+
+
+light_colors = ['#33ACE3', #Bubbles
+                '#EA6964', #Blossom
+                '#4AB62C' #Buttercup
+                ]
+dark_colors = ['#1278a6', 
+               '#a11510', 
+               '#228208']
+light = ListedColormap(light_colors, name='light_powderpuff')
+dark = ListedColormap(dark_colors, name='dark_powderpuff')
+mpl.colormaps.register(cmap=light)
+mpl.colormaps.register(cmap=dark)
+
+fig,ax = plt.subplots()
+g = sns.scatterplot(x='cycle_day', 
+                    y=f'{OUTCOME}_pred', 
+                    style='bc', 
+                    data=Ys, 
+                    hue='ppts',  
+                    hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'],
+                    ax=ax, 
+                    palette='light_powderpuff'
+                    )
+h = sns.scatterplot(x='cycle_day',
+                     y=f'{OUTCOME}_scaled', 
+                     style='bc', 
+                     data=Ys, 
+                     hue='ppts',
+                     hue_order=['sub-Bubbles', 'sub-Blossom', 'sub-Buttercup'], 
+                     ax=ax, 
+                     palette='dark_powderpuff')
+ax.legend(bbox_to_anchor=(1.0, 0.5), loc='center left')
+fig.savefig(join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_actual-predicted.png"), dpi=400, bbox_inches='tight')
+
+
+
+#print(test_outcome, "\n", y_pred)
+# print(pred_outcome)
+if len(np.unique(test_outcome)) > 2:
+    
+    print(f"\nSpearman correlation between predicted and actual {OUTCOME}:\t", corr)
+    test_metrics["spearman correlation"] = corr
+with open(
+    join(TEST_DSET, "derivatives", DERIV_NAME, f"{base_name}_fit-{today_str}.json"), "w"
+) as fp:
+    json.dump(test_metrics, fp)
+np.savetxt(join(TEST_DSET, f"{base_name}_predicted-values_fit-{today_str}.txt"), y_pred)
diff --git a/idconn/workflows/nbs_predict.py b/idconn/workflows/nbs_predict.py
index 50563e7..46e804c 100644
--- a/idconn/workflows/nbs_predict.py
+++ b/idconn/workflows/nbs_predict.py
@@ -29,21 +29,21 @@
 today = datetime.today()
 today_str = strftime("%m_%d_%Y")
 
-TRAIN_DSET = "/Users/katherine.b/Dropbox/Data/ds002674"
-TEST_DSET = "/Users/katherine.b/Dropbox/Data/diva-dset"
+TRAIN_DSET = ""
+TEST_DSET = ""
 DERIV_NAME = "IDConn"
-OUTCOME = "bc"
+OUTCOME = ""
 CONFOUNDS = "framewise_displacement"
 TASK = "rest"
 ATLAS = "craddock2012"
 THRESH = 0.5
 alpha = 0.05
-atlas_fname = "/Users/katherine.b/Dropbox/HPC-Backup-083019/physics-retrieval/craddock2012_tcorr05_2level_270_2mm.nii.gz"
+atlas_fname = "craddock2012_tcorr05_2level_270_2mm.nii.gz"
 
 
-layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
+train_layout = bids.BIDSLayout(TRAIN_DSET, derivatives=True)
 
-dat = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+dat = io.read_corrmats(train_layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
 
 keep = dat["adj"].dropna().index
 dat = dat.loc[keep]
@@ -65,6 +65,47 @@
     base_name = f"nbs-predict_outcome-{OUTCOME}"
 # print(dat['bc'])
 
+# load in test data
+test_layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
+
+test_df = io.read_corrmats(test_layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
+
+keep = test_df[[OUTCOME, "adj"]].dropna().index
+# print(keep)
+
+test_df = test_df.loc[keep]
+
+outcome_test = test_df[OUTCOME].values
+# print(test_df)
+
+# print(outcome_test)
+matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
+    (len(test_df["adj"].dropna().index), num_node, num_node)
+)
+edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
+
+
+
+# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
+if CONFOUNDS is not None:
+    confounds_test = test_df[CONFOUNDS].values
+
+    # regress out the confounds from each edge and the outcome variable,
+    # use the residuals for the rest of the algorithm
+    # print(confounds.shape, outcome.shape)
+    if len(np.unique(outcome_test)) <= 2:
+        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
+        test_outcome = outcome_test
+    elif len(np.unique(outcome_test)) > 3:
+        test_outcome, resid_edges = nbs.residualize(
+            X=edges_test, y=outcome_test, confounds=confounds_test
+        )
+    test_features = resid_edges
+else:
+    test_features = edges_test
+    test_outcome = outcome_test
+
+
 weighted_average, cv_results = nbs.kfold_nbs(
     matrices, outcome, confounds, alpha, groups=groups, n_splits=5, n_iterations=1000
 )
@@ -269,43 +310,6 @@
 )
 
 
-layout = bids.BIDSLayout(TEST_DSET, derivatives=True)
-
-test_df = io.read_corrmats(layout, task=TASK, deriv_name="IDConn", atlas=ATLAS, z_score=False)
-
-keep = test_df[[OUTCOME, "adj"]].dropna().index
-# print(keep)
-
-test_df = test_df.loc[keep]
-
-outcome_test = test_df[OUTCOME].values
-# print(test_df)
-
-# print(outcome_test)
-matrices_test = np.vstack(test_df["adj"].dropna().values).reshape(
-    (len(test_df["adj"].dropna().index), num_node, num_node)
-)
-edges_test = np.vstack(test_df["edge_vector"].dropna().values)[:, filter]
-
-# NEED TO RESIDUALIZE IF CONFOUNDS IS NOT NONE
-if CONFOUNDS is not None:
-    confounds_test = test_df[CONFOUNDS].values
-
-    # regress out the confounds from each edge and the outcome variable,
-    # use the residuals for the rest of the algorithm
-    # print(confounds.shape, outcome.shape)
-    if len(np.unique(outcome_test)) <= 2:
-        resid_edges = nbs.residualize(X=edges_test, confounds=confounds_test)
-        test_outcome = outcome_test
-    elif len(np.unique(outcome_test)) > 3:
-        test_outcome, resid_edges = nbs.residualize(
-            X=edges_test, y=outcome_test, confounds=confounds_test
-        )
-    test_features = resid_edges
-else:
-    test_features = edges_test
-    test_outcome = outcome_test
-
 # scale after residualizing omg
 test_features = x_scaler.transform(test_features)
 if len(np.unique(test_outcome)) <= 2: