Skip to content

Commit

Permalink
Depths (#50)
Browse files Browse the repository at this point in the history
* Adds functionality to export estimated depths of annual IRHs

* Incorporates layer depths functions into HPC scripts; sets random seed for reproducibility

* Updates SCRDIR assignment to nfs1 drive (lustre currently down)

* Adds a default depth-density profile for when no other densities are given

* Updates documentation on data download files

* Adds progress bar to results upload

* Adds option to output raw accum distribution data rather than summary stats

Co-authored-by: Durban Keeler <[email protected]>
  • Loading branch information
durbank and Durban Keeler authored Oct 21, 2021
1 parent 72c8639 commit 9cdb9c6
Show file tree
Hide file tree
Showing 13 changed files with 1,737 additions and 46 deletions.
5 changes: 3 additions & 2 deletions slurm/dataDMZ.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Script to create scratch directory and download data to it from gcloud
# This should be implemented in the Science DMZ nodes
# e.g. dtn01-dmz.chpc.utah.edu
# e.g. dtn07.chpc.utah.edu

# Define start time (for clocking execution speed)
t_start=`date +%s`
Expand All @@ -23,7 +23,8 @@ RHODIR="gcloud:CHPC/flight-density/"
echo "Creating scratch directory"

# Define and create scratch directory
SCRDIR="/scratch/general/lustre/u1046484/"
# SCRDIR="/scratch/general/lustre/u1046484/"
SCRDIR="/scratch/general/nfs1/u1046484/"
mkdir -p $SCRDIR

# Transfer input density data to scratch
Expand Down
2 changes: 1 addition & 1 deletion slurm/filter-file.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Example filter-file to subset which echogram directories to download
# Delete this file when all echogram directories are desired

# Include the 20111109 and 20161109 flightlines
# Flightline directories to include in data download
+ /20111109/**
+ /20161109/**
# Exclude everything else
Expand Down
14 changes: 3 additions & 11 deletions slurm/run_PAIPR.slrm
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,17 @@ SRCFILE="/uufs/chpc.utah.edu/common/home/u1046484/Codebase/PAIPR/src/scripts/pro
SRCDIR="/uufs/chpc.utah.edu/common/home/u1046484/Codebase/PAIPR/src/"
# directory to output results
OUTDIR=$startDIR
# Assign scratch directory to process data
SCRDIR="/scratch/general/lustre/u1046484/"

# create results directory in home space
#mkdir -p $OUTDIR
# Assign scratch directory to process data
SCRDIR="/scratch/general/nfs1/u1046484/"
# SCRDIR="/scratch/general/lustre/u1046484/"

# make scratch dir, copy input data there and cd to it
mkdir -p $SCRDIR #should already exist from dataDMZ.sh
cp $SRCFILE $SCRDIR
cp -r $SRCDIR $SCRDIR/src
cd $SCRDIR

# Data directory should have been assigned, created, and populated previously
# using the dataDMZ.sh script
#RHOFILE="gcloud:CHPC/flight/densities/..."
#DATADIR="gcloud:CHPC/IceBridge-raw/..."
#rclone copyto $RHOFILE $SCRDIR/rho_data.csv
#rclone copy $DATADIR $SCRDIR/Data/

# Create seperate directory within scratch to store outputs
mkdir -p Outputs/

Expand Down
2 changes: 2 additions & 0 deletions slurm/slurm.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

# Development script for testing bash script functionality locally

# Define current working directory
startDIR=$(pwd)

Expand Down
2 changes: 1 addition & 1 deletion slurm/upload_results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ t_start=`date +%s`

# Transfer sbatch source files and results to gcloud
echo "Transferring files to gcloud..."
rclone copy $SRC_DIR $CLOUD_DIR/$dir_name --transfers=8
rclone copy $SRC_DIR $CLOUD_DIR/$dir_name --transfers=8 --progress
echo "Tranfer complete"

# Define end time and calculate execution time
Expand Down
8 changes: 4 additions & 4 deletions src/PAIPR-core/accum_GMix.m
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@

% If only one peak present, fall back to standard uni-modal
% Gaussian model
SMB_mu{j} = nanmean(SMB_mat(j,:));
SMB_std{j} = nanstd(SMB_mat(j,:));
SMB_mu{j} = mean(SMB_mat(j,:), 'omitnan');
SMB_std{j} = std(SMB_mat(j,:), 'omitnan');
SMB_scale{j} = 1;

end
Expand All @@ -145,8 +145,8 @@

% If mixture model fails, default to simple Gaussian
% distribution statistics
SMB_mu{j} = nanmean(SMB_mat(j,:));
SMB_std{j} = nanstd(SMB_mat(j,:));
SMB_mu{j} = mean(SMB_mat(j,:), 'omitnan');
SMB_std{j} = std(SMB_mat(j,:), 'omitnan');
SMB_scale{j} = 1;

end
Expand Down
43 changes: 43 additions & 0 deletions src/PAIPR-core/accum_raw.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
% Blah blah blah
function [accum_table] = accum_raw(radar)

% Preallocate cell for results
accum_table = cell(1,length(radar.SMB));

for i=1:length(radar.SMB)

tbl_tmp = cell2table(...
{radar.collect_time(i), radar.Lon(i), radar.Lat(i), ...
radar.elev(i), radar.SMB_yr{i}, num2cell(radar.SMB{i},2)}, ...
'VariableNames', ...
{'collect_time', 'Lon', 'Lat', 'elev', 'Year', 'accum'});

j_max = cellfun(@length, tbl_tmp.Year);
k_max = max(cellfun(@(x) size(x,2), tbl_tmp.accum{:}));
accum_i = zeros(j_max*k_max,1);
year_i = zeros(j_max*k_max,1);

for j=1:j_max

year_i((j-1)*k_max+1:(j-1)*k_max+k_max) = repmat(...
tbl_tmp.Year{1}(j), k_max,1);
accum_i((j-1)*k_max+1:(j-1)*k_max+k_max) = (tbl_tmp.accum{1}{j})';

% for k=1:k_max
%
% accum_i((j-1)*k_max+1:(j-1)*k_max+k_max) = repmat(...
% tbl_tmp.Year{1}(j), k_max,1);

end

accum_table{i} = table(...
repmat(tbl_tmp.collect_time, length(accum_i),1), ...
cast(repmat(tbl_tmp.Lon, length(accum_i),1), 'single'), ...
cast(repmat(tbl_tmp.Lat, length(accum_i),1), 'single'), ...
cast(repmat(tbl_tmp.elev, length(accum_i),1), 'single'), ...
cast(year_i, 'int16'), cast(accum_i, 'single'), ...
'VariableNames', tbl_tmp.Properties.VariableNames);

end

end
69 changes: 69 additions & 0 deletions src/PAIPR-core/get_depths.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
% Function to find the depth of each assigned year layer

function [yr_DepthTables] = get_depths(radar, bin_size)

% Determine number of elements in each bin based on distance spacing and
% requested bin_size (bin_size in meters)
dist_interval = mean(diff(radar.dist));
stack_step = round(bin_size / dist_interval);

% Determine starting and stopping indices for different aggregated bins
start_idx = 1:stack_step:length(radar.SMB);
end_idx = [start_idx(2:end)-1 length(radar.SMB)];

% Preallocate cell array for annual layer depth tables
yr_DepthTables = cell(1, length(start_idx));


for i=1:length(yr_DepthTables)

% Extract ages with current bin and reshape to 2D
ages_i = radar.ages(:,start_idx(i):end_idx(i),:);
ages_i = reshape(ages_i, size(ages_i,1), []);

% Get min/max ages in slice
yr_first = floor(max(max(ages_i)));
yr_last = ceil(min(min(ages_i)));
yr_list = (yr_first:-1:yr_last)';

% Logical of integer year indices
yr_logic = logical(diff(floor(ages_i)));
% Preallocate array for depth of annual IRHs
yr_depths = NaN(length(yr_list), size(ages_i,2));
for j=1:size(yr_logic,2)

% Get depths of annual IRHs for each simulation
yr_idx = find(yr_logic(:,j));
yr_depths(1:length(yr_idx),j) = radar.depth(yr_idx);
end

% Remove years with more than 25% NaN values
nan_idx = sum(isnan(yr_depths),2)/size(yr_depths,2);
yr_list = yr_list(nan_idx <= 0.25);
yr_depths = yr_depths(nan_idx<=0.25,:);

% Determine average bin values for remaining variables of interest
yr_len = length(yr_list);
time = mean(radar.collect_time(start_idx(i):end_idx(i)));
lat = mean(radar.Lat(start_idx(i):end_idx(i)));
lon = mean(radar.Lon(start_idx(i):end_idx(i)));
elev = mean(radar.elev(start_idx(i):end_idx(i)));

% Output years, median depth, and std. err for each bin
yr_DepthTables{i} = table(...
repmat(time, yr_len,1), repmat(lat, yr_len,1), ...
repmat(lon, yr_len,1), repmat(elev, yr_len,1), ...
yr_list, median(yr_depths,2, 'omitnan'), ...
std(yr_depths,1,2,'omitnan'), ...
sum(~isnan(yr_depths),2),...
'VariableNames', {'collect_time', 'Lat', 'Lon', 'elev', ...
'IHR_year', 'IHR_depth', 'IHR_std', 'IHR_cnt'});


end





end
56 changes: 46 additions & 10 deletions src/PAIPR-core/parsave.m
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
% loop (so as to preserve variable transparency). This saves both the .mat
% results of PAIPR as well as the .csv results of gamma-fitting

function [success] = parsave(mdata, csv_output, dist, bin_size, varargin)
function [success] = parsave(...
mdata, csv_output, dist, bin_size, mat_path, depth_path)

%% Fit distributions to results and save to disk

Expand Down Expand Up @@ -38,27 +39,62 @@

% Converts data to a long form space-time table
long_table = vertcat(table_cell{:});

case 'raw'
% Uses raw MC draws as output and formats results to match others
% as best as can be done
table_cell = accum_raw(mdata);

% Converts data to a long form space-time table
long_table = vertcat(table_cell{:});

end

% Create table for QC values of echogram image
QC_T = table(mdata.QC_flag, mdata.QC_med, mdata.QC_val, ...
mdata.QC_yr, 'VariableNames', ...
{'QC_flag', 'QC_med', 'QC_val', 'QC_yr'});

% Add variables for QC values of echogram image
full_table = [long_table repmat(QC_T, height(long_table), 1)];

% Save the file
writetable(full_table, csv_output);

if strcmp(dist,'raw')
[fn_dir, fn] = fileparts(csv_output);
QC_T.SMB_fn = fn;

QC_output = fullfile(fileparts(fn_dir), 'QC_tbl.csv');
if isfile(QC_output)
writetable(QC_T, QC_output, 'WriteMode', 'Append', ...
'WriteVariableNames', false);
else
writetable(QC_T, QC_output);
end

writetable(long_table, csv_output);
else

% Add variables for QC values of echogram image
full_table = [long_table repmat(QC_T, height(long_table), 1)];

% Save the file
writetable(full_table, csv_output);
end
%%

if length(varargin) == 1
mat_output = varargin{1};
save(mat_output, '-struct', 'mdata', '-v7.3')
% If full echogram file path given, save as .mat file
if ~isempty(mat_path)
save(mat_path, '-struct', 'mdata', '-v7.3')
end

% If layer_depth file path given, save as .csv
if ~isempty(depth_path)

% Calculate annual layer depths as save as table
IRH_depths = get_depths(mdata, bin_size);
depths_table = vertcat(IRH_depths{:});
writetable(depths_table, depth_path);
end




success = true;

end
56 changes: 45 additions & 11 deletions src/PAIPR-core/process_PAIPR.m
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,17 @@
RHO_PATH %string
OUTDIR %string
Ndraw %int16
NameValueArgs.VerboseOutput = false
NameValueArgs.VerboseOutput=false
NameValueArgs.LayerDepths=false
end

% Sets random number generator seed (for reproducibility)
rng(777)

% Directory containing raw OIB echograms
radar_dir = DATADIR;

% If VerboseOutput is flagged, create directory for full radar files
echo_out = fullfile(OUTDIR, 'echo');
if NameValueArgs.VerboseOutput == true
% Check for existence of echogram output dir and create as necessary
Expand All @@ -26,6 +31,14 @@
end
end

% If LayerDepths is flagged, create directory for depth files
depth_out = fullfile(OUTDIR, 'depth');
if NameValueArgs.LayerDepths == true
if 7~=exist(depth_out, 'dir')
mkdir(depth_out)
end
end

% Check for existence of .csv output directory and create as necessary
smb_out = fullfile(OUTDIR, 'smb');
if 7~=exist(smb_out, 'dir')
Expand Down Expand Up @@ -197,20 +210,41 @@
% Assign what distribution modeling to perform and bin size (in
% meters) to use
distribution = 'mixture';
distribution = 'raw';
bin_size = 200;

switch NameValueArgs.VerboseOutput
case true
mat_output = fullfile(echo_out, strcat(filename, '.mat'));
% Save output structures to disk
[success_codes(i)] = parsave(...
radar, csv_output, distribution, bin_size, mat_output);
case false
% Save output structures to disk
[success_codes(i)] = parsave(...
radar, csv_output, distribution, bin_size);
% Name echogram file path based on NameValueArgs
if (NameValueArgs.VerboseOutput==true)
mat_output = fullfile(echo_out, strcat(filename, '.mat'));
else
mat_output = [];
end

% Name layer-depth path based on NameValue Args
if (NameValueArgs.LayerDepths==true)
depth_output = fullfile(depth_out, strcat(filename, '.csv'));
else
depth_output = [];
end

% Save requested output structures
[success_codes(i)] = parsave(...
radar, csv_output, distribution, bin_size, ...
mat_output, depth_output)


% switch NameValueArgs.VerboseOutput
% case true
% mat_output = fullfile(echo_out, strcat(filename, '.mat'));
% % Save output structures to disk
% [success_codes(i)] = parsave(...
% radar, csv_output, distribution, bin_size, mat_output);
% case false
% % Save output structures to disk
% [success_codes(i)] = parsave(...
% radar, csv_output, distribution, bin_size);
% end

catch ME
fprintf(1, ['An error occurred while processing data in the '...
'following directory:\n %s\n'], files(i).folder);
Expand Down
Loading

0 comments on commit 9cdb9c6

Please sign in to comment.