-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add loading script to load experiment design data into database (#63)
* Add initial data loading script * Add script to load exp_design data * Add test for exp_design * remove unnecessary symbol * update bash path * update file permissions * Revert old change * update submodule * update submodule * Handle factor value column descripencies * Add loop break * resolve duplicate constraint issue * add debug log Co-authored-by: Pedro Madrigal <[email protected]> --------- Co-authored-by: Pedro Madrigal <[email protected]>
- Loading branch information
1 parent
3d64dc2
commit 0ad240f
Showing
3 changed files
with
56 additions
and
1 deletion.
There are no files selected for viewing
Submodule atlas-schemas
updated
2 files
+21 −0 | flyway/gxa/migrations/V8__gxa-create-exp-design.sql | |
+20 −0 | flyway/scxa/migrations/V20__scxa-create-exp-design.sql |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -e | ||
|
||
scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) | ||
source $scriptDir/db_scxa_common.sh | ||
|
||
dbConnection=${dbConnection:-$1} | ||
condensed_sdrf_file=${CONDENSED_SDRF_FILE:-$2} | ||
sdrf_file=${SDRF_FILE:-$3} | ||
|
||
# Check that necessary environment variables are defined. | ||
[ -z ${dbConnection+x} ] && echo "Env var dbConnection for the database connection needs to be defined. This includes the database name." && exit 1 | ||
[ -z ${CONDENSED_SDRF_FILE+x} ] && echo "Env var CONDENSED_SDRF_FILE for the experiment design data needs to be defined." && exit 1 | ||
[ -z ${SDRF_FILE+x} ] && echo "Env var SDRF_FILE for column sequence of experiment design needs to be defined." && exit 1 | ||
|
||
# Reason for creating this array is to search factor value column | ||
# In some sdrf files this column is mentioned as "Factor Value" and in some as "FactorValue" | ||
FactorArray=( FactorValue "Factor Value" ) | ||
|
||
# for experiment design column table we need to have unique experiment accession, column name and sample type | ||
# as they are the primary key for the table and we don't want to insert duplicate rows | ||
cut -f 1,4,5 $condensed_sdrf_file | sort | uniq | while read exp_acc sample_type col_name; | ||
do | ||
if [ $sample_type == 'characteristic' ] | ||
then | ||
search_column="Characteristics[${col_name}]" | ||
column_order=$(awk -v val="$search_column" -F '\t' '{for (i=1; i<=NF; i++) if ($i==val) {print i} }' $sdrf_file) | ||
else | ||
for element in "${FactorArray[@]}"; do | ||
search_column="$element[${col_name}]" | ||
column_order=$(awk -v val="$search_column" -F '\t' '{for (i=1; i<=NF; i++) if ($i==val) {print i} }' $sdrf_file) | ||
if [[ -n "${column_order}" ]]; then | ||
break | ||
fi | ||
done | ||
fi | ||
echo "INSERT INTO exp_design_column (experiment_accession, column_name, sample_type, column_order) VALUES ('$exp_acc', '$col_name', '$sample_type', '$column_order');" | psql -v ON_ERROR_STOP=1 $dbConnection | ||
done | ||
|
||
while IFS=$'\t' read exp_acc sample sample_type col_name annot_value annot_url | ||
do | ||
echo "INSERT INTO exp_design (sample, annot_value, annot_ont_uri, exp_design_column_id) VALUES ('$sample', '$annot_value', '$annot_url', (SELECT id FROM exp_design_column WHERE experiment_accession='$exp_acc' AND column_name='$col_name' AND sample_type='$sample_type'));" | psql -v ON_ERROR_STOP=1 $dbConnection | ||
done < $condensed_sdrf_file | ||
|
||
echo "Experiment design data done loading for $condensed_sdrf_file" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters