-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'develop' into bugfix/conform-to-v19-migration
- Loading branch information
Showing
35 changed files
with
528 additions
and
211 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Return the index of the first field that matches the given pattern, or 0 if it’s not found | ||
{ | ||
for (i = 1; i <= NF; ++i) { | ||
field = $i; | ||
if (field ~ pattern) { | ||
print i; | ||
exit; | ||
} | ||
} | ||
|
||
print 0; | ||
exit; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,46 +1,59 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -e | ||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd ) | ||
source "${SCRIPT_DIR}/common_routines.sh" | ||
|
||
scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) | ||
source $scriptDir/db_scxa_common.sh | ||
|
||
# Alfonso is bothered about dbConnection, it shouldn’t be camelCased because: | ||
# 1. It’s a constant, it should be DB_CONNECTION | ||
# 2. We use snake_case for Bash variables | ||
dbConnection=${dbConnection:-$1} | ||
condensed_sdrf_file=${CONDENSED_SDRF_FILE:-$2} | ||
sdrf_file=${SDRF_FILE:-$3} | ||
|
||
# Check that necessary environment variables are defined. | ||
[ -z ${dbConnection+x} ] && echo "Env var dbConnection for the database connection needs to be defined. This includes the database name." && exit 1 | ||
[ -z ${CONDENSED_SDRF_FILE+x} ] && echo "Env var CONDENSED_SDRF_FILE for the experiment design data needs to be defined." && exit 1 | ||
[ -z ${SDRF_FILE+x} ] && echo "Env var SDRF_FILE for column sequence of experiment design needs to be defined." && exit 1 | ||
|
||
# Reason for creating this array is to search factor value column | ||
# In some sdrf files this column is mentioned as "Factor Value" and in some as "FactorValue" | ||
FactorArray=( FactorValue "Factor Value" ) | ||
|
||
# for experiment design column table we need to have unique experiment accession, column name and sample type | ||
# as they are the primary key for the table and we don't want to insert duplicate rows | ||
cut -f 1,4,5 $condensed_sdrf_file | sort | uniq | while read exp_acc sample_type col_name; | ||
do | ||
if [ $sample_type == 'characteristic' ] | ||
then | ||
search_column="Characteristics[${col_name}]" | ||
column_order=$(awk -v val="$search_column" -F '\t' '{for (i=1; i<=NF; i++) if ($i==val) {print i} }' $sdrf_file) | ||
else | ||
for element in "${FactorArray[@]}"; do | ||
search_column="$element[${col_name}]" | ||
column_order=$(awk -v val="$search_column" -F '\t' '{for (i=1; i<=NF; i++) if ($i==val) {print i} }' $sdrf_file) | ||
if [[ -n "${column_order}" ]]; then | ||
break | ||
fi | ||
done | ||
fi | ||
echo "INSERT INTO exp_design_column (experiment_accession, column_name, sample_type, column_order) VALUES ('$exp_acc', '$col_name', '$sample_type', '$column_order');" | psql -v ON_ERROR_STOP=1 $dbConnection | ||
CONDENSED_SDRF_FILE=${CONDENSED_SDRF_FILE:-$2} | ||
SDRF_FILE=${SDRF_FILE:-$3} | ||
|
||
# Check that necessary environment variables are defined | ||
require_env_var "dbConnection" | ||
require_env_var "CONDENSED_SDRF_FILE" | ||
require_env_var "SDRF_FILE" | ||
checkDatabaseConnection "${dbConnection}" | ||
|
||
EXPERIMENT_ACCESSION=$(head -1 "${CONDENSED_SDRF_FILE}" | cut -f 1) | ||
DESTINATION_FILE=${SCRATCH_DIR:-${SCRIPT_DIR}}/${EXPERIMENT_ACCESSION}-exp-design.sql | ||
# Remove DESTINATION_FILE if it exists | ||
rm -f ${DESTINATION_FILE} | ||
|
||
# Create the file and enclose all INSERT statements in a transaction | ||
echo "BEGIN;" >> ${DESTINATION_FILE} | ||
|
||
# In the experiment design column table we use the experiment accession, column name and sample type as the primary key | ||
cut -f 1,4,5 "${CONDENSED_SDRF_FILE}" | sort | uniq | while read experiment_accession sample_type column_name; do | ||
if [ "$sample_type" == 'characteristic' ]; then | ||
sdrf_column_index=$(awk -F '\t' -v pattern="^Characteristics ?\\\[${column_name}\\\]$" -f ${SCRIPT_DIR}/load_exp_design.awk ${SDRF_FILE}) | ||
else | ||
sdrf_column_index=$(awk -F '\t' -v pattern="^Factor ?Value ?\\\[${column_name}\\\]$" -f ${SCRIPT_DIR}/load_exp_design.awk ${SDRF_FILE}) | ||
fi | ||
sql_statement="INSERT INTO exp_design_column (experiment_accession, sample_type, column_name, column_order) VALUES ('${experiment_accession}', '${sample_type}', '${column_name}', '${sdrf_column_index}');" | ||
echo "${sql_statement}" >> ${DESTINATION_FILE} | ||
done | ||
|
||
while IFS=$'\t' read exp_acc sample sample_type col_name annot_value annot_url | ||
do | ||
echo "INSERT INTO exp_design (sample, annot_value, annot_ont_uri, exp_design_column_id) VALUES ('$sample', '$annot_value', '$annot_url', (SELECT id FROM exp_design_column WHERE experiment_accession='$exp_acc' AND column_name='$col_name' AND sample_type='$sample_type'));" | psql -v ON_ERROR_STOP=1 $dbConnection | ||
done < $condensed_sdrf_file | ||
# Add the columns from the condensed SDRF file. | ||
# Fields in the condensed SDRF that aren’t in the SDRF are assigned a column_order value of 0 by the AWK script. | ||
# We need to assign them a value that is greater than the maximum column_order value for the experiment. | ||
# The column_order value is used to order the columns in the UI and is not used for the primary key, so it’s ok to have | ||
# duplicates; we can order the fields with the same column_order by name if necessary. | ||
sql_statement="UPDATE exp_design_column SET column_order=(SELECT MAX(column_order) FROM exp_design_column WHERE experiment_accession='${EXPERIMENT_ACCESSION}')+1 WHERE column_order=0 AND experiment_accession='${EXPERIMENT_ACCESSION}';" | ||
echo "${sql_statement}" >> ${DESTINATION_FILE} | ||
|
||
# Insert the experiment design data. | ||
while IFS=$'\t' read -r experiment_accession sample sample_type column_name annotation_value annotation_url; do | ||
sql_statement="INSERT INTO exp_design (sample, annot_value, annot_ont_uri, exp_design_column_id) VALUES ('${sample}', '${annotation_value}', '${annotation_url}', (SELECT id FROM exp_design_column WHERE experiment_accession='${experiment_accession}' AND column_name='${column_name}' AND sample_type='${sample_type}'));" | ||
echo "${sql_statement}" >> ${DESTINATION_FILE} | ||
done < "$CONDENSED_SDRF_FILE" | ||
|
||
# Finish the transaction | ||
echo "COMMIT;" >> ${DESTINATION_FILE} | ||
|
||
PSQL_CMD="psql -qv ON_ERROR_STOP=1 ${dbConnection} -f ${DESTINATION_FILE}" | ||
echo ${PSQL_CMD} | ||
eval ${PSQL_CMD} | ||
|
||
echo "Experiment design data done loading for $condensed_sdrf_file" | ||
echo "$CONDENSED_SDRF_FILE: finished loading experiment design" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.