Skip to content

Commit

Permalink
Merge branch 'develop' into bugfix/conform-to-v19-migration
Browse files Browse the repository at this point in the history
  • Loading branch information
alfonsomunozpomer authored Oct 13, 2023
2 parents bc40ee7 + 908307e commit b8a45e4
Show file tree
Hide file tree
Showing 35 changed files with 528 additions and 211 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM quay.io/ebigxa/atlas-db-scxa-base:0.1.0
FROM quay.io/ebigxa/atlas-db-scxa-base:0.15.0.0
# debian

ADD bin/* /usr/local/bin/
Expand Down
3 changes: 1 addition & 2 deletions bin/add_exps_to_collection.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )

dbConnection=${dbConnection:-$1}
COLL_ID=${COLL_ID:-$2}
Expand Down
13 changes: 13 additions & 0 deletions bin/common_routines.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,16 @@ get_host_from_hostport() {
get_port_from_hostport() {
echo $(echo $1 | awk -F':' '{ print $2 }')
}

checkDatabaseConnection() {
pg_user=$(echo $1 | sed s+postgresql://++ | awk -F':' '{ print $1}')
pg_host_port=$(echo $1 | awk -F':' '{ print $3}' \
| awk -F'@' '{ print $2}' | awk -F'/' '{ print $1 }')
pg_host=$(echo $pg_host_port | awk -F':' '{print $1}')
pg_port=$(echo $pg_host_port | awk -F':' '{print $2}')
if [ ! -z "$pg_port" ]; then
pg_isready -U $pg_user -h $pg_host -p $pg_port || (echo "No db connection." && exit 1)
else
pg_isready -U $pg_user -h $pg_host || (echo "No db connection" && exit 1)
fi
}
4 changes: 2 additions & 2 deletions bin/create_collection.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

dbConnection=${dbConnection:-$1}
COLL_ID=${COLL_ID:-$2}
Expand Down
14 changes: 0 additions & 14 deletions bin/db_scxa_common.sh

This file was deleted.

3 changes: 1 addition & 2 deletions bin/delete_collection.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )

dbConnection=${dbConnection:-$1}
COLL_ID=${COLL_ID:-$2}
Expand Down
4 changes: 2 additions & 2 deletions bin/delete_exp_from_collection.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

dbConnection=${dbConnection:-$1}
COLL_ID=${COLL_ID:-$2}
Expand Down
3 changes: 1 addition & 2 deletions bin/get_experiment_info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )

postgres_scripts_dir=$scriptDir/../postgres_routines

Expand Down
4 changes: 2 additions & 2 deletions bin/load_db_scxa_analytics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# - Postprocess table and attach it to the main scxa-analytics table.
set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

postgres_scripts_dir=$scriptDir/../postgres_routines

Expand Down
4 changes: 2 additions & 2 deletions bin/load_db_scxa_analytics_pg9.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
# PG10, which loads each experiment into a different partition.
set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

dbConnection=${dbConnection:-$1}
EXP_ID=${EXP_ID:-$2}
Expand Down
4 changes: 2 additions & 2 deletions bin/load_db_scxa_cell_clusters.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
# scxa_cell_group_membership table of AtlasProd.
set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

dbConnection=${dbConnection:-$1}
EXP_ID=${EXP_ID:-$2}
Expand Down
6 changes: 3 additions & 3 deletions bin/load_db_scxa_dimred.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
# parameterisations, and loads it into the scxa_coords table of AtlasProd.
set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

dbConnection=${dbConnection:-$1}
EXP_ID=${EXP_ID:-$2}
DIMRED_TYPE=${DIMRED_TYPE:-$3}
DIMRED_FILE_PATH=${DIMRED_FILE_PATH:-$4}
DIMRED_PARAM_JSON=${DIMRED_PARAM_JSON:-$5}
SCRATCH_DIR=${SCRATCH_DIR:-"$(dirname ${DIMRED_FILE_PATH})"}
SCRATCH_DIR=${SCRATCH_DIR:-"$DIMRED_FILE_PATH"}

# Check that necessary environment variables are defined.
[ -n ${dbConnection+x} ] || (echo "Env var dbConnection for the database connection needs to be defined. This includes the database name." && exit 1)
Expand Down
4 changes: 2 additions & 2 deletions bin/load_db_scxa_marker_genes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
# scxa_cell_groups_marker_genes table of AtlasProd.
set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

dbConnection=${dbConnection:-$1}
EXP_ID=${EXP_ID:-$2}
Expand Down
13 changes: 13 additions & 0 deletions bin/load_exp_design.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Return the index of the first field that matches the given pattern, or 0 if it’s not found
{
for (i = 1; i <= NF; ++i) {
field = $i;
if (field ~ pattern) {
print i;
exit;
}
}

print 0;
exit;
}
91 changes: 52 additions & 39 deletions bin/load_exp_design.sh
Original file line number Diff line number Diff line change
@@ -1,46 +1,59 @@
#!/usr/bin/env bash

set -e
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )
source "${SCRIPT_DIR}/common_routines.sh"

scriptDir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
source $scriptDir/db_scxa_common.sh

# Alfonso is bothered about dbConnection, it shouldn’t be camelCased because:
# 1. It’s a constant, it should be DB_CONNECTION
# 2. We use snake_case for Bash variables
dbConnection=${dbConnection:-$1}
condensed_sdrf_file=${CONDENSED_SDRF_FILE:-$2}
sdrf_file=${SDRF_FILE:-$3}

# Check that necessary environment variables are defined.
[ -z ${dbConnection+x} ] && echo "Env var dbConnection for the database connection needs to be defined. This includes the database name." && exit 1
[ -z ${CONDENSED_SDRF_FILE+x} ] && echo "Env var CONDENSED_SDRF_FILE for the experiment design data needs to be defined." && exit 1
[ -z ${SDRF_FILE+x} ] && echo "Env var SDRF_FILE for column sequence of experiment design needs to be defined." && exit 1

# Reason for creating this array is to search factor value column
# In some sdrf files this column is mentioned as "Factor Value" and in some as "FactorValue"
FactorArray=( FactorValue "Factor Value" )

# for experiment design column table we need to have unique experiment accession, column name and sample type
# as they are the primary key for the table and we don't want to insert duplicate rows
cut -f 1,4,5 $condensed_sdrf_file | sort | uniq | while read exp_acc sample_type col_name;
do
if [ $sample_type == 'characteristic' ]
then
search_column="Characteristics[${col_name}]"
column_order=$(awk -v val="$search_column" -F '\t' '{for (i=1; i<=NF; i++) if ($i==val) {print i} }' $sdrf_file)
else
for element in "${FactorArray[@]}"; do
search_column="$element[${col_name}]"
column_order=$(awk -v val="$search_column" -F '\t' '{for (i=1; i<=NF; i++) if ($i==val) {print i} }' $sdrf_file)
if [[ -n "${column_order}" ]]; then
break
fi
done
fi
echo "INSERT INTO exp_design_column (experiment_accession, column_name, sample_type, column_order) VALUES ('$exp_acc', '$col_name', '$sample_type', '$column_order');" | psql -v ON_ERROR_STOP=1 $dbConnection
CONDENSED_SDRF_FILE=${CONDENSED_SDRF_FILE:-$2}
SDRF_FILE=${SDRF_FILE:-$3}

# Check that necessary environment variables are defined
require_env_var "dbConnection"
require_env_var "CONDENSED_SDRF_FILE"
require_env_var "SDRF_FILE"
checkDatabaseConnection "${dbConnection}"

EXPERIMENT_ACCESSION=$(head -1 "${CONDENSED_SDRF_FILE}" | cut -f 1)
DESTINATION_FILE=${SCRATCH_DIR:-${SCRIPT_DIR}}/${EXPERIMENT_ACCESSION}-exp-design.sql
# Remove DESTINATION_FILE if it exists
rm -f ${DESTINATION_FILE}

# Create the file and enclose all INSERT statements in a transaction
echo "BEGIN;" >> ${DESTINATION_FILE}

# In the experiment design column table we use the experiment accession, column name and sample type as the primary key
cut -f 1,4,5 "${CONDENSED_SDRF_FILE}" | sort | uniq | while read experiment_accession sample_type column_name; do
if [ "$sample_type" == 'characteristic' ]; then
sdrf_column_index=$(awk -F '\t' -v pattern="^Characteristics ?\\\[${column_name}\\\]$" -f ${SCRIPT_DIR}/load_exp_design.awk ${SDRF_FILE})
else
sdrf_column_index=$(awk -F '\t' -v pattern="^Factor ?Value ?\\\[${column_name}\\\]$" -f ${SCRIPT_DIR}/load_exp_design.awk ${SDRF_FILE})
fi
sql_statement="INSERT INTO exp_design_column (experiment_accession, sample_type, column_name, column_order) VALUES ('${experiment_accession}', '${sample_type}', '${column_name}', '${sdrf_column_index}');"
echo "${sql_statement}" >> ${DESTINATION_FILE}
done

while IFS=$'\t' read exp_acc sample sample_type col_name annot_value annot_url
do
echo "INSERT INTO exp_design (sample, annot_value, annot_ont_uri, exp_design_column_id) VALUES ('$sample', '$annot_value', '$annot_url', (SELECT id FROM exp_design_column WHERE experiment_accession='$exp_acc' AND column_name='$col_name' AND sample_type='$sample_type'));" | psql -v ON_ERROR_STOP=1 $dbConnection
done < $condensed_sdrf_file
# Add the columns from the condensed SDRF file.
# Fields in the condensed SDRF that aren’t in the SDRF are assigned a column_order value of 0 by the AWK script.
# We need to assign them a value that is greater than the maximum column_order value for the experiment.
# The column_order value is used to order the columns in the UI and is not used for the primary key, so it’s ok to have
# duplicates; we can order the fields with the same column_order by name if necessary.
sql_statement="UPDATE exp_design_column SET column_order=(SELECT MAX(column_order) FROM exp_design_column WHERE experiment_accession='${EXPERIMENT_ACCESSION}')+1 WHERE column_order=0 AND experiment_accession='${EXPERIMENT_ACCESSION}';"
echo "${sql_statement}" >> ${DESTINATION_FILE}

# Insert the experiment design data.
while IFS=$'\t' read -r experiment_accession sample sample_type column_name annotation_value annotation_url; do
sql_statement="INSERT INTO exp_design (sample, annot_value, annot_ont_uri, exp_design_column_id) VALUES ('${sample}', '${annotation_value}', '${annotation_url}', (SELECT id FROM exp_design_column WHERE experiment_accession='${experiment_accession}' AND column_name='${column_name}' AND sample_type='${sample_type}'));"
echo "${sql_statement}" >> ${DESTINATION_FILE}
done < "$CONDENSED_SDRF_FILE"

# Finish the transaction
echo "COMMIT;" >> ${DESTINATION_FILE}

PSQL_CMD="psql -qv ON_ERROR_STOP=1 ${dbConnection} -f ${DESTINATION_FILE}"
echo ${PSQL_CMD}
eval ${PSQL_CMD}

echo "Experiment design data done loading for $condensed_sdrf_file"
echo "$CONDENSED_SDRF_FILE: finished loading experiment design"
24 changes: 7 additions & 17 deletions bin/load_experiment_web_cli.sh
Original file line number Diff line number Diff line change
@@ -1,44 +1,34 @@
#!/usr/bin/env bash
#
# This script:
# - Checks if the experiment is loaded and stops it is already loaded.
# - Adds the appropiate line to the experiments table if it doesn't exist.
# - Generates the experiment design file from condensed SDRF and SDRF files in $EXPERIMENT_FILES/expdesign
# - Checks if the experiment is loaded and stops if it is already loaded.
# - Adds the appropriate line to the experiments table if it doesn't exist.
# - Generates the experiment design file from condensed SDRF and SDRF files in $EXPERIMENT_DESIGN_FILES
#
# Most of the variables required for this are usually defined in the environment file for each setup (test, prod, etc).
# The experiment designs file might need to be synced to an appropiate location at the web application instance disk
# The experiment designs file might need to be synced to an appropriate location at the web application instance disk
# depending on how the setup disk layout.

jar_dir=$CONDA_PREFIX/share/atlas-cli

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

echo "CONDA_PREFIX: $CONDA_PREFIX"

require_env_var "SOLR_HOST"
require_env_var "ZK_HOST"
require_env_var "ZK_PORT"
require_env_var "BIOENTITIES"
require_env_var "EXPERIMENT_FILES"
require_env_var "EXPERIMENT_DESIGN_FILES"
require_env_var "jdbc_url"
require_env_var "jdbc_username"
require_env_var "jdbc_password"

# Either ACCESSIONS or PRIVATE_ACCESSIONS need to be provided
#require_env_var "ACCESSIONS"

SOLR_PORT=$(get_port_from_hostport $SOLR_HOST)
SOLR_HOST=$(get_host_from_hostport $SOLR_HOST)

require_env_var "SOLR_PORT"

java_opts="-Dsolr.host=$SOLR_HOST"
java_opts="$java_opts -Dsolr.port=$SOLR_PORT"
java_opts="$java_opts -Dzk.host=$ZK_HOST"
java_opts="$java_opts -Dzk.port=$ZK_PORT"
java_opts="$java_opts -Ddata.files.location=$BIOENTITIES"
java_opts="$java_opts -Dexperiment.files.location=$EXPERIMENT_FILES"
java_opts="$java_opts -Dexperiment.design.location=$EXPERIMENT_DESIGN_FILES"
java_opts="$java_opts -Djdbc.url=$jdbc_url"
java_opts="$java_opts -Djdbc.username=$jdbc_username"
java_opts="$java_opts -Djdbc.password=$jdbc_password"
Expand Down
3 changes: 1 addition & 2 deletions bin/modify_collection.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

set -e

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
source $scriptDir/db_scxa_common.sh
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )

dbConnection=${dbConnection:-$1}
COLL_ID=${COLL_ID:-$2}
Expand Down
16 changes: 3 additions & 13 deletions bin/update_experiment_web_cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,22 @@

jar_dir=$CONDA_PREFIX/share/atlas-cli

scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
scriptDir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )
source $scriptDir/common_routines.sh

echo "CONDA_PREFIX: $CONDA_PREFIX"

require_env_var "SOLR_HOST"
require_env_var "ZK_HOST"
require_env_var "ZK_PORT"
require_env_var "BIOENTITIES"
require_env_var "EXPERIMENT_FILES"
require_env_var "EXPERIMENT_DESIGN_FILES"
require_env_var "jdbc_url"
require_env_var "jdbc_username"
require_env_var "jdbc_password"
require_env_var "ACCESSIONS"

SOLR_PORT=$(get_port_from_hostport $SOLR_HOST)
SOLR_HOST=$(get_host_from_hostport $SOLR_HOST)

require_env_var "SOLR_PORT"

java_opts="-Dsolr.host=$SOLR_HOST"
java_opts="$java_opts -Dsolr.port=$SOLR_PORT"
java_opts="$java_opts -Dzk.host=$ZK_HOST"
java_opts="$java_opts -Dzk.port=$ZK_PORT"
java_opts="$java_opts -Ddata.files.location=$BIOENTITIES"
java_opts="$java_opts -Dexperiment.files.location=$EXPERIMENT_FILES"
java_opts="$java_opts -Dexperiment.design.location=$EXPERIMENT_DESIGN_FILES"
java_opts="$java_opts -Djdbc.url=$jdbc_url"
java_opts="$java_opts -Djdbc.username=$jdbc_username"
java_opts="$java_opts -Djdbc.password=$jdbc_password"
Expand Down
2 changes: 1 addition & 1 deletion fixtures/generate-fixtures.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

# https://stackoverflow.com/questions/59895/how-can-i-get-the-source-directory-of-a-bash-script-from-within-the-script-itsel
# https://stackoverflow.com/a/246128
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )"

export POSTGRES_HOST=${POSTGRES_HOST:-localhost}
export POSTGRES_PORT=${POSTGRES_PORT:-5432}
Expand Down
2 changes: 1 addition & 1 deletion fixtures/generate-tsv-fixture.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# We choose 100 genes that are expressed in those cells via analytics table

set -e
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]:-$0}" )" &> /dev/null && pwd )"

source ${SCRIPT_DIR}/utils.sh

Expand Down
Loading

0 comments on commit b8a45e4

Please sign in to comment.