Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…p/atlas-web-bulk into chore/add_back_experiment_design
  • Loading branch information
ke4 committed Apr 10, 2024
2 parents fee8360 + 786a91f commit 98fa98f
Show file tree
Hide file tree
Showing 10 changed files with 264 additions and 64 deletions.
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,21 @@ To create our PostGreSQL database and run the schema migrations up to the latest
```

### Solr
To create the collections, their schemas and populate them, please run the following script.
To create the collections, their schemas and populate them, please run the following scripts.

Currently, this step is separated into 2 sub-steps by Solr collections.
There is an inconsistency in our web apps and various shell scripts - that we use together with the Data Prod Team -
how we use the `SOLR_HOST` and `SOLR_HOSTS` variables. We need to sort this out,
but while it is not solved we probably have to keep this 2 sub-steps, unless we find a way to merge them.

To create and populate the `bioentities` collection:
```bash
./docker/prepare-dev-environment/solr/run.sh -r -l solr.log
./docker/prepare-dev-environment/solr-bioentities/run.sh -r -l solr-bioentities.log
```

To create and populate the `bulk-analytics` collection:
```bash
./docker/prepare-dev-environment/solr-analytics/run.sh -l solr-analytics.log
```

Run the script with the `-h` flag for more details.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,12 @@
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Component;
import uk.ac.ebi.atlas.commons.readers.TsvStreamer;
import uk.ac.ebi.atlas.experimentpage.ExperimentDesignFile;
import uk.ac.ebi.atlas.experimentpage.ExternallyAvailableContentService;
import uk.ac.ebi.atlas.experimentpage.json.JsonBaselineExperimentController;
import uk.ac.ebi.atlas.experimentpage.qc.MicroarrayQcFiles;
import uk.ac.ebi.atlas.experimentpage.qc.QcReportController;
import uk.ac.ebi.atlas.model.download.ExternallyAvailableContent;
import uk.ac.ebi.atlas.model.experiment.Experiment;
import uk.ac.ebi.atlas.model.experiment.ExperimentDesignTable;
import uk.ac.ebi.atlas.model.experiment.ExperimentType;
import uk.ac.ebi.atlas.model.experiment.sample.ReportsGeneExpression;
import uk.ac.ebi.atlas.resource.DataFileHub;
Expand Down Expand Up @@ -85,12 +83,6 @@ private JsonObject experimentPageContentForExperiment(final Experiment<? extends
ExternallyAvailableContent.ContentType.PLOTS))));
}

if (dataFileHub.getExperimentFiles(experiment.getAccession()).experimentDesign.exists()) {
availableTabs.add(
experimentDesignTab(new ExperimentDesignTable(experiment).asJson(),
ExperimentDesignFile.makeUrl(experiment.getAccession(), accessKey)));
}

availableTabs.add(
customContentTab(
"multipart",
Expand Down Expand Up @@ -221,11 +213,4 @@ private JsonObject heatmapTab(JsonArray groups, String geneDistributionUrl, Json
props.add("availableDataUnits", availableDataUnits);
return customContentTab("heatmap", "Results", props);
}

private JsonObject experimentDesignTab(JsonObject table, String downloadUrl) {
JsonObject props = new JsonObject();
props.add("table", table);
props.addProperty("downloadUrl", downloadUrl);
return customContentTab("experiment-design", "Experiment Design", props);
}
}
10 changes: 9 additions & 1 deletion docker/docker-compose-solrcloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ services:
SOLR_HEAP: 3g
SOLR_OPTS: -Denable.packages=true -Dsolr.max.booleanClauses=100000000
ZK_HOST: ${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_1_NAME}:2181,${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_2_NAME}:2181,${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_3_NAME}:2181
secrets:
- solrcloud.der
depends_on:
- gxa-solrcloud-zookeeper-0
- gxa-solrcloud-zookeeper-1
Expand All @@ -87,12 +89,18 @@ services:
SOLR_HEAP: 3g
SOLR_OPTS: -Denable.packages=true -Dsolr.max.booleanClauses=100000000
ZK_HOST: ${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_1_NAME}:2181,${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_2_NAME}:2181,${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_3_NAME}:2181
secrets:
- solrcloud.der
depends_on:
- gxa-solrcloud-zookeeper-0
- gxa-solrcloud-zookeeper-1
- gxa-solrcloud-zookeeper-2
command: "-q"

secrets:
solrcloud.der:
file: ${SOLR_PUBLIC_KEY:-/dev/null}

volumes:
zookeeper-0-data:
name: ${PROJECT_NAME}_solrcloud-zookeeper-0-data
Expand All @@ -113,4 +121,4 @@ volumes:

networks:
atlas-test-net:
name: atlas-test-net
name: atlas-test-net
23 changes: 23 additions & 0 deletions docker/prepare-dev-environment/solr-analytics/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM ubuntu:jammy

# Update package list and install necessary tools
# Python 3 is required by index-bioentities but comes as part of Ubuntu
RUN apt-get update && \
apt-get install -y --no-install-recommends \
git openjdk-11-jdk jq rsync curl

# Clean up APT cache and temporary files
RUN apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

# Create the necessary directory
RUN mkdir -p /root/bioentity-properties-jsonl
RUN mkdir -p /root/experiments-jsonl

# Clone the necessary repositories
WORKDIR /root
RUN git clone --depth 1 --recurse-submodules https://github.com/ebi-gene-expression-group/index-bioentities.git
RUN git clone --depth 1 --recurse-submodules https://github.com/ebi-gene-expression-group/atlas-web-bulk.git
RUN git clone --depth 1 --recurse-submodules https://github.com/ebi-gene-expression-group/solr-bulk.git

ENTRYPOINT ["/bin/bash", "-c"]
92 changes: 92 additions & 0 deletions docker/prepare-dev-environment/solr-analytics/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
version: "3.6"

services:
solr-populator:
build:
context: ${DOCKERFILE_PATH}
no_cache: true
networks:
- atlas-test-net
ports:
- "8089:8089"
depends_on:
- gxa-postgres
- gxa-solrcloud-0
- gxa-solrcloud-1
volumes:
- gradle-wrapper-dists:/root/.gradle/wrapper/dists
- gradle-ro-dep-cache:/gradle-ro-dep-cache:ro
- atlas-data-bioentity-properties:/atlas-data/bioentity_properties
- atlas-data-exp:/atlas-data/exp
- atlas-data-expdesign:/atlas-data/expdesign
secrets:
- solrcloud.pem
environment:
JAVA_TOOL_OPTIONS: "-Dfile.encoding=UTF8"
JAVA_OPTS: "-Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory -Dbasicauth=${SOLR_USER}:${SOLR_PASSWORD}"
GRADLE_RO_DEP_CACHE: /gradle-ro-dep-cache
ZK_HOSTS: "${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_1_NAME}:2181,${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_2_NAME}:2181,${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_3_NAME}:2181"
SOLR_HOST: "http://${PROJECT_NAME}-${SOLR_CLOUD_CONTAINER_1_NAME}:8983/solr"
SOLR_NUM_SHARDS: 2
NUM_DOCS_PER_BATCH: 20000
SOLR_COLLECTION_BIOENTITIES: bioentities
SOLR_COLLECTION_BIOENTITIES_SCHEMA_VERSION: 1
SOLR_COLLECTION_BULK_ANALYTICS: bulk-analytics
SOLR_COLLECTION_BULK_ANALYTICS_SCHEMA_VERSION: 1
working_dir: /root
command:
- |
cd /root/atlas-web-bulk
./gradlew -PdataFilesLocation=/root \
-PexperimentFilesLocation=/atlas-data/exp \
-PexperimentDesignLocation=/atlas-data/expdesign \
-PjdbcUrl=jdbc:postgresql://${POSTGRES_HOST}:5432/${POSTGRES_DB} \
-PjdbcUsername=${POSTGRES_USER} \
-PjdbcPassword=${POSTGRES_PASSWORD} \
-PzkHosts=$${ZK_HOSTS} \
-PsolrHosts=$${SOLR_HOST} \
-PsolrUser=${SOLR_USER} \
-PsolrPassword=${SOLR_PASSWORD} \
:cli:bootRun --args="bulk-analytics-json --output=/root/experiments-jsonl -e $(echo ${EXP_IDS} ${PRIVATE_EXP_IDS} | sed -e "s/ /,/g")"
cd /root/solr-bulk/bin
export SOLR_HOST="${PROJECT_NAME}-${SOLR_CLOUD_CONTAINER_1_NAME}:8983"
./create-bulk-analytics-collection.sh
echo "bulk-analytics collection has been created"
./create-bulk-analytics-schema.sh
echo "bulk-analytics schema has been created"
cd /root/index-bioentities/bin
export SOLR_COLLECTION=$${SOLR_COLLECTION_BULK_ANALYTICS}
export SCHEMA_VERSION=$${SOLR_COLLECTION_BULK_ANALYTICS_SCHEMA_VERSION}
export SOLR_HOSTS="${PROJECT_NAME}-${SOLR_CLOUD_CONTAINER_1_NAME}:8983"
export SOLR_PROCESSORS=dedupe
for FILE in `ls /root/experiments-jsonl/*.jsonl`
do
INPUT_JSONL=$${FILE} ./solr-jsonl-chunk-loader.sh >> /dev/stdout 2>&1
done
secrets:
solrcloud.pem:
file: ${SOLR_PRIVATE_KEY:-/dev/null}

volumes:
gradle-wrapper-dists:
external: true
name: ${PROJECT_NAME}_${GRADLE_WRAPPER_DISTS_VOL_NAME}
gradle-ro-dep-cache:
external: true
name: ${PROJECT_NAME}_${GRADLE_RO_DEP_CACHE_VOL_NAME}
atlas-data-bioentity-properties:
external: true
name: ${PROJECT_NAME}_${ATLAS_DATA_BIOENTITY_PROPERTIES_VOL_NAME}
atlas-data-exp:
external: true
name: ${PROJECT_NAME}_${ATLAS_DATA_EXP_VOL_NAME}
atlas-data-expdesign:
external: true
name: ${PROJECT_NAME}_${ATLAS_DATA_EXPDESIGN_VOL_NAME}

networks:
atlas-test-net:
name: atlas-test-net
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,22 @@ source ${ENV_FILE}
# print_error
source ${SCRIPT_DIR}/../utils.sh

REMOVE_VOLUMES=false
LOG_FILE=/dev/stdout
function print_usage() {
printf '\n%b\n' "Usage: ${0} [ -r ] [ -l FILE ]"
printf '\n%b\n' "Usage: ${0} [ -l FILE ]"
printf '\n%b\n' "Populate a Docker Compose SolrCloud 8 cluster with bulk Expression Atlas data."

printf '\n%b\n' "-r\t\tRemove volumes before creating them"
printf '\n%b\n' "-l FILE \tLog file (default is ${LOG_FILE})"
printf '%b\n\n' "-h\t\tDisplay usage instructions"
}


while getopts "k:o:l:rh" opt
while getopts "l:h" opt
do
case ${opt} in
l)
LOG_FILE=$OPTARG
;;
r)
REMOVE_VOLUMES=true
;;
h)
print_usage
exit 0
Expand Down Expand Up @@ -62,12 +57,6 @@ DOCKER_COMPOSE_SOLRCLOUD_COMMAND="docker compose \

DOCKER_COMPOSE_COMMAND_VARS="DOCKERFILE_PATH=${SCRIPT_DIR}"

if [ "${REMOVE_VOLUMES}" = "true" ]; then
countdown "🗑 Remove Docker Compose Solr and ZooKeeper volumes"
eval "${DOCKER_COMPOSE_SOLRCLOUD_COMMAND}" "down --volumes >> ${LOG_FILE} 2>&1"
print_done
fi

print_stage_name "🛫 Spin up containers to index bioentity annotations and test experiments metadata and data in Solr"
eval "${DOCKER_COMPOSE_COMMAND_VARS}" "${DOCKER_COMPOSE_COMMAND}" "up --build >> ${LOG_FILE} 2>&1"
print_done
Expand All @@ -76,7 +65,7 @@ print_stage_name "🛬 Bring down all services"
eval "${DOCKER_COMPOSE_COMMAND_VARS}" "${DOCKER_COMPOSE_COMMAND}" "down --rmi local >> ${LOG_FILE} 2>&1"
print_done

printf '%b\n' "🙂 All done! You can keep $(basename ${SOLR_PRIVATE_KEY}) and reuse it to sign any other Solr packages."
printf '%b\n' "🙂 All done!"
printf '%b\n' " Start the SolrCloud cluster again with the following command:"
printf '%b\n\n' " ${DOCKER_COMPOSE_SOLRCLOUD_COMMAND} up -d"
printf '%b\n\n' " You can point your browser at http://localhost:8983 to explore your SolrCloud instance."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ RUN mkdir -p /root/bioentity-properties-jsonl

# Clone the necessary repositories
WORKDIR /root
RUN git clone --depth 1 https://github.com/ebi-gene-expression-group/index-bioentities.git
RUN git clone --depth 1 --recurse-submodules https://github.com/ebi-gene-expression-group/index-bioentities.git
RUN git clone --depth 1 --recurse-submodules https://github.com/ebi-gene-expression-group/atlas-web-bulk.git
RUN git clone --depth 1 https://github.com/ebi-gene-expression-group/solr-bulk.git
RUN git clone --depth 1 --recurse-submodules https://github.com/ebi-gene-expression-group/solr-bulk.git

ENTRYPOINT ["/bin/bash", "-c"]
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ version: "3.6"

services:
solr-populator:
build: ${DOCKERFILE_PATH}
build:
context: ${DOCKERFILE_PATH}
no_cache: true
networks:
- atlas-test-net
ports:
- "8089:8089"
depends_on:
- gxa-postgres
- gxa-solrcloud-0
Expand All @@ -15,11 +19,12 @@ services:
- atlas-data-bioentity-properties:/atlas-data/bioentity_properties
- atlas-data-exp:/atlas-data/exp
- atlas-data-expdesign:/atlas-data/expdesign
secrets:
- solrcloud.pem
environment:
JAVA_TOOL_OPTIONS: "-Dfile.encoding=UTF8"
JAVA_OPTS: "-Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory -Dbasicauth=${SOLR_USER}:${SOLR_PASSWORD}"
GRADLE_RO_DEP_CACHE: /gradle-ro-dep-cache
ZK_HOSTS: "${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_1_NAME}:2181,${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_2_NAME}:2181,${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_3_NAME}:2181"
SOLR_HOSTS: "http://${PROJECT_NAME}-${SOLR_CLOUD_CONTAINER_1_NAME}:8983/solr,http://${PROJECT_NAME}-${SOLR_CLOUD_CONTAINER_2_NAME}:8983/solr"
SOLR_HOST: ${PROJECT_NAME}-${SOLR_CLOUD_CONTAINER_1_NAME}:8983
SOLR_NUM_SHARDS: 2
NUM_DOCS_PER_BATCH: 20000
Expand Down Expand Up @@ -58,32 +63,9 @@ services:
./build-suggesters.sh
unset SOLR_COLLECTION
unset SCHEMA_VERSION
cd /root/atlas-web-bulk
./gradlew -PdataFilesLocation=/root \
-PexperimentFilesLocation=/atlas-data/exp \
-PexperimentDesignLocation=/atlas-data/expdesign \
-PzkHosts=$${ZK_HOSTS} \
-PsolrHosts="" \
-PjdbcUrl=jdbc:postgresql://${POSTGRES_HOST}:5432/${POSTGRES_DB} \
-PjdbcUsername=${POSTGRES_USER} \
-PjdbcPassword=${POSTGRES_PASSWORD} \
-PzkHosts=$${ZK_HOSTS} \
-PsolrHosts=$${SOLR_HOSTS} \
:cli:bootRun --args="bulk-analytics-json --output=/root/experiments-jsonl -e $(echo ${EXP_IDS} ${PRIVATE_EXP_IDS} | sed -e "s/ /,/g")"
cd /root/solr-bulk/bin
./create-bulk-analytics-collection.sh
./create-bulk-analytics-schema.sh
cd /root/index-bioentities/bin
export SOLR_COLLECTION=$${SOLR_COLLECTION_BULK_ANALYTICS}
export SCHEMA_VERSION=$${SOLR_COLLECTION_BULK_ANALYTICS_SCHEMA_VERSION}
export SOLR_PROCESSORS=dedupe
for FILE in `ls /root/experiments-jsonl/*.jsonl`
do
INPUT_JSONL=$${FILE} ./solr-jsonl-chunk-loader.sh >> /dev/stdout 2>&1
done
secrets:
solrcloud.pem:
file: ${SOLR_PRIVATE_KEY:-/dev/null}

volumes:
gradle-wrapper-dists:
Expand Down
Loading

0 comments on commit 98fa98f

Please sign in to comment.