diff --git a/README.md b/README.md index 56710707..ecab55e9 100644 --- a/README.md +++ b/README.md @@ -104,10 +104,21 @@ To create our PostGreSQL database and run the schema migrations up to the latest ``` ### Solr -To create the collections, their schemas and populate them, please run the following script. +To create the collections, their schemas and populate them, please run the following scripts. +Currently, this step is separated into 2 sub-steps by Solr collections. +There is an inconsistency in our web apps and various shell scripts - that we use together with the Data Prod Team - +how we use the `SOLR_HOST` and `SOLR_HOSTS` variables. We need to sort this out, +but while it is not solved we probably have to keep this 2 sub-steps, unless we find a way to merge them. + +To create and populate the `bioentities` collection: ```bash -./docker/prepare-dev-environment/solr/run.sh -r -l solr.log +./docker/prepare-dev-environment/solr-bioentities/run.sh -r -l solr-bioentities.log +``` + +To create and populate the `bulk-analytics` collection: +```bash +./docker/prepare-dev-environment/solr-analytics/run.sh -l solr-analytics.log ``` Run the script with the `-h` flag for more details. diff --git a/app/src/main/java/uk/ac/ebi/atlas/experimentpage/content/ExperimentPageContentService.java b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/content/ExperimentPageContentService.java index daeac3f4..efd3c4f1 100644 --- a/app/src/main/java/uk/ac/ebi/atlas/experimentpage/content/ExperimentPageContentService.java +++ b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/content/ExperimentPageContentService.java @@ -10,14 +10,12 @@ import org.springframework.cache.annotation.Cacheable; import org.springframework.stereotype.Component; import uk.ac.ebi.atlas.commons.readers.TsvStreamer; -import uk.ac.ebi.atlas.experimentpage.ExperimentDesignFile; import uk.ac.ebi.atlas.experimentpage.ExternallyAvailableContentService; import uk.ac.ebi.atlas.experimentpage.json.JsonBaselineExperimentController; import uk.ac.ebi.atlas.experimentpage.qc.MicroarrayQcFiles; import uk.ac.ebi.atlas.experimentpage.qc.QcReportController; import uk.ac.ebi.atlas.model.download.ExternallyAvailableContent; import uk.ac.ebi.atlas.model.experiment.Experiment; -import uk.ac.ebi.atlas.model.experiment.ExperimentDesignTable; import uk.ac.ebi.atlas.model.experiment.ExperimentType; import uk.ac.ebi.atlas.model.experiment.sample.ReportsGeneExpression; import uk.ac.ebi.atlas.resource.DataFileHub; @@ -85,12 +83,6 @@ private JsonObject experimentPageContentForExperiment(final Experiment> /dev/stdout 2>&1 + done + +secrets: + solrcloud.pem: + file: ${SOLR_PRIVATE_KEY:-/dev/null} + +volumes: + gradle-wrapper-dists: + external: true + name: ${PROJECT_NAME}_${GRADLE_WRAPPER_DISTS_VOL_NAME} + gradle-ro-dep-cache: + external: true + name: ${PROJECT_NAME}_${GRADLE_RO_DEP_CACHE_VOL_NAME} + atlas-data-bioentity-properties: + external: true + name: ${PROJECT_NAME}_${ATLAS_DATA_BIOENTITY_PROPERTIES_VOL_NAME} + atlas-data-exp: + external: true + name: ${PROJECT_NAME}_${ATLAS_DATA_EXP_VOL_NAME} + atlas-data-expdesign: + external: true + name: ${PROJECT_NAME}_${ATLAS_DATA_EXPDESIGN_VOL_NAME} + +networks: + atlas-test-net: + name: atlas-test-net diff --git a/docker/prepare-dev-environment/solr/run.sh b/docker/prepare-dev-environment/solr-analytics/run.sh similarity index 79% rename from docker/prepare-dev-environment/solr/run.sh rename to docker/prepare-dev-environment/solr-analytics/run.sh index 2f2bec5b..ff2070fd 100755 --- a/docker/prepare-dev-environment/solr/run.sh +++ b/docker/prepare-dev-environment/solr-analytics/run.sh @@ -14,27 +14,22 @@ source ${ENV_FILE} # print_error source ${SCRIPT_DIR}/../utils.sh -REMOVE_VOLUMES=false LOG_FILE=/dev/stdout function print_usage() { - printf '\n%b\n' "Usage: ${0} [ -r ] [ -l FILE ]" + printf '\n%b\n' "Usage: ${0} [ -l FILE ]" printf '\n%b\n' "Populate a Docker Compose SolrCloud 8 cluster with bulk Expression Atlas data." - printf '\n%b\n' "-r\t\tRemove volumes before creating them" printf '\n%b\n' "-l FILE \tLog file (default is ${LOG_FILE})" printf '%b\n\n' "-h\t\tDisplay usage instructions" } -while getopts "k:o:l:rh" opt +while getopts "l:h" opt do case ${opt} in l) LOG_FILE=$OPTARG ;; - r) - REMOVE_VOLUMES=true - ;; h) print_usage exit 0 @@ -62,12 +57,6 @@ DOCKER_COMPOSE_SOLRCLOUD_COMMAND="docker compose \ DOCKER_COMPOSE_COMMAND_VARS="DOCKERFILE_PATH=${SCRIPT_DIR}" -if [ "${REMOVE_VOLUMES}" = "true" ]; then - countdown "🗑 Remove Docker Compose Solr and ZooKeeper volumes" - eval "${DOCKER_COMPOSE_SOLRCLOUD_COMMAND}" "down --volumes >> ${LOG_FILE} 2>&1" - print_done -fi - print_stage_name "🛫 Spin up containers to index bioentity annotations and test experiments metadata and data in Solr" eval "${DOCKER_COMPOSE_COMMAND_VARS}" "${DOCKER_COMPOSE_COMMAND}" "up --build >> ${LOG_FILE} 2>&1" print_done @@ -76,7 +65,7 @@ print_stage_name "🛬 Bring down all services" eval "${DOCKER_COMPOSE_COMMAND_VARS}" "${DOCKER_COMPOSE_COMMAND}" "down --rmi local >> ${LOG_FILE} 2>&1" print_done -printf '%b\n' "🙂 All done! You can keep $(basename ${SOLR_PRIVATE_KEY}) and reuse it to sign any other Solr packages." +printf '%b\n' "🙂 All done!" printf '%b\n' " Start the SolrCloud cluster again with the following command:" printf '%b\n\n' " ${DOCKER_COMPOSE_SOLRCLOUD_COMMAND} up -d" printf '%b\n\n' " You can point your browser at http://localhost:8983 to explore your SolrCloud instance." diff --git a/docker/prepare-dev-environment/solr/Dockerfile b/docker/prepare-dev-environment/solr-bioentities/Dockerfile similarity index 74% rename from docker/prepare-dev-environment/solr/Dockerfile rename to docker/prepare-dev-environment/solr-bioentities/Dockerfile index 6ac71b26..00d60011 100644 --- a/docker/prepare-dev-environment/solr/Dockerfile +++ b/docker/prepare-dev-environment/solr-bioentities/Dockerfile @@ -15,8 +15,8 @@ RUN mkdir -p /root/bioentity-properties-jsonl # Clone the necessary repositories WORKDIR /root -RUN git clone --depth 1 https://github.com/ebi-gene-expression-group/index-bioentities.git +RUN git clone --depth 1 --recurse-submodules https://github.com/ebi-gene-expression-group/index-bioentities.git RUN git clone --depth 1 --recurse-submodules https://github.com/ebi-gene-expression-group/atlas-web-bulk.git -RUN git clone --depth 1 https://github.com/ebi-gene-expression-group/solr-bulk.git +RUN git clone --depth 1 --recurse-submodules https://github.com/ebi-gene-expression-group/solr-bulk.git ENTRYPOINT ["/bin/bash", "-c"] diff --git a/docker/prepare-dev-environment/solr/docker-compose.yml b/docker/prepare-dev-environment/solr-bioentities/docker-compose.yml similarity index 64% rename from docker/prepare-dev-environment/solr/docker-compose.yml rename to docker/prepare-dev-environment/solr-bioentities/docker-compose.yml index 14d83e13..1cd954d1 100644 --- a/docker/prepare-dev-environment/solr/docker-compose.yml +++ b/docker/prepare-dev-environment/solr-bioentities/docker-compose.yml @@ -2,9 +2,13 @@ version: "3.6" services: solr-populator: - build: ${DOCKERFILE_PATH} + build: + context: ${DOCKERFILE_PATH} + no_cache: true networks: - atlas-test-net + ports: + - "8089:8089" depends_on: - gxa-postgres - gxa-solrcloud-0 @@ -15,11 +19,12 @@ services: - atlas-data-bioentity-properties:/atlas-data/bioentity_properties - atlas-data-exp:/atlas-data/exp - atlas-data-expdesign:/atlas-data/expdesign + secrets: + - solrcloud.pem environment: JAVA_TOOL_OPTIONS: "-Dfile.encoding=UTF8" + JAVA_OPTS: "-Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory -Dbasicauth=${SOLR_USER}:${SOLR_PASSWORD}" GRADLE_RO_DEP_CACHE: /gradle-ro-dep-cache - ZK_HOSTS: "${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_1_NAME}:2181,${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_2_NAME}:2181,${PROJECT_NAME}-${SOLR_CLOUD_ZK_CONTAINER_3_NAME}:2181" - SOLR_HOSTS: "http://${PROJECT_NAME}-${SOLR_CLOUD_CONTAINER_1_NAME}:8983/solr,http://${PROJECT_NAME}-${SOLR_CLOUD_CONTAINER_2_NAME}:8983/solr" SOLR_HOST: ${PROJECT_NAME}-${SOLR_CLOUD_CONTAINER_1_NAME}:8983 SOLR_NUM_SHARDS: 2 NUM_DOCS_PER_BATCH: 20000 @@ -58,32 +63,9 @@ services: ./build-suggesters.sh unset SOLR_COLLECTION unset SCHEMA_VERSION - - cd /root/atlas-web-bulk - ./gradlew -PdataFilesLocation=/root \ - -PexperimentFilesLocation=/atlas-data/exp \ - -PexperimentDesignLocation=/atlas-data/expdesign \ - -PzkHosts=$${ZK_HOSTS} \ - -PsolrHosts="" \ - -PjdbcUrl=jdbc:postgresql://${POSTGRES_HOST}:5432/${POSTGRES_DB} \ - -PjdbcUsername=${POSTGRES_USER} \ - -PjdbcPassword=${POSTGRES_PASSWORD} \ - -PzkHosts=$${ZK_HOSTS} \ - -PsolrHosts=$${SOLR_HOSTS} \ - :cli:bootRun --args="bulk-analytics-json --output=/root/experiments-jsonl -e $(echo ${EXP_IDS} ${PRIVATE_EXP_IDS} | sed -e "s/ /,/g")" - - cd /root/solr-bulk/bin - ./create-bulk-analytics-collection.sh - ./create-bulk-analytics-schema.sh - - cd /root/index-bioentities/bin - export SOLR_COLLECTION=$${SOLR_COLLECTION_BULK_ANALYTICS} - export SCHEMA_VERSION=$${SOLR_COLLECTION_BULK_ANALYTICS_SCHEMA_VERSION} - export SOLR_PROCESSORS=dedupe - for FILE in `ls /root/experiments-jsonl/*.jsonl` - do - INPUT_JSONL=$${FILE} ./solr-jsonl-chunk-loader.sh >> /dev/stdout 2>&1 - done +secrets: + solrcloud.pem: + file: ${SOLR_PRIVATE_KEY:-/dev/null} volumes: gradle-wrapper-dists: diff --git a/docker/prepare-dev-environment/solr-bioentities/run.sh b/docker/prepare-dev-environment/solr-bioentities/run.sh new file mode 100755 index 00000000..491a52e4 --- /dev/null +++ b/docker/prepare-dev-environment/solr-bioentities/run.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +set -e +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +# PROJECT_NAME +# SOLR_CLOUD_CONTAINER_1_NAME +# SOLR_CLOUD_CONTAINER_2_NAME +ENV_FILE=${SCRIPT_DIR}/../../dev.env +source ${ENV_FILE} + +# countdown +# print_stage_name +# print_done +# print_error +source ${SCRIPT_DIR}/../utils.sh + +SOLR_KEYS_DIRECTORY=${SCRIPT_DIR} +REMOVE_VOLUMES=false +LOG_FILE=/dev/stdout +function print_usage() { + printf '\n%b\n' "Usage: ${0} [ -r ] [ -l FILE ]" + printf '\n%b\n' "Populate a Docker Compose SolrCloud 8 cluster with bulk Expression Atlas data." + + printf '\n%b\n' "-r\t\tRemove volumes before creating them" + printf '\n%b\n' "-l FILE \tLog file (default is ${LOG_FILE})" + printf '%b\n\n' "-h\t\tDisplay usage instructions" +} + + +while getopts "l:rh" opt +do + case ${opt} in + l) + LOG_FILE=$OPTARG + ;; + r) + REMOVE_VOLUMES=true + ;; + h) + print_usage + exit 0 + ;; + \?) + printf '%b\n' "Invalid option: -${OPTARG}" >&2 + print_usage + exit 2 + ;; + esac +done + +DOCKER_COMPOSE_COMMAND="docker compose \ +--project-name ${PROJECT_NAME} \ +--env-file ${ENV_FILE} \ +--env-file ${SCRIPT_DIR}/../test-data.env \ +--file ${SCRIPT_DIR}/../../docker-compose-postgres.yml \ +--file ${SCRIPT_DIR}/../../docker-compose-solrcloud.yml \ +--file ${SCRIPT_DIR}/docker-compose.yml" + +DOCKER_COMPOSE_SOLRCLOUD_COMMAND="docker compose \ +--project-name ${PROJECT_NAME} \ +--env-file ${ENV_FILE} \ +--file ${SCRIPT_DIR}/../../docker-compose-solrcloud.yml" + +SOLR_PRIVATE_KEY=${SOLR_KEYS_DIRECTORY}/solrcloud.pem +SOLR_PUBLIC_KEY=${SOLR_KEYS_DIRECTORY}/solrcloud.der +SOLR_USERFILES_PATH=/var/solr/data/userfiles/ +DOCKER_COMPOSE_COMMAND_VARS="DOCKERFILE_PATH=${SCRIPT_DIR}" + +if [ "${REMOVE_VOLUMES}" = "true" ]; then + countdown "🗑 Remove Docker Compose Solr and ZooKeeper volumes" + eval "${DOCKER_COMPOSE_SOLRCLOUD_COMMAND}" "down --volumes >> ${LOG_FILE} 2>&1" + print_done +fi + +print_stage_name "🔐 Generate RSA keypair to sign and verify Solr packages" +openssl genrsa -out ${SOLR_PRIVATE_KEY} 512 >> ${LOG_FILE} 2>&1 +openssl rsa -in ${SOLR_PRIVATE_KEY} -pubout -outform DER -out ${SOLR_PUBLIC_KEY} >> ${LOG_FILE} 2>&1 +print_done + +print_stage_name "🌅 Start Solr 8 cluster in Docker Compose" +eval "${DOCKER_COMPOSE_COMMAND_VARS}" "${DOCKER_COMPOSE_SOLRCLOUD_COMMAND}" "up -d >> ${LOG_FILE} 2>&1" +print_done + +print_stage_name "💤 Give Solr ten seconds to start up" +sleep 10 +print_done + +print_stage_name "🔏 Register ${SOLR_PUBLIC_KEY} in SolrCloud" +docker exec ${PROJECT_NAME}-${SOLR_CLOUD_CONTAINER_1_NAME} ./bin/solr package add-key /run/secrets/solrcloud.der >> ${LOG_FILE} 2>&1 +print_done + +print_stage_name "🌄 Stop Solr 8 cluster in Docker Compose" +eval "${DOCKER_COMPOSE_COMMAND_VARS}" "${DOCKER_COMPOSE_SOLRCLOUD_COMMAND}" "down >> ${LOG_FILE} 2>&1" +print_done + +print_stage_name "🛫 Spin up containers to index bioentity annotations and test experiments metadata and data in Solr" +eval "${DOCKER_COMPOSE_COMMAND_VARS}" "${DOCKER_COMPOSE_COMMAND}" "up --build >> ${LOG_FILE} 2>&1" +print_done + +print_stage_name "🛬 Bring down all services" +eval "${DOCKER_COMPOSE_COMMAND_VARS}" "${DOCKER_COMPOSE_COMMAND}" "down --rmi local >> ${LOG_FILE} 2>&1" +print_done + +printf '%b\n' "🙂 All done! You can keep $(basename ${SOLR_PRIVATE_KEY}) and reuse it to sign any other Solr packages." +printf '%b\n' " Start the SolrCloud cluster again with the following command:" +printf '%b\n\n' " ${DOCKER_COMPOSE_SOLRCLOUD_COMMAND} up -d" +printf '%b\n\n' " You can point your browser at http://localhost:8983 to explore your SolrCloud instance." +printf '%b\n' " Stop the SolrCloud cluster again with the following command:" +printf '%b\n' " ${DOCKER_COMPOSE_SOLRCLOUD_COMMAND} down" +