From ed55a47188352640e257aa85651b5bfcc5774815 Mon Sep 17 00:00:00 2001 From: Fabrice Jammes Date: Mon, 15 Jul 2024 15:14:52 +0200 Subject: [PATCH] Improve pip dependencies management Add Dockerfile to ciux source pathes Increase parameters management Add separate log level for spark Improve build script configuration --- .ciux | 1 + .github/workflows/e2e-common.yml | 2 +- Dockerfile | 7 ++- bin/distribute.py | 4 +- bin/raw2science.py | 1 + bin/stream2raw.py | 2 +- build.sh | 2 +- chart/templates/spark-fink-raw2science.yaml | 14 +++--- chart/templates/spark-fink-stream2raw.yaml | 8 ++-- chart/values-ci-noscience.yaml | 52 ++++++++++++++++---- chart/values-ci-science.yaml | 53 +++++++++++++++++++++ chart/values-ci.yaml | 19 -------- e2e/argocd.sh | 22 +++++++-- e2e/fink-start.sh | 2 - fink_broker/parser.py | 9 ++++ push-image.sh | 2 + 16 files changed, 149 insertions(+), 51 deletions(-) create mode 100644 chart/values-ci-science.yaml delete mode 100644 chart/values-ci.yaml diff --git a/.ciux b/.ciux index 03a0056b..9bbfce0b 100644 --- a/.ciux +++ b/.ciux @@ -1,6 +1,7 @@ apiVersion: v1alpha1 registry: gitlab-registry.in2p3.fr/astrolabsoftware/fink sourcePathes: + - Dockerfile - fink_broker - bin - deps diff --git a/.github/workflows/e2e-common.yml b/.github/workflows/e2e-common.yml index 84239116..81020aae 100644 --- a/.github/workflows/e2e-common.yml +++ b/.github/workflows/e2e-common.yml @@ -105,7 +105,7 @@ jobs: df -h sudo rm -rf /opt/hostedtoolcache/CodeQL df -h - sudo docker image prune --all --force + docker image prune --all --force df -h - uses: actions/setup-go@v4 with: diff --git a/Dockerfile b/Dockerfile index 4a7be739..7a6f9bb8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -66,13 +66,16 @@ ENV FINK_JARS "" ENV FINK_PACKAGES "" # pytest requirements ADD deps/requirements-test.txt $FINK_HOME/deps -RUN pip install -r $FINK_HOME/deps/requirements-test.txt +# Listing all requirements helps pip in computing a correct dependencies tree +# See additional explanation in https://github.com/astrolabsoftware/fink-broker/issues/865 +RUN pip install -r $FINK_HOME/deps/requirements.txt -r $FINK_HOME/deps/requirements-test.txt ADD --chown=${spark_uid} . $FINK_HOME/ FROM noscience AS full ADD deps/requirements-science.txt $FINK_HOME/ -RUN pip install -r $FINK_HOME/requirements-science.txt +# Listing all requirements helps pip in computing a correct dependencies tree +RUN pip install -r $FINK_HOME/deps/requirements.txt -r $FINK_HOME/deps/requirements-test.txt -r $FINK_HOME/requirements-science.txt ADD deps/requirements-science-no-deps.txt $FINK_HOME/ RUN pip install -r $FINK_HOME/requirements-science-no-deps.txt --no-deps diff --git a/bin/distribute.py b/bin/distribute.py index af73fec5..5a8eebb1 100644 --- a/bin/distribute.py +++ b/bin/distribute.py @@ -58,7 +58,9 @@ def main(): # Initialise Spark session spark = init_sparksession( - name="distribute_{}_{}".format(args.producer, args.night), shuffle_partitions=2 + name="distribute_{}_{}".format(args.producer, args.night), + shuffle_partitions=2, + log_level=args.spark_log_level, ) # The level here should be controlled by an argument. diff --git a/bin/raw2science.py b/bin/raw2science.py index 01633fb5..cbc4d663 100644 --- a/bin/raw2science.py +++ b/bin/raw2science.py @@ -55,6 +55,7 @@ def main(): name="raw2science_{}_{}".format(args.producer, args.night), shuffle_partitions=2, tz=tz, + log_level=args.spark_log_level, ) # Logger to print useful debug statements diff --git a/bin/stream2raw.py b/bin/stream2raw.py index 20ed3165..7a530c0d 100644 --- a/bin/stream2raw.py +++ b/bin/stream2raw.py @@ -60,7 +60,7 @@ def main(): name="stream2raw_{}_{}".format(args.producer, args.night), shuffle_partitions=2, tz=tz, - log_level=args.log_level, + log_level=args.spark_log_level, ) logger = init_logger(args.log_level) diff --git a/build.sh b/build.sh index 6c822d0a..32f6c8cd 100755 --- a/build.sh +++ b/build.sh @@ -25,7 +25,7 @@ set -euxo pipefail DIR=$(cd "$(dirname "$0")"; pwd -P) # This will avoid overriding user ciuxconfig during a build -export CIUXCONFIG=/tmp/ciux.build.sh +export CIUXCONFIG=$HOME/.ciux/ciux.build.sh usage() { cat << EOD diff --git a/chart/templates/spark-fink-raw2science.yaml b/chart/templates/spark-fink-raw2science.yaml index 989078f5..bdc56953 100644 --- a/chart/templates/spark-fink-raw2science.yaml +++ b/chart/templates/spark-fink-raw2science.yaml @@ -10,18 +10,18 @@ spec: - '{{ .Values.night }}' sparkConf: {{- include "fink.s3config" . | nindent 4 }} driver: - cores: {{ tpl .Values.distribution.cores . }} - coreRequest: "{{ tpl .Values.distribution.coreRequest . }}" - memory: "{{ tpl .Values.distribution.memory . }}" + cores: {{ tpl .Values.raw2science.cores . }} + coreRequest: "{{ tpl .Values.raw2science.coreRequest . }}" + memory: "{{ tpl .Values.raw2science.memory . }}" javaOptions: "-Divy.cache.dir=/tmp -Divy.home=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true" labels: version: 3.4.1 serviceAccount: spark executor: - cores: {{ tpl .Values.distribution.cores . }} - coreRequest: "{{ tpl .Values.distribution.coreRequest . }}" - memory: "{{ tpl .Values.distribution.memory . }}" + cores: {{ tpl .Values.raw2science.cores . }} + coreRequest: "{{ tpl .Values.raw2science.coreRequest . }}" + memory: "{{ tpl .Values.raw2science.memory . }}" javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true" - instances: {{ tpl .Values.distribution.instances . }} + instances: {{ tpl .Values.raw2science.instances . }} labels: version: 3.4.1 diff --git a/chart/templates/spark-fink-stream2raw.yaml b/chart/templates/spark-fink-stream2raw.yaml index 5a6e9834..60db869d 100644 --- a/chart/templates/spark-fink-stream2raw.yaml +++ b/chart/templates/spark-fink-stream2raw.yaml @@ -18,16 +18,16 @@ spec: sparkConf: {{- include "fink.s3config" . | nindent 4 }} driver: cores: {{ tpl .Values.distribution.cores . }} - coreRequest: "{{ tpl .Values.distribution.coreRequest . }}" - memory: "{{ tpl .Values.distribution.memory . }}" + coreRequest: "{{ tpl .Values.stream2raw.coreRequest . }}" + memory: "{{ tpl .Values.stream2raw.memory . }}" labels: version: 3.4.1 serviceAccount: spark javaOptions: "-Divy.cache.dir=/tmp -Divy.home=/tmp -Dcom.amazonaws.sdk.disableCertChecking=true" executor: cores: {{ tpl .Values.distribution.cores . }} - coreRequest: "{{ tpl .Values.distribution.coreRequest . }}" - memory: "{{ tpl .Values.distribution.memory . }}" + coreRequest: "{{ tpl .Values.stream2raw.coreRequest . }}" + memory: "{{ tpl .Values.stream2raw.memory . }}" instances: {{ tpl .Values.distribution.instances . }} javaOptions: "-Dcom.amazonaws.sdk.disableCertChecking=true" memory: "512m" diff --git a/chart/values-ci-noscience.yaml b/chart/values-ci-noscience.yaml index b0ffc21c..e0ac4ea0 100644 --- a/chart/values-ci-noscience.yaml +++ b/chart/values-ci-noscience.yaml @@ -1,21 +1,53 @@ -# Default values for chart. -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. +# Can be overriden in stream2raw, raw2science and distribution sections +cores: 1 +coreRequest: 0 +instances: 1 +memory: 1g +# instances: 1 -night: "20200101" +fink_trigger_update: "2" -image: - name: fink-broker-noscience +# Can be overriden using --image option + +# Default to s3a:// +# online_data_prefix: s3a://fink-broker-online +producer: sims + +log_level: INFO # # Parameters used to run the stream2raw task # stream2raw: + cores: "{{.Values.cores}}" + coreRequest: "{{.Values.coreRequest}}" + memory: "{{.Values.memory}}" + instances: "{{.Values.instances}}" + fink_alert_schema: /home/fink/fink-alert-schemas/ztf/ztf_public_20190903.schema.avro kafka: - topic: "ztf-stream-sim" + in_sockets: kafka-cluster-kafka-bootstrap.kafka:9092 + starting_offset: earliest + topic: ztf-stream-sim # -# Parameters used to access the S3 bucket +# Parameters used to run the raw2science task +# +raw2science: + cores: "{{.Values.cores}}" + coreRequest: "{{.Values.coreRequest}}" + memory: "{{.Values.memory}}" + instances: "{{.Values.instances}}" + # -s3: - bucket: "fink-broker-online" +# Parameters used to run the distribution task +# +distribution: + cores: "{{.Values.cores}}" + coreRequest: "{{.Values.coreRequest}}" + memory: "{{.Values.memory}}" + instances: "{{.Values.instances}}" + kafka: + out_sockets: "kafka-cluster-kafka-external-bootstrap.kafka:9094" + schema: "/home/fink/fink-alert-schemas/ztf/distribution_schema_0p2.avsc" + substream_prefix: "fink_" + diff --git a/chart/values-ci-science.yaml b/chart/values-ci-science.yaml new file mode 100644 index 00000000..b30099f5 --- /dev/null +++ b/chart/values-ci-science.yaml @@ -0,0 +1,53 @@ +# Can be overriden in stream2raw, raw2science and distribution sections +cores: 1 +coreRequest: 0 +instances: 1 +memory: 1g +# instances: 1 + +fink_trigger_update: "2" + +# Can be overriden using --image option + +# Default to s3a:// +# online_data_prefix: s3a://fink-broker-online +producer: sims + +log_level: INFO + +# +# Parameters used to run the stream2raw task +# +stream2raw: + cores: "{{.Values.cores}}" + coreRequest: "{{.Values.coreRequest}}" + memory: "{{.Values.memory}}" + instances: "{{.Values.instances}}" + fink_alert_schema: /home/fink/fink-alert-schemas/ztf/ztf_public_20190903.schema.avro + kafka: + in_sockets: kafka-cluster-kafka-bootstrap.kafka:9092 + starting_offset: earliest + topic: ztf-stream-sim + +# +# Parameters used to run the raw2science task +# +raw2science: + cores: "2" + coreRequest: "0" + memory: "3000m" + instances: "2" + +# +# Parameters used to run the distribution task +# +distribution: + cores: "{{.Values.cores}}" + coreRequest: "{{.Values.coreRequest}}" + memory: "{{.Values.memory}}" + instances: "{{.Values.instances}}" + kafka: + out_sockets: "kafka-cluster-kafka-external-bootstrap.kafka:9094" + schema: "/home/fink/fink-alert-schemas/ztf/distribution_schema_0p2.avsc" + substream_prefix: "fink_" + diff --git a/chart/values-ci.yaml b/chart/values-ci.yaml deleted file mode 100644 index 21557945..00000000 --- a/chart/values-ci.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Default values for chart. -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. - -night: "20200101" - -# -# Parameters used to run the stream2raw task -# -stream2raw: - kafka: - topic: "ztf-stream-sim" - -# -# Parameters used to access the S3 bucket -# -s3: - bucket: "fink-broker-online" - diff --git a/e2e/argocd.sh b/e2e/argocd.sh index 9a0c1989..16d84e92 100755 --- a/e2e/argocd.sh +++ b/e2e/argocd.sh @@ -7,7 +7,9 @@ set -euxo pipefail -CIUXCONFIG=${CIUXCONFIG:-"$HOME/.ciuxconfig"} +DIR=$(cd "$(dirname "$0")"; pwd -P) + +CIUXCONFIG=${CIUXCONFIG:-"$HOME/.ciux/ciux.sh"} echo "CIUXCONFIG=${CIUXCONFIG}" . $CIUXCONFIG @@ -35,10 +37,20 @@ argocd login --core kubectl config set-context --current --namespace="$NS" # Create fink app +IMAGE="$CIUX_IMAGE_URL" +echo "Use CIUX_IMAGE_URL to set fink-broker image: $CIUX_IMAGE_URL" +if [[ "$IMAGE" =~ "-noscience" ]]; +then + valueFile=values-ci-noscience.yaml +else + valueFile=values-ci-science.yaml +fi argocd app create fink --dest-server https://kubernetes.default.svc \ --dest-namespace "$NS" \ --repo https://github.com/astrolabsoftware/fink-cd.git \ - --path apps --revision "$FINK_CD_WORKBRANCH" + --path apps --revision "$FINK_CD_WORKBRANCH" \ + -p finkbroker.revision="$FINK_BROKER_WORKBRANCH" \ + -p finkbroker.valueFile="$valueFile" \ # Sync fink app-of-apps argocd app sync fink @@ -47,7 +59,7 @@ argocd app sync fink argocd app sync strimzi minio-operator spark-operator # TODO Try to make it simpler, try a sync-wave on Strimzi Application? -# see https://github.com/argoproj/argo-cd/discussions/16729 +# see https://github.com/argoproj/argo-cd/discussions/16729argocd app set # and https://stackoverflow.com/questions/77750481/argocd-app-of-apps-ensuring-strimzi-child-app-health-before-kafka-app-sync retry kubectl wait --for condition=established --timeout=60s crd/kafkas.kafka.strimzi.io \ crd/kafkatopics.kafka.strimzi.io \ @@ -61,7 +73,11 @@ retry kubectl wait --for condition=established --timeout=60s crd/kafkas.kafka.st argocd app set fink-broker -p image.repository="$CIUX_IMAGE_REGISTRY" \ -p image.name="$CIUX_IMAGE_NAME" \ -p image.tag="$CIUX_IMAGE_TAG" \ + -p log_level="DEBUG" \ -p night="20200101" +# TODO pass parameters using a valuefile here, and not in 'argocd app create fink' +# see https://argo-cd.readthedocs.io/en/stable/user-guide/commands/argocd_app_set/ + argocd app sync -l app.kubernetes.io/instance=fink # TODO Wait for kafkatopic to exist diff --git a/e2e/fink-start.sh b/e2e/fink-start.sh index 38834880..a69bd378 100755 --- a/e2e/fink-start.sh +++ b/e2e/fink-start.sh @@ -48,10 +48,8 @@ IMAGE="$CIUX_IMAGE_URL" echo "Use CIUX_IMAGE_URL to set fink-broker image: $CIUX_IMAGE_URL" if [[ "$IMAGE" =~ "-noscience" ]]; then - VALUE_FILE="$DIR/../chart/values-ci-noscience.yaml" FINKCONFIG="$DIR/finkconfig_noscience" else - VALUE_FILE="$DIR/../chart/values-ci.yaml" FINKCONFIG="$DIR/finkconfig" fi diff --git a/fink_broker/parser.py b/fink_broker/parser.py index 4673770f..47bfe4dd 100644 --- a/fink_broker/parser.py +++ b/fink_broker/parser.py @@ -126,6 +126,15 @@ def getargs(parser: argparse.ArgumentParser) -> argparse.Namespace: [LOG_LEVEL] """, ) + parser.add_argument( + "-spark_log_level", + type=str, + default="WARN", + help=""" + The minimum level of log for the Spark framework: OFF, DEBUG, INFO, WARN, ERROR, CRITICAL + [LOG_LEVEL] + """, + ) parser.add_argument( "-finkwebpath", type=str, diff --git a/push-image.sh b/push-image.sh index c6d0c4c9..1e99e8d4 100755 --- a/push-image.sh +++ b/push-image.sh @@ -7,6 +7,8 @@ set -euxo pipefail DIR=$(cd "$(dirname "$0")"; pwd -P) + +export CIUXCONFIG=$HOME/.ciux/ciux.build.sh . "$CIUXCONFIG" set -e