From 9830f4dea65180943ac8ac95f04f651c7f4401d4 Mon Sep 17 00:00:00 2001 From: bio-boris Date: Wed, 16 Nov 2022 13:46:57 -0600 Subject: [PATCH 01/21] Update Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 7bac474..ea63e02 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM htcondor/execute:9.7-el7 +FROM htcondor/execute:9.12.0-el7 ENV container docker # Ge$t commonly used utilities From 1696183564ad5a5defe8fe46c0cd9c1e2857a25b Mon Sep 17 00:00:00 2001 From: bio-boris Date: Wed, 16 Nov 2022 13:50:19 -0600 Subject: [PATCH 02/21] Create manual-build.yml --- .github/workflows/manual-build.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/workflows/manual-build.yml diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml new file mode 100644 index 0000000..944f903 --- /dev/null +++ b/.github/workflows/manual-build.yml @@ -0,0 +1,11 @@ +--- +name: Manual Build & Push +on: + workflow_dispatch: +jobs: + build-push: + uses: kbase/.github/.github/workflows/reusable_build-push.yml@main + with: + name: '${{ github.event.repository.name }}-develop' + tags: br-${{ github.ref_name }} + secrets: inherit From 69c18b192d35722c1eee8b047d08f408c7a8e904 Mon Sep 17 00:00:00 2001 From: bio-boris Date: Thu, 1 Dec 2022 14:35:10 -0600 Subject: [PATCH 03/21] Delete build --- hooks/build | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 hooks/build diff --git a/hooks/build b/hooks/build deleted file mode 100644 index 0b09918..0000000 --- a/hooks/build +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# $IMAGE_NAME var is injected into the build so the tag is correct. - -echo "Build hook running" -echo "IMAGE_NAME = ${IMAGE_NAME}" -echo "DOCKER_REPO = ${DOCKER_REPO}" -export BRANCH=${SOURCE_BRANCH:-`git symbolic-ref --short HEAD`} - -export DATE=`date -u +"%Y-%m-%dT%H:%M:%SZ"` -export COMMIT=${SOURCE_COMMIT:-`git rev-parse --short HEAD`} -docker build --build-arg BUILD_DATE=$DATE \ - --build-arg VCS_REF=$COMMIT \ - --build-arg BRANCH=$BRANCH \ - -t ${IMAGE_NAME} . && \ -echo "Built and tagged ${IMAGE_NAME}" From 4e8e11201c06ee548ad250ae4e5469b35abff1e1 Mon Sep 17 00:00:00 2001 From: bio-boris Date: Thu, 1 Dec 2022 22:45:38 -0600 Subject: [PATCH 04/21] Create kbase_worker.conf --- kbase_worker.conf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 kbase_worker.conf diff --git a/kbase_worker.conf b/kbase_worker.conf new file mode 100644 index 0000000..7f2dc97 --- /dev/null +++ b/kbase_worker.conf @@ -0,0 +1,15 @@ +# This contains info just for the worker + +# CGROUPS +BASE_CGROUP = htcondor +CGROUP_MEMORY_LIMIT_POLICY = soft + +# To stop cron jobs from polluting classads +REMOVE_SIGNIFICANT_ATTRIBUTES = HEALTH_STATUS_MESSAGE + +# This issuer value of a valid token must be set before generating any tokens, otherwise the tokens won't work +# Append to this file to override +TRUST_DOMAIN = condor:9618 + +# CM Daemons +DAEMON_LIST = MASTER STARTD From 052714762f31df50f450ce05880f867f000c0dd8 Mon Sep 17 00:00:00 2001 From: bio-boris Date: Thu, 1 Dec 2022 22:47:26 -0600 Subject: [PATCH 05/21] Create pre-exec.sh --- pre-exec.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 pre-exec.sh diff --git a/pre-exec.sh b/pre-exec.sh new file mode 100644 index 0000000..a436515 --- /dev/null +++ b/pre-exec.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# See documentation at https://github.com/htcondor/htcondor/tree/main/build/docker/services#providing-additional-configuration + + +if [ "$CONDOR_JWT_TOKEN" ] ; then + echo "$CONDOR_JWT_TOKEN" > /etc/condor/tokens.d/JWT + chmod 600 /etc/condor/tokens.d/JWT +fi + +if [ "$TRUST_DOMAIN" ] ; then + echo "$TRUST_DOMAIN" >> /etc/condor/condor_config.local +fi + +if [ "$COLLECTOR_HOST" ] ; then + echo "$COLLECTOR_HOST" >> /etc/condor/condor_config.local +fi From 41348eed761d4e098d6caa328682c261439d85c6 Mon Sep 17 00:00:00 2001 From: bio-boris Date: Thu, 1 Dec 2022 22:49:53 -0600 Subject: [PATCH 06/21] Update pre-exec.sh --- pre-exec.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pre-exec.sh b/pre-exec.sh index a436515..1eff766 100644 --- a/pre-exec.sh +++ b/pre-exec.sh @@ -8,10 +8,7 @@ if [ "$CONDOR_JWT_TOKEN" ] ; then chmod 600 /etc/condor/tokens.d/JWT fi -if [ "$TRUST_DOMAIN" ] ; then - echo "$TRUST_DOMAIN" >> /etc/condor/condor_config.local -fi - if [ "$COLLECTOR_HOST" ] ; then - echo "$COLLECTOR_HOST" >> /etc/condor/condor_config.local + echo "COLLECTOR_HOST = $COLLECTOR_HOST" >> /etc/condor/condor_config.local + /update-config fi From 4c54a1520cc1838b95890ea6941e9d04086dfa20 Mon Sep 17 00:00:00 2001 From: bio-boris Date: Thu, 1 Dec 2022 22:53:43 -0600 Subject: [PATCH 07/21] Update Dockerfile --- Dockerfile | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/Dockerfile b/Dockerfile index ea63e02..90fd7ca 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,10 @@ FROM htcondor/execute:9.12.0-el7 ENV container docker +COPY pre-exec.sh /root/config/pre-exec.sh +# See https://www-auth.cs.wisc.edu/lists/htcondor-users/2014-August/msg00044.shtml +COPY kbase_central_manager.conf /etc/condor/condor_config.local -# Ge$t commonly used utilities +# Get commonly used utilities RUN yum install -y deltarpm RUN yum -y update && yum upgrade -y RUN yum -y install -y epel-release wget which git deltarpm gcc libcgroup libcgroup-tools stress-ng tmpwatch @@ -9,7 +12,6 @@ RUN yum -y install -y epel-release wget which git deltarpm gcc libcgroup libcgro # Install docker binaries RUN yum install -y yum-utils device-mapper-persistent-data lvm2 && yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo && yum install -y docker-ce - #Install Python3 and Libraries (source /root/miniconda/bin/activate) RUN yum install -y bzip2 \ && wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh \ @@ -29,9 +31,6 @@ RUN wget -N https://github.com/kbase/dockerize/raw/master/dockerize-linux-amd64- # Also add the user to the groups that map to "docker" on Linux and "daemon" on Mac RUN usermod -a -G 0 kbase && usermod -a -G 999 kbase -#ADD DIRS -RUN mkdir -p /var/run/condor && mkdir -p /var/log/condor && mkdir -p /var/lock/condor && mkdir -p /var/lib/condor/execute - # Maybe you want: rm -rf /var/cache/yum, to also free up space taken by orphaned data from disabled or removed repos RUN rm -rf /var/cache/yum @@ -51,13 +50,4 @@ LABEL org.label-schema.build-date=$BUILD_DATE \ us.kbase.vcs-branch=$BRANCH \ maintainer="Steve Chan sychan@lbl.gov" -ENTRYPOINT [ "/kb/deployment/bin/dockerize" ] -CMD [ "-template", "/kb/deployment/conf/.templates/deployment.cfg.templ:/kb/deployment/conf/deployment.cfg", \ - "-template", "/kb/deployment/conf/.templates/http.ini.templ:/kb/deployment/jettybase/start.d/http.ini", \ - "-template", "/kb/deployment/conf/.templates/server.ini.templ:/kb/deployment/jettybase/start.d/server.ini", \ - "-template", "/kb/deployment/conf/.templates/start_server.sh.templ:/kb/deployment/bin/start_server.sh", \ - "-template", "/kb/deployment/conf/.templates/condor_config.templ:/etc/condor/condor_config.local", \ - "-stdout", "/kb/deployment/jettybase/logs/request.log", \ - "/kb/deployment/bin/start_server.sh" ] - WORKDIR /kb/deployment/jettybase From e1c41787c6050fe55f4cbee8c991aeb591f70d0b Mon Sep 17 00:00:00 2001 From: bio-boris Date: Thu, 1 Dec 2022 22:55:09 -0600 Subject: [PATCH 08/21] Update Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 90fd7ca..f91810c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM htcondor/execute:9.12.0-el7 ENV container docker COPY pre-exec.sh /root/config/pre-exec.sh # See https://www-auth.cs.wisc.edu/lists/htcondor-users/2014-August/msg00044.shtml -COPY kbase_central_manager.conf /etc/condor/condor_config.local +COPY kbase_worker.conf /etc/condor/condor_config.local # Get commonly used utilities RUN yum install -y deltarpm From db807013e66cab368ac8abd547dba9c917bcd0d1 Mon Sep 17 00:00:00 2001 From: bio-boris Date: Thu, 1 Dec 2022 23:10:11 -0600 Subject: [PATCH 09/21] Update kbase_worker.conf --- kbase_worker.conf | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/kbase_worker.conf b/kbase_worker.conf index 7f2dc97..780625f 100644 --- a/kbase_worker.conf +++ b/kbase_worker.conf @@ -1,15 +1,21 @@ # This contains info just for the worker +# Worker Daemons +DAEMON_LIST = MASTER STARTD + # CGROUPS BASE_CGROUP = htcondor CGROUP_MEMORY_LIMIT_POLICY = soft -# To stop cron jobs from polluting classads -REMOVE_SIGNIFICANT_ATTRIBUTES = HEALTH_STATUS_MESSAGE +# TODO TEMPLATIZE THIS +CLIENTGROUP = "njs,bigmem,extreme" -# This issuer value of a valid token must be set before generating any tokens, otherwise the tokens won't work -# Append to this file to override -TRUST_DOMAIN = condor:9618 - -# CM Daemons -DAEMON_LIST = MASTER STARTD +# Useful for running multiple workers on the same machine +EXECUTE = /cdr/staging + +# Add clientgroup to something that jobs can match against! +STARTD_ATTRS = CLIENTGROUP, $(STARTD_ATTRS) + +# Todo Enable This When Cronjobs are back! +#NODE_IS_HEALTHY = False +#START = (NODE_IS_HEALTHY =?= True) From 9f6a7286f6f9105604aa84f262cd81f4c2c3a510 Mon Sep 17 00:00:00 2001 From: bio-boris Date: Mon, 30 Jan 2023 12:46:11 -0600 Subject: [PATCH 10/21] Update kbase_worker.conf --- kbase_worker.conf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kbase_worker.conf b/kbase_worker.conf index 780625f..47a2bad 100644 --- a/kbase_worker.conf +++ b/kbase_worker.conf @@ -19,3 +19,7 @@ STARTD_ATTRS = CLIENTGROUP, $(STARTD_ATTRS) # Todo Enable This When Cronjobs are back! #NODE_IS_HEALTHY = False #START = (NODE_IS_HEALTHY =?= True) + +# Advertise CPUS and Memory as detected +NUM_CPUS = $(DETECTED_CPUS) +MEMORY = $(DETECTED_MEMORY) From ff0691aab7f6527c821f1d9834af3d4ffe15463b Mon Sep 17 00:00:00 2001 From: Boris Date: Tue, 7 Feb 2023 10:24:38 -0600 Subject: [PATCH 11/21] Add more configs --- Dockerfile | 3 ++- kbase_worker.conf | 4 +++- pre-exec.sh | 25 +++++++++++++++++++++++-- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index f91810c..3926c26 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,8 @@ FROM htcondor/execute:9.12.0-el7 ENV container docker -COPY pre-exec.sh /root/config/pre-exec.sh + # See https://www-auth.cs.wisc.edu/lists/htcondor-users/2014-August/msg00044.shtml +COPY pre-exec.sh /root/config/pre-exec.sh COPY kbase_worker.conf /etc/condor/condor_config.local # Get commonly used utilities diff --git a/kbase_worker.conf b/kbase_worker.conf index 47a2bad..ce6dc37 100644 --- a/kbase_worker.conf +++ b/kbase_worker.conf @@ -7,7 +7,7 @@ DAEMON_LIST = MASTER STARTD BASE_CGROUP = htcondor CGROUP_MEMORY_LIMIT_POLICY = soft -# TODO TEMPLATIZE THIS +# CLIENTGROUP Default for allowing multiple jobs to run at once CLIENTGROUP = "njs,bigmem,extreme" # Useful for running multiple workers on the same machine @@ -23,3 +23,5 @@ STARTD_ATTRS = CLIENTGROUP, $(STARTD_ATTRS) # Advertise CPUS and Memory as detected NUM_CPUS = $(DETECTED_CPUS) MEMORY = $(DETECTED_MEMORY) + +# To Override any settings, append to this file diff --git a/pre-exec.sh b/pre-exec.sh index 1eff766..1182ed0 100644 --- a/pre-exec.sh +++ b/pre-exec.sh @@ -2,7 +2,6 @@ # See documentation at https://github.com/htcondor/htcondor/tree/main/build/docker/services#providing-additional-configuration - if [ "$CONDOR_JWT_TOKEN" ] ; then echo "$CONDOR_JWT_TOKEN" > /etc/condor/tokens.d/JWT chmod 600 /etc/condor/tokens.d/JWT @@ -10,5 +9,27 @@ fi if [ "$COLLECTOR_HOST" ] ; then echo "COLLECTOR_HOST = $COLLECTOR_HOST" >> /etc/condor/condor_config.local - /update-config fi + +if [ "$CLIENTGROUP" ] ; then + echo "CLIENTGROUP = \"$CLIENTGROUP\"" >> /etc/condor/condor_config.local +fi + +# To keep docker partition from filling up +if [ "$DOCKER_SYSTEM_PRUNE" ] ; then + docker system prune -a -f +fi + + +#TODO Make nobody user able to run jobs + + +# Ensure condor user can write to logs, since these are mounted onto host +chown condor $(condor_config_val log) $(condor_config_val lock) $(condor_config_val run) + + + + + + +/update-config \ No newline at end of file From a5733eb0e8face601b6e3fb4390271d5ffc879e7 Mon Sep 17 00:00:00 2001 From: bio-boris Date: Wed, 8 Feb 2023 19:31:11 -0600 Subject: [PATCH 12/21] Update pre-exec.sh --- pre-exec.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pre-exec.sh b/pre-exec.sh index 1182ed0..9d019ae 100644 --- a/pre-exec.sh +++ b/pre-exec.sh @@ -11,8 +11,9 @@ if [ "$COLLECTOR_HOST" ] ; then echo "COLLECTOR_HOST = $COLLECTOR_HOST" >> /etc/condor/condor_config.local fi +#Note the clientgroup will require quotation marks in the env variable if [ "$CLIENTGROUP" ] ; then - echo "CLIENTGROUP = \"$CLIENTGROUP\"" >> /etc/condor/condor_config.local + echo "CLIENTGROUP = $CLIENTGROUP >> /etc/condor/condor_config.local fi # To keep docker partition from filling up @@ -32,4 +33,4 @@ chown condor $(condor_config_val log) $(condor_config_val lock) $(condor_config_ -/update-config \ No newline at end of file +/update-config From 851b24f420b9f35b038313dbd11b2e14bdac1058 Mon Sep 17 00:00:00 2001 From: Boris Date: Tue, 14 Feb 2023 17:12:53 -0600 Subject: [PATCH 13/21] Up to parity with prod --- kbase_worker.conf | 10 ++++++++++ pre-exec.sh | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/kbase_worker.conf b/kbase_worker.conf index ce6dc37..169ab97 100644 --- a/kbase_worker.conf +++ b/kbase_worker.conf @@ -1,5 +1,15 @@ # This contains info just for the worker +#Setup Directories +LOCAL_DIR = /condor +LOG = $(LOCAL_DIR)/log +SPOOL = $(LOCAL_DIR)/spool +CONDOR_SHARED = /condor_shared +RUN = $(LOCAL_DIR)/run +EXECUTE = $(LOCAL_DIR)/execute +DIRS_TO_CREATE = $(LOCAL_DIR) $(LOG) $(SPOOL) $(CONDOR_SHARED) $(RUN) $(EXECUTE) + + # Worker Daemons DAEMON_LIST = MASTER STARTD diff --git a/pre-exec.sh b/pre-exec.sh index 1182ed0..29af0b9 100644 --- a/pre-exec.sh +++ b/pre-exec.sh @@ -22,6 +22,13 @@ fi #TODO Make nobody user able to run jobs +####################### HOST PATHS ############################################ +DIRS_TO_CREATE=$(condor_config_val DIRS_TO_CREATE) +mkdir -p $DIRS_TO_CREATE +chmod 770 $DIRS_TO_CREATE && chown root:condor $DIRS_TO_CREATE +/update-config +####################### HOST PATHS ############################################ + # Ensure condor user can write to logs, since these are mounted onto host From 565c330a93c6a0339ecae8ef916a77ad67024ded Mon Sep 17 00:00:00 2001 From: Boris Date: Tue, 14 Feb 2023 17:13:57 -0600 Subject: [PATCH 14/21] Up to parity with condor --- pre-exec.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pre-exec.sh b/pre-exec.sh index a41a001..01bf6cd 100644 --- a/pre-exec.sh +++ b/pre-exec.sh @@ -13,12 +13,12 @@ fi #Note the clientgroup will require quotation marks in the env variable if [ "$CLIENTGROUP" ] ; then - echo "CLIENTGROUP = $CLIENTGROUP >> /etc/condor/condor_config.local + echo "CLIENTGROUP = $CLIENTGROUP >> /etc/condor/condor_config.local" fi # To keep docker partition from filling up if [ "$DOCKER_SYSTEM_PRUNE" ] ; then - docker system prune -a -f + docker system prune -a -fg fi From 029e3fd7799310c0cf397e005c19f02f71188458 Mon Sep 17 00:00:00 2001 From: Boris Date: Tue, 14 Feb 2023 17:25:46 -0600 Subject: [PATCH 15/21] Up to parity with condor --- pre-exec.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pre-exec.sh b/pre-exec.sh index 01bf6cd..f8c5298 100644 --- a/pre-exec.sh +++ b/pre-exec.sh @@ -27,6 +27,8 @@ fi DIRS_TO_CREATE=$(condor_config_val DIRS_TO_CREATE) mkdir -p $DIRS_TO_CREATE chmod 770 $DIRS_TO_CREATE && chown root:condor $DIRS_TO_CREATE +# /condor/execute root-squashed or not condor-owned: requiring world-writability +chmod 01777 $(condor_config_val EXECUTE) /update-config ####################### HOST PATHS ############################################ From 8ebd1d974d3c846ef1c7a1cf5068e75338ff82fd Mon Sep 17 00:00:00 2001 From: Boris Date: Tue, 14 Feb 2023 17:44:26 -0600 Subject: [PATCH 16/21] Specify lockfile --- kbase_worker.conf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kbase_worker.conf b/kbase_worker.conf index 169ab97..839a2fd 100644 --- a/kbase_worker.conf +++ b/kbase_worker.conf @@ -8,7 +8,8 @@ CONDOR_SHARED = /condor_shared RUN = $(LOCAL_DIR)/run EXECUTE = $(LOCAL_DIR)/execute DIRS_TO_CREATE = $(LOCAL_DIR) $(LOG) $(SPOOL) $(CONDOR_SHARED) $(RUN) $(EXECUTE) - +#LOCKFILE +LOCK = $(LOCAL_DIR)/lock # Worker Daemons DAEMON_LIST = MASTER STARTD From 917540574401fd5ec0a62b8a479579b47adbe4f0 Mon Sep 17 00:00:00 2001 From: Boris Date: Wed, 15 Feb 2023 10:47:35 -0600 Subject: [PATCH 17/21] Disable shared port --- kbase_worker.conf | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kbase_worker.conf b/kbase_worker.conf index 839a2fd..caadcd3 100644 --- a/kbase_worker.conf +++ b/kbase_worker.conf @@ -1,13 +1,14 @@ # This contains info just for the worker #Setup Directories +CDR = /cdr LOCAL_DIR = /condor LOG = $(LOCAL_DIR)/log SPOOL = $(LOCAL_DIR)/spool CONDOR_SHARED = /condor_shared RUN = $(LOCAL_DIR)/run EXECUTE = $(LOCAL_DIR)/execute -DIRS_TO_CREATE = $(LOCAL_DIR) $(LOG) $(SPOOL) $(CONDOR_SHARED) $(RUN) $(EXECUTE) +DIRS_TO_CREATE = $(LOCAL_DIR) $(LOG) $(SPOOL) $(CONDOR_SHARED) $(RUN) $(EXECUTE) $(CDR) #LOCKFILE LOCK = $(LOCAL_DIR)/lock @@ -35,4 +36,7 @@ STARTD_ATTRS = CLIENTGROUP, $(STARTD_ATTRS) NUM_CPUS = $(DETECTED_CPUS) MEMORY = $(DETECTED_MEMORY) +# Prevent Shared Port Daemon from starting +USE_SHARED_PORT=false + # To Override any settings, append to this file From 4c884c3291b3c566d695775855655aa2e231a0bf Mon Sep 17 00:00:00 2001 From: Boris Date: Wed, 15 Feb 2023 14:13:11 -0600 Subject: [PATCH 18/21] Updates --- Dockerfile | 3 +- deployment/bin/install_python_dependencies.sh | 2 + deployment/bin/start-condor.sh | 94 +++++++++---------- .../conf/.templates/deployment.cfg.templ | 61 ------------ .../conf/.templates/shared_port_config.templ | 4 - .../conf/.templates/start_server.sh.templ | 19 ---- kbase_worker.conf | 5 + pre-exec.sh | 1 + 8 files changed, 57 insertions(+), 132 deletions(-) delete mode 100644 deployment/conf/.templates/deployment.cfg.templ delete mode 100644 deployment/conf/.templates/shared_port_config.templ delete mode 100755 deployment/conf/.templates/start_server.sh.templ diff --git a/Dockerfile b/Dockerfile index 3926c26..213ec7e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,4 @@ +Dockerfile FROM htcondor/execute:9.12.0-el7 ENV container docker @@ -39,7 +40,7 @@ COPY --chown=kbase deployment/ /kb/deployment/ # Install dependencies for JobRunner ENV PATH /miniconda/bin:$PATH -RUN wget https://raw.githubusercontent.com/kbase/JobRunner/master/requirements.txt && pip install -r requirements.txt && rm requirements.txt +#RUN wget https://raw.githubusercontent.com/kbase/JobRunner/ee2/requirements.txt && pip install -r requirements.txt && rm requirements.txt RUN /kb/deployment/bin/install_python_dependencies.sh # The BUILD_DATE value seem to bust the docker cache when the timestamp changes, move to diff --git a/deployment/bin/install_python_dependencies.sh b/deployment/bin/install_python_dependencies.sh index 401325c..003a2fa 100755 --- a/deployment/bin/install_python_dependencies.sh +++ b/deployment/bin/install_python_dependencies.sh @@ -4,3 +4,5 @@ #TODO Requirements.txt source /miniconda/bin/activate pip install requests docker slackclient htcondor psutil lockfile +pip install sanic==21.9.3 docker==3.6.0 + diff --git a/deployment/bin/start-condor.sh b/deployment/bin/start-condor.sh index d2d7ac2..7af05cc 100755 --- a/deployment/bin/start-condor.sh +++ b/deployment/bin/start-condor.sh @@ -3,50 +3,50 @@ # If there is an environment variable "POOL_PASSWORD" write it out to the pool # condor pool password -if [ "$GROUPMOD_DOCKER" ] ; then - groupmod -g $GROUPMOD_DOCKER docker -fi - -if [ "$POOL_PASSWORD" ] ; then - /usr/sbin/condor_store_cred -p "$POOL_PASSWORD" -f "$(condor_config_val SEC_PASSWORD_FILE)" - condor_store_cred -p "$POOL_PASSWORD" -c add - umask 0077; condor_token_create -identity condor@mypool > /etc/condor/tokens.d/condor@mypool -fi - -if [ "$SET_NOBODY_USER_GUID" ] ; then - usermod -a -G "$SET_NOBODY_USER_GUID" nobody - usermod -a -G "$SET_NOBODY_USER_GUID" condor -# For backwards compatibility for directories already created by the kbase user - usermod -a -G "kbase" nobody -fi - -if [ "$SET_NOBODY_USER_UID" ] ; then - usermod -u "$SET_NOBODY_USER_UID" nobody -o -fi - -# Set up directory for jobs to run in, as well as a place for logs to go after a job is done. -# Not sure which one of these paths will be used for logs yet - -if [ "$CONDOR_SUBMIT_WORKDIR" ] ; then - mkdir -p "${CONDOR_SUBMIT_WORKDIR}/${EXECUTE_SUFFIX}" - chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}" - chmod 01777 "$CONDOR_SUBMIT_WORKDIR/logs" - chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/logs" - chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/../logs" -else - mkdir -p "/cdr/${EXECUTE_SUFFIX}" - chmod 01777 "/cdr/${EXECUTE_SUFFIX}" - chmod 01777 "/cdr/${EXECUTE_SUFFIX}/logs" - chmod 01777 "/cdr/${EXECUTE_SUFFIX}/../logs" -fi - -# Ensure condor user can write to logs, since this is now mounted from host -# Ensure condor user can modify the lock files and run files as of 8.9.10 -chown condor $(condor_config_val log) $(condor_config_val lock) $(condor_config_val run) - - - - - -docker system prune -a -f -exec "$(condor_config_val MASTER)" -f -t 2>&1 +#if [ "$GROUPMOD_DOCKER" ] ; then +# groupmod -g $GROUPMOD_DOCKER docker +#fi + +#if [ "$POOL_PASSWORD" ] ; then +# /usr/sbin/condor_store_cred -p "$POOL_PASSWORD" -f "$(condor_config_val SEC_PASSWORD_FILE)" +# condor_store_cred -p "$POOL_PASSWORD" -c add +# umask 0077; condor_token_create -identity condor@mypool > /etc/condor/tokens.d/condor@mypool +#fi + +#if [ "$SET_NOBODY_USER_GUID" ] ; then +# usermod -a -G "$SET_NOBODY_USER_GUID" nobody +# usermod -a -G "$SET_NOBODY_USER_GUID" condor +## For backwards compatibility for directories already created by the kbase user +# usermod -a -G "kbase" nobody +#fi +# +#if [ "$SET_NOBODY_USER_UID" ] ; then +# usermod -u "$SET_NOBODY_USER_UID" nobody -o +#fi +# +## Set up directory for jobs to run in, as well as a place for logs to go after a job is done. +## Not sure which one of these paths will be used for logs yet +# +#if [ "$CONDOR_SUBMIT_WORKDIR" ] ; then +# mkdir -p "${CONDOR_SUBMIT_WORKDIR}/${EXECUTE_SUFFIX}" +# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}" +# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/logs" +# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/logs" +# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/../logs" +#else +# mkdir -p "/cdr/${EXECUTE_SUFFIX}" +# chmod 01777 "/cdr/${EXECUTE_SUFFIX}" +# chmod 01777 "/cdr/${EXECUTE_SUFFIX}/logs" +# chmod 01777 "/cdr/${EXECUTE_SUFFIX}/../logs" +#fi +# +## Ensure condor user can write to logs, since this is now mounted from host +## Ensure condor user can modify the lock files and run files as of 8.9.10 +#chown condor $(condor_config_val log) $(condor_config_val lock) $(condor_config_val run) +# +# +# + +# +#docker system prune -a -f +#exec "$(condor_config_val MASTER)" -f -t 2>&1 diff --git a/deployment/conf/.templates/deployment.cfg.templ b/deployment/conf/.templates/deployment.cfg.templ deleted file mode 100644 index e01e8c1..0000000 --- a/deployment/conf/.templates/deployment.cfg.templ +++ /dev/null @@ -1,61 +0,0 @@ -[NarrativeJobService] -port = {{ default .Env.port "8200" }} -# server thread count - this determines the number of requests that can be -# processed simultaneously. -server-threads = {{ default .Env.server_threads "20" }} -# Minimum memory size in MB. -min-memory = {{ default .Env.min_memory "1000" }} -# Maximum memory size in MB. -max-memory = {{ default .Env.max_memory "1500" }} - -queue.db.dir={{ default .Env.queue.db.dir "/tmp/njs/queue" }} -basedir={{ default .Env.basedir "njs_wrapper" }} -scratch={{ default .Env.scratch "/tmp" }} -ref.data.base={{ default .Env.ref_data_base "/kb/data" }} - -self.external.url={{ default .Env.self_external_url "https://ci.kbase.us/services/njs_wrapper" }} -kbase.endpoint={{ default .Env.kbase_endpoint "https://ci.kbase.us/services" }} -workspace.srv.url={{ default .Env.workspace_srv_url "https://ci.kbase.us/services/ws" }} -jobstatus.srv.url={{ default .Env.jobstatus_srv_url "https://ci.kbase.us/services/userandjobstate" }} -shock.url={{ default .Env.shock_url "https://ci.kbase.us/services/shock-api" }} -awe.srv.url={{ default .Env.awe_srv_url "http://ci.kbase.us/services/awe-api" }} -docker.registry.url={{ default .Env.docker_registry_url "dockerhub-ci.kbase.us" }} -awe.client.docker.uri={{ default .Env.awe_client_docker_uri "unix:///var/run/docker.sock" }} -catalog.srv.url={{ default .Env.catalog_srv_url "https://ci.kbase.us/services/catalog" }} -handle.url={{ default .Env.handle_url "https://ci.kbase.us/services/handle_service" }} -srv.wiz.url={{ default .Env.srv_wiz_url "https://ci.kbase.us/services/service_wizard" }} -auth-service-url = {{ default .Env.auth_service_url "https://ci.kbase.us/services/auth/api/legacy/KBase/Sessions/Login" }} -auth.service.url.v2 = {{ default .Env.auth_service_url_v2 "https://ci.kbase.us/services/auth/api/V2/token" }} -auth-service-url-allow-insecure={{ default .Env.auth_service_url_allow_insecure "false" }} - - -## This user can run list_running_apps method to get states -## of all running apps (running internally on wrapper side). -admin.user={{default .Env.admin_user "" }} - -# Following parameters define Catalog admin creds for pushing exec-stats: -catalog.admin.token={{ default .Env.catalog_token "" }} - -default.awe.client.groups={{ default .Env.default_awe_client_groups "ci" }} -awe.readonly.admin.token={{ default .Env.awe_token "" }} -awe.client.callback.networks={{ default .Env.awe_client_callback_networks "docker0,eth0" }} -running.tasks.per.user={{ default .Env.running_tasks_per_user "5" }} - -mongodb-host = {{ default .Env.mongodb_host "localhost:27017" }} -mongodb-database = {{ default .Env.mongodb_database "exec_engine" }} -mongodb-user = {{ default .Env.mongodb_user "" }} -mongodb-pwd = {{ default .Env.mongodb_pwd "" }} - -ujs-mongodb-host = {{ default .Env.ujs_mongodb_host "localhost:27017" }} -ujs-mongodb-database = {{ default .Env.ujs_mongodb_database "userjobstate" }} -ujs-mongodb-user = {{ default .Env.ujs_mongodb_user "" }} -ujs-mongodb-pwd = {{ default .Env.ujs_mongodb_pwd "" }} - -narrative.proxy.sharing.user={{ default .Env.narrative_proxy_sharing_user "narrativejoblistener" }} - -condor.mode={{ default .Env.condor_mode "1" }} -condor.submit.desc.file.path={{ default .Env.condor_submit_desc_file_path "/kb/deployment/misc/" }} -condor-submit-workdir={{ default .Env.condor_submit_workdir "/mnt/condor" }} - -## Formula = Token Expiration in ms - (Time_Before_Expiration / 60 ) * 1000 -time.before.expiration = {{ default .Env.time_before_expiration "10" }} \ No newline at end of file diff --git a/deployment/conf/.templates/shared_port_config.templ b/deployment/conf/.templates/shared_port_config.templ deleted file mode 100644 index 1ff2405..0000000 --- a/deployment/conf/.templates/shared_port_config.templ +++ /dev/null @@ -1,4 +0,0 @@ -SHARED_PORT_ARGS = -p {{ default .Env.SHARED_PORT_CONFIG "9618" }} -DAEMON_LIST = $(DAEMON_LIST), SHARED_PORT -COLLECTOR_HOST = $(CONDOR_HOST)?sock=collector -USE_SHARED_PORT = TRUE diff --git a/deployment/conf/.templates/start_server.sh.templ b/deployment/conf/.templates/start_server.sh.templ deleted file mode 100755 index 67aedf4..0000000 --- a/deployment/conf/.templates/start_server.sh.templ +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh -# - -# If there is an environment variable "POOL_PASSWORD" write it out to the -# condor pool password file before starting the java services -if [ "$POOL_PASSWORD" ] ; then - /usr/sbin/condor_store_cred -p "$POOL_PASSWORD" -f `condor_config_val SEC_PASSWORD_FILE` -fi - -chown kbase `condor_config_val SEC_PASSWORD_FILE` - -JAVA_OPTS="-Djava.awt.headless=true -server \ - -Xms{{ default .Env.min_memory "1000" }}m -Xmx{{ default .Env.max_memory "3000" }}m \ - -XX:+UseG1GC" - -# This script assumes that the docker base image includes a runnable JETTY environment -# that provides $JETTY_HOME. Currently the base image is built on library/jetty:jre8 -# su --preserve-environment kbase -c "exec java -DSTOP.PORT=8079 -DSTOP.KEY=foo -Djetty.home=$JETTY_HOME -jar $JETTY_HOME/start.jar" -exec java -DSTOP.PORT=8079 -DSTOP.KEY=foo -Djetty.home=$JETTY_HOME -jar $JETTY_HOME/start.jar diff --git a/kbase_worker.conf b/kbase_worker.conf index caadcd3..d659aa0 100644 --- a/kbase_worker.conf +++ b/kbase_worker.conf @@ -28,6 +28,9 @@ EXECUTE = /cdr/staging # Add clientgroup to something that jobs can match against! STARTD_ATTRS = CLIENTGROUP, $(STARTD_ATTRS) +# We don't need core files, but can enable them for persistent crashes +CREATE_CORE_FILES = false + # Todo Enable This When Cronjobs are back! #NODE_IS_HEALTHY = False #START = (NODE_IS_HEALTHY =?= True) @@ -39,4 +42,6 @@ MEMORY = $(DETECTED_MEMORY) # Prevent Shared Port Daemon from starting USE_SHARED_PORT=false + + # To Override any settings, append to this file diff --git a/pre-exec.sh b/pre-exec.sh index f8c5298..8e81de7 100644 --- a/pre-exec.sh +++ b/pre-exec.sh @@ -29,6 +29,7 @@ mkdir -p $DIRS_TO_CREATE chmod 770 $DIRS_TO_CREATE && chown root:condor $DIRS_TO_CREATE # /condor/execute root-squashed or not condor-owned: requiring world-writability chmod 01777 $(condor_config_val EXECUTE) +chmod 01777 /var/run/docker.sock /update-config ####################### HOST PATHS ############################################ From f8ae3416ad578ed6f2a69f86948ea00d4843ae4b Mon Sep 17 00:00:00 2001 From: Boris Date: Wed, 15 Feb 2023 14:14:14 -0600 Subject: [PATCH 19/21] Updates --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 213ec7e..0be1d60 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,3 @@ -Dockerfile FROM htcondor/execute:9.12.0-el7 ENV container docker From bcc13a6d15eac8365abbc3c42f709a451eacda92 Mon Sep 17 00:00:00 2001 From: Boris Date: Wed, 15 Feb 2023 16:26:19 -0600 Subject: [PATCH 20/21] Permissions --- kbase_worker.conf | 7 +++---- pre-exec.sh | 30 +++++++++++++++--------------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/kbase_worker.conf b/kbase_worker.conf index d659aa0..676784e 100644 --- a/kbase_worker.conf +++ b/kbase_worker.conf @@ -9,8 +9,10 @@ CONDOR_SHARED = /condor_shared RUN = $(LOCAL_DIR)/run EXECUTE = $(LOCAL_DIR)/execute DIRS_TO_CREATE = $(LOCAL_DIR) $(LOG) $(SPOOL) $(CONDOR_SHARED) $(RUN) $(EXECUTE) $(CDR) -#LOCKFILE + +#These need to be 770 or 777 LOCK = $(LOCAL_DIR)/lock +DOCKER_SOCKET = /var/run/docker.sock # Worker Daemons DAEMON_LIST = MASTER STARTD @@ -22,9 +24,6 @@ CGROUP_MEMORY_LIMIT_POLICY = soft # CLIENTGROUP Default for allowing multiple jobs to run at once CLIENTGROUP = "njs,bigmem,extreme" -# Useful for running multiple workers on the same machine -EXECUTE = /cdr/staging - # Add clientgroup to something that jobs can match against! STARTD_ATTRS = CLIENTGROUP, $(STARTD_ATTRS) diff --git a/pre-exec.sh b/pre-exec.sh index 8e81de7..14e730d 100644 --- a/pre-exec.sh +++ b/pre-exec.sh @@ -2,6 +2,7 @@ # See documentation at https://github.com/htcondor/htcondor/tree/main/build/docker/services#providing-additional-configuration +######################## Required Values BEGIN ######################################## if [ "$CONDOR_JWT_TOKEN" ] ; then echo "$CONDOR_JWT_TOKEN" > /etc/condor/tokens.d/JWT chmod 600 /etc/condor/tokens.d/JWT @@ -11,6 +12,13 @@ if [ "$COLLECTOR_HOST" ] ; then echo "COLLECTOR_HOST = $COLLECTOR_HOST" >> /etc/condor/condor_config.local fi +# Change execute directory for multiple works on the same host, instead of the default /cdr/ dir +if [ "$EXECUTE" ] ; then + echo "EXECUTE = $EXECUTE >> /etc/condor/condor_config.local" + +fi +######################## Required Values END ########################################## + #Note the clientgroup will require quotation marks in the env variable if [ "$CLIENTGROUP" ] ; then echo "CLIENTGROUP = $CLIENTGROUP >> /etc/condor/condor_config.local" @@ -22,25 +30,17 @@ if [ "$DOCKER_SYSTEM_PRUNE" ] ; then fi + + + #TODO Make nobody user able to run jobs ####################### HOST PATHS ############################################ DIRS_TO_CREATE=$(condor_config_val DIRS_TO_CREATE) mkdir -p $DIRS_TO_CREATE -chmod 770 $DIRS_TO_CREATE && chown root:condor $DIRS_TO_CREATE +chmod 01777 $DIRS_TO_CREATE && chown root:condor $DIRS_TO_CREATE + # /condor/execute root-squashed or not condor-owned: requiring world-writability chmod 01777 $(condor_config_val EXECUTE) -chmod 01777 /var/run/docker.sock -/update-config +chmod 01777 $(condor_config_val DOCKER_SOCKET) ####################### HOST PATHS ############################################ - - - -# Ensure condor user can write to logs, since these are mounted onto host -chown condor $(condor_config_val log) $(condor_config_val lock) $(condor_config_val run) - - - - - - -/update-config +/update-config \ No newline at end of file From 2bf32a0cedef96dea5f88298f46182a0affcdb85 Mon Sep 17 00:00:00 2001 From: Boris Date: Wed, 15 Feb 2023 16:54:49 -0600 Subject: [PATCH 21/21] Permissions and mount dir --- pre-exec.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pre-exec.sh b/pre-exec.sh index 14e730d..429546a 100644 --- a/pre-exec.sh +++ b/pre-exec.sh @@ -12,16 +12,16 @@ if [ "$COLLECTOR_HOST" ] ; then echo "COLLECTOR_HOST = $COLLECTOR_HOST" >> /etc/condor/condor_config.local fi -# Change execute directory for multiple works on the same host, instead of the default /cdr/ dir +# This has to be the same exact path as the mount otherwise the JobRunner doesn't understand relative mounts +# e.g. You cannot mount /cdr/staging as /execute, you must mount /cdr/staging as /cdr/staging if [ "$EXECUTE" ] ; then - echo "EXECUTE = $EXECUTE >> /etc/condor/condor_config.local" - + echo "EXECUTE = $EXECUTE" >> /etc/condor/condor_config.local fi ######################## Required Values END ########################################## #Note the clientgroup will require quotation marks in the env variable if [ "$CLIENTGROUP" ] ; then - echo "CLIENTGROUP = $CLIENTGROUP >> /etc/condor/condor_config.local" + echo "CLIENTGROUP = $CLIENTGROUP" >> /etc/condor/condor_config.local fi # To keep docker partition from filling up