diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml new file mode 100644 index 0000000..944f903 --- /dev/null +++ b/.github/workflows/manual-build.yml @@ -0,0 +1,11 @@ +--- +name: Manual Build & Push +on: + workflow_dispatch: +jobs: + build-push: + uses: kbase/.github/.github/workflows/reusable_build-push.yml@main + with: + name: '${{ github.event.repository.name }}-develop' + tags: br-${{ github.ref_name }} + secrets: inherit diff --git a/Dockerfile b/Dockerfile index 7bac474..0be1d60 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,11 @@ -FROM htcondor/execute:9.7-el7 +FROM htcondor/execute:9.12.0-el7 ENV container docker -# Ge$t commonly used utilities +# See https://www-auth.cs.wisc.edu/lists/htcondor-users/2014-August/msg00044.shtml +COPY pre-exec.sh /root/config/pre-exec.sh +COPY kbase_worker.conf /etc/condor/condor_config.local + +# Get commonly used utilities RUN yum install -y deltarpm RUN yum -y update && yum upgrade -y RUN yum -y install -y epel-release wget which git deltarpm gcc libcgroup libcgroup-tools stress-ng tmpwatch @@ -9,7 +13,6 @@ RUN yum -y install -y epel-release wget which git deltarpm gcc libcgroup libcgro # Install docker binaries RUN yum install -y yum-utils device-mapper-persistent-data lvm2 && yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo && yum install -y docker-ce - #Install Python3 and Libraries (source /root/miniconda/bin/activate) RUN yum install -y bzip2 \ && wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh \ @@ -29,9 +32,6 @@ RUN wget -N https://github.com/kbase/dockerize/raw/master/dockerize-linux-amd64- # Also add the user to the groups that map to "docker" on Linux and "daemon" on Mac RUN usermod -a -G 0 kbase && usermod -a -G 999 kbase -#ADD DIRS -RUN mkdir -p /var/run/condor && mkdir -p /var/log/condor && mkdir -p /var/lock/condor && mkdir -p /var/lib/condor/execute - # Maybe you want: rm -rf /var/cache/yum, to also free up space taken by orphaned data from disabled or removed repos RUN rm -rf /var/cache/yum @@ -39,7 +39,7 @@ COPY --chown=kbase deployment/ /kb/deployment/ # Install dependencies for JobRunner ENV PATH /miniconda/bin:$PATH -RUN wget https://raw.githubusercontent.com/kbase/JobRunner/master/requirements.txt && pip install -r requirements.txt && rm requirements.txt +#RUN wget https://raw.githubusercontent.com/kbase/JobRunner/ee2/requirements.txt && pip install -r requirements.txt && rm requirements.txt RUN /kb/deployment/bin/install_python_dependencies.sh # The BUILD_DATE value seem to bust the docker cache when the timestamp changes, move to @@ -51,13 +51,4 @@ LABEL org.label-schema.build-date=$BUILD_DATE \ us.kbase.vcs-branch=$BRANCH \ maintainer="Steve Chan sychan@lbl.gov" -ENTRYPOINT [ "/kb/deployment/bin/dockerize" ] -CMD [ "-template", "/kb/deployment/conf/.templates/deployment.cfg.templ:/kb/deployment/conf/deployment.cfg", \ - "-template", "/kb/deployment/conf/.templates/http.ini.templ:/kb/deployment/jettybase/start.d/http.ini", \ - "-template", "/kb/deployment/conf/.templates/server.ini.templ:/kb/deployment/jettybase/start.d/server.ini", \ - "-template", "/kb/deployment/conf/.templates/start_server.sh.templ:/kb/deployment/bin/start_server.sh", \ - "-template", "/kb/deployment/conf/.templates/condor_config.templ:/etc/condor/condor_config.local", \ - "-stdout", "/kb/deployment/jettybase/logs/request.log", \ - "/kb/deployment/bin/start_server.sh" ] - WORKDIR /kb/deployment/jettybase diff --git a/deployment/bin/install_python_dependencies.sh b/deployment/bin/install_python_dependencies.sh index 401325c..003a2fa 100755 --- a/deployment/bin/install_python_dependencies.sh +++ b/deployment/bin/install_python_dependencies.sh @@ -4,3 +4,5 @@ #TODO Requirements.txt source /miniconda/bin/activate pip install requests docker slackclient htcondor psutil lockfile +pip install sanic==21.9.3 docker==3.6.0 + diff --git a/deployment/bin/start-condor.sh b/deployment/bin/start-condor.sh index d2d7ac2..7af05cc 100755 --- a/deployment/bin/start-condor.sh +++ b/deployment/bin/start-condor.sh @@ -3,50 +3,50 @@ # If there is an environment variable "POOL_PASSWORD" write it out to the pool # condor pool password -if [ "$GROUPMOD_DOCKER" ] ; then - groupmod -g $GROUPMOD_DOCKER docker -fi - -if [ "$POOL_PASSWORD" ] ; then - /usr/sbin/condor_store_cred -p "$POOL_PASSWORD" -f "$(condor_config_val SEC_PASSWORD_FILE)" - condor_store_cred -p "$POOL_PASSWORD" -c add - umask 0077; condor_token_create -identity condor@mypool > /etc/condor/tokens.d/condor@mypool -fi - -if [ "$SET_NOBODY_USER_GUID" ] ; then - usermod -a -G "$SET_NOBODY_USER_GUID" nobody - usermod -a -G "$SET_NOBODY_USER_GUID" condor -# For backwards compatibility for directories already created by the kbase user - usermod -a -G "kbase" nobody -fi - -if [ "$SET_NOBODY_USER_UID" ] ; then - usermod -u "$SET_NOBODY_USER_UID" nobody -o -fi - -# Set up directory for jobs to run in, as well as a place for logs to go after a job is done. -# Not sure which one of these paths will be used for logs yet - -if [ "$CONDOR_SUBMIT_WORKDIR" ] ; then - mkdir -p "${CONDOR_SUBMIT_WORKDIR}/${EXECUTE_SUFFIX}" - chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}" - chmod 01777 "$CONDOR_SUBMIT_WORKDIR/logs" - chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/logs" - chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/../logs" -else - mkdir -p "/cdr/${EXECUTE_SUFFIX}" - chmod 01777 "/cdr/${EXECUTE_SUFFIX}" - chmod 01777 "/cdr/${EXECUTE_SUFFIX}/logs" - chmod 01777 "/cdr/${EXECUTE_SUFFIX}/../logs" -fi - -# Ensure condor user can write to logs, since this is now mounted from host -# Ensure condor user can modify the lock files and run files as of 8.9.10 -chown condor $(condor_config_val log) $(condor_config_val lock) $(condor_config_val run) - - - - - -docker system prune -a -f -exec "$(condor_config_val MASTER)" -f -t 2>&1 +#if [ "$GROUPMOD_DOCKER" ] ; then +# groupmod -g $GROUPMOD_DOCKER docker +#fi + +#if [ "$POOL_PASSWORD" ] ; then +# /usr/sbin/condor_store_cred -p "$POOL_PASSWORD" -f "$(condor_config_val SEC_PASSWORD_FILE)" +# condor_store_cred -p "$POOL_PASSWORD" -c add +# umask 0077; condor_token_create -identity condor@mypool > /etc/condor/tokens.d/condor@mypool +#fi + +#if [ "$SET_NOBODY_USER_GUID" ] ; then +# usermod -a -G "$SET_NOBODY_USER_GUID" nobody +# usermod -a -G "$SET_NOBODY_USER_GUID" condor +## For backwards compatibility for directories already created by the kbase user +# usermod -a -G "kbase" nobody +#fi +# +#if [ "$SET_NOBODY_USER_UID" ] ; then +# usermod -u "$SET_NOBODY_USER_UID" nobody -o +#fi +# +## Set up directory for jobs to run in, as well as a place for logs to go after a job is done. +## Not sure which one of these paths will be used for logs yet +# +#if [ "$CONDOR_SUBMIT_WORKDIR" ] ; then +# mkdir -p "${CONDOR_SUBMIT_WORKDIR}/${EXECUTE_SUFFIX}" +# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}" +# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/logs" +# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/logs" +# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/../logs" +#else +# mkdir -p "/cdr/${EXECUTE_SUFFIX}" +# chmod 01777 "/cdr/${EXECUTE_SUFFIX}" +# chmod 01777 "/cdr/${EXECUTE_SUFFIX}/logs" +# chmod 01777 "/cdr/${EXECUTE_SUFFIX}/../logs" +#fi +# +## Ensure condor user can write to logs, since this is now mounted from host +## Ensure condor user can modify the lock files and run files as of 8.9.10 +#chown condor $(condor_config_val log) $(condor_config_val lock) $(condor_config_val run) +# +# +# + +# +#docker system prune -a -f +#exec "$(condor_config_val MASTER)" -f -t 2>&1 diff --git a/deployment/conf/.templates/deployment.cfg.templ b/deployment/conf/.templates/deployment.cfg.templ deleted file mode 100644 index e01e8c1..0000000 --- a/deployment/conf/.templates/deployment.cfg.templ +++ /dev/null @@ -1,61 +0,0 @@ -[NarrativeJobService] -port = {{ default .Env.port "8200" }} -# server thread count - this determines the number of requests that can be -# processed simultaneously. -server-threads = {{ default .Env.server_threads "20" }} -# Minimum memory size in MB. -min-memory = {{ default .Env.min_memory "1000" }} -# Maximum memory size in MB. -max-memory = {{ default .Env.max_memory "1500" }} - -queue.db.dir={{ default .Env.queue.db.dir "/tmp/njs/queue" }} -basedir={{ default .Env.basedir "njs_wrapper" }} -scratch={{ default .Env.scratch "/tmp" }} -ref.data.base={{ default .Env.ref_data_base "/kb/data" }} - -self.external.url={{ default .Env.self_external_url "https://ci.kbase.us/services/njs_wrapper" }} -kbase.endpoint={{ default .Env.kbase_endpoint "https://ci.kbase.us/services" }} -workspace.srv.url={{ default .Env.workspace_srv_url "https://ci.kbase.us/services/ws" }} -jobstatus.srv.url={{ default .Env.jobstatus_srv_url "https://ci.kbase.us/services/userandjobstate" }} -shock.url={{ default .Env.shock_url "https://ci.kbase.us/services/shock-api" }} -awe.srv.url={{ default .Env.awe_srv_url "http://ci.kbase.us/services/awe-api" }} -docker.registry.url={{ default .Env.docker_registry_url "dockerhub-ci.kbase.us" }} -awe.client.docker.uri={{ default .Env.awe_client_docker_uri "unix:///var/run/docker.sock" }} -catalog.srv.url={{ default .Env.catalog_srv_url "https://ci.kbase.us/services/catalog" }} -handle.url={{ default .Env.handle_url "https://ci.kbase.us/services/handle_service" }} -srv.wiz.url={{ default .Env.srv_wiz_url "https://ci.kbase.us/services/service_wizard" }} -auth-service-url = {{ default .Env.auth_service_url "https://ci.kbase.us/services/auth/api/legacy/KBase/Sessions/Login" }} -auth.service.url.v2 = {{ default .Env.auth_service_url_v2 "https://ci.kbase.us/services/auth/api/V2/token" }} -auth-service-url-allow-insecure={{ default .Env.auth_service_url_allow_insecure "false" }} - - -## This user can run list_running_apps method to get states -## of all running apps (running internally on wrapper side). -admin.user={{default .Env.admin_user "" }} - -# Following parameters define Catalog admin creds for pushing exec-stats: -catalog.admin.token={{ default .Env.catalog_token "" }} - -default.awe.client.groups={{ default .Env.default_awe_client_groups "ci" }} -awe.readonly.admin.token={{ default .Env.awe_token "" }} -awe.client.callback.networks={{ default .Env.awe_client_callback_networks "docker0,eth0" }} -running.tasks.per.user={{ default .Env.running_tasks_per_user "5" }} - -mongodb-host = {{ default .Env.mongodb_host "localhost:27017" }} -mongodb-database = {{ default .Env.mongodb_database "exec_engine" }} -mongodb-user = {{ default .Env.mongodb_user "" }} -mongodb-pwd = {{ default .Env.mongodb_pwd "" }} - -ujs-mongodb-host = {{ default .Env.ujs_mongodb_host "localhost:27017" }} -ujs-mongodb-database = {{ default .Env.ujs_mongodb_database "userjobstate" }} -ujs-mongodb-user = {{ default .Env.ujs_mongodb_user "" }} -ujs-mongodb-pwd = {{ default .Env.ujs_mongodb_pwd "" }} - -narrative.proxy.sharing.user={{ default .Env.narrative_proxy_sharing_user "narrativejoblistener" }} - -condor.mode={{ default .Env.condor_mode "1" }} -condor.submit.desc.file.path={{ default .Env.condor_submit_desc_file_path "/kb/deployment/misc/" }} -condor-submit-workdir={{ default .Env.condor_submit_workdir "/mnt/condor" }} - -## Formula = Token Expiration in ms - (Time_Before_Expiration / 60 ) * 1000 -time.before.expiration = {{ default .Env.time_before_expiration "10" }} \ No newline at end of file diff --git a/deployment/conf/.templates/shared_port_config.templ b/deployment/conf/.templates/shared_port_config.templ deleted file mode 100644 index 1ff2405..0000000 --- a/deployment/conf/.templates/shared_port_config.templ +++ /dev/null @@ -1,4 +0,0 @@ -SHARED_PORT_ARGS = -p {{ default .Env.SHARED_PORT_CONFIG "9618" }} -DAEMON_LIST = $(DAEMON_LIST), SHARED_PORT -COLLECTOR_HOST = $(CONDOR_HOST)?sock=collector -USE_SHARED_PORT = TRUE diff --git a/deployment/conf/.templates/start_server.sh.templ b/deployment/conf/.templates/start_server.sh.templ deleted file mode 100755 index 67aedf4..0000000 --- a/deployment/conf/.templates/start_server.sh.templ +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh -# - -# If there is an environment variable "POOL_PASSWORD" write it out to the -# condor pool password file before starting the java services -if [ "$POOL_PASSWORD" ] ; then - /usr/sbin/condor_store_cred -p "$POOL_PASSWORD" -f `condor_config_val SEC_PASSWORD_FILE` -fi - -chown kbase `condor_config_val SEC_PASSWORD_FILE` - -JAVA_OPTS="-Djava.awt.headless=true -server \ - -Xms{{ default .Env.min_memory "1000" }}m -Xmx{{ default .Env.max_memory "3000" }}m \ - -XX:+UseG1GC" - -# This script assumes that the docker base image includes a runnable JETTY environment -# that provides $JETTY_HOME. Currently the base image is built on library/jetty:jre8 -# su --preserve-environment kbase -c "exec java -DSTOP.PORT=8079 -DSTOP.KEY=foo -Djetty.home=$JETTY_HOME -jar $JETTY_HOME/start.jar" -exec java -DSTOP.PORT=8079 -DSTOP.KEY=foo -Djetty.home=$JETTY_HOME -jar $JETTY_HOME/start.jar diff --git a/hooks/build b/hooks/build deleted file mode 100644 index 0b09918..0000000 --- a/hooks/build +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# $IMAGE_NAME var is injected into the build so the tag is correct. - -echo "Build hook running" -echo "IMAGE_NAME = ${IMAGE_NAME}" -echo "DOCKER_REPO = ${DOCKER_REPO}" -export BRANCH=${SOURCE_BRANCH:-`git symbolic-ref --short HEAD`} - -export DATE=`date -u +"%Y-%m-%dT%H:%M:%SZ"` -export COMMIT=${SOURCE_COMMIT:-`git rev-parse --short HEAD`} -docker build --build-arg BUILD_DATE=$DATE \ - --build-arg VCS_REF=$COMMIT \ - --build-arg BRANCH=$BRANCH \ - -t ${IMAGE_NAME} . && \ -echo "Built and tagged ${IMAGE_NAME}" diff --git a/kbase_worker.conf b/kbase_worker.conf new file mode 100644 index 0000000..676784e --- /dev/null +++ b/kbase_worker.conf @@ -0,0 +1,46 @@ +# This contains info just for the worker + +#Setup Directories +CDR = /cdr +LOCAL_DIR = /condor +LOG = $(LOCAL_DIR)/log +SPOOL = $(LOCAL_DIR)/spool +CONDOR_SHARED = /condor_shared +RUN = $(LOCAL_DIR)/run +EXECUTE = $(LOCAL_DIR)/execute +DIRS_TO_CREATE = $(LOCAL_DIR) $(LOG) $(SPOOL) $(CONDOR_SHARED) $(RUN) $(EXECUTE) $(CDR) + +#These need to be 770 or 777 +LOCK = $(LOCAL_DIR)/lock +DOCKER_SOCKET = /var/run/docker.sock + +# Worker Daemons +DAEMON_LIST = MASTER STARTD + +# CGROUPS +BASE_CGROUP = htcondor +CGROUP_MEMORY_LIMIT_POLICY = soft + +# CLIENTGROUP Default for allowing multiple jobs to run at once +CLIENTGROUP = "njs,bigmem,extreme" + +# Add clientgroup to something that jobs can match against! +STARTD_ATTRS = CLIENTGROUP, $(STARTD_ATTRS) + +# We don't need core files, but can enable them for persistent crashes +CREATE_CORE_FILES = false + +# Todo Enable This When Cronjobs are back! +#NODE_IS_HEALTHY = False +#START = (NODE_IS_HEALTHY =?= True) + +# Advertise CPUS and Memory as detected +NUM_CPUS = $(DETECTED_CPUS) +MEMORY = $(DETECTED_MEMORY) + +# Prevent Shared Port Daemon from starting +USE_SHARED_PORT=false + + + +# To Override any settings, append to this file diff --git a/pre-exec.sh b/pre-exec.sh new file mode 100644 index 0000000..429546a --- /dev/null +++ b/pre-exec.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# See documentation at https://github.com/htcondor/htcondor/tree/main/build/docker/services#providing-additional-configuration + +######################## Required Values BEGIN ######################################## +if [ "$CONDOR_JWT_TOKEN" ] ; then + echo "$CONDOR_JWT_TOKEN" > /etc/condor/tokens.d/JWT + chmod 600 /etc/condor/tokens.d/JWT +fi + +if [ "$COLLECTOR_HOST" ] ; then + echo "COLLECTOR_HOST = $COLLECTOR_HOST" >> /etc/condor/condor_config.local +fi + +# This has to be the same exact path as the mount otherwise the JobRunner doesn't understand relative mounts +# e.g. You cannot mount /cdr/staging as /execute, you must mount /cdr/staging as /cdr/staging +if [ "$EXECUTE" ] ; then + echo "EXECUTE = $EXECUTE" >> /etc/condor/condor_config.local +fi +######################## Required Values END ########################################## + +#Note the clientgroup will require quotation marks in the env variable +if [ "$CLIENTGROUP" ] ; then + echo "CLIENTGROUP = $CLIENTGROUP" >> /etc/condor/condor_config.local +fi + +# To keep docker partition from filling up +if [ "$DOCKER_SYSTEM_PRUNE" ] ; then + docker system prune -a -fg +fi + + + + + +#TODO Make nobody user able to run jobs +####################### HOST PATHS ############################################ +DIRS_TO_CREATE=$(condor_config_val DIRS_TO_CREATE) +mkdir -p $DIRS_TO_CREATE +chmod 01777 $DIRS_TO_CREATE && chown root:condor $DIRS_TO_CREATE + +# /condor/execute root-squashed or not condor-owned: requiring world-writability +chmod 01777 $(condor_config_val EXECUTE) +chmod 01777 $(condor_config_val DOCKER_SOCKET) +####################### HOST PATHS ############################################ +/update-config \ No newline at end of file