Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Condor Version and add Manual Builds #56

Open
wants to merge 22 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .github/workflows/manual-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
name: Manual Build & Push
on:
workflow_dispatch:
jobs:
build-push:
uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
with:
name: '${{ github.event.repository.name }}-develop'
tags: br-${{ github.ref_name }}
secrets: inherit
23 changes: 7 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
FROM htcondor/execute:9.7-el7
FROM htcondor/execute:9.12.0-el7
ENV container docker

# Ge$t commonly used utilities
# See https://www-auth.cs.wisc.edu/lists/htcondor-users/2014-August/msg00044.shtml
COPY pre-exec.sh /root/config/pre-exec.sh
COPY kbase_worker.conf /etc/condor/condor_config.local

# Get commonly used utilities
RUN yum install -y deltarpm
RUN yum -y update && yum upgrade -y
RUN yum -y install -y epel-release wget which git deltarpm gcc libcgroup libcgroup-tools stress-ng tmpwatch

# Install docker binaries
RUN yum install -y yum-utils device-mapper-persistent-data lvm2 && yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo && yum install -y docker-ce


#Install Python3 and Libraries (source /root/miniconda/bin/activate)
RUN yum install -y bzip2 \
&& wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh \
Expand All @@ -29,17 +32,14 @@ RUN wget -N https://github.com/kbase/dockerize/raw/master/dockerize-linux-amd64-
# Also add the user to the groups that map to "docker" on Linux and "daemon" on Mac
RUN usermod -a -G 0 kbase && usermod -a -G 999 kbase

#ADD DIRS
RUN mkdir -p /var/run/condor && mkdir -p /var/log/condor && mkdir -p /var/lock/condor && mkdir -p /var/lib/condor/execute

# Maybe you want: rm -rf /var/cache/yum, to also free up space taken by orphaned data from disabled or removed repos
RUN rm -rf /var/cache/yum

COPY --chown=kbase deployment/ /kb/deployment/

# Install dependencies for JobRunner
ENV PATH /miniconda/bin:$PATH
RUN wget https://raw.githubusercontent.com/kbase/JobRunner/master/requirements.txt && pip install -r requirements.txt && rm requirements.txt
#RUN wget https://raw.githubusercontent.com/kbase/JobRunner/ee2/requirements.txt && pip install -r requirements.txt && rm requirements.txt
RUN /kb/deployment/bin/install_python_dependencies.sh

# The BUILD_DATE value seem to bust the docker cache when the timestamp changes, move to
Expand All @@ -51,13 +51,4 @@ LABEL org.label-schema.build-date=$BUILD_DATE \
us.kbase.vcs-branch=$BRANCH \
maintainer="Steve Chan [email protected]"

ENTRYPOINT [ "/kb/deployment/bin/dockerize" ]
CMD [ "-template", "/kb/deployment/conf/.templates/deployment.cfg.templ:/kb/deployment/conf/deployment.cfg", \
"-template", "/kb/deployment/conf/.templates/http.ini.templ:/kb/deployment/jettybase/start.d/http.ini", \
"-template", "/kb/deployment/conf/.templates/server.ini.templ:/kb/deployment/jettybase/start.d/server.ini", \
"-template", "/kb/deployment/conf/.templates/start_server.sh.templ:/kb/deployment/bin/start_server.sh", \
"-template", "/kb/deployment/conf/.templates/condor_config.templ:/etc/condor/condor_config.local", \
"-stdout", "/kb/deployment/jettybase/logs/request.log", \
"/kb/deployment/bin/start_server.sh" ]

WORKDIR /kb/deployment/jettybase
2 changes: 2 additions & 0 deletions deployment/bin/install_python_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
#TODO Requirements.txt
source /miniconda/bin/activate
pip install requests docker slackclient htcondor psutil lockfile
pip install sanic==21.9.3 docker==3.6.0

94 changes: 47 additions & 47 deletions deployment/bin/start-condor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,50 @@
# If there is an environment variable "POOL_PASSWORD" write it out to the pool
# condor pool password

if [ "$GROUPMOD_DOCKER" ] ; then
groupmod -g $GROUPMOD_DOCKER docker
fi

if [ "$POOL_PASSWORD" ] ; then
/usr/sbin/condor_store_cred -p "$POOL_PASSWORD" -f "$(condor_config_val SEC_PASSWORD_FILE)"
condor_store_cred -p "$POOL_PASSWORD" -c add
umask 0077; condor_token_create -identity condor@mypool > /etc/condor/tokens.d/condor@mypool
fi

if [ "$SET_NOBODY_USER_GUID" ] ; then
usermod -a -G "$SET_NOBODY_USER_GUID" nobody
usermod -a -G "$SET_NOBODY_USER_GUID" condor
# For backwards compatibility for directories already created by the kbase user
usermod -a -G "kbase" nobody
fi

if [ "$SET_NOBODY_USER_UID" ] ; then
usermod -u "$SET_NOBODY_USER_UID" nobody -o
fi

# Set up directory for jobs to run in, as well as a place for logs to go after a job is done.
# Not sure which one of these paths will be used for logs yet

if [ "$CONDOR_SUBMIT_WORKDIR" ] ; then
mkdir -p "${CONDOR_SUBMIT_WORKDIR}/${EXECUTE_SUFFIX}"
chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}"
chmod 01777 "$CONDOR_SUBMIT_WORKDIR/logs"
chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/logs"
chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/../logs"
else
mkdir -p "/cdr/${EXECUTE_SUFFIX}"
chmod 01777 "/cdr/${EXECUTE_SUFFIX}"
chmod 01777 "/cdr/${EXECUTE_SUFFIX}/logs"
chmod 01777 "/cdr/${EXECUTE_SUFFIX}/../logs"
fi

# Ensure condor user can write to logs, since this is now mounted from host
# Ensure condor user can modify the lock files and run files as of 8.9.10
chown condor $(condor_config_val log) $(condor_config_val lock) $(condor_config_val run)





docker system prune -a -f
exec "$(condor_config_val MASTER)" -f -t 2>&1
#if [ "$GROUPMOD_DOCKER" ] ; then
# groupmod -g $GROUPMOD_DOCKER docker
#fi

#if [ "$POOL_PASSWORD" ] ; then
# /usr/sbin/condor_store_cred -p "$POOL_PASSWORD" -f "$(condor_config_val SEC_PASSWORD_FILE)"
# condor_store_cred -p "$POOL_PASSWORD" -c add
# umask 0077; condor_token_create -identity condor@mypool > /etc/condor/tokens.d/condor@mypool
#fi

#if [ "$SET_NOBODY_USER_GUID" ] ; then
# usermod -a -G "$SET_NOBODY_USER_GUID" nobody
# usermod -a -G "$SET_NOBODY_USER_GUID" condor
## For backwards compatibility for directories already created by the kbase user
# usermod -a -G "kbase" nobody
#fi
#
#if [ "$SET_NOBODY_USER_UID" ] ; then
# usermod -u "$SET_NOBODY_USER_UID" nobody -o
#fi
#
## Set up directory for jobs to run in, as well as a place for logs to go after a job is done.
## Not sure which one of these paths will be used for logs yet
#
#if [ "$CONDOR_SUBMIT_WORKDIR" ] ; then
# mkdir -p "${CONDOR_SUBMIT_WORKDIR}/${EXECUTE_SUFFIX}"
# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}"
# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/logs"
# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/logs"
# chmod 01777 "$CONDOR_SUBMIT_WORKDIR/${EXECUTE_SUFFIX}/../logs"
#else
# mkdir -p "/cdr/${EXECUTE_SUFFIX}"
# chmod 01777 "/cdr/${EXECUTE_SUFFIX}"
# chmod 01777 "/cdr/${EXECUTE_SUFFIX}/logs"
# chmod 01777 "/cdr/${EXECUTE_SUFFIX}/../logs"
#fi
#
## Ensure condor user can write to logs, since this is now mounted from host
## Ensure condor user can modify the lock files and run files as of 8.9.10
#chown condor $(condor_config_val log) $(condor_config_val lock) $(condor_config_val run)
#
#
#

#
#docker system prune -a -f
#exec "$(condor_config_val MASTER)" -f -t 2>&1
61 changes: 0 additions & 61 deletions deployment/conf/.templates/deployment.cfg.templ

This file was deleted.

4 changes: 0 additions & 4 deletions deployment/conf/.templates/shared_port_config.templ

This file was deleted.

19 changes: 0 additions & 19 deletions deployment/conf/.templates/start_server.sh.templ

This file was deleted.

16 changes: 0 additions & 16 deletions hooks/build

This file was deleted.

46 changes: 46 additions & 0 deletions kbase_worker.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# This contains info just for the worker

#Setup Directories
CDR = /cdr
LOCAL_DIR = /condor
LOG = $(LOCAL_DIR)/log
SPOOL = $(LOCAL_DIR)/spool
CONDOR_SHARED = /condor_shared
RUN = $(LOCAL_DIR)/run
EXECUTE = $(LOCAL_DIR)/execute
DIRS_TO_CREATE = $(LOCAL_DIR) $(LOG) $(SPOOL) $(CONDOR_SHARED) $(RUN) $(EXECUTE) $(CDR)

#These need to be 770 or 777
LOCK = $(LOCAL_DIR)/lock
DOCKER_SOCKET = /var/run/docker.sock

# Worker Daemons
DAEMON_LIST = MASTER STARTD

# CGROUPS
BASE_CGROUP = htcondor
CGROUP_MEMORY_LIMIT_POLICY = soft

# CLIENTGROUP Default for allowing multiple jobs to run at once
CLIENTGROUP = "njs,bigmem,extreme"

# Add clientgroup to something that jobs can match against!
STARTD_ATTRS = CLIENTGROUP, $(STARTD_ATTRS)

# We don't need core files, but can enable them for persistent crashes
CREATE_CORE_FILES = false

# Todo Enable This When Cronjobs are back!
#NODE_IS_HEALTHY = False
#START = (NODE_IS_HEALTHY =?= True)

# Advertise CPUS and Memory as detected
NUM_CPUS = $(DETECTED_CPUS)
MEMORY = $(DETECTED_MEMORY)

# Prevent Shared Port Daemon from starting
USE_SHARED_PORT=false



# To Override any settings, append to this file
46 changes: 46 additions & 0 deletions pre-exec.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash

# See documentation at https://github.com/htcondor/htcondor/tree/main/build/docker/services#providing-additional-configuration

######################## Required Values BEGIN ########################################
if [ "$CONDOR_JWT_TOKEN" ] ; then
echo "$CONDOR_JWT_TOKEN" > /etc/condor/tokens.d/JWT
chmod 600 /etc/condor/tokens.d/JWT
fi

if [ "$COLLECTOR_HOST" ] ; then
echo "COLLECTOR_HOST = $COLLECTOR_HOST" >> /etc/condor/condor_config.local
fi

# This has to be the same exact path as the mount otherwise the JobRunner doesn't understand relative mounts
# e.g. You cannot mount /cdr/staging as /execute, you must mount /cdr/staging as /cdr/staging
if [ "$EXECUTE" ] ; then
echo "EXECUTE = $EXECUTE" >> /etc/condor/condor_config.local
fi
######################## Required Values END ##########################################

#Note the clientgroup will require quotation marks in the env variable
if [ "$CLIENTGROUP" ] ; then
echo "CLIENTGROUP = $CLIENTGROUP" >> /etc/condor/condor_config.local
fi

# To keep docker partition from filling up
if [ "$DOCKER_SYSTEM_PRUNE" ] ; then
docker system prune -a -fg
fi





#TODO Make nobody user able to run jobs
####################### HOST PATHS ############################################
DIRS_TO_CREATE=$(condor_config_val DIRS_TO_CREATE)
mkdir -p $DIRS_TO_CREATE
chmod 01777 $DIRS_TO_CREATE && chown root:condor $DIRS_TO_CREATE

# /condor/execute root-squashed or not condor-owned: requiring world-writability
chmod 01777 $(condor_config_val EXECUTE)
chmod 01777 $(condor_config_val DOCKER_SOCKET)
####################### HOST PATHS ############################################
/update-config