Skip to content

Commit

Permalink
ENH Support launching Airflow through the SLURM submission script.
Browse files Browse the repository at this point in the history
  • Loading branch information
gadorlhiac committed Jul 1, 2024
1 parent 0f932a5 commit b5312bd
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
16 changes: 15 additions & 1 deletion launch_scripts/launch_airflow.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/sdf/group/lcls/ds/ana/sw/conda1/inst/envs/ana-4.0.60-py3/bin/python
#!/sdf/group/lcls/ds/ana/sw/conda1/inst/envs/ana-4.0.62-py3/bin/python

"""Script submitted by Automated Run Processor (ARP) to trigger an Airflow DAG.
Expand Down Expand Up @@ -118,7 +118,14 @@ def _request_arp_token(exp: str, lifetime: int = 300) -> str:
extra_args: List[str] # Should contain all SLURM arguments!
args, extra_args = parser.parse_known_args()
# Check if was submitted from ARP - look for token
use_kerberos: bool = False
if os.getenv("Authorization") is None:
use_kerberos = True
cache_file: Optional[str] = os.getenv("KRB5CCNAME")
if cache_file is None:
logger.info("No Kerberos cache. Try running `kinit` and resubmitting.")
sys.exit(-1)

if args.experiment is None or args.run is None:
logger.info(
(
Expand Down Expand Up @@ -265,6 +272,13 @@ def _request_arp_token(exp: str, lifetime: int = 300) -> str:
logger.info(f"DAG exited: {dag_state}")
break

if use_kerberos:
# We had to do some funny business to get Kerberos credentials...
# Cleanup now that we're done
logger.debug("Removing duplicate Kerberos credentials.")
os.remove(cache_file) # This should be defined if we get here
os.rmdir(f"{os.path.expanduser('~')}/.tmp_cache")

if dag_state == "failed":
sys.exit(1)
else:
Expand Down
8 changes: 8 additions & 0 deletions launch_scripts/submit_launch_airflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,18 @@ do
done
set -- "${POS[@]}"

# Bodge Kerberos credentials
# These duplicates are removed later by the workflow process
KERB_CACHE_PATH=$(klist -l | awk -F"FILE:" '{printf (NF>1)? $NF : ""}')
mkdir $HOME/.tmp_cache
cp $KERB_CACHE_PATH $HOME/.tmp_cache/kerbcache
export KRB5CCNAME="FILE:${HOME}/.tmp_cache/kerbcache"

CMD="${@}"
CMD="${CMD} --partition=${PARTITION} --account=${ACCOUNT}"
echo $CMD
CMD="/sdf/group/lcls/ds/tools/lute/lute_launcher ${CMD}"
SLURM_ARGS="--partition=${PARTITION} --account=${ACCOUNT} --ntasks=1"
echo "Running ${CMD} with ${SLURM_ARGS}"
sbatch $SLURM_ARGS --wrap "${CMD}"
export KRB5CCNAME="FILE:${KERB_CACHE_PATH}"
1 change: 1 addition & 0 deletions lute/managed_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Executor-managed Tasks with specific environment specifications are defined
here.
"""

from .execution.executor import *
from .io.config import *

Expand Down

0 comments on commit b5312bd

Please sign in to comment.