Skip to content

Commit

Permalink
[GSProcessing] Update EMRS image to 7.1.0, add file in image to ensur…
Browse files Browse the repository at this point in the history
…e we recognize execution env.
  • Loading branch information
thvasilo committed Jul 29, 2024
1 parent 8cbf6d5 commit 3073696
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ARG ARCH=x86_64
FROM public.ecr.aws/emr-serverless/spark/emr-7.0.0:20240206-${ARCH} as base
FROM public.ecr.aws/emr-serverless/spark/emr-7.1.0:20240528-${ARCH} as base

USER root
ENV PYTHON_VERSION=3.9.18
Expand Down Expand Up @@ -40,6 +40,8 @@ else \
python3 -c "from transformers import AutoModel; AutoModel.from_pretrained('${MODEL}')"; \
fi

# We use this file as an indicator of the execution environment
RUN touch /usr/lib/spark/code/EMR_SERVERLESS_EXECUTION

# GSProcessing codebase
COPY code/ /usr/lib/spark/code/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -573,10 +573,10 @@ def main():
format="[GSPROCESSING] %(asctime)s %(levelname)-8s %(message)s",
)

# Determine if we're running within a SageMaker container
# Determine execution environment
if os.path.exists("/opt/ml/config/processingjobconfig.json"):
execution_env = ExecutionEnv.SAGEMAKER
elif os.path.exists("/emr-serverless-config.json"):
elif os.path.exists("/usr/lib/spark/code/EMR_SERVERLESS_EXECUTION"):
execution_env = ExecutionEnv.EMR_SERVERLESS
elif os.path.exists("/usr/lib/spark/code/EMR_EXECUTION"):
execution_env = ExecutionEnv.EMR_ON_EC2
Expand Down

0 comments on commit 3073696

Please sign in to comment.