From 0fbf0952c3b203a99f23d89b0fb477df6f36df70 Mon Sep 17 00:00:00 2001 From: Jon Massey Date: Wed, 1 May 2024 21:51:44 +0100 Subject: [PATCH 1/5] Replace Rocker R package library with OpensAFELY R image library --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index faf0a99..c1f9c65 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,15 +24,15 @@ RUN --mount=type=cache,target=/var/cache/apt \ rm /usr/bin/python3 &&\ ln -s /usr/bin/python3.10 /usr/bin/python3 -# install renv -RUN --mount=type=cache,target=/cache,id=/cache-2004 R -e 'install.packages("renv", destdir="/cache"); renv::init(bare = TRUE)' +# Remove the packages shipped with the rocker image +RUN rm -rf /usr/library/lib/R/site-library/* -# copy the renv directory from the OpenSAFELY R action image +# copy the renv directory into the local site library from the OpenSAFELY R action image # # DL3022: hadolint can't access a network and doesn't behave # as expected when a reference is made to an external image. # hadolint ignore=DL3022 -COPY --from=ghcr.io/opensafely-core/r /renv/ /renv/ +COPY --chown=rstudio:rstudio --from=ghcr.io/opensafely-core/r:latest /renv/lib/R-4.0/x86_64-pc-linux-gnu/ /usr/local/lib/R/site-library # Copy the Python virtualenv from OpenSAFELY Python action image # From cdd84e6e5fcce8eb95826882da6e170f7e69ab68 Mon Sep 17 00:00:00 2001 From: Jon Massey Date: Thu, 2 May 2024 13:57:42 +0100 Subject: [PATCH 2/5] Don't require auth for local rstudio server --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index c1f9c65..94a721d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,6 +34,9 @@ RUN rm -rf /usr/library/lib/R/site-library/* # hadolint ignore=DL3022 COPY --chown=rstudio:rstudio --from=ghcr.io/opensafely-core/r:latest /renv/lib/R-4.0/x86_64-pc-linux-gnu/ /usr/local/lib/R/site-library +# Configure RStudio Server to run without auth +RUN echo "auth-none=1" >> /etc/rstudio/rserver.conf && echo "USER=rstudio" >> /etc/environment + # Copy the Python virtualenv from OpenSAFELY Python action image # # DL3022: hadolint can't access a network and doesn't behave From 1726c65646ba7a869da4ae845a23b2789cfe469b Mon Sep 17 00:00:00 2001 From: Jon Massey Date: Fri, 3 May 2024 11:33:16 +0100 Subject: [PATCH 3/5] Redirect system python to venv python --- Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 94a721d..c27fa12 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,7 +42,10 @@ RUN echo "auth-none=1" >> /etc/rstudio/rserver.conf && echo "USER=rstudio" >> /e # DL3022: hadolint can't access a network and doesn't behave # as expected when a reference is made to an external image. # hadolint ignore=DL3022 -COPY --from=ghcr.io/opensafely-core/python:v2 /opt/venv /opt/venv +COPY --chown=rstudio:rstudio --from=ghcr.io/opensafely-core/python:v2 /opt/venv /opt/venv + +# Create a fake system Python pointing at venv python +RUN echo 'exec /opt/venv/bin/python3.10 "$@"' > /usr/bin/python # Create a local user and give it sudo (aka root) permissions RUN usermod -aG sudo rstudio &&\ From dc5c86317c6c59f5a07e6910dfd38362928a3751 Mon Sep 17 00:00:00 2001 From: Lucy Bridges Date: Fri, 3 May 2024 11:54:00 +0100 Subject: [PATCH 4/5] Combine multiple RUN statements This reduces the number of layers in the Docker image, which should reduce the size although in practice it hasn't made a significant difference to this image. --- Dockerfile | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/Dockerfile b/Dockerfile index c27fa12..89c8bec 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,9 +6,6 @@ LABEL org.opencontainers.image.source https://github.com/opensafely/research-tem # docker clean up that deletes that cache on every apt install RUN rm -f /etc/apt/apt.conf.d/docker-clean -# Install python 3.10. This is the version used by the python-docker -# image, used for analyses using the OpenSAFELY pipeline. -# # DL3042: we always want latest package versions when we rebuild # DL3013: using an apt cache on the host instead # hadolint ignore=DL3042,DL3013 @@ -17,15 +14,24 @@ RUN --mount=type=cache,target=/var/cache/apt \ echo "deb http://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu focal main" > /etc/apt/sources.list.d/deadsnakes-ppa.list &&\ /usr/lib/apt/apt-helper download-file 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf23c5a6cf475977595c89f51ba6932366a755776' /etc/apt/trusted.gpg.d/deadsnakes.asc &&\ apt-get update &&\ + # Install python 3.10. This is the version used by the python-docker + # image, used for analyses using the OpenSAFELY pipeline. apt-get install -y --no-install-recommends curl python3.10 python3.10-distutils python3.10-venv &&\ # Pip for Python 3.10 isn't included in deadsnakes, so install separately curl https://bootstrap.pypa.io/get-pip.py | python3.10 &&\ # Set default python, so that the Python virtualenv works as expected rm /usr/bin/python3 &&\ - ln -s /usr/bin/python3.10 /usr/bin/python3 - -# Remove the packages shipped with the rocker image -RUN rm -rf /usr/library/lib/R/site-library/* + ln -s /usr/bin/python3.10 /usr/bin/python3 &&\ + # Create a fake system Python pointing at venv python + echo 'exec /opt/venv/bin/python3.10 "$@"' > /usr/bin/python &&\ + # Configure RStudio Server to run without auth + echo "auth-none=1" >> /etc/rstudio/rserver.conf &&\ + echo "USER=rstudio" >> /etc/environment &&\ + # Remove the packages shipped with the rocker image + rm -rf /usr/library/lib/R/site-library/* &&\ + # Give the local user sudo (aka root) permissions + usermod -aG sudo rstudio &&\ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers # copy the renv directory into the local site library from the OpenSAFELY R action image # @@ -34,9 +40,6 @@ RUN rm -rf /usr/library/lib/R/site-library/* # hadolint ignore=DL3022 COPY --chown=rstudio:rstudio --from=ghcr.io/opensafely-core/r:latest /renv/lib/R-4.0/x86_64-pc-linux-gnu/ /usr/local/lib/R/site-library -# Configure RStudio Server to run without auth -RUN echo "auth-none=1" >> /etc/rstudio/rserver.conf && echo "USER=rstudio" >> /etc/environment - # Copy the Python virtualenv from OpenSAFELY Python action image # # DL3022: hadolint can't access a network and doesn't behave @@ -44,13 +47,6 @@ RUN echo "auth-none=1" >> /etc/rstudio/rserver.conf && echo "USER=rstudio" >> /e # hadolint ignore=DL3022 COPY --chown=rstudio:rstudio --from=ghcr.io/opensafely-core/python:v2 /opt/venv /opt/venv -# Create a fake system Python pointing at venv python -RUN echo 'exec /opt/venv/bin/python3.10 "$@"' > /usr/bin/python - -# Create a local user and give it sudo (aka root) permissions -RUN usermod -aG sudo rstudio &&\ - echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers - # Required for installing opensafely cli ENV PATH="/home/rstudio/.local/bin:${PATH}" From 26557994f4714f6ad403e5392318c7a502f3c065 Mon Sep 17 00:00:00 2001 From: Lucy Bridges Date: Fri, 3 May 2024 11:58:49 +0100 Subject: [PATCH 5/5] Move COPY from R image to after the Python COPY The R image is currently more frequently updated than the python one. Moving this statement lower in the Dockerfile means we're more likely to take advantage of caching to get slightly faster build times in most cases. --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 89c8bec..38f67b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,19 +33,19 @@ RUN --mount=type=cache,target=/var/cache/apt \ usermod -aG sudo rstudio &&\ echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers -# copy the renv directory into the local site library from the OpenSAFELY R action image +# Copy the Python virtualenv from OpenSAFELY Python action image # # DL3022: hadolint can't access a network and doesn't behave # as expected when a reference is made to an external image. # hadolint ignore=DL3022 -COPY --chown=rstudio:rstudio --from=ghcr.io/opensafely-core/r:latest /renv/lib/R-4.0/x86_64-pc-linux-gnu/ /usr/local/lib/R/site-library +COPY --chown=rstudio:rstudio --from=ghcr.io/opensafely-core/python:v2 /opt/venv /opt/venv -# Copy the Python virtualenv from OpenSAFELY Python action image +# copy the renv directory into the local site library from the OpenSAFELY R action image # # DL3022: hadolint can't access a network and doesn't behave # as expected when a reference is made to an external image. # hadolint ignore=DL3022 -COPY --chown=rstudio:rstudio --from=ghcr.io/opensafely-core/python:v2 /opt/venv /opt/venv +COPY --chown=rstudio:rstudio --from=ghcr.io/opensafely-core/r:latest /renv/lib/R-4.0/x86_64-pc-linux-gnu/ /usr/local/lib/R/site-library # Required for installing opensafely cli ENV PATH="/home/rstudio/.local/bin:${PATH}"