From 90f71e0dd41bc5e28b3f37036c89688a6e1ac248 Mon Sep 17 00:00:00 2001 From: Ben Zhang Date: Tue, 15 Oct 2024 18:45:56 +0000 Subject: [PATCH 1/3] Install cvmfs_ducc command --- Dockerfile | 27 ++++++++++++++++++++++++++- README.md | 11 +++++++++++ docker-compose.yml | 7 ++++++- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index c9279b7..d13fd3c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,29 @@ # MARK: base -FROM ubuntu:jammy-20240911.1@sha256:3d1556a8a18cf5307b121e0a98e93f1ddf1f3f8e092f1fddfd941254785b95d7 as base +FROM ubuntu:jammy-20240911.1@sha256:3d1556a8a18cf5307b121e0a98e93f1ddf1f3f8e092f1fddfd941254785b95d7 AS base + +# MARK: ducc +# bookworm is the codename for Debian 12, which is the base for Ubuntu 22.04 (Jammy) +FROM golang:1.23.2-bookworm@sha256:18d2f940cc20497f85466fdbe6c3d7a52ed2db1d5a1a49a4508ffeee2dff1463 AS ducc + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + # Required for the setcap command + libcap2-bin \ + && rm -rf /var/lib/apt/lists/* + +# Inject a dummy sudo command to workaround the hardcoded sudo requirement in the build script. We are already running as root. +RUN echo $'#!/bin/bash\nexec "$@"' > /usr/bin/sudo \ + && chmod +x /usr/bin/sudo + +RUN mkdir cvmfs \ + && cd cvmfs \ + && git init \ + && git remote add origin https://github.com/cvmfs/cvmfs.git \ + && git fetch --depth 1 origin 73a1fc54940e18b612d8f49bf08835f305ebdcbd \ + && git checkout FETCH_HEAD + +RUN cd cvmfs/ducc \ + && make # MARK: courier # This stage is used to keep the cache valid across different systems (even when the file permissions change). @@ -74,5 +98,6 @@ RUN python3 -m pip install -r /tmp/requirements.txt && rm /tmp/requirements.txt COPY --from=courier /server/src /app COPY --from=courier /server/rootfs / +COPY --from=ducc /go/cvmfs/ducc/cvmfs_ducc /usr/bin/cvmfs_ducc WORKDIR /app \ No newline at end of file diff --git a/README.md b/README.md index 1954356..dbdec3d 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,17 @@ cvmfs_server publish cvmfs_swissknife notify -p -u http://thor-slurm1.cluster.watonomous.ca:4929/api/v1 -r http://thor-slurm1.cluster.watonomous.ca:8080/cvmfs/cvmfs-server.example.local ``` +#### DUCC + +```bash +python3 main.py init-cvmfs-repo cvmfs-server.example.local +time cvmfs_ducc convert-single-image ghcr.io/watonomous/actions-runner-image:nightly cvmfs-server.example.local --skip-layers +time cvmfs_ducc convert-single-image ghcr.io/watonomous/infra-config:master cvmfs-server.example.local --skip-layers +``` + +This is the step that downloads image layers: +- https://github.com/cvmfs/cvmfs/blob/5c3777cf846cd2bb6e73d17af83384b37c2b6fa2/ducc/lib/conversion.go#L134 + ### Notifications diff --git a/docker-compose.yml b/docker-compose.yml index 5a981b0..cd5175c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,7 +26,12 @@ services: - /var/spool/cvmfs cap_add: - - SYS_ADMIN + - SYS_ADMIN # required for fuse and ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 + - DAC_OVERRIDE # required by ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 + - DAC_READ_SEARCH # required by ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 + - FOWNER # required by ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 + - CHOWN # required by ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 + - MKNOD # required by ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 security_opt: - apparmor:unconfined devices: From f0f8e722e3d2577407378c28909303efda1c6fe1 Mon Sep 17 00:00:00 2001 From: Ben Zhang Date: Tue, 15 Oct 2024 19:00:39 +0000 Subject: [PATCH 2/3] Add notes on cvmfs deduplication and ideas on how to speed up image conversion --- README.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/README.md b/README.md index dbdec3d..496c671 100644 --- a/README.md +++ b/README.md @@ -89,12 +89,40 @@ cvmfs_swissknife notify -p -u http://thor-slurm1.cluster.watonomous.ca:4929/api/ ```bash python3 main.py init-cvmfs-repo cvmfs-server.example.local time cvmfs_ducc convert-single-image ghcr.io/watonomous/actions-runner-image:nightly cvmfs-server.example.local --skip-layers +# real 1m6.331s +# user 0m52.700s +# sys 0m29.849s time cvmfs_ducc convert-single-image ghcr.io/watonomous/infra-config:master cvmfs-server.example.local --skip-layers +# real 0m49.238s +# user 0m36.520s +# sys 0m25.311s ``` This is the step that downloads image layers: - https://github.com/cvmfs/cvmfs/blob/5c3777cf846cd2bb6e73d17af83384b37c2b6fa2/ducc/lib/conversion.go#L134 +See the effects of deduplication: +``` +root@f5a15f3a5635:/app# df -h /srv +Filesystem Size Used Avail Use% Mounted on +tmpfs 95G 56K 95G 1% /srv + +root@f5a15f3a5635:/app# time cvmfs_ducc convert-single-image ghcr.io/watonomous/infra-config:master cvmfs-server.example.local --skip-layers + +root@f5a15f3a5635:/app# df -h /srv +Filesystem Size Used Avail Use% Mounted on +tmpfs 95G 1.5G 93G 2% /srv + +root@f5a15f3a5635:/app# time cvmfs_ducc convert-single-image ghcr.io/watonomous/infra-config@sha256:46b00a593dc4b93972501bad7b5cb0167f687d0791fa7611764b22dd31101a46 cvmfs-server.example.local --skip-layers + +root@f5a15f3a5635:/app# df -h /srv +Filesystem Size Used Avail Use% Mounted on +tmpfs 95G 1.7G 93G 2% /srv +``` + +File deduplication is nice, but it still takes a long time to convert images. +It appears that the conversion process is downloading/unpacking + [adding singularity metadata](https://github.com/cvmfs/cvmfs/blob/5c3777cf846cd2bb6e73d17af83384b37c2b6fa2/ducc/lib/conversion.go#L161-L172). +If we don't use a registry in the first place, there may be no need to convert the image into layers in the first place. Perhaps we can just zip up all of the files and add the metadata. ### Notifications From 00211142772bfb233e38a4e11299febaa3037429 Mon Sep 17 00:00:00 2001 From: Ben Zhang Date: Tue, 15 Oct 2024 19:15:29 +0000 Subject: [PATCH 3/3] Fix timing and clarify comment --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 496c671..67dbb2b 100644 --- a/README.md +++ b/README.md @@ -89,13 +89,13 @@ cvmfs_swissknife notify -p -u http://thor-slurm1.cluster.watonomous.ca:4929/api/ ```bash python3 main.py init-cvmfs-repo cvmfs-server.example.local time cvmfs_ducc convert-single-image ghcr.io/watonomous/actions-runner-image:nightly cvmfs-server.example.local --skip-layers -# real 1m6.331s -# user 0m52.700s -# sys 0m29.849s -time cvmfs_ducc convert-single-image ghcr.io/watonomous/infra-config:master cvmfs-server.example.local --skip-layers # real 0m49.238s # user 0m36.520s # sys 0m25.311s +time cvmfs_ducc convert-single-image ghcr.io/watonomous/infra-config:master cvmfs-server.example.local --skip-layers +# real 1m6.331s +# user 0m52.700s +# sys 0m29.849s ``` This is the step that downloads image layers: @@ -122,7 +122,7 @@ tmpfs 95G 1.7G 93G 2% /srv File deduplication is nice, but it still takes a long time to convert images. It appears that the conversion process is downloading/unpacking + [adding singularity metadata](https://github.com/cvmfs/cvmfs/blob/5c3777cf846cd2bb6e73d17af83384b37c2b6fa2/ducc/lib/conversion.go#L161-L172). -If we don't use a registry in the first place, there may be no need to convert the image into layers in the first place. Perhaps we can just zip up all of the files and add the metadata. +If we don't use a registry in the first place, there may be no need to convert the image into layers (i.e. docker save). Perhaps we can just zip up all of the files and add the metadata. ### Notifications