diff --git a/Dockerfile b/Dockerfile index c9279b7..d13fd3c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,29 @@ # MARK: base -FROM ubuntu:jammy-20240911.1@sha256:3d1556a8a18cf5307b121e0a98e93f1ddf1f3f8e092f1fddfd941254785b95d7 as base +FROM ubuntu:jammy-20240911.1@sha256:3d1556a8a18cf5307b121e0a98e93f1ddf1f3f8e092f1fddfd941254785b95d7 AS base + +# MARK: ducc +# bookworm is the codename for Debian 12, which is the base for Ubuntu 22.04 (Jammy) +FROM golang:1.23.2-bookworm@sha256:18d2f940cc20497f85466fdbe6c3d7a52ed2db1d5a1a49a4508ffeee2dff1463 AS ducc + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + # Required for the setcap command + libcap2-bin \ + && rm -rf /var/lib/apt/lists/* + +# Inject a dummy sudo command to workaround the hardcoded sudo requirement in the build script. We are already running as root. +RUN echo $'#!/bin/bash\nexec "$@"' > /usr/bin/sudo \ + && chmod +x /usr/bin/sudo + +RUN mkdir cvmfs \ + && cd cvmfs \ + && git init \ + && git remote add origin https://github.com/cvmfs/cvmfs.git \ + && git fetch --depth 1 origin 73a1fc54940e18b612d8f49bf08835f305ebdcbd \ + && git checkout FETCH_HEAD + +RUN cd cvmfs/ducc \ + && make # MARK: courier # This stage is used to keep the cache valid across different systems (even when the file permissions change). @@ -74,5 +98,6 @@ RUN python3 -m pip install -r /tmp/requirements.txt && rm /tmp/requirements.txt COPY --from=courier /server/src /app COPY --from=courier /server/rootfs / +COPY --from=ducc /go/cvmfs/ducc/cvmfs_ducc /usr/bin/cvmfs_ducc WORKDIR /app \ No newline at end of file diff --git a/README.md b/README.md index 1954356..67dbb2b 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,45 @@ cvmfs_server publish cvmfs_swissknife notify -p -u http://thor-slurm1.cluster.watonomous.ca:4929/api/v1 -r http://thor-slurm1.cluster.watonomous.ca:8080/cvmfs/cvmfs-server.example.local ``` +#### DUCC + +```bash +python3 main.py init-cvmfs-repo cvmfs-server.example.local +time cvmfs_ducc convert-single-image ghcr.io/watonomous/actions-runner-image:nightly cvmfs-server.example.local --skip-layers +# real 0m49.238s +# user 0m36.520s +# sys 0m25.311s +time cvmfs_ducc convert-single-image ghcr.io/watonomous/infra-config:master cvmfs-server.example.local --skip-layers +# real 1m6.331s +# user 0m52.700s +# sys 0m29.849s +``` + +This is the step that downloads image layers: +- https://github.com/cvmfs/cvmfs/blob/5c3777cf846cd2bb6e73d17af83384b37c2b6fa2/ducc/lib/conversion.go#L134 + +See the effects of deduplication: +``` +root@f5a15f3a5635:/app# df -h /srv +Filesystem Size Used Avail Use% Mounted on +tmpfs 95G 56K 95G 1% /srv + +root@f5a15f3a5635:/app# time cvmfs_ducc convert-single-image ghcr.io/watonomous/infra-config:master cvmfs-server.example.local --skip-layers + +root@f5a15f3a5635:/app# df -h /srv +Filesystem Size Used Avail Use% Mounted on +tmpfs 95G 1.5G 93G 2% /srv + +root@f5a15f3a5635:/app# time cvmfs_ducc convert-single-image ghcr.io/watonomous/infra-config@sha256:46b00a593dc4b93972501bad7b5cb0167f687d0791fa7611764b22dd31101a46 cvmfs-server.example.local --skip-layers + +root@f5a15f3a5635:/app# df -h /srv +Filesystem Size Used Avail Use% Mounted on +tmpfs 95G 1.7G 93G 2% /srv +``` + +File deduplication is nice, but it still takes a long time to convert images. +It appears that the conversion process is downloading/unpacking + [adding singularity metadata](https://github.com/cvmfs/cvmfs/blob/5c3777cf846cd2bb6e73d17af83384b37c2b6fa2/ducc/lib/conversion.go#L161-L172). +If we don't use a registry in the first place, there may be no need to convert the image into layers (i.e. docker save). Perhaps we can just zip up all of the files and add the metadata. ### Notifications diff --git a/docker-compose.yml b/docker-compose.yml index 5a981b0..cd5175c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,7 +26,12 @@ services: - /var/spool/cvmfs cap_add: - - SYS_ADMIN + - SYS_ADMIN # required for fuse and ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 + - DAC_OVERRIDE # required by ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 + - DAC_READ_SEARCH # required by ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 + - FOWNER # required by ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 + - CHOWN # required by ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 + - MKNOD # required by ducc: https://cernvm-forum.cern.ch/t/where-to-get-cvmfs-ducc-binary/366/2 security_opt: - apparmor:unconfined devices: