From cbcc890193b86fcd2b417cc83987b261e4d26beb Mon Sep 17 00:00:00 2001 From: Adam Joseph Date: Fri, 19 Jan 2024 20:01:58 -0800 Subject: [PATCH 01/34] gcc: link $lib/lib -> $lib/$targetConfig correctly and consistently When native-compiling, gcc will install libraries into: /nix/store/...-$targetConfig-gcc-$version-lib/lib When cross-compiling, gcc will install libraries into: /nix/store/...-$targetConfig-gcc-$version-lib/$targetConfig When cross-compiling, we intended to create a link from $lib/lib to $lib/$targetConfig, so that downstream users can always safely assume that "${lib.getLib stdenv.cc.cc}/lib" is where the gcc libraries are, regardless of whether `stdenv.cc.cc` is a cross compiler or a native compiler. Unfortunately, there were two problems with how we were trying to create these links: 1. The link would be created only when `enableLibGccOutput==true` 2. The link was being created from the incorrect source `$lib/lib/lib` instead of `$lib/lib`. Both of these mistakes are my fault. This commit corrects them by creating the link using `ln -Ts` (which is more predictable) and by creating the link from `gcc/common/builder.nix` rather than from `gcc/common/libgcc.nix`. --- pkgs/development/compilers/gcc/common/builder.nix | 8 ++++++++ pkgs/development/compilers/gcc/common/libgcc.nix | 4 ---- pkgs/development/compilers/gcc/default.nix | 4 +++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pkgs/development/compilers/gcc/common/builder.nix b/pkgs/development/compilers/gcc/common/builder.nix index 98525b5e237ef..25c5646338653 100644 --- a/pkgs/development/compilers/gcc/common/builder.nix +++ b/pkgs/development/compilers/gcc/common/builder.nix @@ -1,6 +1,7 @@ { lib , stdenv , enableMultilib +, targetConfig }: let @@ -196,6 +197,13 @@ originalAttrs: (stdenv.mkDerivation (finalAttrs: originalAttrs // { mkdir -p "$out/''${targetConfig}/lib" mkdir -p "''${!outputLib}/''${targetConfig}/lib" '' + + # if cross-compiling, link from $lib/lib to $lib/${targetConfig}. + # since native-compiles have $lib/lib as a directory (not a + # symlink), this ensures that in every case we can assume that + # $lib/lib contains the .so files + lib.optionalString (with stdenv; targetPlatform.config != hostPlatform.config) '' + ln -Ts "''${!outputLib}/''${targetConfig}/lib" $lib/lib + '' + # Make `lib64` symlinks to `lib`. lib.optionalString (!enableMultilib && stdenv.hostPlatform.is64bit && !stdenv.hostPlatform.isMips64n32) '' ln -s lib "$out/''${targetConfig}/lib64" diff --git a/pkgs/development/compilers/gcc/common/libgcc.nix b/pkgs/development/compilers/gcc/common/libgcc.nix index c8342ae90054a..a7de840adc8d0 100644 --- a/pkgs/development/compilers/gcc/common/libgcc.nix +++ b/pkgs/development/compilers/gcc/common/libgcc.nix @@ -83,10 +83,6 @@ in lib.optionalString (!langC) '' rm -f $out/lib/libgcc_s.so* '' - + lib.optionalString (hostPlatform != targetPlatform) '' - mkdir -p $lib/lib/ - ln -s ${targetPlatformSlash}lib $lib/lib - '' # TODO(amjoseph): remove the `libgcc_s.so` symlinks below and replace them # with a `-L${gccForLibs.libgcc}/lib` in cc-wrapper's diff --git a/pkgs/development/compilers/gcc/default.nix b/pkgs/development/compilers/gcc/default.nix index cc3546bed22cf..0144ab4cfff9b 100644 --- a/pkgs/development/compilers/gcc/default.nix +++ b/pkgs/development/compilers/gcc/default.nix @@ -103,6 +103,7 @@ let inherit version; disableBootstrap = atLeast11 && !stdenv.hostPlatform.isDarwin && (atLeast12 -> !profiledCompiler); inherit (stdenv) buildPlatform hostPlatform targetPlatform; + targetConfig = if targetPlatform != hostPlatform then targetPlatform.config else null; patches = callFile ./patches {}; @@ -124,6 +125,7 @@ let inherit version; buildPlatform hostPlatform targetPlatform + targetConfig patches crossMingw stageNameAddon @@ -329,7 +331,7 @@ lib.pipe ((callFile ./common/builder.nix {}) ({ ++ optional (is7 && targetPlatform.isAarch64) "--enable-fix-cortex-a53-843419" ++ optional (is7 && targetPlatform.isNetBSD) "--disable-libcilkrts"; - targetConfig = if targetPlatform != hostPlatform then targetPlatform.config else null; + inherit targetConfig; buildFlags = # we do not yet have Nix-driven profiling From cbde122958b83da0c89f322ef497bcfdf80a17c0 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 10 Jan 2024 02:32:25 +0000 Subject: [PATCH 02/34] cudaPackages: cross-compilation attempt 2 workaround bug in cross-compilation / meta.getExe cudaPackages.autoAddCudaCompatRunpathHook: correct meta.platforms reference cudaPackages.backendStdenv: use coreutils from buildPackages when cross-compiling --- .../cuda-modules/backend-stdenv.nix | 4 +- .../cuda-modules/cuda/overrides.nix | 9 ++- .../cuda-modules/cutensor/extension.nix | 4 +- pkgs/development/cuda-modules/flags.nix | 15 +++-- .../generic-builders/manifest.nix | 11 +++- .../generic-builders/multiplex.nix | 4 +- .../cuda-modules/setup-hooks/extension.nix | 17 ++--- .../cuda-modules/tensorrt/fixup.nix | 6 +- pkgs/top-level/cuda-packages.nix | 64 +++++++++---------- 9 files changed, 72 insertions(+), 62 deletions(-) diff --git a/pkgs/development/cuda-modules/backend-stdenv.nix b/pkgs/development/cuda-modules/backend-stdenv.nix index bcca7118b163b..32386ffbdd4c9 100644 --- a/pkgs/development/cuda-modules/backend-stdenv.nix +++ b/pkgs/development/cuda-modules/backend-stdenv.nix @@ -3,10 +3,8 @@ nvccCompatibilities, cudaVersion, pkgs, - overrideCC, stdenv, - wrapCCWith, - stdenvAdapters, + stdenvAdapters }: let diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index f43d649afbbf3..31d03dd0fc73f 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -1,4 +1,4 @@ -{cudaVersion, lib, addDriverRunpath}: +{cudaVersion, lib}: let inherit (lib) attrsets lists strings; # cudaVersionOlder : Version -> Boolean @@ -58,7 +58,7 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) { while IFS= read -r -d $'\0' path ; do sed -i \ -e "s|^libdir\s*=.*/lib\$|libdir=''${!outputLib}/lib/stubs|" \ - -e "s|^Libs\s*:\(.*\)\$|Libs: \1 -Wl,-rpath,${addDriverRunpath.driverLink}/lib|" \ + -e "s|^Libs\s*:\(.*\)\$|Libs: \1 -Wl,-rpath,${final.pkgs.addDriverRunpath.driverLink}/lib|" \ "$path" done < <(find -iname 'cuda-*.pc' -print0) '' @@ -92,6 +92,11 @@ attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) { "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" = !final.flags.isJetsonBuild; }; + meta = prevAttrs.meta // { + # For cross-compilation, we need the hostPlatform to be included in order to fetch and build the package. This + # doesn't change the fact that it won't work on non-Jetson devices, so we only add it when building for Jetson. + platforms = prevAttrs.meta.platforms ++ lib.optionals final.flags.isJetsonBuild [ "x86_64-linux" ]; + }; } ); diff --git a/pkgs/development/cuda-modules/cutensor/extension.nix b/pkgs/development/cuda-modules/cutensor/extension.nix index 534941887c6e4..38b0b03248aad 100644 --- a/pkgs/development/cuda-modules/cutensor/extension.nix +++ b/pkgs/development/cuda-modules/cutensor/extension.nix @@ -15,7 +15,7 @@ { cudaVersion, flags, - hostPlatform, + targetPlatform, lib, mkVersionedPackageName, }: @@ -93,7 +93,7 @@ let # LibPath are not constant across the same release -- one platform may support fewer # CUDA versions than another. # redistArch :: String - redistArch = flags.getRedistArch hostPlatform.system; + redistArch = flags.getRedistArch targetPlatform.system; # platformIsSupported :: Manifests -> Boolean platformIsSupported = {feature, ...}: diff --git a/pkgs/development/cuda-modules/flags.nix b/pkgs/development/cuda-modules/flags.nix index d5e01be01fd51..50a69d6fd1d1d 100644 --- a/pkgs/development/cuda-modules/flags.nix +++ b/pkgs/development/cuda-modules/flags.nix @@ -7,7 +7,9 @@ cudaForwardCompat ? (config.cudaForwardCompat or true), lib, cudaVersion, + buildPlatform, hostPlatform, + targetPlatform, # gpus :: List Gpu gpus, }: @@ -216,16 +218,19 @@ let lists.filter (cap: !(builtins.elem cap requestedJetsonDevices)) cudaCapabilities; jetsonBuildSufficientCondition = requestedJetsonDevices != []; - jetsonBuildNecessaryCondition = requestedNonJetsonDevices == [] && hostPlatform.isAarch64; + jetsonBuildNecessaryCondition = requestedNonJetsonDevices == [] && targetPlatform.isAarch64; in trivial.throwIf (jetsonBuildSufficientCondition && !jetsonBuildNecessaryCondition) '' - Jetson devices cannot be targeted with non-Jetson devices. Additionally, they require hostPlatform to be aarch64. - You requested ${builtins.toJSON cudaCapabilities} for host platform ${hostPlatform.system}. + Jetson devices cannot be targeted with non-Jetson devices. Additionally, they require targetPlatform to be aarch64. + You requested ${builtins.toJSON cudaCapabilities} for: + - Build platform ${buildPlatform.system} + - Host platform ${hostPlatform.system} + - Target platform ${targetPlatform.system} Requested Jetson devices: ${builtins.toJSON requestedJetsonDevices}. Requested non-Jetson devices: ${builtins.toJSON requestedNonJetsonDevices}. Exactly one of the following must be true: - - All CUDA capabilities belong to Jetson devices and hostPlatform is aarch64. + - All CUDA capabilities belong to Jetson devices and targetPlatform is aarch64. - No CUDA capabilities belong to Jetson devices. See ${./gpus.nix} for a list of architectures supported by this version of Nixpkgs. '' @@ -346,7 +351,7 @@ assert let in asserts.assertMsg # We can't do this test unless we're targeting aarch64 - (hostPlatform.isAarch64 -> (expected == actualWrapped)) + (targetPlatform.isAarch64 -> (expected == actualWrapped)) '' Jetson devices can only be built with other Jetson devices. Both 6.2 and 7.2 are Jetson devices. diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix index 4f40b7f01dc28..049c8936426d2 100644 --- a/pkgs/development/cuda-modules/generic-builders/manifest.nix +++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix @@ -10,7 +10,7 @@ markForCudatoolkitRootHook, flags, stdenv, - hostPlatform, + targetPlatform, # Builder-specific arguments # Short package name (e.g., "cuda_cccl") # pname : String @@ -46,7 +46,7 @@ let # redistArch :: String # The redistArch is the name of the architecture for which the redistributable is built. # It is `"unsupported"` if the redistributable is not supported on the target platform. - redistArch = flags.getRedistArch hostPlatform.system; + redistArch = flags.getRedistArch targetPlatform.system; sourceMatchesHost = flags.getNixSystem redistArch == stdenv.hostPlatform.system; in @@ -195,6 +195,8 @@ backendStdenv.mkDerivation ( # Check e.g. with `patchelf --print-rpath path/to/my/binary autoAddDriverRunpath markForCudatoolkitRootHook + # To create fat outputs from each component and find a version of `lndir` built for the host platform. + lndir ] # autoAddCudaCompatRunpath depends on cuda_compat and would cause # infinite recursion if applied to `cuda_compat` itself (beside the fact @@ -296,11 +298,14 @@ backendStdenv.mkDerivation ( # For each output, create a symlink to it in the out output. # NOTE: We must recreate the out output here, because the setup hook will have deleted it if it was empty. + # TODO: Previously we used `meta.getExe lndir` to get the path to lndir, but that doesn't work under + # cross-compilation -- whatever machinery Nixpkgs uses to get a version built for hostPlatform (so it can run + # during the build) doesn't extend to `meta.getExe`. postPatchelf = '' mkdir -p "$out" for output in $(getAllOutputNames); do if [[ "$output" != "out" ]]; then - ${meta.getExe lndir} "''${!output}" "$out" + lndir "''${!output}" "$out" fi done ''; diff --git a/pkgs/development/cuda-modules/generic-builders/multiplex.nix b/pkgs/development/cuda-modules/generic-builders/multiplex.nix index f2a9c6840ecd0..deeb2da6e0042 100644 --- a/pkgs/development/cuda-modules/generic-builders/multiplex.nix +++ b/pkgs/development/cuda-modules/generic-builders/multiplex.nix @@ -3,7 +3,7 @@ lib, cudaVersion, flags, - hostPlatform, + targetPlatform, # Expected to be passed by the caller mkVersionedPackageName, # pname :: String @@ -74,7 +74,7 @@ let # Get all of the packages for our given platform. # redistArch :: String # Value is `"unsupported"` if the platform is not supported. - redistArch = flags.getRedistArch hostPlatform.system; + redistArch = flags.getRedistArch targetPlatform.system; preferable = p1: p2: (isSupported p2 -> isSupported p1) && (strings.versionAtLeast p1.version p2.version); diff --git a/pkgs/development/cuda-modules/setup-hooks/extension.nix b/pkgs/development/cuda-modules/setup-hooks/extension.nix index ece70da52b027..b9afd4f1998cd 100644 --- a/pkgs/development/cuda-modules/setup-hooks/extension.nix +++ b/pkgs/development/cuda-modules/setup-hooks/extension.nix @@ -71,22 +71,19 @@ final: _: { autoAddCudaCompatRunpath = final.callPackage ( - {makeSetupHook, autoFixElfFiles, cuda_compat ? null }: + {makeSetupHook, autoFixElfFiles, lib, flags, cuda_compat ? null }: makeSetupHook { name = "auto-add-cuda-compat-runpath-hook"; propagatedBuildInputs = [autoFixElfFiles]; - substitutions = { - # Hotfix Ofborg evaluation - libcudaPath = if final.flags.isJetsonBuild then "${cuda_compat}/compat" else null; - }; - - meta.broken = !final.flags.isJetsonBuild; + substitutions.libcudaPath = lib.optionalString flags.isJetsonBuild "${cuda_compat}/compat"; - # Pre-cuda_compat CUDA release: - meta.badPlatforms = final.lib.optionals (cuda_compat == null) final.lib.platforms.all; - meta.platforms = cuda_compat.meta.platforms or [ ]; + meta = { + broken = !flags.isJetsonBuild; + badPlatforms = lib.optionals (cuda_compat == null) lib.platforms.all; + platforms = cuda_compat.meta.platforms or [ ]; + }; } ./auto-add-cuda-compat-runpath.sh ) diff --git a/pkgs/development/cuda-modules/tensorrt/fixup.nix b/pkgs/development/cuda-modules/tensorrt/fixup.nix index 51ca3d652bd1a..c6cbd137a0e4c 100644 --- a/pkgs/development/cuda-modules/tensorrt/fixup.nix +++ b/pkgs/development/cuda-modules/tensorrt/fixup.nix @@ -1,7 +1,7 @@ { cudaVersion, final, - hostPlatform, + targetPlatform, lib, mkVersionedPackageName, package, @@ -18,7 +18,7 @@ let versions ; # targetArch :: String - targetArch = attrsets.attrByPath [ hostPlatform.system ] "unsupported" { + targetArch = attrsets.attrByPath [ targetPlatform.system ] "unsupported" { x86_64-linux = "x86_64-linux-gnu"; aarch64-linux = "aarch64-linux-gnu"; }; @@ -106,7 +106,7 @@ finalAttrs: prevAttrs: { meta = prevAttrs.meta // { badPlatforms = prevAttrs.meta.badPlatforms or [ ] - ++ lib.optionals (targetArch == "unsupported") [ hostPlatform.system ]; + ++ lib.optionals (targetArch == "unsupported") [ targetPlatform.system ]; homepage = "https://developer.nvidia.com/tensorrt"; maintainers = prevAttrs.meta.maintainers ++ [maintainers.aidalgol]; }; diff --git a/pkgs/top-level/cuda-packages.nix b/pkgs/top-level/cuda-packages.nix index 4b8ad4646485e..eb0efcb10865f 100644 --- a/pkgs/top-level/cuda-packages.nix +++ b/pkgs/top-level/cuda-packages.nix @@ -21,17 +21,16 @@ # # I've (@connorbaker) attempted to do that, though I'm unsure of how this will interact with overrides. { - callPackage, + config, cudaVersion, + generateSplicesForMkScope, lib, - newScope, + makeScopeWithSplicing', pkgs, __attrsFailEvaluation ? true, }: let inherit (lib) - attrsets - customisation fixedPoints strings versions @@ -39,13 +38,13 @@ let # Backbone gpus = builtins.import ../development/cuda-modules/gpus.nix; nvccCompatibilities = builtins.import ../development/cuda-modules/nvcc-compatibilities.nix; - flags = callPackage ../development/cuda-modules/flags.nix {inherit cudaVersion gpus;}; passthruFunction = final: ( { inherit cudaVersion lib pkgs; - inherit gpus nvccCompatibilities flags; + inherit gpus nvccCompatibilities; + flags = final.callPackage ../development/cuda-modules/flags.nix {}; cudaMajorVersion = versions.major cudaVersion; cudaMajorMinorVersion = versions.majorMinor cudaVersion; cudaOlder = strings.versionOlder cudaVersion; @@ -58,7 +57,7 @@ let cudaPackages = final; # TODO(@connorbaker): `cudaFlags` is an alias for `flags` which should be removed in the future. - cudaFlags = flags; + cudaFlags = final.flags; # Exposed as cudaPackages.backendStdenv. # This is what nvcc uses as a backend, @@ -86,32 +85,33 @@ let ]; composedExtension = fixedPoints.composeManyExtensions [ - (import ../development/cuda-modules/setup-hooks/extension.nix) - (callPackage ../development/cuda-modules/cuda/extension.nix {inherit cudaVersion;}) - (callPackage ../development/cuda-modules/cuda/overrides.nix {inherit cudaVersion;}) - (callPackage ../development/cuda-modules/generic-builders/multiplex.nix { - inherit cudaVersion flags mkVersionedPackageName; - pname = "cudnn"; - releasesModule = ../development/cuda-modules/cudnn/releases.nix; - shimsFn = ../development/cuda-modules/cudnn/shims.nix; - fixupFn = ../development/cuda-modules/cudnn/fixup.nix; - }) - (callPackage ../development/cuda-modules/cutensor/extension.nix { - inherit cudaVersion flags mkVersionedPackageName; - }) - (callPackage ../development/cuda-modules/generic-builders/multiplex.nix { - inherit cudaVersion flags mkVersionedPackageName; - pname = "tensorrt"; - releasesModule = ../development/cuda-modules/tensorrt/releases.nix; - shimsFn = ../development/cuda-modules/tensorrt/shims.nix; - fixupFn = ../development/cuda-modules/tensorrt/fixup.nix; - }) - (callPackage ../development/cuda-modules/cuda-samples/extension.nix {inherit cudaVersion;}) - (callPackage ../development/cuda-modules/cuda-library-samples/extension.nix {}) + (builtins.import ../development/cuda-modules/setup-hooks/extension.nix) + (builtins.import ../development/cuda-modules/cuda/extension.nix {inherit cudaVersion lib;}) + (builtins.import ../development/cuda-modules/cuda/overrides.nix {inherit cudaVersion lib;}) + # (callPackage ../development/cuda-modules/generic-builders/multiplex.nix { + # inherit cudaVersion flags mkVersionedPackageName; + # pname = "cudnn"; + # releasesModule = ../development/cuda-modules/cudnn/releases.nix; + # shimsFn = ../development/cuda-modules/cudnn/shims.nix; + # fixupFn = ../development/cuda-modules/cudnn/fixup.nix; + # }) + # (callPackage ../development/cuda-modules/cutensor/extension.nix { + # inherit cudaVersion flags mkVersionedPackageName; + # }) + # (callPackage ../development/cuda-modules/generic-builders/multiplex.nix { + # inherit cudaVersion flags mkVersionedPackageName; + # pname = "tensorrt"; + # releasesModule = ../development/cuda-modules/tensorrt/releases.nix; + # shimsFn = ../development/cuda-modules/tensorrt/shims.nix; + # fixupFn = ../development/cuda-modules/tensorrt/fixup.nix; + # }) + # (callPackage ../development/cuda-modules/cuda-samples/extension.nix {inherit cudaVersion;}) + # (callPackage ../development/cuda-modules/cuda-library-samples/extension.nix {}) ]; - cudaPackages = customisation.makeScope newScope ( - fixedPoints.extends composedExtension passthruFunction - ); + cudaPackages = makeScopeWithSplicing' { + otherSplices = generateSplicesForMkScope "cudaPackages"; + f = fixedPoints.extends composedExtension passthruFunction; + }; in cudaPackages // { inherit __attrsFailEvaluation; } From c86cead2478428d1d73535bf801120576b87df3f Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Tue, 23 Jan 2024 03:24:40 +0000 Subject: [PATCH 03/34] cuda-modules/setup-hooks: introduce helper function and add comments about callPackage --- .../cuda-modules/setup-hooks/extension.nix | 136 +++++++++--------- 1 file changed, 69 insertions(+), 67 deletions(-) diff --git a/pkgs/development/cuda-modules/setup-hooks/extension.nix b/pkgs/development/cuda-modules/setup-hooks/extension.nix index b9afd4f1998cd..32483c9e200eb 100644 --- a/pkgs/development/cuda-modules/setup-hooks/extension.nix +++ b/pkgs/development/cuda-modules/setup-hooks/extension.nix @@ -1,67 +1,67 @@ -final: _: { +let + createSetupHooks = + setupHooksAttrs: final: prev: + let + # It is imperative that we use `final.callPackage` to create these setup hooks, as it allows us access to the spliced + # package sets. + inherit (final) callPackage; + + # NOTE(@connorbaker): We MUST use `lib` from `prev` because the attribute names CAN NOT depend on `final`. + inherit (prev.lib.attrsets) mapAttrs; + + aliases = { + # Deprecated: an alias kept for compatibility. Consider removing after 24.11 + autoAddOpenGLRunpathHook = final.autoAddDriverRunpath; + }; + in + mapAttrs (_: value: callPackage value { }) setupHooksAttrs // aliases; +in +createSetupHooks { # Helper hook used in both autoAddCudaCompatRunpath and # autoAddDriverRunpath that applies a generic patching action to all elf # files with a dynamic linking section. autoFixElfFiles = - final.callPackage - ( - {makeSetupHook}: - makeSetupHook - { - name = "auto-fix-elf-files"; - } - ./auto-fix-elf-files.sh - ) - {}; + { makeSetupHook }: makeSetupHook { name = "auto-fix-elf-files"; } ./auto-fix-elf-files.sh; # Internal hook, used by cudatoolkit and cuda redist packages # to accommodate automatic CUDAToolkit_ROOT construction markForCudatoolkitRootHook = - final.callPackage - ( - {makeSetupHook}: - makeSetupHook {name = "mark-for-cudatoolkit-root-hook";} ./mark-for-cudatoolkit-root-hook.sh - ) - {}; + { makeSetupHook }: + makeSetupHook { name = "mark-for-cudatoolkit-root-hook"; } ./mark-for-cudatoolkit-root-hook.sh; # Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly setupCudaHook = - (final.callPackage - ( - {makeSetupHook, backendStdenv}: - makeSetupHook - { - name = "setup-cuda-hook"; - - substitutions.setupCudaHook = placeholder "out"; + { backendStdenv, makeSetupHook }: + makeSetupHook + { + name = "setup-cuda-hook"; - # Point NVCC at a compatible compiler - substitutions.ccRoot = "${backendStdenv.cc}"; - - # Required in addition to ccRoot as otherwise bin/gcc is looked up - # when building CMakeCUDACompilerId.cu - substitutions.ccFullPath = "${backendStdenv.cc}/bin/${backendStdenv.cc.targetPrefix}c++"; - } - ./setup-cuda-hook.sh - ) - {} - ); + substitutions = { + # Required in addition to ccRoot as otherwise bin/gcc is looked up + # when building CMakeCUDACompilerId.cu + ccFullPath = "${backendStdenv.cc}/bin/${backendStdenv.cc.targetPrefix}c++"; + # Point NVCC at a compatible compiler + ccRoot = "${backendStdenv.cc}"; + setupCudaHook = placeholder "out"; + }; + } + ./setup-cuda-hook.sh; autoAddDriverRunpath = - final.callPackage - ( - {addDriverRunpath, autoFixElfFiles, makeSetupHook}: - makeSetupHook - { - name = "auto-add-opengl-runpath-hook"; - propagatedBuildInputs = [addDriverRunpath autoFixElfFiles]; - } - ./auto-add-driver-runpath-hook.sh - ) - {}; - - # Deprecated: an alias kept for compatibility. Consider removing after 24.11 - autoAddOpenGLRunpathHook = final.autoAddDriverRunpath; + { + addDriverRunpath, + autoFixElfFiles, + makeSetupHook, + }: + makeSetupHook + { + name = "auto-add-opengl-runpath-hook"; + propagatedBuildInputs = [ + addDriverRunpath + autoFixElfFiles + ]; + } + ./auto-add-driver-runpath-hook.sh; # autoAddCudaCompatRunpath hook must be added AFTER `setupCudaHook`. Both # hooks prepend a path with `libcuda.so` to the `DT_RUNPATH` section of @@ -69,23 +69,25 @@ final: _: { # it doesn't have any effect) and thus appear first. Meaning this hook must be # executed last. autoAddCudaCompatRunpath = - final.callPackage - ( - {makeSetupHook, autoFixElfFiles, lib, flags, cuda_compat ? null }: - makeSetupHook - { - name = "auto-add-cuda-compat-runpath-hook"; - propagatedBuildInputs = [autoFixElfFiles]; + { + autoFixElfFiles, + cuda_compat ? null, + flags, + lib, + makeSetupHook, + }: + makeSetupHook + { + name = "auto-add-cuda-compat-runpath-hook"; + propagatedBuildInputs = [ autoFixElfFiles ]; - substitutions.libcudaPath = lib.optionalString flags.isJetsonBuild "${cuda_compat}/compat"; + substitutions.libcudaPath = lib.optionalString flags.isJetsonBuild "${cuda_compat}/compat"; - meta = { - broken = !flags.isJetsonBuild; - badPlatforms = lib.optionals (cuda_compat == null) lib.platforms.all; - platforms = cuda_compat.meta.platforms or [ ]; - }; - } - ./auto-add-cuda-compat-runpath.sh - ) - {}; + meta = { + broken = !flags.isJetsonBuild; + badPlatforms = lib.optionals (cuda_compat == null) lib.platforms.all; + platforms = cuda_compat.meta.platforms or [ ]; + }; + } + ./auto-add-cuda-compat-runpath.sh; } From 798c380ff1346c75adc751f6a3e09e1c066d6213 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Tue, 23 Jan 2024 03:28:33 +0000 Subject: [PATCH 04/34] cuda-modules/generic-builders/manifest: use hostTarget autoAddCudaCompatRunpathHook --- .../development/cuda-modules/generic-builders/manifest.nix | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix index 049c8936426d2..0a3c65f28fc30 100644 --- a/pkgs/development/cuda-modules/generic-builders/manifest.nix +++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix @@ -204,7 +204,12 @@ backendStdenv.mkDerivation ( ++ lib.optionals (pname != "cuda_compat" && flags.isJetsonBuild) [ # autoAddCudaCompatRunpath must appear AFTER autoAddDriverRunpath. # See its documentation in ./setup-hooks/extension.nix. - autoAddCudaCompatRunpath + # NOTE(@connorbaker): Because autoAddCudaCompatRunpath is in nativeBuildInputs, it tries to use toolchains + # from buildPlatform, but that's not what we want. We want to use our host/target toolchains! + # To overcome this, we access the `__spliced` attribute and choose the `hostTarget` attribute. + # In the case the `__spliced` attribute doesn't exist, we just use the hook directly (because we're not + # cross-compiling). + autoAddCudaCompatRunpath.__spliced.hostTarget or autoAddCudaCompatRunpath ]; buildInputs = From 96cc89966d01d05e22cd36a3cde9eb79ef17794a Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Tue, 23 Jan 2024 03:33:38 +0000 Subject: [PATCH 05/34] cuda-modules/cuda/overrides: introduce helper function and add comments about callPackage --- .../cuda-modules/cuda/overrides.nix | 613 +++++++++++------- pkgs/top-level/cuda-packages.nix | 2 +- 2 files changed, 364 insertions(+), 251 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index 31d03dd0fc73f..59d4639587ca5 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -1,284 +1,397 @@ -{cudaVersion, lib}: +# NOTE(@connorbaker): None of the functions in this attribute set should need to access _final or _prev. +# As such, they are prefixed with an underscore -- everything should be doable with the spliced package sets +# provided to each function in the attribute set by `final.callPackage`. let - inherit (lib) attrsets lists strings; - # cudaVersionOlder : Version -> Boolean - cudaVersionOlder = strings.versionOlder cudaVersion; - # cudaVersionAtLeast : Version -> Boolean - cudaVersionAtLeast = strings.versionAtLeast cudaVersion; + filterAndCreateOverrides = + createOverrideAttrs: final: prev: + let + # It is imperative that we use `final.callPackage` to create these overrides, as it allows us access to the spliced + # package sets. + inherit (final) callPackage; - addBuildInputs = - drv: buildInputs: - drv.overrideAttrs (prevAttrs: {buildInputs = prevAttrs.buildInputs ++ buildInputs;}); + # NOTE(@connorbaker): We MUST use `lib` from `prev` because the attribute names CAN NOT depend on `final`. + inherit (prev.lib.attrsets) filterAttrs mapAttrs; + inherit (prev.lib.trivial) pipe; + in + pipe createOverrideAttrs [ + # NOTE: Filter out attributes that are not present in the previous version of + # the package set. This is necessary to prevent the appearance of attributes + # like `cuda_nvcc` in `cudaPackages_10_0, which predates redistributables. + (filterAttrs (name: _: prev ? ${name})) + # NOTE: It is imperative that we use `final.callPackage` to perform overrides, + # as it allows us access to the spliced package sets. + # Pass the previous version of the package to the override function. + (mapAttrs (name: value: callPackage value { ${name} = prev.${name}; })) + ]; in -# NOTE: Filter out attributes that are not present in the previous version of -# the package set. This is necessary to prevent the appearance of attributes -# like `cuda_nvcc` in `cudaPackages_10_0, which predates redistributables. -final: prev: -attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) { - libcufile = prev.libcufile.overrideAttrs ( - prevAttrs: { - buildInputs = prevAttrs.buildInputs ++ [ - final.libcublas.lib - final.pkgs.numactl - final.pkgs.rdma-core - ]; - # Before 11.7 libcufile depends on itself for some reason. - autoPatchelfIgnoreMissingDeps = - prevAttrs.autoPatchelfIgnoreMissingDeps - ++ lists.optionals (cudaVersionOlder "11.7") [ "libcufile.so.0" ]; - } - ); +filterAndCreateOverrides { + libcufile = + { + cudaOlder, + lib, + libcublas, + libcufile, + numactl, + rdma-core, + }: + libcufile.overrideAttrs ( + prevAttrs: { + buildInputs = prevAttrs.buildInputs ++ [ + libcublas.lib + numactl + rdma-core + ]; + # Before 11.7 libcufile depends on itself for some reason. + autoPatchelfIgnoreMissingDeps = + prevAttrs.autoPatchelfIgnoreMissingDeps + ++ lib.lists.optionals (cudaOlder "11.7") [ "libcufile.so.0" ]; + } + ); - libcusolver = addBuildInputs prev.libcusolver ( - # Always depends on this - [final.libcublas.lib] - # Dependency from 12.0 and on - ++ lists.optionals (cudaVersionAtLeast "12.0") [final.libnvjitlink.lib] - # Dependency from 12.1 and on - ++ lists.optionals (cudaVersionAtLeast "12.1") [final.libcusparse.lib] - ); + libcusolver = + { + cudaAtLeast, + lib, + libcublas, + libcusolver, + libcusparse ? null, + libnvjitlink ? null, + }: + libcusolver.overrideAttrs ( + prevAttrs: { + buildInputs = + prevAttrs.buildInputs + # Always depends on this + ++ [ libcublas.lib ] + # Dependency from 12.0 and on + ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ] + # Dependency from 12.1 and on + ++ lib.lists.optionals (cudaAtLeast "12.1") [ libcusparse.lib ]; + } + ); - libcusparse = addBuildInputs prev.libcusparse ( - lists.optionals (cudaVersionAtLeast "12.0") [final.libnvjitlink.lib] - ); + libcusparse = + { + cudaAtLeast, + lib, + libcusparse, + libnvjitlink ? null, + }: + libcusparse.overrideAttrs ( + prevAttrs: { + buildInputs = + prevAttrs.buildInputs + # Dependency from 12.0 and on + ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ]; + } + ); - cuda_cudart = prev.cuda_cudart.overrideAttrs ( - prevAttrs: { - # Remove once cuda-find-redist-features has a special case for libcuda - outputs = - prevAttrs.outputs - ++ lists.optionals (!(builtins.elem "stubs" prevAttrs.outputs)) [ "stubs" ]; + cuda_cudart = + { + buildPackages, + cuda_cudart, + lib, + }: + cuda_cudart.overrideAttrs ( + prevAttrs: { + # Remove once cuda-find-redist-features has a special case for libcuda + outputs = + prevAttrs.outputs + ++ lib.lists.optionals (!(builtins.elem "stubs" prevAttrs.outputs)) [ "stubs" ]; - allowFHSReferences = false; + allowFHSReferences = false; - # The libcuda stub's pkg-config doesn't follow the general pattern: - postPatch = - prevAttrs.postPatch or "" - + '' - while IFS= read -r -d $'\0' path ; do - sed -i \ - -e "s|^libdir\s*=.*/lib\$|libdir=''${!outputLib}/lib/stubs|" \ - -e "s|^Libs\s*:\(.*\)\$|Libs: \1 -Wl,-rpath,${final.pkgs.addDriverRunpath.driverLink}/lib|" \ - "$path" - done < <(find -iname 'cuda-*.pc' -print0) - '' - + '' - # Namelink may not be enough, add a soname. - # Cf. https://gitlab.kitware.com/cmake/cmake/-/issues/25536 - if [[ -f lib/stubs/libcuda.so && ! -f lib/stubs/libcuda.so.1 ]] ; then - ln -s libcuda.so lib/stubs/libcuda.so.1 - fi - ''; + # The libcuda stub's pkg-config doesn't follow the general pattern: + postPatch = + prevAttrs.postPatch or "" + + '' + while IFS= read -r -d $'\0' path ; do + sed -i \ + -e "s|^libdir\s*=.*/lib\$|libdir=''${!outputLib}/lib/stubs|" \ + -e "s|^Libs\s*:\(.*\)\$|Libs: \1 -Wl,-rpath,${buildPackages.addDriverRunpath.driverLink}/lib|" \ + "$path" + done < <(find -iname 'cuda-*.pc' -print0) + '' + + '' + # Namelink may not be enough, add a soname. + # Cf. https://gitlab.kitware.com/cmake/cmake/-/issues/25536 + if [[ -f lib/stubs/libcuda.so && ! -f lib/stubs/libcuda.so.1 ]] ; then + ln -s libcuda.so lib/stubs/libcuda.so.1 + fi + ''; - postFixup = - prevAttrs.postFixup or "" - + '' - moveToOutput lib/stubs "$stubs" - ln -s "$stubs"/lib/stubs/* "$stubs"/lib/ - ln -s "$stubs"/lib/stubs "''${!outputLib}/lib/stubs" - ''; - } - ); + postFixup = + prevAttrs.postFixup or "" + + '' + moveToOutput lib/stubs "$stubs" + ln -s "$stubs"/lib/stubs/* "$stubs"/lib/ + ln -s "$stubs"/lib/stubs "''${!outputLib}/lib/stubs" + ''; + } + ); - cuda_compat = prev.cuda_compat.overrideAttrs ( - prevAttrs: { - autoPatchelfIgnoreMissingDeps = prevAttrs.autoPatchelfIgnoreMissingDeps ++ [ - "libnvrm_gpu.so" - "libnvrm_mem.so" - "libnvdla_runtime.so" - ]; - # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices. - badPlatformsConditions = prevAttrs.badPlatformsConditions // { - "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" = - !final.flags.isJetsonBuild; - }; - meta = prevAttrs.meta // { - # For cross-compilation, we need the hostPlatform to be included in order to fetch and build the package. This - # doesn't change the fact that it won't work on non-Jetson devices, so we only add it when building for Jetson. - platforms = prevAttrs.meta.platforms ++ lib.optionals final.flags.isJetsonBuild [ "x86_64-linux" ]; - }; - } - ); + cuda_compat = + { + cuda_compat, + flags, + lib, + }: + cuda_compat.overrideAttrs ( + prevAttrs: { + autoPatchelfIgnoreMissingDeps = prevAttrs.autoPatchelfIgnoreMissingDeps ++ [ + "libnvrm_gpu.so" + "libnvrm_mem.so" + "libnvdla_runtime.so" + ]; + # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices. + badPlatformsConditions = prevAttrs.badPlatformsConditions // { + "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" = !flags.isJetsonBuild; + }; + meta = prevAttrs.meta // { + # For cross-compilation, we need the hostPlatform to be included in order to fetch and build the package. This + # doesn't change the fact that it won't work on non-Jetson devices, so we only add it when building for Jetson. + platforms = prevAttrs.meta.platforms ++ lib.lists.optionals flags.isJetsonBuild [ "x86_64-linux" ]; + }; + } + ); - cuda_gdb = addBuildInputs prev.cuda_gdb ( - # x86_64 only needs gmp from 12.0 and on - lists.optionals (cudaVersionAtLeast "12.0") [final.pkgs.gmp] - ); + cuda_gdb = + { + cuda_gdb, + cudaAtLeast, + gmp, + lib, + }: + cuda_gdb.overrideAttrs ( + prevAttrs: { + buildInputs = + prevAttrs.buildInputs + # x86_64 only needs gmp from 12.0 and on + ++ lib.lists.optionals (cudaAtLeast "11.0") [ gmp ]; + } + ); - cuda_nvcc = prev.cuda_nvcc.overrideAttrs ( - oldAttrs: - let - # This replicates the logic in stdenvAdapters.useLibsFrom, except we use - # gcc from pkgsHostTarget and not from buildPackages. - ccForLibs-wrapper = final.pkgs.stdenv.cc; - gccMajorVersion = final.nvccCompatibilities.${cudaVersion}.gccMaxMajorVersion; - cc = final.pkgs.wrapCCWith { - cc = final.pkgs."gcc${gccMajorVersion}".cc; - useCcForLibs = true; - gccForLibs = ccForLibs-wrapper.cc; - }; - in + cuda_nvcc = { + backendStdenv, + buildPackages, + cuda_cudart, + cuda_nvcc, + cudaAtLeast, + cudaOlder, + lib, + setupCudaHook, + }: + cuda_nvcc.overrideAttrs ( + prevAttrs: { + # Remove once cuda-find-redist-features has a special case for libcuda + outputs = + prevAttrs.outputs + ++ lib.lists.optionals (!(builtins.elem "lib" prevAttrs.outputs)) [ "lib" ]; - outputs = oldAttrs.outputs ++ lists.optionals (!(builtins.elem "lib" oldAttrs.outputs)) [ "lib" ]; + # Patch the nvcc.profile. + # Syntax: + # - `=` for assignment, + # - `?=` for conditional assignment, + # - `+=` to "prepend", + # - `=+` to "append". - # Patch the nvcc.profile. - # Syntax: - # - `=` for assignment, - # - `?=` for conditional assignment, - # - `+=` to "prepend", - # - `=+` to "append". + # Cf. https://web.archive.org/web/20230308044351/https://arcb.csc.ncsu.edu/~mueller/cluster/nvidia/2.0/nvcc_2.0.pdf - # Cf. https://web.archive.org/web/20230308044351/https://arcb.csc.ncsu.edu/~mueller/cluster/nvidia/2.0/nvcc_2.0.pdf + # We set all variables with the lowest priority (=+), but we do force + # nvcc to use the fixed backend toolchain. Cf. comments in + # backend-stdenv.nix - # We set all variables with the lowest priority (=+), but we do force - # nvcc to use the fixed backend toolchain. Cf. comments in - # backend-stdenv.nix + nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ backendStdenv.cc ]; - postPatch = - (oldAttrs.postPatch or "") - + '' - substituteInPlace bin/nvcc.profile \ - --replace \ - '$(TOP)/lib' \ - "''${!outputLib}/lib" \ - --replace \ - '$(TOP)/$(_NVVM_BRANCH_)' \ - "''${!outputBin}/nvvm" \ - --replace \ - '$(TOP)/$(_TARGET_DIR_)/include' \ - "''${!outputDev}/include" + postPatch = + (prevAttrs.postPatch or "") + + '' + echo "Running the cuda_nvcc postPatch" + substituteInPlace bin/nvcc.profile \ + --replace \ + '$(TOP)/lib' \ + "''${!outputLib}/lib" \ + --replace \ + '$(TOP)/$(_NVVM_BRANCH_)' \ + "''${!outputBin}/nvvm" \ + --replace \ + '$(TOP)/$(_TARGET_DIR_)/include' \ + "''${!outputDev}/include" - cat << EOF >> bin/nvcc.profile + cat << EOF >> bin/nvcc.profile - # Fix a compatible backend compiler - PATH += ${lib.getBin cc}/bin: + # Fix a compatible backend compiler + PATH += ${lib.getBin backendStdenv.cc}/bin: - # Expose the split-out nvvm - LIBRARIES =+ -L''${!outputBin}/nvvm/lib - INCLUDES =+ -I''${!outputBin}/nvvm/include + # Expose the split-out nvvm + LIBRARIES =+ -L''${!outputBin}/nvvm/lib + INCLUDES =+ -I''${!outputBin}/nvvm/include - # Expose cudart and the libcuda stubs - LIBRARIES =+ -L$static/lib" "-L${final.cuda_cudart.lib}/lib -L${final.cuda_cudart.lib}/lib/stubs - INCLUDES =+ -I${final.cuda_cudart.dev}/include - EOF - ''; + # Expose cudart and the libcuda stubs + LIBRARIES =+ -L$static/lib" "-L${cuda_cudart.lib}/lib -L${cuda_cudart.lib}/lib/stubs + INCLUDES =+ -I${cuda_cudart.dev}/include + EOF + ''; - propagatedBuildInputs = [ final.setupCudaHook ]; + propagatedNativeBuildInputs = [ setupCudaHook ]; - postInstall = - (oldAttrs.postInstall or "") - + '' - moveToOutput "nvvm" "''${!outputBin}" - ''; + postInstall = + (prevAttrs.postInstall or "") + + '' + moveToOutput "nvvm" "''${!outputBin}" + ''; - # The nvcc and cicc binaries contain hard-coded references to /usr - allowFHSReferences = true; + # The nvcc and cicc binaries contain hard-coded references to /usr + allowFHSReferences = true; - meta = (oldAttrs.meta or { }) // { - mainProgram = "nvcc"; - }; - } - ); + meta = (prevAttrs.meta or { }) // { + mainProgram = "nvcc"; + }; + } + ); - cuda_nvprof = prev.cuda_nvprof.overrideAttrs ( - prevAttrs: {buildInputs = prevAttrs.buildInputs ++ [final.cuda_cupti.lib];} - ); + cuda_nvprof = + { cuda_cupti, cuda_nvprof }: + cuda_nvprof.overrideAttrs ( + prevAttrs: { buildInputs = prevAttrs.buildInputs ++ [ cuda_cupti.lib ]; } + ); - cuda_demo_suite = addBuildInputs prev.cuda_demo_suite [ - final.pkgs.freeglut - final.pkgs.libGLU - final.pkgs.libglvnd - final.pkgs.mesa - final.libcufft.lib - final.libcurand.lib - ]; + cuda_demo_suite = + { + cuda_demo_suite, + freeglut, + lib, + libcufft, + libcurand, + libGLU, + libglvnd, + mesa, + }: + cuda_demo_suite.overrideAttrs ( + prevAttrs: { + buildInputs = prevAttrs.buildInputs ++ [ + freeglut + libcufft.lib + libcurand.lib + libGLU + libglvnd + mesa + ]; + } + ); - nsight_compute = prev.nsight_compute.overrideAttrs ( - prevAttrs: { - nativeBuildInputs = - prevAttrs.nativeBuildInputs - ++ ( - if (strings.versionOlder prev.nsight_compute.version "2022.2.0") then - [final.pkgs.qt5.wrapQtAppsHook] - else - [final.pkgs.qt6.wrapQtAppsHook] - ); - buildInputs = - prevAttrs.buildInputs - ++ ( - if (strings.versionOlder prev.nsight_compute.version "2022.2.0") then - [final.pkgs.qt5.qtwebview] - else - [final.pkgs.qt6.qtwebview] - ); - } - ); + nsight_compute = + { + lib, + nsight_compute, + qt5 ? null, + qt6 ? null, + }: + nsight_compute.overrideAttrs ( + prevAttrs: { + nativeBuildInputs = + prevAttrs.nativeBuildInputs + ++ ( + if (lib.strings.versionOlder prevAttrs.version "2022.2.0") then + [ qt5.wrapQtAppsHook ] + else + [ qt6.wrapQtAppsHook ] + ); + buildInputs = + prevAttrs.buildInputs + ++ ( + if (lib.strings.versionOlder prevAttrs.version "2022.2.0") then + [ qt5.qtwebview ] + else + [ qt6.qtwebview ] + ); + } + ); - nsight_systems = prev.nsight_systems.overrideAttrs ( - prevAttrs: - let - qt = if lib.versionOlder prevAttrs.version "2022.4.2.1" then final.pkgs.qt5 else final.pkgs.qt6; - qtwayland = - if lib.versions.major qt.qtbase.version == "5" then - lib.getBin qt.qtwayland - else - lib.getLib qt.qtwayland; - qtWaylandPlugins = "${qtwayland}/${qt.qtbase.qtPluginPrefix}"; - in + nsight_systems = { - # An ad hoc replacement for - # https://github.com/ConnorBaker/cuda-redist-find-features/issues/11 - env.rmPatterns = toString [ - "nsight-systems/*/*/libQt*" - "nsight-systems/*/*/libstdc*" - "nsight-systems/*/*/libboost*" - "nsight-systems/*/*/lib{ssl,ssh,crypto}*" - "nsight-systems/*/*/lib{arrow,jpeg}*" - "nsight-systems/*/*/Mesa" - "nsight-systems/*/*/python/bin/python" - "nsight-systems/*/*/libexec" - "nsight-systems/*/*/Plugins" - ]; - postPatch = - prevAttrs.postPatch or "" - + '' - for path in $rmPatterns ; do - rm -r "$path" - done - ''; - nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ qt.wrapQtAppsHook ]; - buildInputs = prevAttrs.buildInputs ++ [ - final.cuda_cudart.stubs - final.pkgs.alsa-lib - final.pkgs.boost178 - final.pkgs.e2fsprogs - final.pkgs.gst_all_1.gst-plugins-base - final.pkgs.gst_all_1.gstreamer - final.pkgs.nss - final.pkgs.numactl - final.pkgs.pulseaudio - final.pkgs.rdma-core - final.pkgs.ucx - final.pkgs.wayland - final.pkgs.xorg.libXcursor - final.pkgs.xorg.libXdamage - final.pkgs.xorg.libXrandr - final.pkgs.xorg.libXtst - qt.qtbase - (qt.qtdeclarative or qt.full) - (qt.qtsvg or qt.full) - qtWaylandPlugins - ]; + alsa-lib, + boost178, + cuda_cudart, + cudaOlder, + e2fsprogs, + gst_all_1, + lib, + nsight_systems, + nss, + numactl, + pulseaudio, + qt5 ? null, + qt6 ? null, + rdma-core, + ucx, + wayland, + xorg, + }: + nsight_systems.overrideAttrs ( + prevAttrs: + let + qt = if lib.strings.versionOlder prevAttrs.version "2022.4.2.1" then qt5 else qt6; + qtwayland = + if lib.versions.major qt.qtbase.version == "5" then + lib.getBin qt.qtwayland + else + lib.getLib qt.qtwayland; + qtWaylandPlugins = "${qtwayland}/${qt.qtbase.qtPluginPrefix}"; + in + { + # An ad hoc replacement for + # https://github.com/ConnorBaker/cuda-redist-find-features/issues/11 + env.rmPatterns = toString [ + "nsight-systems/*/*/libQt*" + "nsight-systems/*/*/libstdc*" + "nsight-systems/*/*/libboost*" + "nsight-systems/*/*/lib{ssl,ssh,crypto}*" + "nsight-systems/*/*/lib{arrow,jpeg}*" + "nsight-systems/*/*/Mesa" + "nsight-systems/*/*/python/bin/python" + "nsight-systems/*/*/libexec" + "nsight-systems/*/*/Plugins" + ]; + postPatch = + prevAttrs.postPatch or "" + + '' + for path in $rmPatterns ; do + rm -r "$path" + done + ''; + nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ qt.wrapQtAppsHook ]; + buildInputs = prevAttrs.buildInputs ++ [ + (qt.qtdeclarative or qt.full) + (qt.qtsvg or qt.full) + cuda_cudart.stubs + gst_all_1.gst-plugins-base + gst_all_1.gstreamer + nss + numactl + pulseaudio + qt.qtbase + qtWaylandPlugins + rdma-core + ucx + wayland + xorg.libXcursor + xorg.libXdamage + xorg.libXrandr + xorg.libXtst + ]; - # Older releases require boost 1.70 deprecated in Nixpkgs - meta.broken = prevAttrs.meta.broken or false || lib.versionOlder final.cudaVersion "11.8"; - } - ); + # Older releases require boost 1.70 deprecated in Nixpkgs + meta.broken = prevAttrs.meta.broken or false || cudaOlder "11.8"; + } + ); - nvidia_driver = prev.nvidia_driver.overrideAttrs { - # No need to support this package as we have drivers already - # in linuxPackages. - meta.broken = true; - }; + nvidia_driver = + { nvidia_driver }: + nvidia_driver.overrideAttrs { + # No need to support this package as we have drivers already + # in linuxPackages. + meta.broken = true; + }; } diff --git a/pkgs/top-level/cuda-packages.nix b/pkgs/top-level/cuda-packages.nix index eb0efcb10865f..8191d6035cbb0 100644 --- a/pkgs/top-level/cuda-packages.nix +++ b/pkgs/top-level/cuda-packages.nix @@ -87,7 +87,7 @@ let composedExtension = fixedPoints.composeManyExtensions [ (builtins.import ../development/cuda-modules/setup-hooks/extension.nix) (builtins.import ../development/cuda-modules/cuda/extension.nix {inherit cudaVersion lib;}) - (builtins.import ../development/cuda-modules/cuda/overrides.nix {inherit cudaVersion lib;}) + (builtins.import ../development/cuda-modules/cuda/overrides.nix) # (callPackage ../development/cuda-modules/generic-builders/multiplex.nix { # inherit cudaVersion flags mkVersionedPackageName; # pname = "cudnn"; From 40aab07fb59ec88c396eb4959b1bc92fec1b3d69 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Tue, 23 Jan 2024 03:36:03 +0000 Subject: [PATCH 06/34] cuda-modules/cuda/overrides: add TODOs for @connorbaker --- pkgs/development/cuda-modules/cuda/overrides.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index 59d4639587ca5..f8c96a538be12 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -185,6 +185,8 @@ filterAndCreateOverrides { cuda_nvcc.overrideAttrs ( prevAttrs: { # Remove once cuda-find-redist-features has a special case for libcuda + # TODO(@connorbaker): The order of build outputs matters as we traverse them when creating split outputs. + # The `lib` output cannot come after `static` as it moves all the static libraries back to the `lib` output. outputs = prevAttrs.outputs ++ lib.lists.optionals (!(builtins.elem "lib" prevAttrs.outputs)) [ "lib" ]; @@ -204,6 +206,7 @@ filterAndCreateOverrides { nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ backendStdenv.cc ]; + # TODO(@connorbaker): We should specify the spliced version of backendStdenv and cuda_cudart to use here. postPatch = (prevAttrs.postPatch or "") + '' From 2057e243b20e3b34097b2fba0face2cb5e175e36 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Tue, 30 Jan 2024 17:38:09 +0000 Subject: [PATCH 07/34] cuda-modules: use hostPlatform when downloading binaries --- pkgs/development/cuda-modules/cutensor/extension.nix | 4 ++-- .../cuda-modules/generic-builders/manifest.nix | 8 +++----- .../cuda-modules/generic-builders/multiplex.nix | 4 ++-- pkgs/development/cuda-modules/tensorrt/fixup.nix | 6 +++--- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pkgs/development/cuda-modules/cutensor/extension.nix b/pkgs/development/cuda-modules/cutensor/extension.nix index 38b0b03248aad..29959fc013f99 100644 --- a/pkgs/development/cuda-modules/cutensor/extension.nix +++ b/pkgs/development/cuda-modules/cutensor/extension.nix @@ -13,9 +13,9 @@ # - Instead of providing different releases for each version of CUDA, CuTensor has multiple subdirectories in `lib` # -- one for each version of CUDA. { + backendStdenv, cudaVersion, flags, - targetPlatform, lib, mkVersionedPackageName, }: @@ -93,7 +93,7 @@ let # LibPath are not constant across the same release -- one platform may support fewer # CUDA versions than another. # redistArch :: String - redistArch = flags.getRedistArch targetPlatform.system; + redistArch = flags.getRedistArch backendStdenv.hostPlatform.system; # platformIsSupported :: Manifests -> Boolean platformIsSupported = {feature, ...}: diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix index 0a3c65f28fc30..fc1b899d1146c 100644 --- a/pkgs/development/cuda-modules/generic-builders/manifest.nix +++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix @@ -10,7 +10,6 @@ markForCudatoolkitRootHook, flags, stdenv, - targetPlatform, # Builder-specific arguments # Short package name (e.g., "cuda_cccl") # pname : String @@ -32,7 +31,6 @@ let inherit (lib) attrsets lists - meta strings trivial licenses @@ -45,10 +43,10 @@ let supportedRedistArchs = builtins.attrNames featureRelease; # redistArch :: String # The redistArch is the name of the architecture for which the redistributable is built. - # It is `"unsupported"` if the redistributable is not supported on the target platform. - redistArch = flags.getRedistArch targetPlatform.system; + # It is `"unsupported"` if the redistributable is not supported on the hostPlatform. + redistArch = flags.getRedistArch backendStdenv.hostPlatform.system; - sourceMatchesHost = flags.getNixSystem redistArch == stdenv.hostPlatform.system; + sourceMatchesHost = flags.getNixSystem redistArch == backendStdenv.hostPlatform.system; in backendStdenv.mkDerivation ( finalAttrs: { diff --git a/pkgs/development/cuda-modules/generic-builders/multiplex.nix b/pkgs/development/cuda-modules/generic-builders/multiplex.nix index deeb2da6e0042..1cb6d8462b7dc 100644 --- a/pkgs/development/cuda-modules/generic-builders/multiplex.nix +++ b/pkgs/development/cuda-modules/generic-builders/multiplex.nix @@ -1,9 +1,9 @@ { # callPackage-provided arguments + backendStdenv, lib, cudaVersion, flags, - targetPlatform, # Expected to be passed by the caller mkVersionedPackageName, # pname :: String @@ -74,7 +74,7 @@ let # Get all of the packages for our given platform. # redistArch :: String # Value is `"unsupported"` if the platform is not supported. - redistArch = flags.getRedistArch targetPlatform.system; + redistArch = flags.getRedistArch backendStdenv.hostPlatform.system; preferable = p1: p2: (isSupported p2 -> isSupported p1) && (strings.versionAtLeast p1.version p2.version); diff --git a/pkgs/development/cuda-modules/tensorrt/fixup.nix b/pkgs/development/cuda-modules/tensorrt/fixup.nix index c6cbd137a0e4c..27851d3e1e56f 100644 --- a/pkgs/development/cuda-modules/tensorrt/fixup.nix +++ b/pkgs/development/cuda-modules/tensorrt/fixup.nix @@ -1,7 +1,7 @@ { + backendStdenv, cudaVersion, final, - targetPlatform, lib, mkVersionedPackageName, package, @@ -18,7 +18,7 @@ let versions ; # targetArch :: String - targetArch = attrsets.attrByPath [ targetPlatform.system ] "unsupported" { + targetArch = attrsets.attrByPath [ backendStdenv.hostPlatform.system ] "unsupported" { x86_64-linux = "x86_64-linux-gnu"; aarch64-linux = "aarch64-linux-gnu"; }; @@ -106,7 +106,7 @@ finalAttrs: prevAttrs: { meta = prevAttrs.meta // { badPlatforms = prevAttrs.meta.badPlatforms or [ ] - ++ lib.optionals (targetArch == "unsupported") [ targetPlatform.system ]; + ++ lib.optionals (targetArch == "unsupported") [ backendStdenv.hostPlatform.system ]; homepage = "https://developer.nvidia.com/tensorrt"; maintainers = prevAttrs.meta.maintainers ++ [maintainers.aidalgol]; }; From cd632d812adc51981c2662ce8b168d70ca342ac7 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 20 Mar 2024 17:18:17 +0000 Subject: [PATCH 08/34] cuda-modules/setup-hooks: switch to directory structure --- .../auto-add-cuda-compat-runpath-hook.sh} | 0 .../default.nix | 26 +++++ .../auto-add-driver-runpath-hook.sh | 0 .../auto-add-driver-runpath-hook/default.nix | 14 +++ .../auto-fix-elf-files-hook.sh} | 2 +- .../auto-fix-elf-files-hook/default.nix | 4 + .../cuda-modules/setup-hooks/extension.nix | 102 ++---------------- .../default.nix | 4 + .../mark-for-cudatoolkit-root-hook.sh | 0 .../setup-hooks/setup-cuda-hook/default.nix | 16 +++ .../{ => setup-cuda-hook}/setup-cuda-hook.sh | 0 11 files changed, 75 insertions(+), 93 deletions(-) rename pkgs/development/cuda-modules/setup-hooks/{auto-add-cuda-compat-runpath.sh => auto-add-cuda-compat-runpath-hook/auto-add-cuda-compat-runpath-hook.sh} (100%) create mode 100644 pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix rename pkgs/development/cuda-modules/setup-hooks/{ => auto-add-driver-runpath-hook}/auto-add-driver-runpath-hook.sh (100%) create mode 100644 pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/default.nix rename pkgs/development/cuda-modules/setup-hooks/{auto-fix-elf-files.sh => auto-fix-elf-files-hook/auto-fix-elf-files-hook.sh} (97%) create mode 100644 pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/default.nix create mode 100644 pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/default.nix rename pkgs/development/cuda-modules/setup-hooks/{ => mark-for-cudatoolkit-root-hook}/mark-for-cudatoolkit-root-hook.sh (100%) create mode 100644 pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix rename pkgs/development/cuda-modules/setup-hooks/{ => setup-cuda-hook}/setup-cuda-hook.sh (100%) diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath.sh b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/auto-add-cuda-compat-runpath-hook.sh similarity index 100% rename from pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath.sh rename to pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/auto-add-cuda-compat-runpath-hook.sh diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix new file mode 100644 index 0000000000000..f253331fb24b0 --- /dev/null +++ b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix @@ -0,0 +1,26 @@ +# autoAddCudaCompatRunpath hook must be added AFTER `setupCudaHook`. Both +# hooks prepend a path with `libcuda.so` to the `DT_RUNPATH` section of +# patched elf files, but `cuda_compat` path must take precedence (otherwise, +# it doesn't have any effect) and thus appear first. Meaning this hook must be +# executed last. +{ + autoFixElfFiles, + cuda_compat ? null, + flags, + lib, + makeSetupHook, +}: +makeSetupHook + { + name = "auto-add-cuda-compat-runpath-hook"; + propagatedBuildInputs = [ autoFixElfFiles ]; + + substitutions.libcudaPath = lib.optionalString flags.isJetsonBuild "${cuda_compat}/compat"; + + meta = { + broken = !flags.isJetsonBuild; + badPlatforms = lib.optionals (cuda_compat == null) lib.platforms.all; + platforms = cuda_compat.meta.platforms or [ ]; + }; + } + ./auto-add-cuda-compat-runpath-hook.sh diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook.sh b/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/auto-add-driver-runpath-hook.sh similarity index 100% rename from pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook.sh rename to pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/auto-add-driver-runpath-hook.sh diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/default.nix new file mode 100644 index 0000000000000..97d020b2129d5 --- /dev/null +++ b/pkgs/development/cuda-modules/setup-hooks/auto-add-driver-runpath-hook/default.nix @@ -0,0 +1,14 @@ +{ + addDriverRunpath, + autoFixElfFiles, + makeSetupHook, +}: +makeSetupHook + { + name = "auto-add-opengl-runpath-hook"; + propagatedBuildInputs = [ + addDriverRunpath + autoFixElfFiles + ]; + } + ./auto-add-driver-runpath-hook.sh diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/auto-fix-elf-files-hook.sh similarity index 97% rename from pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh rename to pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/auto-fix-elf-files-hook.sh index 1d57dfb17a66d..084c14016fc0b 100644 --- a/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files.sh +++ b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/auto-fix-elf-files-hook.sh @@ -2,7 +2,7 @@ # List all dynamically linked ELF files in the outputs and apply a generic fix # action provided as a parameter (currently used to add the CUDA or the # cuda_compat driver to the runpath of binaries) -echo "Sourcing cuda/fix-elf-files.sh" +echo "Sourcing auto-fix-elf-files-hook" # Returns the exit code of patchelf --print-rpath. # A return code of 0 (success) means the ELF file has a dynamic section, while diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/default.nix new file mode 100644 index 0000000000000..4550dc80edaef --- /dev/null +++ b/pkgs/development/cuda-modules/setup-hooks/auto-fix-elf-files-hook/default.nix @@ -0,0 +1,4 @@ +# Helper hook used in both autoAddCudaCompatRunpath and +# autoAddDriverRunpath that applies a generic patching action to all elf +# files with a dynamic linking section. +{ makeSetupHook }: makeSetupHook { name = "auto-fix-elf-files-hook"; } ./auto-fix-elf-files-hook.sh diff --git a/pkgs/development/cuda-modules/setup-hooks/extension.nix b/pkgs/development/cuda-modules/setup-hooks/extension.nix index 32483c9e200eb..57dc92900e77c 100644 --- a/pkgs/development/cuda-modules/setup-hooks/extension.nix +++ b/pkgs/development/cuda-modules/setup-hooks/extension.nix @@ -1,93 +1,11 @@ -let - createSetupHooks = - setupHooksAttrs: final: prev: - let - # It is imperative that we use `final.callPackage` to create these setup hooks, as it allows us access to the spliced - # package sets. - inherit (final) callPackage; - - # NOTE(@connorbaker): We MUST use `lib` from `prev` because the attribute names CAN NOT depend on `final`. - inherit (prev.lib.attrsets) mapAttrs; - - aliases = { - # Deprecated: an alias kept for compatibility. Consider removing after 24.11 - autoAddOpenGLRunpathHook = final.autoAddDriverRunpath; - }; - in - mapAttrs (_: value: callPackage value { }) setupHooksAttrs // aliases; -in -createSetupHooks { - # Helper hook used in both autoAddCudaCompatRunpath and - # autoAddDriverRunpath that applies a generic patching action to all elf - # files with a dynamic linking section. - autoFixElfFiles = - { makeSetupHook }: makeSetupHook { name = "auto-fix-elf-files"; } ./auto-fix-elf-files.sh; - - # Internal hook, used by cudatoolkit and cuda redist packages - # to accommodate automatic CUDAToolkit_ROOT construction - markForCudatoolkitRootHook = - { makeSetupHook }: - makeSetupHook { name = "mark-for-cudatoolkit-root-hook"; } ./mark-for-cudatoolkit-root-hook.sh; - - # Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly - setupCudaHook = - { backendStdenv, makeSetupHook }: - makeSetupHook - { - name = "setup-cuda-hook"; - - substitutions = { - # Required in addition to ccRoot as otherwise bin/gcc is looked up - # when building CMakeCUDACompilerId.cu - ccFullPath = "${backendStdenv.cc}/bin/${backendStdenv.cc.targetPrefix}c++"; - # Point NVCC at a compatible compiler - ccRoot = "${backendStdenv.cc}"; - setupCudaHook = placeholder "out"; - }; - } - ./setup-cuda-hook.sh; - - autoAddDriverRunpath = - { - addDriverRunpath, - autoFixElfFiles, - makeSetupHook, - }: - makeSetupHook - { - name = "auto-add-opengl-runpath-hook"; - propagatedBuildInputs = [ - addDriverRunpath - autoFixElfFiles - ]; - } - ./auto-add-driver-runpath-hook.sh; - - # autoAddCudaCompatRunpath hook must be added AFTER `setupCudaHook`. Both - # hooks prepend a path with `libcuda.so` to the `DT_RUNPATH` section of - # patched elf files, but `cuda_compat` path must take precedence (otherwise, - # it doesn't have any effect) and thus appear first. Meaning this hook must be - # executed last. - autoAddCudaCompatRunpath = - { - autoFixElfFiles, - cuda_compat ? null, - flags, - lib, - makeSetupHook, - }: - makeSetupHook - { - name = "auto-add-cuda-compat-runpath-hook"; - propagatedBuildInputs = [ autoFixElfFiles ]; - - substitutions.libcudaPath = lib.optionalString flags.isJetsonBuild "${cuda_compat}/compat"; - - meta = { - broken = !flags.isJetsonBuild; - badPlatforms = lib.optionals (cuda_compat == null) lib.platforms.all; - platforms = cuda_compat.meta.platforms or [ ]; - }; - } - ./auto-add-cuda-compat-runpath.sh; +final: _: { + autoAddCudaCompatRunpath = final.callPackage ./auto-add-cuda-compat-runpath-hook { }; + autoAddDriverRunpath = final.callPackage ./auto-add-driver-runpath-hook { }; + autoFixElfFiles = final.callPackage ./auto-fix-elf-files-hook { }; + markForCudatoolkitRootHook = final.callPackage ./mark-for-cudatoolkit-root-hook { }; + setupCudaHook = final.callPackage ./setup-cuda-hook { }; + + # Aliases + # Deprecated: an alias kept for compatibility. Consider removing after 24.11 + autoAddOpenGLRunpathHook = final.autoAddDriverRunpath; } diff --git a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/default.nix new file mode 100644 index 0000000000000..86ff28d6c41a1 --- /dev/null +++ b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/default.nix @@ -0,0 +1,4 @@ +# Internal hook, used by cudatoolkit and cuda redist packages +# to accommodate automatic CUDAToolkit_ROOT construction +{ makeSetupHook }: +makeSetupHook { name = "mark-for-cudatoolkit-root-hook"; } ./mark-for-cudatoolkit-root-hook.sh diff --git a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh similarity index 100% rename from pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook.sh rename to pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh diff --git a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix new file mode 100644 index 0000000000000..6c5f299d4418c --- /dev/null +++ b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix @@ -0,0 +1,16 @@ +# Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly +{ backendStdenv, makeSetupHook }: +makeSetupHook + { + name = "setup-cuda-hook"; + + substitutions = { + # Required in addition to ccRoot as otherwise bin/gcc is looked up + # when building CMakeCUDACompilerId.cu + ccFullPath = "${backendStdenv.cc}/bin/${backendStdenv.cc.targetPrefix}c++"; + # Point NVCC at a compatible compiler + ccRoot = "${backendStdenv.cc}"; + setupCudaHook = placeholder "out"; + }; + } + ./setup-cuda-hook.sh diff --git a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook.sh b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh similarity index 100% rename from pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook.sh rename to pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh From 1aa56f017116b209784f770d65102e1f3fb7191a Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 20 Mar 2024 17:29:27 +0000 Subject: [PATCH 09/34] cuda-modules: always get *Platform from stdenv --- .../cuda-modules/cuda-library-samples/extension.nix | 6 ++++-- .../cuda-modules/cuda-samples/extension.nix | 4 ++-- pkgs/development/cuda-modules/cuda-samples/generic.nix | 3 ++- pkgs/development/cuda-modules/flags.nix | 10 +++++++--- .../cuda-modules/generic-builders/manifest.nix | 6 ++++-- pkgs/development/cuda-modules/tensorrt/fixup.nix | 7 +++++-- 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda-library-samples/extension.nix b/pkgs/development/cuda-modules/cuda-library-samples/extension.nix index 4cb34af732095..9092a653bd5e9 100644 --- a/pkgs/development/cuda-modules/cuda-library-samples/extension.nix +++ b/pkgs/development/cuda-modules/cuda-library-samples/extension.nix @@ -1,8 +1,10 @@ -{hostPlatform, lib}: +{backendStdenv, lib}: let + inherit (backendStdenv.hostPlatform) isx86_64 isLinux; + # Samples are built around the CUDA Toolkit, which is not available for # aarch64. Check for both CUDA version and platform. - platformIsSupported = hostPlatform.isx86_64 && hostPlatform.isLinux; + platformIsSupported = isx86_64 && isLinux; # Build our extension extension = diff --git a/pkgs/development/cuda-modules/cuda-samples/extension.nix b/pkgs/development/cuda-modules/cuda-samples/extension.nix index d41da90cd5d0e..90a124f80fa73 100644 --- a/pkgs/development/cuda-modules/cuda-samples/extension.nix +++ b/pkgs/development/cuda-modules/cuda-samples/extension.nix @@ -1,6 +1,6 @@ { + backendStdenv, cudaVersion, - hostPlatform, lib, }: let @@ -26,7 +26,7 @@ let # Samples are built around the CUDA Toolkit, which is not available for # aarch64. Check for both CUDA version and platform. cudaVersionIsSupported = cudaVersionToHash ? ${cudaVersion}; - platformIsSupported = hostPlatform.isx86_64; + platformIsSupported = backendStdenv.hostPlatform.isx86_64; isSupported = cudaVersionIsSupported && platformIsSupported; # Build our extension diff --git a/pkgs/development/cuda-modules/cuda-samples/generic.nix b/pkgs/development/cuda-modules/cuda-samples/generic.nix index 3d1dac015e16c..e2a33cd7839c9 100644 --- a/pkgs/development/cuda-modules/cuda-samples/generic.nix +++ b/pkgs/development/cuda-modules/cuda-samples/generic.nix @@ -14,6 +14,7 @@ }: let inherit (lib) lists strings; + inherit (backendStdenv.hostPlatform.parsed) cpu kernel; in backendStdenv.mkDerivation ( finalAttrs: { @@ -64,7 +65,7 @@ backendStdenv.mkDerivation ( installPhase = '' runHook preInstall - install -Dm755 -t $out/bin bin/${backendStdenv.hostPlatform.parsed.cpu.name}/${backendStdenv.hostPlatform.parsed.kernel.name}/release/* + install -Dm755 -t $out/bin bin/${cpu.name}/${kernel.name}/release/* runHook postInstall ''; diff --git a/pkgs/development/cuda-modules/flags.nix b/pkgs/development/cuda-modules/flags.nix index 50a69d6fd1d1d..89ddfe53aea96 100644 --- a/pkgs/development/cuda-modules/flags.nix +++ b/pkgs/development/cuda-modules/flags.nix @@ -2,14 +2,12 @@ # Gpu :: AttrSet # - See the documentation in ./gpus.nix. { + backendStdenv, config, cudaCapabilities ? (config.cudaCapabilities or []), cudaForwardCompat ? (config.cudaForwardCompat or true), lib, cudaVersion, - buildPlatform, - hostPlatform, - targetPlatform, # gpus :: List Gpu gpus, }: @@ -22,6 +20,12 @@ let trivial ; + inherit (backendStdenv) + buildPlatform + hostPlatform + targetPlatform + ; + # Flags are determined based on your CUDA toolkit by default. You may benefit # from improved performance, reduced file size, or greater hardware support by # passing a configuration based on your specific GPU environment. diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix index fc1b899d1146c..cb49f98d77597 100644 --- a/pkgs/development/cuda-modules/generic-builders/manifest.nix +++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix @@ -38,15 +38,17 @@ let sourceTypes ; + inherit (backendStdenv) hostPlatform; + # Get the redist architectures for which package provides distributables. # These are used by meta.platforms. supportedRedistArchs = builtins.attrNames featureRelease; # redistArch :: String # The redistArch is the name of the architecture for which the redistributable is built. # It is `"unsupported"` if the redistributable is not supported on the hostPlatform. - redistArch = flags.getRedistArch backendStdenv.hostPlatform.system; + redistArch = flags.getRedistArch hostPlatform.system; - sourceMatchesHost = flags.getNixSystem redistArch == backendStdenv.hostPlatform.system; + sourceMatchesHost = flags.getNixSystem redistArch == hostPlatform.system; in backendStdenv.mkDerivation ( finalAttrs: { diff --git a/pkgs/development/cuda-modules/tensorrt/fixup.nix b/pkgs/development/cuda-modules/tensorrt/fixup.nix index 27851d3e1e56f..f632f1b138d96 100644 --- a/pkgs/development/cuda-modules/tensorrt/fixup.nix +++ b/pkgs/development/cuda-modules/tensorrt/fixup.nix @@ -17,8 +17,11 @@ let strings versions ; + + inherit (backendStdenv) hostPlatform; + # targetArch :: String - targetArch = attrsets.attrByPath [ backendStdenv.hostPlatform.system ] "unsupported" { + targetArch = attrsets.attrByPath [ hostPlatform.system ] "unsupported" { x86_64-linux = "x86_64-linux-gnu"; aarch64-linux = "aarch64-linux-gnu"; }; @@ -106,7 +109,7 @@ finalAttrs: prevAttrs: { meta = prevAttrs.meta // { badPlatforms = prevAttrs.meta.badPlatforms or [ ] - ++ lib.optionals (targetArch == "unsupported") [ backendStdenv.hostPlatform.system ]; + ++ lib.optionals (targetArch == "unsupported") [ hostPlatform.system ]; homepage = "https://developer.nvidia.com/tensorrt"; maintainers = prevAttrs.meta.maintainers ++ [maintainers.aidalgol]; }; From 0fec676f01a2d159b60f410dcb5052273bf04aec Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 20 Mar 2024 17:46:48 +0000 Subject: [PATCH 10/34] cuda-modules/cuda/overrides: simplify callPackage then overrideAttrs pattern --- .../cuda-modules/cuda/overrides.nix | 520 +++++++++--------- 1 file changed, 246 insertions(+), 274 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index f8c96a538be12..a8a2c173d3c17 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -1,268 +1,248 @@ -# NOTE(@connorbaker): None of the functions in this attribute set should need to access _final or _prev. -# As such, they are prefixed with an underscore -- everything should be doable with the spliced package sets -# provided to each function in the attribute set by `final.callPackage`. let filterAndCreateOverrides = createOverrideAttrs: final: prev: let - # It is imperative that we use `final.callPackage` to create these overrides, as it allows us access to the spliced - # package sets. + # It is imperative that we use `final.callPackage` to create these overrides + # as it allows us access to the spliced package sets. inherit (final) callPackage; - # NOTE(@connorbaker): We MUST use `lib` from `prev` because the attribute names CAN NOT depend on `final`. + # NOTE(@connorbaker): We MUST use `lib` from `prev` because the attribute + # names CAN NOT depend on `final`. inherit (prev.lib.attrsets) filterAttrs mapAttrs; inherit (prev.lib.trivial) pipe; - in - pipe createOverrideAttrs [ + # NOTE: Filter out attributes that are not present in the previous version of # the package set. This is necessary to prevent the appearance of attributes # like `cuda_nvcc` in `cudaPackages_10_0, which predates redistributables. - (filterAttrs (name: _: prev ? ${name})) + filterOutNewAttrs = filterAttrs (name: _: prev ? ${name}); + # NOTE: It is imperative that we use `final.callPackage` to perform overrides, # as it allows us access to the spliced package sets. - # Pass the previous version of the package to the override function. - (mapAttrs (name: value: callPackage value { ${name} = prev.${name}; })) + # Apply callPackage to each attribute value, yielding a value to be passed + # to overrideAttrs. + callPackageThenOverrideAttrs = mapAttrs ( + name: value: prev.${name}.overrideAttrs (callPackage value { }) + ); + in + pipe createOverrideAttrs [ + filterOutNewAttrs + callPackageThenOverrideAttrs ]; in +# Each attribute name is the name of an existing package in the previous version +# of the package set. +# The value is a function (to be provided to callPackage), which yields a value +# to be provided to overrideAttrs. This allows us to override the attributes of +# a package without losing access to the fixed point of the package set -- +# especially useful given that some packages may depend on each other! filterAndCreateOverrides { libcufile = { cudaOlder, lib, libcublas, - libcufile, numactl, rdma-core, }: - libcufile.overrideAttrs ( - prevAttrs: { - buildInputs = prevAttrs.buildInputs ++ [ - libcublas.lib - numactl - rdma-core - ]; - # Before 11.7 libcufile depends on itself for some reason. - autoPatchelfIgnoreMissingDeps = - prevAttrs.autoPatchelfIgnoreMissingDeps - ++ lib.lists.optionals (cudaOlder "11.7") [ "libcufile.so.0" ]; - } - ); + prevAttrs: { + buildInputs = prevAttrs.buildInputs ++ [ + libcublas.lib + numactl + rdma-core + ]; + # Before 11.7 libcufile depends on itself for some reason. + autoPatchelfIgnoreMissingDeps = + prevAttrs.autoPatchelfIgnoreMissingDeps + ++ lib.lists.optionals (cudaOlder "11.7") [ "libcufile.so.0" ]; + }; libcusolver = { cudaAtLeast, lib, libcublas, - libcusolver, libcusparse ? null, libnvjitlink ? null, }: - libcusolver.overrideAttrs ( - prevAttrs: { - buildInputs = - prevAttrs.buildInputs - # Always depends on this - ++ [ libcublas.lib ] - # Dependency from 12.0 and on - ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ] - # Dependency from 12.1 and on - ++ lib.lists.optionals (cudaAtLeast "12.1") [ libcusparse.lib ]; - } - ); + prevAttrs: { + buildInputs = + prevAttrs.buildInputs + # Always depends on this + ++ [ libcublas.lib ] + # Dependency from 12.0 and on + ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ] + # Dependency from 12.1 and on + ++ lib.lists.optionals (cudaAtLeast "12.1") [ libcusparse.lib ]; + }; libcusparse = { cudaAtLeast, lib, - libcusparse, libnvjitlink ? null, }: - libcusparse.overrideAttrs ( - prevAttrs: { - buildInputs = - prevAttrs.buildInputs - # Dependency from 12.0 and on - ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ]; - } - ); + prevAttrs: { + buildInputs = + prevAttrs.buildInputs + # Dependency from 12.0 and on + ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ]; + }; cuda_cudart = - { - buildPackages, - cuda_cudart, - lib, - }: - cuda_cudart.overrideAttrs ( - prevAttrs: { - # Remove once cuda-find-redist-features has a special case for libcuda - outputs = - prevAttrs.outputs - ++ lib.lists.optionals (!(builtins.elem "stubs" prevAttrs.outputs)) [ "stubs" ]; + { buildPackages, lib }: + prevAttrs: { + # Remove once cuda-find-redist-features has a special case for libcuda + outputs = + prevAttrs.outputs + ++ lib.lists.optionals (!(builtins.elem "stubs" prevAttrs.outputs)) [ "stubs" ]; - allowFHSReferences = false; + allowFHSReferences = false; - # The libcuda stub's pkg-config doesn't follow the general pattern: - postPatch = - prevAttrs.postPatch or "" - + '' - while IFS= read -r -d $'\0' path ; do - sed -i \ - -e "s|^libdir\s*=.*/lib\$|libdir=''${!outputLib}/lib/stubs|" \ - -e "s|^Libs\s*:\(.*\)\$|Libs: \1 -Wl,-rpath,${buildPackages.addDriverRunpath.driverLink}/lib|" \ - "$path" - done < <(find -iname 'cuda-*.pc' -print0) - '' - + '' - # Namelink may not be enough, add a soname. - # Cf. https://gitlab.kitware.com/cmake/cmake/-/issues/25536 - if [[ -f lib/stubs/libcuda.so && ! -f lib/stubs/libcuda.so.1 ]] ; then - ln -s libcuda.so lib/stubs/libcuda.so.1 - fi - ''; + # The libcuda stub's pkg-config doesn't follow the general pattern: + postPatch = + prevAttrs.postPatch or "" + + '' + while IFS= read -r -d $'\0' path ; do + sed -i \ + -e "s|^libdir\s*=.*/lib\$|libdir=''${!outputLib}/lib/stubs|" \ + -e "s|^Libs\s*:\(.*\)\$|Libs: \1 -Wl,-rpath,${buildPackages.addDriverRunpath.driverLink}/lib|" \ + "$path" + done < <(find -iname 'cuda-*.pc' -print0) + '' + + '' + # Namelink may not be enough, add a soname. + # Cf. https://gitlab.kitware.com/cmake/cmake/-/issues/25536 + if [[ -f lib/stubs/libcuda.so && ! -f lib/stubs/libcuda.so.1 ]] ; then + ln -s libcuda.so lib/stubs/libcuda.so.1 + fi + ''; - postFixup = - prevAttrs.postFixup or "" - + '' - moveToOutput lib/stubs "$stubs" - ln -s "$stubs"/lib/stubs/* "$stubs"/lib/ - ln -s "$stubs"/lib/stubs "''${!outputLib}/lib/stubs" - ''; - } - ); + postFixup = + prevAttrs.postFixup or "" + + '' + moveToOutput lib/stubs "$stubs" + ln -s "$stubs"/lib/stubs/* "$stubs"/lib/ + ln -s "$stubs"/lib/stubs "''${!outputLib}/lib/stubs" + ''; + }; cuda_compat = - { - cuda_compat, - flags, - lib, - }: - cuda_compat.overrideAttrs ( - prevAttrs: { - autoPatchelfIgnoreMissingDeps = prevAttrs.autoPatchelfIgnoreMissingDeps ++ [ - "libnvrm_gpu.so" - "libnvrm_mem.so" - "libnvdla_runtime.so" - ]; - # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices. - badPlatformsConditions = prevAttrs.badPlatformsConditions // { - "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" = !flags.isJetsonBuild; - }; - meta = prevAttrs.meta // { - # For cross-compilation, we need the hostPlatform to be included in order to fetch and build the package. This - # doesn't change the fact that it won't work on non-Jetson devices, so we only add it when building for Jetson. - platforms = prevAttrs.meta.platforms ++ lib.lists.optionals flags.isJetsonBuild [ "x86_64-linux" ]; - }; - } - ); + { flags, lib }: + prevAttrs: { + autoPatchelfIgnoreMissingDeps = prevAttrs.autoPatchelfIgnoreMissingDeps ++ [ + "libnvrm_gpu.so" + "libnvrm_mem.so" + "libnvdla_runtime.so" + ]; + # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices. + badPlatformsConditions = prevAttrs.badPlatformsConditions // { + "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" = !flags.isJetsonBuild; + }; + meta = prevAttrs.meta // { + # For cross-compilation, we need the hostPlatform to be included in order to fetch and build the package. This + # doesn't change the fact that it won't work on non-Jetson devices, so we only add it when building for Jetson. + platforms = prevAttrs.meta.platforms ++ lib.lists.optionals flags.isJetsonBuild [ "x86_64-linux" ]; + }; + }; cuda_gdb = { - cuda_gdb, cudaAtLeast, gmp, lib, }: - cuda_gdb.overrideAttrs ( - prevAttrs: { - buildInputs = - prevAttrs.buildInputs - # x86_64 only needs gmp from 12.0 and on - ++ lib.lists.optionals (cudaAtLeast "11.0") [ gmp ]; - } - ); + prevAttrs: { + buildInputs = + prevAttrs.buildInputs + # x86_64 only needs gmp from 12.0 and on + ++ lib.lists.optionals (cudaAtLeast "11.0") [ gmp ]; + }; cuda_nvcc = { backendStdenv, buildPackages, cuda_cudart, - cuda_nvcc, cudaAtLeast, cudaOlder, lib, setupCudaHook, }: - cuda_nvcc.overrideAttrs ( - prevAttrs: { - # Remove once cuda-find-redist-features has a special case for libcuda - # TODO(@connorbaker): The order of build outputs matters as we traverse them when creating split outputs. - # The `lib` output cannot come after `static` as it moves all the static libraries back to the `lib` output. - outputs = - prevAttrs.outputs - ++ lib.lists.optionals (!(builtins.elem "lib" prevAttrs.outputs)) [ "lib" ]; + prevAttrs: { + # Remove once cuda-find-redist-features has a special case for libcuda + # TODO(@connorbaker): The order of build outputs matters as we traverse them when creating split outputs. + # The `lib` output cannot come after `static` as it moves all the static libraries back to the `lib` output. + outputs = + prevAttrs.outputs + ++ lib.lists.optionals (!(builtins.elem "lib" prevAttrs.outputs)) [ "lib" ]; - # Patch the nvcc.profile. - # Syntax: - # - `=` for assignment, - # - `?=` for conditional assignment, - # - `+=` to "prepend", - # - `=+` to "append". + # Patch the nvcc.profile. + # Syntax: + # - `=` for assignment, + # - `?=` for conditional assignment, + # - `+=` to "prepend", + # - `=+` to "append". - # Cf. https://web.archive.org/web/20230308044351/https://arcb.csc.ncsu.edu/~mueller/cluster/nvidia/2.0/nvcc_2.0.pdf + # Cf. https://web.archive.org/web/20230308044351/https://arcb.csc.ncsu.edu/~mueller/cluster/nvidia/2.0/nvcc_2.0.pdf - # We set all variables with the lowest priority (=+), but we do force - # nvcc to use the fixed backend toolchain. Cf. comments in - # backend-stdenv.nix + # We set all variables with the lowest priority (=+), but we do force + # nvcc to use the fixed backend toolchain. Cf. comments in + # backend-stdenv.nix - nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ backendStdenv.cc ]; + nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ backendStdenv.cc ]; - # TODO(@connorbaker): We should specify the spliced version of backendStdenv and cuda_cudart to use here. - postPatch = - (prevAttrs.postPatch or "") - + '' - echo "Running the cuda_nvcc postPatch" - substituteInPlace bin/nvcc.profile \ - --replace \ - '$(TOP)/lib' \ - "''${!outputLib}/lib" \ - --replace \ - '$(TOP)/$(_NVVM_BRANCH_)' \ - "''${!outputBin}/nvvm" \ - --replace \ - '$(TOP)/$(_TARGET_DIR_)/include' \ - "''${!outputDev}/include" + # TODO(@connorbaker): We should specify the spliced version of backendStdenv and cuda_cudart to use here. + postPatch = + (prevAttrs.postPatch or "") + + '' + echo "Running the cuda_nvcc postPatch" + substituteInPlace bin/nvcc.profile \ + --replace \ + '$(TOP)/lib' \ + "''${!outputLib}/lib" \ + --replace \ + '$(TOP)/$(_NVVM_BRANCH_)' \ + "''${!outputBin}/nvvm" \ + --replace \ + '$(TOP)/$(_TARGET_DIR_)/include' \ + "''${!outputDev}/include" - cat << EOF >> bin/nvcc.profile + cat << EOF >> bin/nvcc.profile - # Fix a compatible backend compiler - PATH += ${lib.getBin backendStdenv.cc}/bin: + # Fix a compatible backend compiler + PATH += ${lib.getBin backendStdenv.cc}/bin: - # Expose the split-out nvvm - LIBRARIES =+ -L''${!outputBin}/nvvm/lib - INCLUDES =+ -I''${!outputBin}/nvvm/include + # Expose the split-out nvvm + LIBRARIES =+ -L''${!outputBin}/nvvm/lib + INCLUDES =+ -I''${!outputBin}/nvvm/include - # Expose cudart and the libcuda stubs - LIBRARIES =+ -L$static/lib" "-L${cuda_cudart.lib}/lib -L${cuda_cudart.lib}/lib/stubs - INCLUDES =+ -I${cuda_cudart.dev}/include - EOF - ''; + # Expose cudart and the libcuda stubs + LIBRARIES =+ -L$static/lib" "-L${cuda_cudart.lib}/lib -L${cuda_cudart.lib}/lib/stubs + INCLUDES =+ -I${cuda_cudart.dev}/include + EOF + ''; - propagatedNativeBuildInputs = [ setupCudaHook ]; + propagatedNativeBuildInputs = [ setupCudaHook ]; - postInstall = - (prevAttrs.postInstall or "") - + '' - moveToOutput "nvvm" "''${!outputBin}" - ''; + postInstall = + (prevAttrs.postInstall or "") + + '' + moveToOutput "nvvm" "''${!outputBin}" + ''; - # The nvcc and cicc binaries contain hard-coded references to /usr - allowFHSReferences = true; + # The nvcc and cicc binaries contain hard-coded references to /usr + allowFHSReferences = true; - meta = (prevAttrs.meta or { }) // { - mainProgram = "nvcc"; - }; - } - ); + meta = (prevAttrs.meta or { }) // { + mainProgram = "nvcc"; + }; + }; cuda_nvprof = - { cuda_cupti, cuda_nvprof }: - cuda_nvprof.overrideAttrs ( - prevAttrs: { buildInputs = prevAttrs.buildInputs ++ [ cuda_cupti.lib ]; } - ); + { cuda_cupti }: prevAttrs: { buildInputs = prevAttrs.buildInputs ++ [ cuda_cupti.lib ]; }; cuda_demo_suite = { - cuda_demo_suite, freeglut, lib, libcufft, @@ -271,46 +251,41 @@ filterAndCreateOverrides { libglvnd, mesa, }: - cuda_demo_suite.overrideAttrs ( - prevAttrs: { - buildInputs = prevAttrs.buildInputs ++ [ - freeglut - libcufft.lib - libcurand.lib - libGLU - libglvnd - mesa - ]; - } - ); + prevAttrs: { + buildInputs = prevAttrs.buildInputs ++ [ + freeglut + libcufft.lib + libcurand.lib + libGLU + libglvnd + mesa + ]; + }; nsight_compute = { lib, - nsight_compute, qt5 ? null, qt6 ? null, }: - nsight_compute.overrideAttrs ( - prevAttrs: { - nativeBuildInputs = - prevAttrs.nativeBuildInputs - ++ ( - if (lib.strings.versionOlder prevAttrs.version "2022.2.0") then - [ qt5.wrapQtAppsHook ] - else - [ qt6.wrapQtAppsHook ] - ); - buildInputs = - prevAttrs.buildInputs - ++ ( - if (lib.strings.versionOlder prevAttrs.version "2022.2.0") then - [ qt5.qtwebview ] - else - [ qt6.qtwebview ] - ); - } - ); + prevAttrs: { + nativeBuildInputs = + prevAttrs.nativeBuildInputs + ++ ( + if (lib.strings.versionOlder prevAttrs.version "2022.2.0") then + [ qt5.wrapQtAppsHook ] + else + [ qt6.wrapQtAppsHook ] + ); + buildInputs = + prevAttrs.buildInputs + ++ ( + if (lib.strings.versionOlder prevAttrs.version "2022.2.0") then + [ qt5.qtwebview ] + else + [ qt6.qtwebview ] + ); + }; nsight_systems = { @@ -321,7 +296,6 @@ filterAndCreateOverrides { e2fsprogs, gst_all_1, lib, - nsight_systems, nss, numactl, pulseaudio, @@ -332,67 +306,65 @@ filterAndCreateOverrides { wayland, xorg, }: - nsight_systems.overrideAttrs ( - prevAttrs: - let - qt = if lib.strings.versionOlder prevAttrs.version "2022.4.2.1" then qt5 else qt6; - qtwayland = - if lib.versions.major qt.qtbase.version == "5" then - lib.getBin qt.qtwayland - else - lib.getLib qt.qtwayland; - qtWaylandPlugins = "${qtwayland}/${qt.qtbase.qtPluginPrefix}"; - in - { - # An ad hoc replacement for - # https://github.com/ConnorBaker/cuda-redist-find-features/issues/11 - env.rmPatterns = toString [ - "nsight-systems/*/*/libQt*" - "nsight-systems/*/*/libstdc*" - "nsight-systems/*/*/libboost*" - "nsight-systems/*/*/lib{ssl,ssh,crypto}*" - "nsight-systems/*/*/lib{arrow,jpeg}*" - "nsight-systems/*/*/Mesa" - "nsight-systems/*/*/python/bin/python" - "nsight-systems/*/*/libexec" - "nsight-systems/*/*/Plugins" - ]; - postPatch = - prevAttrs.postPatch or "" - + '' - for path in $rmPatterns ; do - rm -r "$path" - done - ''; - nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ qt.wrapQtAppsHook ]; - buildInputs = prevAttrs.buildInputs ++ [ - (qt.qtdeclarative or qt.full) - (qt.qtsvg or qt.full) - cuda_cudart.stubs - gst_all_1.gst-plugins-base - gst_all_1.gstreamer - nss - numactl - pulseaudio - qt.qtbase - qtWaylandPlugins - rdma-core - ucx - wayland - xorg.libXcursor - xorg.libXdamage - xorg.libXrandr - xorg.libXtst - ]; + prevAttrs: + let + qt = if lib.strings.versionOlder prevAttrs.version "2022.4.2.1" then qt5 else qt6; + qtwayland = + if lib.versions.major qt.qtbase.version == "5" then + lib.getBin qt.qtwayland + else + lib.getLib qt.qtwayland; + qtWaylandPlugins = "${qtwayland}/${qt.qtbase.qtPluginPrefix}"; + in + { + # An ad hoc replacement for + # https://github.com/ConnorBaker/cuda-redist-find-features/issues/11 + env.rmPatterns = toString [ + "nsight-systems/*/*/libQt*" + "nsight-systems/*/*/libstdc*" + "nsight-systems/*/*/libboost*" + "nsight-systems/*/*/lib{ssl,ssh,crypto}*" + "nsight-systems/*/*/lib{arrow,jpeg}*" + "nsight-systems/*/*/Mesa" + "nsight-systems/*/*/python/bin/python" + "nsight-systems/*/*/libexec" + "nsight-systems/*/*/Plugins" + ]; + postPatch = + prevAttrs.postPatch or "" + + '' + for path in $rmPatterns ; do + rm -r "$path" + done + ''; + nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ qt.wrapQtAppsHook ]; + buildInputs = prevAttrs.buildInputs ++ [ + (qt.qtdeclarative or qt.full) + (qt.qtsvg or qt.full) + cuda_cudart.stubs + gst_all_1.gst-plugins-base + gst_all_1.gstreamer + nss + numactl + pulseaudio + qt.qtbase + qtWaylandPlugins + rdma-core + ucx + wayland + xorg.libXcursor + xorg.libXdamage + xorg.libXrandr + xorg.libXtst + ]; - # Older releases require boost 1.70 deprecated in Nixpkgs - meta.broken = prevAttrs.meta.broken or false || cudaOlder "11.8"; - } - ); + # Older releases require boost 1.70 deprecated in Nixpkgs + meta.broken = prevAttrs.meta.broken or false || cudaOlder "11.8"; + }; nvidia_driver = - { nvidia_driver }: - nvidia_driver.overrideAttrs { + { }: + { # No need to support this package as we have drivers already # in linuxPackages. meta.broken = true; From ddcfff03cf907789a00aa0c4f95eac916fce9426 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 20 Mar 2024 18:16:01 +0000 Subject: [PATCH 11/34] cudaPackages.cuda_nvcc: lib must precede static in outputs --- .../cuda-modules/cuda/overrides.nix | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index a8a2c173d3c17..2d40ccd2de9b3 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -170,11 +170,29 @@ filterAndCreateOverrides { }: prevAttrs: { # Remove once cuda-find-redist-features has a special case for libcuda - # TODO(@connorbaker): The order of build outputs matters as we traverse them when creating split outputs. - # The `lib` output cannot come after `static` as it moves all the static libraries back to the `lib` output. outputs = - prevAttrs.outputs - ++ lib.lists.optionals (!(builtins.elem "lib" prevAttrs.outputs)) [ "lib" ]; + # NOTE: The order of build outputs matters as we traverse them when creating + # split outputs. The `lib` output cannot come after `static` as it moves all + # the static libraries back to the `lib` output. + let + libOutputIsPresent = builtins.elem "lib" prevAttrs.outputs; + staticOutputPos = lib.lists.findFirstIndex (x: x == "static") null prevAttrs.outputs; + outputsBeforeStatic = lib.lists.take staticOutputPos prevAttrs.outputs; + outputsFromStaticAndLater = lib.lists.drop staticOutputPos prevAttrs.outputs; + newOutputs = + if libOutputIsPresent then + # If the lib output is present, we want to keep it in the same position + prevAttrs.outputs + else if staticOutputPos == null then + # If the static output is not present, location of the lib output + # doesn't matter and we can append it + prevAttrs.outputs ++ [ "lib" ] + else + # The lib output is missing and the static output is present. + # We need to insert the lib output before the static output. + outputsBeforeStatic ++ [ "lib" ] ++ outputsFromStaticAndLater; + in + newOutputs; # Patch the nvcc.profile. # Syntax: From 112d38bb8c21660f1aa7501319dec0789701021f Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 20 Mar 2024 18:25:51 +0000 Subject: [PATCH 12/34] cuda-modules: add check for duplicate/misordered outputs --- .../cuda-modules/generic-builders/manifest.nix | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix index cb49f98d77597..d47b979b7029a 100644 --- a/pkgs/development/cuda-modules/generic-builders/manifest.nix +++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix @@ -133,7 +133,18 @@ backendStdenv.mkDerivation ( # brokenConditions :: AttrSet Bool # Sets `meta.broken = true` if any of the conditions are true. # Example: Broken on a specific version of CUDA or when a dependency has a specific version. - brokenConditions = { }; + brokenConditions = { + # Unclear how this is handled by Nix internals. + "Duplicate entries in outputs" = finalAttrs.outputs != lists.unique finalAttrs.outputs; + # Typically this results in the static output being empty, as all libraries are moved + # back to the lib output. + "lib output follows static output" = + let + libIndex = lists.findFirstIndex (x: x == "lib") null finalAttrs.outputs; + staticIndex = lists.findFirstIndex (x: x == "static") null finalAttrs.outputs; + in + libIndex != null && staticIndex != null && libIndex > staticIndex; + }; # badPlatformsConditions :: AttrSet Bool # Sets `meta.badPlatforms = meta.platforms` if any of the conditions are true. From 49676c7cf88588a29e781432a8f2a136aa41a7db Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 20 Mar 2024 18:31:20 +0000 Subject: [PATCH 13/34] cuda-modules: update note on use of lndir from path --- pkgs/development/cuda-modules/generic-builders/manifest.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix index d47b979b7029a..d0d023f565c60 100644 --- a/pkgs/development/cuda-modules/generic-builders/manifest.nix +++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix @@ -314,9 +314,9 @@ backendStdenv.mkDerivation ( # For each output, create a symlink to it in the out output. # NOTE: We must recreate the out output here, because the setup hook will have deleted it if it was empty. - # TODO: Previously we used `meta.getExe lndir` to get the path to lndir, but that doesn't work under - # cross-compilation -- whatever machinery Nixpkgs uses to get a version built for hostPlatform (so it can run - # during the build) doesn't extend to `meta.getExe`. + # NOTE: Rely on nativeBuildInputs adding lndir to the path because meta.getExe has no concept of spliced + # attributes and will select the hostPlatform variant instead of the buildPlatform variant. + # TODO(@connorbaker): This should be removed when https://github.com/NixOS/nixpkgs/issues/271792 is resolved. postPatchelf = '' mkdir -p "$out" for output in $(getAllOutputNames); do From 868fa525d4eb16a253ac073a1dbddf6ef87ff0f5 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 20 Mar 2024 18:40:33 +0000 Subject: [PATCH 14/34] cudaPackages.saxpy: Jetson should be supported after CUDA 11.4 --- pkgs/development/cuda-modules/saxpy/default.nix | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkgs/development/cuda-modules/saxpy/default.nix b/pkgs/development/cuda-modules/saxpy/default.nix index bc299dea006f4..be31ced11be77 100644 --- a/pkgs/development/cuda-modules/saxpy/default.nix +++ b/pkgs/development/cuda-modules/saxpy/default.nix @@ -10,11 +10,11 @@ let cuda_cccl cuda_cudart cuda_nvcc + cudaAtLeast + cudaOlder cudatoolkit - cudaVersion flags libcublas - setupCudaHook ; inherit (lib) getDev getLib getOutput; in @@ -31,18 +31,18 @@ backendStdenv.mkDerivation { cmake autoAddDriverRunpath ] - ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit] - ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [cuda_nvcc]; + ++ lib.optionals (cudaOlder "11.4") [cudatoolkit] + ++ lib.optionals (cudaAtLeast "11.4") [cuda_nvcc]; buildInputs = - lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit] - ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [ + lib.optionals (cudaOlder "11.4") [cudatoolkit] + ++ lib.optionals (cudaAtLeast "11.4") [ (getDev libcublas) (getLib libcublas) (getOutput "static" libcublas) cuda_cudart ] - ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0") [cuda_cccl]; + ++ lib.optionals (cudaAtLeast "12.0") [cuda_cccl]; cmakeFlags = [ (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true) @@ -56,6 +56,6 @@ backendStdenv.mkDerivation { license = lib.licenses.mit; maintainers = lib.teams.cuda.members; platforms = lib.platforms.unix; - badPlatforms = lib.optionals flags.isJetsonBuild platforms; + badPlatforms = lib.optionals (flags.isJetsonBuild && cudaOlder "11.4") platforms; }; } From 2b351b2d1c64be6442cf44da597b9f062fd97b65 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 20 Mar 2024 19:09:33 +0000 Subject: [PATCH 15/34] cuda-modules/cuda/overrides: remove unused callPackage arguments --- pkgs/development/cuda-modules/cuda/overrides.nix | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index 2d40ccd2de9b3..963003944f332 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -161,10 +161,7 @@ filterAndCreateOverrides { cuda_nvcc = { backendStdenv, - buildPackages, cuda_cudart, - cudaAtLeast, - cudaOlder, lib, setupCudaHook, }: @@ -262,7 +259,6 @@ filterAndCreateOverrides { cuda_demo_suite = { freeglut, - lib, libcufft, libcurand, libGLU, @@ -307,11 +303,8 @@ filterAndCreateOverrides { nsight_systems = { - alsa-lib, - boost178, cuda_cudart, cudaOlder, - e2fsprogs, gst_all_1, lib, nss, From 0304c9b726d1dc1e12fa8c14f681a1f521d242fd Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Mon, 25 Mar 2024 16:58:20 +0000 Subject: [PATCH 16/34] cuda-modules/flags: use cudaAtLeast when possible --- pkgs/development/cuda-modules/flags.nix | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkgs/development/cuda-modules/flags.nix b/pkgs/development/cuda-modules/flags.nix index 89ddfe53aea96..5f417287574d8 100644 --- a/pkgs/development/cuda-modules/flags.nix +++ b/pkgs/development/cuda-modules/flags.nix @@ -7,6 +7,7 @@ cudaCapabilities ? (config.cudaCapabilities or []), cudaForwardCompat ? (config.cudaForwardCompat or true), lib, + cudaAtLeast, cudaVersion, # gpus :: List Gpu gpus, @@ -48,7 +49,7 @@ let gpu: let inherit (gpu) minCudaVersion maxCudaVersion; - lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion; + lowerBoundSatisfied = cudaAtLeast minCudaVersion; upperBoundSatisfied = (maxCudaVersion == null) || !(strings.versionOlder maxCudaVersion cudaVersion); in @@ -286,7 +287,7 @@ assert let }; actualWrapped = (builtins.tryEval (builtins.deepSeq actual actual)).value; in -asserts.assertMsg ((strings.versionAtLeast cudaVersion "11.2") -> (expected == actualWrapped)) '' +asserts.assertMsg ((cudaAtLeast "11.2") -> (expected == actualWrapped)) '' This test should only fail when using a version of CUDA older than 11.2, the first to support 8.6. Expected: ${builtins.toJSON expected} From 5b65222a12d4cc530da82b78e9aed82e447fd2e8 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Mon, 25 Mar 2024 16:59:49 +0000 Subject: [PATCH 17/34] cuda-modules/flags: ignore platforms in throwIf in isJetsonBuild Since, even under cross-compilation, we evaluate this flag on multiple platforms, it makes more sense to move the platform check out of the throw condition and into the boolean return value. The alternative is to restrict all uses of this value to locations which gaurd evaluation so it does not occur when the host platform is still x86_64. --- pkgs/development/cuda-modules/flags.nix | 65 +++---------------------- 1 file changed, 6 insertions(+), 59 deletions(-) diff --git a/pkgs/development/cuda-modules/flags.nix b/pkgs/development/cuda-modules/flags.nix index 5f417287574d8..495ba647feb1b 100644 --- a/pkgs/development/cuda-modules/flags.nix +++ b/pkgs/development/cuda-modules/flags.nix @@ -223,24 +223,24 @@ let lists.filter (cap: !(builtins.elem cap requestedJetsonDevices)) cudaCapabilities; jetsonBuildSufficientCondition = requestedJetsonDevices != []; - jetsonBuildNecessaryCondition = requestedNonJetsonDevices == [] && targetPlatform.isAarch64; + jetsonBuildNecessaryCondition = requestedNonJetsonDevices == []; in trivial.throwIf (jetsonBuildSufficientCondition && !jetsonBuildNecessaryCondition) '' - Jetson devices cannot be targeted with non-Jetson devices. Additionally, they require targetPlatform to be aarch64. + Jetson devices cannot be targeted with non-Jetson devices. Additionally, host platform + and target platform must be aarch64. You requested ${builtins.toJSON cudaCapabilities} for: - Build platform ${buildPlatform.system} - Host platform ${hostPlatform.system} - Target platform ${targetPlatform.system} Requested Jetson devices: ${builtins.toJSON requestedJetsonDevices}. Requested non-Jetson devices: ${builtins.toJSON requestedNonJetsonDevices}. - Exactly one of the following must be true: - - All CUDA capabilities belong to Jetson devices and targetPlatform is aarch64. - - No CUDA capabilities belong to Jetson devices. See ${./gpus.nix} for a list of architectures supported by this version of Nixpkgs. '' jetsonBuildSufficientCondition - && jetsonBuildNecessaryCondition; + && jetsonBuildNecessaryCondition + && hostPlatform.isAarch64 + && targetPlatform.isAarch64; }; in # When changing names or formats: pause, validate, and update the assert @@ -310,59 +310,6 @@ asserts.assertMsg (expected == actualWrapped) '' Expected: ${builtins.toJSON expected} Actual: ${builtins.toJSON actualWrapped} ''; -# Check Jetson-only -assert let - expected = { - cudaCapabilities = [ - "6.2" - "7.2" - ]; - enableForwardCompat = true; - - archNames = [ - "Pascal" - "Volta" - ]; - realArches = [ - "sm_62" - "sm_72" - ]; - virtualArches = [ - "compute_62" - "compute_72" - ]; - arches = [ - "sm_62" - "sm_72" - "compute_72" - ]; - - gencode = [ - "-gencode=arch=compute_62,code=sm_62" - "-gencode=arch=compute_72,code=sm_72" - "-gencode=arch=compute_72,code=compute_72" - ]; - gencodeString = "-gencode=arch=compute_62,code=sm_62 -gencode=arch=compute_72,code=sm_72 -gencode=arch=compute_72,code=compute_72"; - - isJetsonBuild = true; - }; - actual = formatCapabilities { - cudaCapabilities = [ - "6.2" - "7.2" - ]; - }; - actualWrapped = (builtins.tryEval (builtins.deepSeq actual actual)).value; -in -asserts.assertMsg - # We can't do this test unless we're targeting aarch64 - (targetPlatform.isAarch64 -> (expected == actualWrapped)) - '' - Jetson devices can only be built with other Jetson devices. - Both 6.2 and 7.2 are Jetson devices. - Expected: ${builtins.toJSON expected} - Actual: ${builtins.toJSON actualWrapped} - ''; { # formatCapabilities :: { cudaCapabilities: List Capability, enableForwardCompat: Boolean } -> { ... } inherit formatCapabilities; From 969ee2bf21242e1eb4d7912081440bb52e83a3de Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Mon, 25 Mar 2024 17:04:03 +0000 Subject: [PATCH 18/34] cuda-modules: fix deprecated uses of substituteInPlace replace flag --- .../cuda-modules/cuda-library-samples/generic.nix | 2 +- pkgs/development/cuda-modules/cuda/overrides.nix | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda-library-samples/generic.nix b/pkgs/development/cuda-modules/cuda-library-samples/generic.nix index d4182536654e1..3c080c8a9c382 100644 --- a/pkgs/development/cuda-modules/cuda-library-samples/generic.nix +++ b/pkgs/development/cuda-modules/cuda-library-samples/generic.nix @@ -76,7 +76,7 @@ in # CUTENSOR_ROOT is double escaped postPatch = '' substituteInPlace CMakeLists.txt \ - --replace "\''${CUTENSOR_ROOT}/include" "${cutensor.dev}/include" + --replace-fail "\''${CUTENSOR_ROOT}/include" "${cutensor.dev}/include" ''; CUTENSOR_ROOT = cutensor; diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index 963003944f332..5c041b9d8a077 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -212,13 +212,13 @@ filterAndCreateOverrides { + '' echo "Running the cuda_nvcc postPatch" substituteInPlace bin/nvcc.profile \ - --replace \ + --replace-fail \ '$(TOP)/lib' \ "''${!outputLib}/lib" \ - --replace \ + --replace-fail \ '$(TOP)/$(_NVVM_BRANCH_)' \ "''${!outputBin}/nvvm" \ - --replace \ + --replace-fail \ '$(TOP)/$(_TARGET_DIR_)/include' \ "''${!outputDev}/include" From 3b629a2ad3200a2f1934f4fc0f554de8f8f69bc5 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Mon, 25 Mar 2024 17:05:27 +0000 Subject: [PATCH 19/34] cuda-modules/cuda/overrides: backendStdenv.cc is already part of nativeBuildInputs --- pkgs/development/cuda-modules/cuda/overrides.nix | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index 5c041b9d8a077..917271f80e66a 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -204,8 +204,6 @@ filterAndCreateOverrides { # nvcc to use the fixed backend toolchain. Cf. comments in # backend-stdenv.nix - nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ backendStdenv.cc ]; - # TODO(@connorbaker): We should specify the spliced version of backendStdenv and cuda_cudart to use here. postPatch = (prevAttrs.postPatch or "") From e55a9c21a4cebcdc395b9667a9b3cc0caadc3e25 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Mon, 25 Mar 2024 17:14:42 +0000 Subject: [PATCH 20/34] cuda-modules/cuda/overrides: specify spliced packages for cuda_nvcc postPatch phase --- .../cuda-modules/cuda/overrides.nix | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index 917271f80e66a..460874b0e4978 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -204,8 +204,17 @@ filterAndCreateOverrides { # nvcc to use the fixed backend toolchain. Cf. comments in # backend-stdenv.nix - # TODO(@connorbaker): We should specify the spliced version of backendStdenv and cuda_cudart to use here. postPatch = + let + # CC must come from the host environment, not the target environment because it is + # used at build time. + ccBin = lib.getBin (backendStdenv.__spliced.buildHost.cc or backendStdenv.cc); + # CUDA runtime libraries must come from the host/target environment because they + # are used at runtime, not build time (outside of linking). + cudartStatic = (cuda_cudart.__spliced.hostTarget or cuda_cudart).static; + cudartLib = lib.getLib (cuda_cudart.__spliced.hostTarget or cuda_cudart); + cudartDev = lib.getDev (cuda_cudart.__spliced.hostTarget or cuda_cudart); + in (prevAttrs.postPatch or "") + '' echo "Running the cuda_nvcc postPatch" @@ -223,19 +232,19 @@ filterAndCreateOverrides { cat << EOF >> bin/nvcc.profile # Fix a compatible backend compiler - PATH += ${lib.getBin backendStdenv.cc}/bin: + PATH += "${ccBin}/bin": # Expose the split-out nvvm - LIBRARIES =+ -L''${!outputBin}/nvvm/lib - INCLUDES =+ -I''${!outputBin}/nvvm/include + LIBRARIES =+ -L"''${!outputBin}/nvvm/lib" + INCLUDES =+ -I"''${!outputBin}/nvvm/include" # Expose cudart and the libcuda stubs - LIBRARIES =+ -L$static/lib" "-L${cuda_cudart.lib}/lib -L${cuda_cudart.lib}/lib/stubs - INCLUDES =+ -I${cuda_cudart.dev}/include + LIBRARIES =+ -L"$static/lib" -L"${cudartStatic}/lib" -L"${cudartLib}/lib" -L"${cudartLib}/lib/stubs" + INCLUDES =+ -I"${cudartDev}/include" EOF ''; - propagatedNativeBuildInputs = [ setupCudaHook ]; + propagatedNativeBuildInputs = (prevAttrs.propagatedNativeBuildInputs or [ ]) ++ [ setupCudaHook ]; postInstall = (prevAttrs.postInstall or "") From f85d321f170a2fb7b7391cce8d3d4b39a86f5b1f Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Mon, 25 Mar 2024 17:32:55 +0000 Subject: [PATCH 21/34] cuda-modules/generic-builders/manifest: wip cross-compilation --- .../generic-builders/manifest.nix | 114 +++++++++++------- 1 file changed, 70 insertions(+), 44 deletions(-) diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix index d0d023f565c60..591acfcc4c051 100644 --- a/pkgs/development/cuda-modules/generic-builders/manifest.nix +++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix @@ -38,17 +38,19 @@ let sourceTypes ; - inherit (backendStdenv) hostPlatform; + inherit (backendStdenv) buildPlatform hostPlatform targetPlatform; # Get the redist architectures for which package provides distributables. # These are used by meta.platforms. supportedRedistArchs = builtins.attrNames featureRelease; - # redistArch :: String - # The redistArch is the name of the architecture for which the redistributable is built. + + # hostPlatformRedistArch :: String + # The hostPlatformRedistArch is the name of the architecture for which the redistributable is built. # It is `"unsupported"` if the redistributable is not supported on the hostPlatform. - redistArch = flags.getRedistArch hostPlatform.system; + hostPlatformRedistArch = flags.getRedistArch hostPlatform.system; - sourceMatchesHost = flags.getNixSystem redistArch == hostPlatform.system; + # sourceMatchesHost :: Bool + sourceMatchesHost = flags.getNixSystem hostPlatformRedistArch == hostPlatform.system; in backendStdenv.mkDerivation ( finalAttrs: { @@ -76,7 +78,7 @@ backendStdenv.mkDerivation ( output: attrsets.attrByPath [ - redistArch + hostPlatformRedistArch "outputs" output ] @@ -96,12 +98,12 @@ backendStdenv.mkDerivation ( # NOTE: In the case the redistributable isn't supported on the target platform, # we will have `outputs = [ "out" ] ++ possibleOutputs`. This is of note because platforms which # aren't supported would otherwise have evaluation errors when trying to access outputs other than `out`. - # The alternative would be to have `outputs = [ "out" ]` when`redistArch = "unsupported"`, but that would + # The alternative would be to have `outputs = [ "out" ]` when`hostPlatformRedistArch = "unsupported"`, but that would # require adding guards throughout the entirety of the CUDA package set to ensure `cudaSupport` is true -- # recall that OfBorg will evaluate packages marked as broken and that `cudaPackages` will be evaluated with # `cudaSupport = false`! additionalOutputs = - if redistArch == "unsupported" + if hostPlatformRedistArch == "unsupported" then possibleOutputs else builtins.filter hasOutput possibleOutputs; # The out output is special -- it's the default output and we always include it. @@ -154,18 +156,35 @@ backendStdenv.mkDerivation ( }; # src :: Optional Derivation - src = trivial.pipe redistArch [ - # If redistArch doesn't exist in redistribRelease, return null. - (redistArch: redistribRelease.${redistArch} or null) - # If the release is non-null, fetch the source; otherwise, return null. - (trivial.mapNullable ( - { relative_path, sha256, ... }: - fetchurl { - url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}"; - inherit sha256; - } - )) - ]; + src = + # TODO(@connorbaker): Remove debugging lib.warn and inline this. + let + src = + trivial.mapNullable + ( + { relative_path, sha256, ... }: + fetchurl { + url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}"; + inherit sha256; + } + ) + (redistribRelease.${hostPlatformRedistArch} or null); + in + lib.warn + '' + Info: + - redistName: ${redistName} + - hostPlatformRedistArch: ${hostPlatformRedistArch} + - pname: ${finalAttrs.pname} + - version: ${finalAttrs.version} + - outputs: ${builtins.toJSON finalAttrs.outputs} + - brokenConditions: ${builtins.toJSON finalAttrs.brokenConditions} + - badPlatformsConditions: ${builtins.toJSON finalAttrs.badPlatformsConditions} + - buildPlatform: ${buildPlatform.system} + - hostPlatform: ${hostPlatform.system} + - targetPlatform: ${targetPlatform.system} + '' + src; # Handle the pkg-config files: # 1. No FHS @@ -198,30 +217,37 @@ backendStdenv.mkDerivation ( # We do need some other phases, like configurePhase, so the multiple-output setup hook works. dontBuild = true; - nativeBuildInputs = [ - autoPatchelfHook - # This hook will make sure libcuda can be found - # in typically /lib/opengl-driver by adding that - # directory to the rpath of all ELF binaries. - # Check e.g. with `patchelf --print-rpath path/to/my/binary - autoAddDriverRunpath - markForCudatoolkitRootHook - # To create fat outputs from each component and find a version of `lndir` built for the host platform. - lndir - ] - # autoAddCudaCompatRunpath depends on cuda_compat and would cause - # infinite recursion if applied to `cuda_compat` itself (beside the fact - # that it doesn't make sense in the first place) - ++ lib.optionals (pname != "cuda_compat" && flags.isJetsonBuild) [ - # autoAddCudaCompatRunpath must appear AFTER autoAddDriverRunpath. - # See its documentation in ./setup-hooks/extension.nix. - # NOTE(@connorbaker): Because autoAddCudaCompatRunpath is in nativeBuildInputs, it tries to use toolchains - # from buildPlatform, but that's not what we want. We want to use our host/target toolchains! - # To overcome this, we access the `__spliced` attribute and choose the `hostTarget` attribute. - # In the case the `__spliced` attribute doesn't exist, we just use the hook directly (because we're not - # cross-compiling). - autoAddCudaCompatRunpath.__spliced.hostTarget or autoAddCudaCompatRunpath - ]; + nativeBuildInputs = + [ + # To create fat outputs from each component and find a version of `lndir` built for the host platform. + lndir + ] + ++ [ + # Patchelf is used to fix the rpath of the binaries. + autoPatchelfHook + # (autoPatchelfHook.__spliced.buildHost or autoPatchelfHook) + + # This hook will make sure libcuda can be found in typically + # /lib/opengl-driver by adding that directory to the rpath of all ELF + # binaries. Check e.g. with `patchelf --print-rpath path/to/my/binary + autoAddDriverRunpath + # (autoAddDriverRunpath.__spliced.buildHost or autoAddDriverRunpath) + + # Mark the CUDA toolkit root directory for the CUDA compatibility libraries + markForCudatoolkitRootHook + # (markForCudatoolkitRootHook.__spliced.buildHost or markForCudatoolkitRootHook) + ] + # autoAddCudaCompatRunpath depends on cuda_compat and would cause + # infinite recursion if applied to `cuda_compat` itself (beside the fact + # that it doesn't make sense in the first place) + ++ lib.optionals (pname != "cuda_compat" && flags.isJetsonBuild) [ + # autoAddCudaCompatRunpath must appear AFTER autoAddDriverRunpath. + # See its documentation in ./setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix. + # NOTE(@connorbaker): Because autoAddCudaCompatRunpath is in nativeBuildInputs, it tries to use cuda_compat + # from buildPackages, but we need to use the one from targetPackages. + # We can either use autoAddCudaCompatRunpath.__spliced.hostTarget or move it to buildInputs. + (autoAddCudaCompatRunpath.__spliced.hostTarget or autoAddCudaCompatRunpath) + ]; buildInputs = [ From 2b6a5a9107ba628715734cb2dda5a889090a986d Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Mon, 25 Mar 2024 17:35:04 +0000 Subject: [PATCH 22/34] cuda-modules/saxpy: remove CMAKE_VERBOSE_MAKEFILE --- pkgs/development/cuda-modules/saxpy/default.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/pkgs/development/cuda-modules/saxpy/default.nix b/pkgs/development/cuda-modules/saxpy/default.nix index be31ced11be77..e8701f0fd2df5 100644 --- a/pkgs/development/cuda-modules/saxpy/default.nix +++ b/pkgs/development/cuda-modules/saxpy/default.nix @@ -45,7 +45,6 @@ backendStdenv.mkDerivation { ++ lib.optionals (cudaAtLeast "12.0") [cuda_cccl]; cmakeFlags = [ - (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true) (lib.cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( with flags; lib.concatStringsSep ";" (lib.lists.map dropDot cudaCapabilities) )) From 77ea14bb7e1ef4bb2a001b3387a314d65498981a Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Mon, 25 Mar 2024 17:39:47 +0000 Subject: [PATCH 23/34] cuda-modules/setup-hooks/setup-cuda-hook: factor out cc access --- .../cuda-modules/setup-hooks/setup-cuda-hook/default.nix | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix index 6c5f299d4418c..f36e9339de5ce 100644 --- a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix +++ b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/default.nix @@ -1,15 +1,17 @@ # Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly { backendStdenv, makeSetupHook }: +let + inherit (backendStdenv) cc; +in makeSetupHook { name = "setup-cuda-hook"; - substitutions = { # Required in addition to ccRoot as otherwise bin/gcc is looked up # when building CMakeCUDACompilerId.cu - ccFullPath = "${backendStdenv.cc}/bin/${backendStdenv.cc.targetPrefix}c++"; + ccFullPath = "${cc}/bin/${cc.targetPrefix}c++"; # Point NVCC at a compatible compiler - ccRoot = "${backendStdenv.cc}"; + ccRoot = "${cc}"; setupCudaHook = placeholder "out"; }; } From 37d2448db92b108b31d0e081cfc4e2d157d89ec3 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Mon, 25 Mar 2024 17:40:28 +0000 Subject: [PATCH 24/34] cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook: collapse body like other setup hooks --- .../setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix index f253331fb24b0..8209f02953c3e 100644 --- a/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix +++ b/pkgs/development/cuda-modules/setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix @@ -14,9 +14,7 @@ makeSetupHook { name = "auto-add-cuda-compat-runpath-hook"; propagatedBuildInputs = [ autoFixElfFiles ]; - substitutions.libcudaPath = lib.optionalString flags.isJetsonBuild "${cuda_compat}/compat"; - meta = { broken = !flags.isJetsonBuild; badPlatforms = lib.optionals (cuda_compat == null) lib.platforms.all; From de0bb6a996d08beec718e8c54249d4172a9cc71c Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Mon, 25 Mar 2024 18:21:35 +0000 Subject: [PATCH 25/34] cuda-modules/setup-hooks: wip rewrite and set NIX_DEBUG=1 --- .../mark-for-cudatoolkit-root-hook.sh | 43 +++++++-- .../setup-cuda-hook/setup-cuda-hook.sh | 95 +++++++++++-------- 2 files changed, 92 insertions(+), 46 deletions(-) diff --git a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh index ba04c2e0806af..92785ccf01ac1 100644 --- a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh +++ b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh @@ -1,14 +1,43 @@ # shellcheck shell=bash -# Should we mimick cc-wrapper's "hygiene"? -[[ -z ${strictDeps-} ]] || (( "$hostOffset" < 0 )) || return 0 +guard=Sourcing +reason= -echo "Sourcing mark-for-cudatoolkit-root-hook" >&2 +export NIX_DEBUG=1 + +# Only run the hook from nativeBuildInputs. +# See the table under https://nixos.org/manual/nixpkgs/unstable/#dependency-propagation for information +# about the different target combinations and their offsets. +if (( "${hostOffset:?}" != -1 && "${targetOffset:?}" != 0 )); then + guard=Skipping + reason=" because the hook is not in nativeBuildInputs" +fi + +if (( "${NIX_DEBUG:-0}" >= 1 )); then + echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset mark-for-cudatoolkit-root-hook$reason" >&2 +else + echo "$guard mark-for-cudatoolkit-root-hook$reason" >&2 +fi + +[[ "$guard" = Sourcing ]] || return 0 markForCUDAToolkit_ROOT() { - mkdir -p "${prefix}/nix-support" - [[ -f "${prefix}/nix-support/include-in-cudatoolkit-root" ]] && return - echo "$pname-$output" > "${prefix}/nix-support/include-in-cudatoolkit-root" -} + local fnName=mark-for-cudatoolkit-root-hook::markForCUDAToolkit_ROOT + echo "$fnName: Running" >&2 + + mkdir -p "${prefix:?}/nix-support" + local markerPath="$prefix/nix-support/include-in-cudatoolkit-root" + if [[ -f "$markerPath" ]]; then + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: $markerPath exists, skipping" >&2 + return + fi + # Always create the file, even if it's empty, since setup-cuda-hook relies on its existence. + # However, only populate it if strictDeps is not set. + touch "$markerPath" + if [[ -z ${strictDeps-} ]]; then + (( "${NIX_DEBUG:-0}" >= 1 )) || echo "$fnName: populating $markerPath" >&2 + echo "${pname:?}-${output:?}" > "$markerPath" + fi +} fixupOutputHooks+=(markForCUDAToolkit_ROOT) diff --git a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh index a4a444fcd2417..99e6e100cf332 100644 --- a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh +++ b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh @@ -1,15 +1,26 @@ # shellcheck shell=bash -# Only run the hook from nativeBuildInputs -(( "$hostOffset" == -1 && "$targetOffset" == 0)) || return 0 - guard=Sourcing reason= -[[ -n ${cudaSetupHookOnce-} ]] && guard=Skipping && reason=" because the hook has been propagated more than once" +export NIX_DEBUG=1 + +# Only run the hook from buildInputs: outside executables like cuda_nvcc, most +# CUDA dependencies are needed at runtime, not build-time. +# See the table under https://nixos.org/manual/nixpkgs/unstable/#dependency-propagation for information +# about the different target combinations and their offsets. +if (( "${hostOffset:?}" != -1 && "${targetOffset:?}" != 0 )); then + guard=Skipping + reason=" because the hook is not in nativeBuildInputs" +fi -if (( "${NIX_DEBUG:-0}" >= 1 )) ; then - echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset setupCudaHook$reason" >&2 +if [[ -n ${cudaSetupHookOnce-} ]]; then + guard=Skipping + reason=" because the hook has been propagated more than once" +fi + +if (( "${NIX_DEBUG:-0}" >= 1 )); then + echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset setup-cuda-hook$reason" >&2 else echo "$guard setup-cuda-hook$reason" >&2 fi @@ -20,13 +31,22 @@ declare -g cudaSetupHookOnce=1 declare -Ag cudaHostPathsSeen=() declare -Ag cudaOutputToPath=() -extendcudaHostPathsSeen() { - (( "${NIX_DEBUG:-0}" >= 1 )) && echo "extendcudaHostPathsSeen $1" >&2 +extendCudaHostPathsSeen() { + local fnName=setup-cuda-hook::extendCudaHostPathsSeen + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: $1" >&2 local markerPath="$1/nix-support/include-in-cudatoolkit-root" - [[ ! -f "${markerPath}" ]] && return - [[ -v cudaHostPathsSeen[$1] ]] && return + if [[ ! -f "$markerPath" ]]; then + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: skipping since $markerPath exists" >&2 + return + fi + if [[ -v cudaHostPathsSeen[$1] ]]; then + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: skipping since $1 has already been seen" >&2 + return + fi + + # Add the path to the list of CUDA host paths. cudaHostPathsSeen["$1"]=1 # E.g. cuda_cudart-lib @@ -36,31 +56,32 @@ extendcudaHostPathsSeen() { [[ -z "$cudaOutputName" ]] && return local oldPath="${cudaOutputToPath[$cudaOutputName]-}" - [[ -n "$oldPath" ]] && echo "extendcudaHostPathsSeen: warning: overwriting $cudaOutputName from $oldPath to $1" >&2 + [[ -n "$oldPath" ]] && echo "$fnName: warning: overwriting $cudaOutputName from $oldPath to $1" >&2 cudaOutputToPath["$cudaOutputName"]="$1" } -addEnvHooks "$targetOffset" extendcudaHostPathsSeen +addEnvHooks "$targetOffset" extendCudaHostPathsSeen setupCUDAToolkit_ROOT() { - (( "${NIX_DEBUG:-0}" >= 1 )) && echo "setupCUDAToolkit_ROOT: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 + local fnName=setup-cuda-hook::setupCUDAToolkit_ROOT + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 - for path in "${!cudaHostPathsSeen[@]}" ; do + for path in "${!cudaHostPathsSeen[@]}"; do addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path" - if [[ -d "$path/include" ]] ; then - addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$path/include" - fi + [[ -d "$path/include" ]] && addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$path/include" done - export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT" + export cmakeFlagsArray+=( + -DCUDAToolkit_INCLUDE_DIR="${CUDAToolkit_INCLUDE_DIR:-}" + -DCUDAToolkit_ROOT="${CUDAToolkit_ROOT:-}" + ) } preConfigureHooks+=(setupCUDAToolkit_ROOT) setupCUDAToolkitCompilers() { - echo Executing setupCUDAToolkitCompilers >&2 + local fnName=setup-cuda-hook::setupCUDAToolkitCompilers + echo "$fnName: Running" >&2 - if [[ -n "${dontSetupCUDAToolkitCompilers-}" ]] ; then - return - fi + [[ -n "${dontSetupCUDAToolkitCompilers-}" ]] && return # Point NVCC at a compatible compiler @@ -69,8 +90,10 @@ setupCUDAToolkitCompilers() { # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html - export cmakeFlags+=" -DCUDA_HOST_COMPILER=@ccFullPath@" - export cmakeFlags+=" -DCMAKE_CUDA_HOST_COMPILER=@ccFullPath@" + export cmakeFlagsArray+=( + -DCUDA_HOST_COMPILER="@ccFullPath@" + -DCMAKE_CUDA_HOST_COMPILER="@ccFullPath@" + ) # For non-CMake projects: # We prepend --compiler-bindir to nvcc flags. @@ -78,26 +101,23 @@ setupCUDAToolkitCompilers() { # uses the last --compiler-bindir it gets on the command line. # FIXME: this results in "incompatible redefinition" warnings. # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin - if [ -z "${CUDAHOSTCXX-}" ]; then - export CUDAHOSTCXX="@ccFullPath@"; - fi + [[ -z "${CUDAHOSTCXX-}" ]] && export CUDAHOSTCXX="@ccFullPath@" export NVCC_PREPEND_FLAGS+=" --compiler-bindir=@ccRoot@/bin" # NOTE: We set -Xfatbin=-compress-all, which reduces the size of the compiled - # binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as - # the default set of CUDA capabilities we build can regularly cause this to occur (for - # example, with Magma). + # binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as + # the default set of CUDA capabilities we build can regularly cause this to occur (for + # example, with Magma). # # @SomeoneSerge: original comment was made by @ConnorBaker in .../cudatoolkit/common.nix - if [[ -z "${dontCompressFatbin-}" ]]; then - export NVCC_PREPEND_FLAGS+=" -Xfatbin=-compress-all" - fi + [[ -z "${dontCompressFatbin-}" ]] && export NVCC_PREPEND_FLAGS+=" -Xfatbin=-compress-all" } preConfigureHooks+=(setupCUDAToolkitCompilers) propagateCudaLibraries() { - (( "${NIX_DEBUG:-0}" >= 1 )) && echo "propagateCudaLibraries: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 + local fnName=setup-cuda-hook::propagateCudaLibraries + (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 [[ -z "${cudaPropagateToOutput-}" ]] && return @@ -106,11 +126,8 @@ propagateCudaLibraries() { echo "@setupCudaHook@" >> "${!cudaPropagateToOutput}/nix-support/propagated-native-build-inputs" local propagatedBuildInputs=( "${!cudaHostPathsSeen[@]}" ) - for output in $(getAllOutputNames) ; do - if [[ ! "$output" = "$cudaPropagateToOutput" ]] ; then - propagatedBuildInputs+=( "${!output}" ) - fi - break + for output in $(getAllOutputNames); do + [[ ! "$output" = "$cudaPropagateToOutput" ]] && propagatedBuildInputs+=( "${!output}" ) && break done # One'd expect this should be propagated-host-host-deps, but that doesn't seem to work From 3297d6fed39fbef2fb63a2861c3900fd28d608b0 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Tue, 26 Mar 2024 13:56:54 +0000 Subject: [PATCH 26/34] cudaPackages.cuda_nvcc: never has a lib output --- .../cuda-modules/cuda/overrides.nix | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index 460874b0e4978..d1f65fdd2a804 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -166,31 +166,6 @@ filterAndCreateOverrides { setupCudaHook, }: prevAttrs: { - # Remove once cuda-find-redist-features has a special case for libcuda - outputs = - # NOTE: The order of build outputs matters as we traverse them when creating - # split outputs. The `lib` output cannot come after `static` as it moves all - # the static libraries back to the `lib` output. - let - libOutputIsPresent = builtins.elem "lib" prevAttrs.outputs; - staticOutputPos = lib.lists.findFirstIndex (x: x == "static") null prevAttrs.outputs; - outputsBeforeStatic = lib.lists.take staticOutputPos prevAttrs.outputs; - outputsFromStaticAndLater = lib.lists.drop staticOutputPos prevAttrs.outputs; - newOutputs = - if libOutputIsPresent then - # If the lib output is present, we want to keep it in the same position - prevAttrs.outputs - else if staticOutputPos == null then - # If the static output is not present, location of the lib output - # doesn't matter and we can append it - prevAttrs.outputs ++ [ "lib" ] - else - # The lib output is missing and the static output is present. - # We need to insert the lib output before the static output. - outputsBeforeStatic ++ [ "lib" ] ++ outputsFromStaticAndLater; - in - newOutputs; - # Patch the nvcc.profile. # Syntax: # - `=` for assignment, @@ -219,9 +194,6 @@ filterAndCreateOverrides { + '' echo "Running the cuda_nvcc postPatch" substituteInPlace bin/nvcc.profile \ - --replace-fail \ - '$(TOP)/lib' \ - "''${!outputLib}/lib" \ --replace-fail \ '$(TOP)/$(_NVVM_BRANCH_)' \ "''${!outputBin}/nvvm" \ From 1ac762130a12a366b7638e594d79ba9bf832a2fd Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Tue, 26 Mar 2024 13:57:34 +0000 Subject: [PATCH 27/34] cuda-modules/setup-hooks: wip --- .../mark-for-cudatoolkit-root-hook.sh | 14 ++++++++++---- .../setup-cuda-hook/setup-cuda-hook.sh | 17 ++++++++++------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh index 92785ccf01ac1..475e19fb1db87 100644 --- a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh +++ b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh @@ -3,18 +3,24 @@ guard=Sourcing reason= -export NIX_DEBUG=1 +# export NIX_DEBUG=1 # Only run the hook from nativeBuildInputs. # See the table under https://nixos.org/manual/nixpkgs/unstable/#dependency-propagation for information # about the different target combinations and their offsets. -if (( "${hostOffset:?}" != -1 && "${targetOffset:?}" != 0 )); then + +# Skip setup hook if we're neither a build-time dep, nor, temporarily, doing a +# native compile. +if [[ -v ${strictDeps-} ]]; then + guard=Skipping + reason=" because strictDeps is set" +elif (( "${hostOffset:?}" < 0 )); then guard=Skipping - reason=" because the hook is not in nativeBuildInputs" + reason=" because the hook is not in buildInputs" fi if (( "${NIX_DEBUG:-0}" >= 1 )); then - echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset mark-for-cudatoolkit-root-hook$reason" >&2 + echo "$guard hostOffset=$hostOffset targetOffset=${targetOffset:?} mark-for-cudatoolkit-root-hook$reason" >&2 else echo "$guard mark-for-cudatoolkit-root-hook$reason" >&2 fi diff --git a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh index 99e6e100cf332..4c42a6dc26b48 100644 --- a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh +++ b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh @@ -3,24 +3,27 @@ guard=Sourcing reason= -export NIX_DEBUG=1 +# export NIX_DEBUG=1 # Only run the hook from buildInputs: outside executables like cuda_nvcc, most # CUDA dependencies are needed at runtime, not build-time. # See the table under https://nixos.org/manual/nixpkgs/unstable/#dependency-propagation for information # about the different target combinations and their offsets. -if (( "${hostOffset:?}" != -1 && "${targetOffset:?}" != 0 )); then +# Skip setup hook if we're neither a build-time dep, nor, temporarily, doing a +# native compile. +if [[ -v ${strictDeps-} ]]; then guard=Skipping - reason=" because the hook is not in nativeBuildInputs" -fi - -if [[ -n ${cudaSetupHookOnce-} ]]; then + reason=" because strictDeps is set" +elif (( "${hostOffset:?}" < 0 )); then + guard=Skipping + reason=" because the hook is not in buildInputs" +elif [[ -n ${cudaSetupHookOnce-} ]]; then guard=Skipping reason=" because the hook has been propagated more than once" fi if (( "${NIX_DEBUG:-0}" >= 1 )); then - echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset setup-cuda-hook$reason" >&2 + echo "$guard hostOffset=$hostOffset targetOffset=${targetOffset:?} setup-cuda-hook$reason" >&2 else echo "$guard setup-cuda-hook$reason" >&2 fi From a026c059ffc813cece0a81bcb91184878bbb2ce3 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 27 Mar 2024 03:02:25 +0000 Subject: [PATCH 28/34] cuda-modules/cuda/overrides: cuda_nvcc should not include references to cuda_cudart --- .../cuda-modules/cuda/overrides.nix | 69 +++++++++---------- 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index d1f65fdd2a804..f0bcc8cbdd1ed 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -88,8 +88,13 @@ filterAndCreateOverrides { ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ]; }; + # TODO(@connorbaker): cuda_cudart.dev depends on crt/host_config.h, which is from + # cuda_nvcc.dev. It would be nice to be able to encode that. cuda_cudart = - { buildPackages, lib }: + { addDriverRunpath, lib }: + let + inherit (addDriverRunpath.__spliced.buildHost or addDriverRunpath) driverLink; + in prevAttrs: { # Remove once cuda-find-redist-features has a special case for libcuda outputs = @@ -105,7 +110,7 @@ filterAndCreateOverrides { while IFS= read -r -d $'\0' path ; do sed -i \ -e "s|^libdir\s*=.*/lib\$|libdir=''${!outputLib}/lib/stubs|" \ - -e "s|^Libs\s*:\(.*\)\$|Libs: \1 -Wl,-rpath,${buildPackages.addDriverRunpath.driverLink}/lib|" \ + -e "s|^Libs\s*:\(.*\)\$|Libs: \1 -Wl,-rpath,${driverLink}/lib|" \ "$path" done < <(find -iname 'cuda-*.pc' -print0) '' @@ -165,6 +170,11 @@ filterAndCreateOverrides { lib, setupCudaHook, }: + let + # CC must come from the host environment, not the target environment because it is + # used at build time. + inherit (backendStdenv.__spliced.buildHost or backendStdenv) cc; + in prevAttrs: { # Patch the nvcc.profile. # Syntax: @@ -180,16 +190,6 @@ filterAndCreateOverrides { # backend-stdenv.nix postPatch = - let - # CC must come from the host environment, not the target environment because it is - # used at build time. - ccBin = lib.getBin (backendStdenv.__spliced.buildHost.cc or backendStdenv.cc); - # CUDA runtime libraries must come from the host/target environment because they - # are used at runtime, not build time (outside of linking). - cudartStatic = (cuda_cudart.__spliced.hostTarget or cuda_cudart).static; - cudartLib = lib.getLib (cuda_cudart.__spliced.hostTarget or cuda_cudart); - cudartDev = lib.getDev (cuda_cudart.__spliced.hostTarget or cuda_cudart); - in (prevAttrs.postPatch or "") + '' echo "Running the cuda_nvcc postPatch" @@ -204,19 +204,23 @@ filterAndCreateOverrides { cat << EOF >> bin/nvcc.profile # Fix a compatible backend compiler - PATH += "${ccBin}/bin": + PATH += "${cc}/bin": # Expose the split-out nvvm - LIBRARIES =+ -L"''${!outputBin}/nvvm/lib" - INCLUDES =+ -I"''${!outputBin}/nvvm/include" - - # Expose cudart and the libcuda stubs - LIBRARIES =+ -L"$static/lib" -L"${cudartStatic}/lib" -L"${cudartLib}/lib" -L"${cudartLib}/lib/stubs" - INCLUDES =+ -I"${cudartDev}/include" + LIBRARIES =+ "-L''${!outputBin}/nvvm/lib" + INCLUDES =+ "-I''${!outputBin}/nvvm/include" EOF ''; - propagatedNativeBuildInputs = (prevAttrs.propagatedNativeBuildInputs or [ ]) ++ [ setupCudaHook ]; + propagatedNativeBuildInputs = (prevAttrs.propagatedNativeBuildInputs or [ ]) ++ [ cc ]; + + # NOTE(@connorbaker): + # Though it might seem odd or counter-intuitive to add the setup hook to `propagatedBuildInputs` instead of + # `propagatedNativeBuildInputs`, it is necessary! If you move the setup hook from `propagatedBuildInputs` to + # `propagatedNativeBuildInputs`, it stops being propagated to downstream packages during their build because + # setup hooks in `propagatedNativeBuildInputs` are not designed to affect the runtime or build environment of + # dependencies; they are only meant to affect the build environment of the package that directly includes them. + propagatedBuildInputs = (prevAttrs.propagatedBuildInputs or [ ]) ++ [ setupCudaHook ]; postInstall = (prevAttrs.postInstall or "") @@ -261,23 +265,14 @@ filterAndCreateOverrides { qt5 ? null, qt6 ? null, }: - prevAttrs: { - nativeBuildInputs = - prevAttrs.nativeBuildInputs - ++ ( - if (lib.strings.versionOlder prevAttrs.version "2022.2.0") then - [ qt5.wrapQtAppsHook ] - else - [ qt6.wrapQtAppsHook ] - ); - buildInputs = - prevAttrs.buildInputs - ++ ( - if (lib.strings.versionOlder prevAttrs.version "2022.2.0") then - [ qt5.qtwebview ] - else - [ qt6.qtwebview ] - ); + prevAttrs: + let + qt = if lib.strings.versionOlder prevAttrs.version "2022.2.0" then qt5 else qt6; + inherit (qt) wrapQtAppsHook qtwebview; + in + { + nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [ wrapQtAppsHook ]; + buildInputs = prevAttrs.buildInputs ++ [ qtwebview ]; }; nsight_systems = From b340c3ff8811e2a11f596a5343a3d30160196559 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 27 Mar 2024 03:22:08 +0000 Subject: [PATCH 29/34] cudaPackages.nccl: remove unneeded makeFlags, specify splicing, and enable structuredAttrs --- .../development/cuda-modules/nccl/default.nix | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pkgs/development/cuda-modules/nccl/default.nix b/pkgs/development/cuda-modules/nccl/default.nix index e3d10b79386f9..f7a3f78b9bd15 100644 --- a/pkgs/development/cuda-modules/nccl/default.nix +++ b/pkgs/development/cuda-modules/nccl/default.nix @@ -35,6 +35,7 @@ backendStdenv.mkDerivation ( }; strictDeps = true; + __structuredAttrs = true; outputs = [ "out" @@ -66,22 +67,20 @@ backendStdenv.mkDerivation ( preConfigure = '' patchShebangs ./src/device/generate.py - makeFlagsArray+=( - "NVCC_GENCODE=${lib.concatStringsSep " " cudaFlags.gencode}" - ) ''; + # NOTE(@connorbaker): When referencing packages, make sure to use the spliced version corresponding to + # buildPackages instead of pkgs (the default). makeFlags = - ["PREFIX=$(out)"] + [ + "PREFIX=$(out)" + "NVCC_GENCODE=${lib.concatStringsSep " " cudaFlags.gencode}" + ] ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [ - "CUDA_HOME=${cudatoolkit}" - "CUDA_LIB=${lib.getLib cudatoolkit}/lib" - "CUDA_INC=${lib.getDev cudatoolkit}/include" + "CUDA_HOME=${cudatoolkit.__spliced.buildHost or cudatoolkit}" ] ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [ - "CUDA_HOME=${cuda_nvcc}" - "CUDA_LIB=${lib.getLib cuda_cudart}/lib" - "CUDA_INC=${lib.getDev cuda_cudart}/include" + "CUDA_HOME=${cuda_nvcc.__spliced.buildHost or cuda_nvcc}" ]; enableParallelBuilding = true; From 0fa534c40570fb28c0408833f14c5a82626b748c Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 27 Mar 2024 03:26:27 +0000 Subject: [PATCH 30/34] cuda-modules/generic-builders/manifest: cleanup --- .../generic-builders/manifest.nix | 62 +++++++------------ 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix index 591acfcc4c051..64850841d995b 100644 --- a/pkgs/development/cuda-modules/generic-builders/manifest.nix +++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix @@ -38,7 +38,7 @@ let sourceTypes ; - inherit (backendStdenv) buildPlatform hostPlatform targetPlatform; + inherit (backendStdenv) hostPlatform; # Get the redist architectures for which package provides distributables. # These are used by meta.platforms. @@ -157,39 +157,21 @@ backendStdenv.mkDerivation ( # src :: Optional Derivation src = - # TODO(@connorbaker): Remove debugging lib.warn and inline this. - let - src = - trivial.mapNullable - ( - { relative_path, sha256, ... }: - fetchurl { - url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}"; - inherit sha256; - } - ) - (redistribRelease.${hostPlatformRedistArch} or null); - in - lib.warn - '' - Info: - - redistName: ${redistName} - - hostPlatformRedistArch: ${hostPlatformRedistArch} - - pname: ${finalAttrs.pname} - - version: ${finalAttrs.version} - - outputs: ${builtins.toJSON finalAttrs.outputs} - - brokenConditions: ${builtins.toJSON finalAttrs.brokenConditions} - - badPlatformsConditions: ${builtins.toJSON finalAttrs.badPlatformsConditions} - - buildPlatform: ${buildPlatform.system} - - hostPlatform: ${hostPlatform.system} - - targetPlatform: ${targetPlatform.system} - '' - src; + trivial.mapNullable + ( + { relative_path, sha256, ... }: + fetchurl { + url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${relative_path}"; + inherit sha256; + } + ) + (redistribRelease.${hostPlatformRedistArch} or null); # Handle the pkg-config files: # 1. No FHS # 2. Location expected by the pkg-config wrapper # 3. Generate unversioned names too + # TODO(@connorbaker): Not all packages have a lib or dev output, so we should check for their existence. postPatch = '' for path in pkg-config pkgconfig ; do [[ -d "$path" ]] || continue @@ -236,17 +218,6 @@ backendStdenv.mkDerivation ( # Mark the CUDA toolkit root directory for the CUDA compatibility libraries markForCudatoolkitRootHook # (markForCudatoolkitRootHook.__spliced.buildHost or markForCudatoolkitRootHook) - ] - # autoAddCudaCompatRunpath depends on cuda_compat and would cause - # infinite recursion if applied to `cuda_compat` itself (beside the fact - # that it doesn't make sense in the first place) - ++ lib.optionals (pname != "cuda_compat" && flags.isJetsonBuild) [ - # autoAddCudaCompatRunpath must appear AFTER autoAddDriverRunpath. - # See its documentation in ./setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix. - # NOTE(@connorbaker): Because autoAddCudaCompatRunpath is in nativeBuildInputs, it tries to use cuda_compat - # from buildPackages, but we need to use the one from targetPackages. - # We can either use autoAddCudaCompatRunpath.__spliced.hostTarget or move it to buildInputs. - (autoAddCudaCompatRunpath.__spliced.hostTarget or autoAddCudaCompatRunpath) ]; buildInputs = @@ -256,6 +227,17 @@ backendStdenv.mkDerivation ( # nvcc forces us to use an older gcc # NB: We don't actually know if this is the right thing to do stdenv.cc.cc.lib + ] + # autoAddCudaCompatRunpath depends on cuda_compat and would cause + # infinite recursion if applied to `cuda_compat` itself (beside the fact + # that it doesn't make sense in the first place) + ++ lib.optionals (pname != "cuda_compat" && flags.isJetsonBuild) [ + # autoAddCudaCompatRunpath must appear AFTER autoAddDriverRunpath. + # See its documentation in ./setup-hooks/auto-add-cuda-compat-runpath-hook/default.nix. + # NOTE(@connorbaker): If autoAddCudaCompatRunpath is in nativeBuildInputs, it tries to use cuda_compat + # from buildPackages, but we need to use the one from pkgs (pkgsHostTarget). + # We can either use autoAddCudaCompatRunpath.__spliced.hostTarget or move it to buildInputs. + autoAddCudaCompatRunpath ]; # Picked up by autoPatchelf From 1f077ac1323bad24214aca56ecf1ec72a27aaad6 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 27 Mar 2024 03:26:57 +0000 Subject: [PATCH 31/34] cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook: rewrite --- .../mark-for-cudatoolkit-root-hook.sh | 72 +++++++++++-------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh index 475e19fb1db87..67c4f5ecf51b0 100644 --- a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh +++ b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook/mark-for-cudatoolkit-root-hook.sh @@ -1,48 +1,64 @@ # shellcheck shell=bash -guard=Sourcing -reason= +# Guard helper function +# Returns 0 (success) if the hook should be run, 1 (failure) otherwise. +# This allows us to use short-circuit evaluation to avoid running the hook when it shouldn't be. +markForCUDAToolkit_ROOTGuard() { + local -i hostOffset=${hostOffset:?} + local -i targetOffset=${targetOffset:?} + local fnName="mark-for-cudatoolkit-root-hook::markForCUDAToolkit_ROOTGuard hostOffset=$hostOffset targetOffset=$targetOffset" + local guard=Skipping + local reason -# export NIX_DEBUG=1 + # This hook is meant only to add a stub file to the nix-support directory of the package including it in its + # nativeBuildInputs, so that the setup hook propagated by cuda_nvcc, setup-cuda-hook, can detect it and add the + # package to the CUDA toolkit root. Therefore, since it only modifies the package being built and will not be + # propagated, it should only ever be included in nativeBuildInputs. + if (( hostOffset == -1 && targetOffset == 0)); then + guard=Sourcing + reason="because the hook is in nativeBuildInputs relative to the package being built" + fi + + echo "$fnName: $guard $reason" >&2 -# Only run the hook from nativeBuildInputs. -# See the table under https://nixos.org/manual/nixpkgs/unstable/#dependency-propagation for information -# about the different target combinations and their offsets. + # Recall that test commands return 0 for success and 1 for failure. + [[ "$guard" == Sourcing ]] + return $? +} -# Skip setup hook if we're neither a build-time dep, nor, temporarily, doing a -# native compile. -if [[ -v ${strictDeps-} ]]; then - guard=Skipping - reason=" because strictDeps is set" -elif (( "${hostOffset:?}" < 0 )); then - guard=Skipping - reason=" because the hook is not in buildInputs" -fi +# Guard against calling the hook at the wrong time. +markForCUDAToolkit_ROOTGuard || return 0 -if (( "${NIX_DEBUG:-0}" >= 1 )); then - echo "$guard hostOffset=$hostOffset targetOffset=${targetOffset:?} mark-for-cudatoolkit-root-hook$reason" >&2 -else - echo "$guard mark-for-cudatoolkit-root-hook$reason" >&2 -fi +# Make a copy of the current offsets, so that we can use them in information messages; this is necessary because the +# offsets are not consistently available in the environment during various phases of the build. +declare -g snapshotHostOffset="${hostOffset:?}" +declare -g snapshotTargetOffset="${targetOffset:?}" -[[ "$guard" = Sourcing ]] || return 0 +markForCUDAToolkit_ROOTGetFnName() { + local fnName="mark-for-cudatoolkit-root-hook::${1:?}" + local hostOffset="${hostOffset:-$snapshotHostOffset}" + local targetOffset="${targetOffset:-$snapshotTargetOffset}" + echo "$fnName hostOffset=$hostOffset targetOffset=$targetOffset" +} markForCUDAToolkit_ROOT() { - local fnName=mark-for-cudatoolkit-root-hook::markForCUDAToolkit_ROOT - echo "$fnName: Running" >&2 + # Name function never needs to have return value checked. + # shellcheck disable=SC2155 + local fnName="$(markForCUDAToolkit_ROOTGetFnName markForCUDAToolkit_ROOT)" + echo "$fnName: Running on ${prefix:?}" >&2 - mkdir -p "${prefix:?}/nix-support" local markerPath="$prefix/nix-support/include-in-cudatoolkit-root" + mkdir -p "$(dirname "$markerPath")" if [[ -f "$markerPath" ]]; then - (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: $markerPath exists, skipping" >&2 - return + (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: $markerPath exists, skipping" >&2 + return 0 fi # Always create the file, even if it's empty, since setup-cuda-hook relies on its existence. # However, only populate it if strictDeps is not set. touch "$markerPath" - if [[ -z ${strictDeps-} ]]; then - (( "${NIX_DEBUG:-0}" >= 1 )) || echo "$fnName: populating $markerPath" >&2 + if [[ -z "${strictDeps-}" ]]; then + (( ${NIX_DEBUG:-0} >= 1 )) || echo "$fnName: populating $markerPath" >&2 echo "${pname:?}-${output:?}" > "$markerPath" fi } From 3fc9e9baf4786acc679ff11e4cd0f2377b6c2e96 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 27 Mar 2024 03:28:32 +0000 Subject: [PATCH 32/34] cudaPackages.saxpy: getDev/getLib would not always select the desired output --- pkgs/development/cuda-modules/saxpy/default.nix | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pkgs/development/cuda-modules/saxpy/default.nix b/pkgs/development/cuda-modules/saxpy/default.nix index e8701f0fd2df5..158b4cf846cc2 100644 --- a/pkgs/development/cuda-modules/saxpy/default.nix +++ b/pkgs/development/cuda-modules/saxpy/default.nix @@ -16,7 +16,6 @@ let flags libcublas ; - inherit (lib) getDev getLib getOutput; in backendStdenv.mkDerivation { pname = "saxpy"; @@ -37,10 +36,9 @@ backendStdenv.mkDerivation { buildInputs = lib.optionals (cudaOlder "11.4") [cudatoolkit] ++ lib.optionals (cudaAtLeast "11.4") [ - (getDev libcublas) - (getLib libcublas) - (getOutput "static" libcublas) cuda_cudart + libcublas.dev + libcublas.lib ] ++ lib.optionals (cudaAtLeast "12.0") [cuda_cccl]; From 7fced1173fea4a484a68003482d75c66709e82d2 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 27 Mar 2024 03:37:38 +0000 Subject: [PATCH 33/34] cuda-modules/setup-hooks/setup-cuda-hook: rewrite --- .../setup-cuda-hook/setup-cuda-hook.sh | 123 +++++++++++------- 1 file changed, 76 insertions(+), 47 deletions(-) diff --git a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh index 4c42a6dc26b48..694a4b6cf0ef9 100644 --- a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh +++ b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook/setup-cuda-hook.sh @@ -1,72 +1,97 @@ # shellcheck shell=bash -guard=Sourcing -reason= - -# export NIX_DEBUG=1 - -# Only run the hook from buildInputs: outside executables like cuda_nvcc, most -# CUDA dependencies are needed at runtime, not build-time. -# See the table under https://nixos.org/manual/nixpkgs/unstable/#dependency-propagation for information -# about the different target combinations and their offsets. -# Skip setup hook if we're neither a build-time dep, nor, temporarily, doing a -# native compile. -if [[ -v ${strictDeps-} ]]; then - guard=Skipping - reason=" because strictDeps is set" -elif (( "${hostOffset:?}" < 0 )); then - guard=Skipping - reason=" because the hook is not in buildInputs" -elif [[ -n ${cudaSetupHookOnce-} ]]; then - guard=Skipping - reason=" because the hook has been propagated more than once" -fi - -if (( "${NIX_DEBUG:-0}" >= 1 )); then - echo "$guard hostOffset=$hostOffset targetOffset=${targetOffset:?} setup-cuda-hook$reason" >&2 -else - echo "$guard setup-cuda-hook$reason" >&2 -fi - -[[ "$guard" = Sourcing ]] || return 0 +# Guard helper function +# Returns 0 (success) if the hook should be run, 1 (failure) otherwise. +# This allows us to use short-circuit evaluation to avoid running the hook when it shouldn't be. +setupCudaHookGuard() { + local -i hostOffset=${hostOffset:?} + local -i targetOffset=${targetOffset:?} + local fnName="setup-cuda-hook::setupCudaHookGuard hostOffset=$hostOffset targetOffset=$targetOffset" + local guard=Skipping + local reason= + + # This hook is meant only to add a stub file to the nix-support directory of the package including it in its + # nativeBuildInputs, so that the setup hook propagated by cuda_nvcc, setup-cuda-hook, can detect it and add the + # package to the CUDA toolkit root. Therefore, since it only modifies the package being built and will not be + # propagated, it should only ever be included in nativeBuildInputs. + if (( hostOffset == -1 && targetOffset == 0)); then + guard=Sourcing + reason="because the hook is in nativeBuildInputs relative to the package being built" + elif [[ -n "${cudaSetupHookOnce-}" ]]; then + guard=Skipping + reason="because the hook has been propagated more than once" + fi + + echo "$fnName: $guard $reason" >&2 + + # Recall that test commands return 0 for success and 1 for failure. + [[ "$guard" == Sourcing ]] + return $? +} + +# Guard against calling the hook at the wrong time. +setupCudaHookGuard || return 0 declare -g cudaSetupHookOnce=1 declare -Ag cudaHostPathsSeen=() declare -Ag cudaOutputToPath=() -extendCudaHostPathsSeen() { - local fnName=setup-cuda-hook::extendCudaHostPathsSeen - (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: $1" >&2 +# Make a copy of the current offsets, so that we can use them in information messages; this is necessary because the +# offsets are not consistently available in the environment during various phases of the build. +declare -g snapshotHostOffset="${hostOffset:?}" +declare -g snapshotTargetOffset="${targetOffset:?}" +setupCudaHookGetFnName() { + local fnName="setup-cuda-hook::${1:?}" + local hostOffset="${hostOffset:-$snapshotHostOffset}" + local targetOffset="${targetOffset:-$snapshotTargetOffset}" + echo "$fnName hostOffset=$hostOffset targetOffset=$targetOffset" +} + +extendCudaHostPathsSeen() { + # Name function never needs to have return value checked. + # shellcheck disable=SC2155 + local fnName="$(setupCudaHookGetFnName extendCudaHostPathsSeen)" local markerPath="$1/nix-support/include-in-cudatoolkit-root" + (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: checking for existence of $markerPath" >&2 + if [[ ! -f "$markerPath" ]]; then - (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: skipping since $markerPath exists" >&2 - return + (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: skipping since $markerPath does not exist" >&2 + return 0 fi - if [[ -v cudaHostPathsSeen[$1] ]]; then - (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: skipping since $1 has already been seen" >&2 - return + if [[ -v cudaHostPathsSeen["$1"] ]]; then + (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: skipping since $1 has already been seen" >&2 + return 0 fi # Add the path to the list of CUDA host paths. cudaHostPathsSeen["$1"]=1 + (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: added $1 to cudaHostPathsSeen" >&2 + + # Only attempt to read the file referenced by markerPath if strictDeps is not set; otherwise it is blank and we + # don't need to read it. + [[ -n "${strictDeps-}" ]] && return 0 # E.g. cuda_cudart-lib local cudaOutputName - read -r cudaOutputName < "$markerPath" + # Fail gracefully if the file is empty. This may happen if the package was built with strictDeps set, + # but the current build does not have strictDeps set. + read -r cudaOutputName < "$markerPath" || return 0 - [[ -z "$cudaOutputName" ]] && return + [[ -z "$cudaOutputName" ]] && return 0 local oldPath="${cudaOutputToPath[$cudaOutputName]-}" [[ -n "$oldPath" ]] && echo "$fnName: warning: overwriting $cudaOutputName from $oldPath to $1" >&2 cudaOutputToPath["$cudaOutputName"]="$1" } -addEnvHooks "$targetOffset" extendCudaHostPathsSeen +addEnvHooks "${targetOffset:?}" extendCudaHostPathsSeen setupCUDAToolkit_ROOT() { - local fnName=setup-cuda-hook::setupCUDAToolkit_ROOT - (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 + # Name function never needs to have return value checked. + # shellcheck disable=SC2155 + local fnName="$(setupCudaHookGetFnName setupCUDAToolkit_ROOT)" + (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 for path in "${!cudaHostPathsSeen[@]}"; do addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path" @@ -81,10 +106,12 @@ setupCUDAToolkit_ROOT() { preConfigureHooks+=(setupCUDAToolkit_ROOT) setupCUDAToolkitCompilers() { - local fnName=setup-cuda-hook::setupCUDAToolkitCompilers + # Name function never needs to have return value checked. + # shellcheck disable=SC2155 + local fnName="$(setupCudaHookGetFnName setupCUDAToolkitCompilers)" echo "$fnName: Running" >&2 - [[ -n "${dontSetupCUDAToolkitCompilers-}" ]] && return + [[ -n "${dontSetupCUDAToolkitCompilers-}" ]] && return 0 # Point NVCC at a compatible compiler @@ -119,10 +146,12 @@ setupCUDAToolkitCompilers() { preConfigureHooks+=(setupCUDAToolkitCompilers) propagateCudaLibraries() { - local fnName=setup-cuda-hook::propagateCudaLibraries - (( "${NIX_DEBUG:-0}" >= 1 )) && echo "$fnName: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 + # Name function never needs to have return value checked. + # shellcheck disable=SC2155 + local fnName="$(setupCudaHookGetFnName propagateCudaLibraries)" + (( ${NIX_DEBUG:-0} >= 1 )) && echo "$fnName: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2 - [[ -z "${cudaPropagateToOutput-}" ]] && return + [[ -z "${cudaPropagateToOutput-}" ]] && return 0 mkdir -p "${!cudaPropagateToOutput}/nix-support" # One'd expect this should be propagated-bulid-build-deps, but that doesn't seem to work From b43bf06bf052f326636df3f27415b14190461040 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Wed, 27 Mar 2024 19:35:19 +0000 Subject: [PATCH 34/34] cudaPackages.saxpy: attempt manually setting flags for cross --- .../cuda-modules/cuda/overrides.nix | 4 +- .../cuda-modules/saxpy/default.nix | 119 ++++++++++++++++-- 2 files changed, 110 insertions(+), 13 deletions(-) diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix index f0bcc8cbdd1ed..fe902e68f4000 100644 --- a/pkgs/development/cuda-modules/cuda/overrides.nix +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -212,7 +212,7 @@ filterAndCreateOverrides { EOF ''; - propagatedNativeBuildInputs = (prevAttrs.propagatedNativeBuildInputs or [ ]) ++ [ cc ]; + # propagatedNativeBuildInputs = (prevAttrs.propagatedNativeBuildInputs or [ ]) ++ [ cc ]; # NOTE(@connorbaker): # Though it might seem odd or counter-intuitive to add the setup hook to `propagatedBuildInputs` instead of @@ -220,7 +220,7 @@ filterAndCreateOverrides { # `propagatedNativeBuildInputs`, it stops being propagated to downstream packages during their build because # setup hooks in `propagatedNativeBuildInputs` are not designed to affect the runtime or build environment of # dependencies; they are only meant to affect the build environment of the package that directly includes them. - propagatedBuildInputs = (prevAttrs.propagatedBuildInputs or [ ]) ++ [ setupCudaHook ]; + # propagatedBuildInputs = (prevAttrs.propagatedBuildInputs or [ ]) ++ [ setupCudaHook ]; postInstall = (prevAttrs.postInstall or "") diff --git a/pkgs/development/cuda-modules/saxpy/default.nix b/pkgs/development/cuda-modules/saxpy/default.nix index 158b4cf846cc2..b457ee34b0586 100644 --- a/pkgs/development/cuda-modules/saxpy/default.nix +++ b/pkgs/development/cuda-modules/saxpy/default.nix @@ -30,17 +30,114 @@ backendStdenv.mkDerivation { cmake autoAddDriverRunpath ] - ++ lib.optionals (cudaOlder "11.4") [cudatoolkit] - ++ lib.optionals (cudaAtLeast "11.4") [cuda_nvcc]; - - buildInputs = - lib.optionals (cudaOlder "11.4") [cudatoolkit] - ++ lib.optionals (cudaAtLeast "11.4") [ - cuda_cudart - libcublas.dev - libcublas.lib - ] - ++ lib.optionals (cudaAtLeast "12.0") [cuda_cccl]; + ++ lib.optionals (cudaOlder "11.4") [ cudatoolkit ] + ++ lib.optionals (cudaAtLeast "11.4") [ cuda_nvcc ]; + + # buildInputs = + # lib.optionals (cudaOlder "11.4") [ cudatoolkit ] + # ++ lib.optionals (cudaAtLeast "11.4") [ + # cuda_cudart + # libcublas + # # libcublas.dev + # # libcublas.lib + # ] + # ++ lib.optionals (cudaAtLeast "12.0") [ cuda_cccl ]; + + # TODO: CMake tells us CUDA_HOST_COMPILER is an unused variable; CMAKE_CUDA_HOST_COMPILER is used and we can set it. + # TODO: CMake tells us CUDAToolkit_INCLUDE_DIR is an unused variable; CUDAToolkit_INCLUDE_DIRS is used and we can set it. + # TODO: What is the difference between CUDA_CUDA_COMPILER and CMAKE_CUDA_HOST_COMPILER, or CUDACXX and CUDAHOSTCXX? + # TODO: The CUDA compiler source identification process used by CMake requires building and running a test program. This is not possible in a cross-compilation environment. We can use CMAKE_CUDA_FLAGS_INIT to get around it. + # TODO: Why aren't any of these correctly configured by the environment? + # TODO: See whether CUDAToolkit_INCLUDE_DIR etc is necessary, or just the LIBRARY_PATH and LD_LIBRARY_PATH. + # TODO: /nix/store/j2y057vz3i19yh4zjsan1s3q256q15rd-binutils-2.41/bin/ld: /nix/store/gh1azxmwdisz1q92h1hw20w9l72gwza7-libcublas-aarch64-unknown-linux-gnu-12.2.5.6-lib/lib/libcublas.so: error adding symbols: file in wrong format + preConfigure = + let + inherit (backendStdenv.__spliced.buildHost) cc; + ccFullPath = "${cc}/bin/${cc.targetPrefix}c++"; + ccRoot = "${cc}"; + nvccBuildHost = cuda_nvcc.__spliced.buildHost; + cudartBuildHost = cuda_cudart.__spliced.buildHost; + + cudartHostTarget = cuda_cudart.__spliced.hostTarget; + ccclHostTarget = cuda_cccl.__spliced.hostTarget; + libcublasHostTarget = libcublas.__spliced.hostTarget; + in + # Working (until linker error) + # export NVCC_PREPEND_FLAGS+=" -I${cudartHostTarget}/include -I${ccclHostTarget}/include -L${cudartHostTarget}/lib -L${ccclHostTarget}/lib" + # export LIBRARY_PATH+="${cudartHostTarget}/lib" + # export LD_LIBRARY_PATH+="${cudartHostTarget}/lib" + # export CPATH="$CUDAToolkit_INCLUDE_DIRS" + # + # Ripped from setup-cuda-hook::setupCUDAToolkitCompilers, added logging + '' + # Name function never needs to have return value checked. + # shellcheck disable=SC2155 + + for path in "${cudartHostTarget}" "${ccclHostTarget}" "${libcublasHostTarget}" "${nvccBuildHost}"; do + if [[ -d "$path" ]]; then + echo "Adding $path to CUDAToolkit search path" + addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path" + echo "CUDAToolkit_ROOT is now $CUDAToolkit_ROOT" + else + echo "Skipping $path as it is not a directory" + fi + + if [[ -d "$path/include" ]]; then + echo "Adding $path/include to CUDAToolkit search path" + addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIRS "$path/include" + echo "CUDAToolkit_INCLUDE_DIRS is now $CUDAToolkit_INCLUDE_DIRS" + else + echo "Skipping $path/include as it is not a directory" + fi + done + + export cmakeFlagsArray+=( + -DCUDAToolkit_INCLUDE_DIRS="''${CUDAToolkit_INCLUDE_DIRS:-}" + -DCUDAToolkit_ROOT="''${CUDAToolkit_ROOT:-}" + ) + '' + # Try to export the include dirs to CPATH, replacing the semicolons with colons + + '' + export CPATH="''${CUDAToolkit_INCLUDE_DIRS//;/:}" + echo "CPATH is now $CPATH" + '' + # Ripped from setup-cuda-hook::setupCUDAToolkitCompilers + + '' + # Point NVCC at a compatible compiler + + # For CMake-based projects: + # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables + # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html + # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html + + export cmakeFlagsArray+=( + -DCMAKE_CUDA_HOST_COMPILER="${ccFullPath}" + ) + + # For non-CMake projects: + # We prepend --compiler-bindir to nvcc flags. + # Downstream packages can override these, because NVCC + # uses the last --compiler-bindir it gets on the command line. + # FIXME: this results in "incompatible redefinition" warnings. + # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin + export CUDAHOSTCXX="${ccFullPath}" + + export NVCC_PREPEND_FLAGS+=" --compiler-bindir=${ccRoot}/bin" + + # NOTE: We set -Xfatbin=-compress-all, which reduces the size of the compiled + # binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as + # the default set of CUDA capabilities we build can regularly cause this to occur (for + # example, with Magma). + # + # @SomeoneSerge: original comment was made by @ConnorBaker in .../cudatoolkit/common.nix + export NVCC_PREPEND_FLAGS+=" -Xfatbin=-compress-all" + '' + # Try to get around compiler initialization via CMAKE_CUDA_FLAGS_INIT + + '' + export cmakeFlagsArray+=( + -DCMAKE_CUDA_FLAGS_INIT="-L${cudartBuildHost}/lib -I${cudartBuildHost}/include" + ) + ''; cmakeFlags = [ (lib.cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (