Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into staged-ci
Browse files Browse the repository at this point in the history
  • Loading branch information
masterleinad committed Oct 8, 2024
2 parents 29cfc1a + 4e88587 commit 49e9c6a
Show file tree
Hide file tree
Showing 84 changed files with 3,021 additions and 438 deletions.
2 changes: 2 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Formatted CMake files with cmake-format
0247634f35e2f9e6b9dec3c80cae567b15027554
# Moved to clang-format-16
60fb9cc94b40e698cbc3278c5538f58dee721276
# Formatted the entire codebase with ClangFormat 8
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@461ef6c76dfe95d5c364de2f431ddbd31a417628 # v3.26.9
uses: github/codeql-action/init@6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea # v3.26.11
with:
languages: c-cpp

Expand All @@ -43,6 +43,6 @@ jobs:
cmake --build build --parallel 2

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@461ef6c76dfe95d5c364de2f431ddbd31a417628 # v3.26.9
uses: github/codeql-action/analyze@6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea # v3.26.11
with:
category: "/language:c-cpp"
2 changes: 1 addition & 1 deletion .github/workflows/continuous-integration-linux-hpx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
repository: STELLAR-GROUP/hpx
ref: v1.9.0
path: hpx
- uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
- uses: actions/cache@2cdf405574d6ef1f33a1d12acccd3ae82f47b3f2 # v4.1.0
id: cache-hpx
with:
path: ./hpx/install
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/continuous-integration-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
sudo cmake --build . --target install --parallel 2
- name: Checkout code
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
- uses: actions/cache@2cdf405574d6ef1f33a1d12acccd3ae82f47b3f2 # v4.1.0
with:
path: ~/.cache/ccache
key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{ github.ref }}-${{ github.sha }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/performance-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
- uses: actions/cache@2cdf405574d6ef1f33a1d12acccd3ae82f47b3f2 # v4.1.0
with:
path: ~/.cache/ccache
key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }}-${{ github.ref }}-${{ github.sha }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scorecard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,6 @@ jobs:

# Upload the results to GitHub's code scanning dashboard.
- name: "Upload SARIF results to code scanning"
uses: github/codeql-action/upload-sarif@461ef6c76dfe95d5c364de2f431ddbd31a417628 # v3.26.9
uses: github/codeql-action/upload-sarif@6db8d6351fd0be61f9ed8ebd12ccd35dcec51fea # v3.26.11
with:
sarif_file: results.sarif
56 changes: 56 additions & 0 deletions .github/workflows/snl-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
name: SNL-CI

on:
pull_request:
paths-ignore:
- '**/*.md'
types: [ opened, reopened, synchronize ]

permissions:
contents: none

# Cancels any in progress 'workflow' associated with this PR
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
CUDA_12_2_CPP20:
name: SNL_CUDA_NVCC_12_2_CPP20
runs-on: [snl-kk-env-cuda-12.2.0-gcc-11.3.0-latest]

steps:
- name: checkout_kokkos
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
repository: kokkos/kokkos
ref: ${{ github.base_ref }}
path: kokkos

- name: configure_kokkos
run: |
nvidia-smi
cd kokkos
cmake -B build \
-DCMAKE_CXX_STANDARD=20 \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ARCH_HOPPER90=ON \
-DCMAKE_CXX_EXTENSIONS=OFF \
-DBUILD_SHARED_LIBS=OFF \
-DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \
-DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \
-DKokkos_ENABLE_TESTS=ON \
-DKokkos_ENABLE_EXAMPLES=ON \
-DKokkos_ENABLE_BENCHMARKS=ON \
./
- name: build_and_install_kokkos
working-directory: kokkos
run: |
cmake --build build -j36
cmake --install build --prefix install
- name: test_kokkos
working-directory: kokkos/build
run: ctest --output-on-failure --timeout 3600

9 changes: 9 additions & 0 deletions .gitlab/hpsf-gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
test:
stage: test
tags: [nvidia-gh200]
image: masterleinad/kokkos-nvcc:12.6.1
script:
- cmake -B build -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_HOPPER90=ON -DKokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY=ON -DKokkos_ENABLE_TESTS=ON
- cmake --build build -j48
- cd build
- ctest -V
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
* SIMD: Allow flexible vector width for 32 bit types [\#6802](https://github.com/kokkos/kokkos/pull/6802)
* Updates for `Kokkos::Array`: add `kokkos_swap(Array<T, N>)` specialization [\#6943](https://github.com/kokkos/kokkos/pull/6943), add `Kokkos::to_array` [\#6375](https://github.com/kokkos/kokkos/pull/6375), make `Kokkos::Array` equality-comparable [\#7148](https://github.com/kokkos/kokkos/pull/7148)
* Structured binding support for `Kokkos::complex` [\#7040](https://github.com/kokkos/kokkos/pull/7040)
* Introduce `KOKKOS_DEDUCTION_GUIDE` macro to allow for portable user-defined deduction guides [\#6954](https://github.com/kokkos/kokkos/pull/6954)

### Build System Changes
* Do not require OpenMP support for languages other than CXX [\#6965](https://github.com/kokkos/kokkos/pull/6965)
Expand Down
9 changes: 8 additions & 1 deletion Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ KOKKOS_STANDALONE_CMAKE ?= "no"
# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,enable_malloc_async
KOKKOS_CUDA_OPTIONS ?= ""

# Options: rdc
# Options: rdc,enable_malloc_async
KOKKOS_HIP_OPTIONS ?= ""

# Default settings specific options.
Expand Down Expand Up @@ -96,6 +96,7 @@ KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPT
KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecation_warnings)

KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)
KOKKOS_INTERNAL_HIP_ENABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),enable_malloc_async)
KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE := $(call kokkos_has_string,$(KOKKOS_OPENACC_OPTIONS),force_host_as_device)

# Check for Kokkos Host Execution Spaces one of which must be on.
Expand Down Expand Up @@ -1228,6 +1229,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_CXXFLAGS+=-fno-gpu-rdc
KOKKOS_LDFLAGS+=-fno-gpu-rdc
endif

ifeq ($(KOKKOS_INTERNAL_HIP_ENABLE_MALLOC_ASYNC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_HIP_MALLOC_ASYNC")
else
tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_HIP_MALLOC_ASYNC */")
endif
endif

ifneq ($(KOKKOS_INTERNAL_USE_ARCH_AMD), 0)
Expand Down
1 change: 1 addition & 0 deletions algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wsuggest-override"

#if defined(KOKKOS_COMPILER_CLANG)
// Some versions of Clang fail to compile Thrust, failing with errors like
Expand Down
1 change: 1 addition & 0 deletions algorithms/src/sorting/impl/Kokkos_SortImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wsuggest-override"

#if defined(KOKKOS_COMPILER_CLANG)
// Some versions of Clang fail to compile Thrust, failing with errors like
Expand Down
4 changes: 4 additions & 0 deletions algorithms/unit_tests/TestBinSortA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ void test_sort_integer_overflow() {
} // namespace BinSortSetA

TEST(TEST_CATEGORY, BinSortGenericTests) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif
using ExecutionSpace = TEST_EXECSPACE;
using key_type = unsigned;
constexpr int N = 171;
Expand Down
4 changes: 4 additions & 0 deletions algorithms/unit_tests/TestBinSortB.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ void run_for_rank2() {
} // namespace BinSortSetB

TEST(TEST_CATEGORY, BinSortUnsignedKeyLayoutStrideValues) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif
using ExeSpace = TEST_EXECSPACE;
using key_type = unsigned;
BinSortSetB::run_for_rank1<ExeSpace, key_type, int>();
Expand Down
10 changes: 10 additions & 0 deletions algorithms/unit_tests/TestNestedSort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,11 @@ void test_nested_sort_by_key(unsigned int N, KeyType minKey, KeyType maxKey,
} // namespace NestedSortImpl

TEST(TEST_CATEGORY, NestedSort) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif

using ExecutionSpace = TEST_EXECSPACE;
NestedSortImpl::test_nested_sort<ExecutionSpace, unsigned>(171, 0U, UINT_MAX);
NestedSortImpl::test_nested_sort<ExecutionSpace, float>(42, -1e6f, 1e6f);
Expand All @@ -394,6 +399,11 @@ TEST(TEST_CATEGORY, NestedSort) {
}

TEST(TEST_CATEGORY, NestedSortByKey) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif

using ExecutionSpace = TEST_EXECSPACE;

// Second/third template arguments are key and value respectively.
Expand Down
9 changes: 9 additions & 0 deletions algorithms/unit_tests/TestRandom.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,11 @@ void test_duplicate_stream() {
} // namespace AlgoRandomImpl

TEST(TEST_CATEGORY, Random_XorShift64) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif

using ExecutionSpace = TEST_EXECSPACE;

#if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \
Expand All @@ -562,6 +567,10 @@ TEST(TEST_CATEGORY, Random_XorShift64) {

TEST(TEST_CATEGORY, Random_XorShift1024_0) {
using ExecutionSpace = TEST_EXECSPACE;
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif

#if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \
defined(KOKKOS_ENABLE_HIP)
Expand Down
2 changes: 1 addition & 1 deletion algorithms/unit_tests/TestRandomAccessIterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace stdalgos {

struct random_access_iterator_test : std_algorithms_test {
public:
virtual void SetUp() {
void SetUp() override {
Kokkos::parallel_for(m_static_view.extent(0),
AssignIndexFunctor<static_view_t>(m_static_view));

Expand Down
8 changes: 8 additions & 0 deletions algorithms/unit_tests/TestSort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ void test_sort_integer_overflow() {
} // namespace SortImpl

TEST(TEST_CATEGORY, SortUnsignedValueType) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif
using ExecutionSpace = TEST_EXECSPACE;
using key_type = unsigned;
constexpr int N = 171;
Expand All @@ -224,6 +228,10 @@ TEST(TEST_CATEGORY, SortUnsignedValueType) {
}

TEST(TEST_CATEGORY, SortEmptyView) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif
using ExecutionSpace = TEST_EXECSPACE;

// does not matter if we use int or something else
Expand Down
4 changes: 3 additions & 1 deletion algorithms/unit_tests/TestSortByKey.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,9 @@ TEST(TEST_CATEGORY, SortByKeyWithStrides) {
ASSERT_EQ(sort_fails, 0u);
}

TEST(TEST_CATEGORY, SortByKeyKeysLargerThanValues) {
TEST(TEST_CATEGORY_DEATH, SortByKeyKeysLargerThanValues) {
::testing::FLAGS_gtest_death_test_style = "threadsafe";

using ExecutionSpace = TEST_EXECSPACE;

// does not matter if we use int or something else
Expand Down
2 changes: 1 addition & 1 deletion algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace stdalgos {

struct std_algorithms_mod_seq_ops_test : std_algorithms_test {
public:
virtual void SetUp() {
void SetUp() override {
Kokkos::parallel_for(m_static_view.extent(0),
AssignIndexFunctor<static_view_t>(m_static_view));
}
Expand Down
1 change: 1 addition & 0 deletions cmake/KokkosCore_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
#cmakedefine KOKKOS_ENABLE_IMPL_HIP_MALLOC_ASYNC
#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
#cmakedefine KOKKOS_ENABLE_SYCL_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED
Expand Down
3 changes: 3 additions & 0 deletions cmake/kokkos_arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ if(KOKKOS_ENABLE_COMPILER_WARNINGS)
"-Wsign-compare"
"-Wtype-limits"
"-Wuninitialized"
"-Wsuggest-override"
)

# NOTE KOKKOS_ prefixed variable (all uppercase) is not set yet because TPLs are processed after ARCH
Expand Down Expand Up @@ -762,6 +763,8 @@ if(KOKKOS_ENABLE_SYCL)
compiler_specific_flags(DEFAULT -fsycl-device-code-split=off -DDESUL_SYCL_DEVICE_GLOBAL_SUPPORTED)
endif()
endif()

check_cxx_symbol_exists(SYCL_EXT_ONEAPI_GRAPH "sycl/sycl.hpp" KOKKOS_IMPL_HAVE_SYCL_EXT_ONEAPI_GRAPH)
endif()

set(CUDA_ARCH_ALREADY_SPECIFIED "")
Expand Down
9 changes: 8 additions & 1 deletion cmake/kokkos_enable_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ kokkos_enable_option(
"Whether multiple kernels are instantiated at compile time - improve performance but increase compile time"
)
kokkos_enable_option(IMPL_HIP_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for HIP")
kokkos_enable_option(IMPL_HIP_MALLOC_ASYNC OFF "Whether to enable hipMallocAsync")
kokkos_enable_option(OPENACC_FORCE_HOST_AS_DEVICE OFF "Whether to force to use host as a target device for OpenACC")

# This option will go away eventually, but allows fallback to old implementation when needed.
Expand Down Expand Up @@ -167,7 +168,13 @@ check_device_specific_options(
IMPL_CUDA_UNIFIED_MEMORY
)
check_device_specific_options(
DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE HIP_MULTIPLE_KERNEL_INSTANTIATIONS IMPL_HIP_UNIFIED_MEMORY
DEVICE
HIP
OPTIONS
HIP_RELOCATABLE_DEVICE_CODE
HIP_MULTIPLE_KERNEL_INSTANTIATIONS
IMPL_HIP_UNIFIED_MEMORY
IMPL_HIP_MALLOC_ASYNC
)
check_device_specific_options(DEVICE HPX OPTIONS IMPL_HPX_ASYNC_DISPATCH)
check_device_specific_options(DEVICE OPENACC OPTIONS OPENACC_FORCE_HOST_AS_DEVICE)
Expand Down
24 changes: 2 additions & 22 deletions containers/src/Kokkos_DynRankView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1361,26 +1361,6 @@ struct MirrorDRViewType {
std::conditional_t<is_same_memspace, src_view_type, dest_view_type>;
};

template <class Space, class T, class... P>
struct MirrorDRVType {
// The incoming view_type
using src_view_type = typename Kokkos::DynRankView<T, P...>;
// The memory space for the mirror view
using memory_space = typename Space::memory_space;
// Check whether it is the same memory space
enum {
is_same_memspace =
std::is_same_v<memory_space, typename src_view_type::memory_space>
};
// The array_layout
using array_layout = typename src_view_type::array_layout;
// The data type (we probably want it non-const since otherwise we can't even
// deep_copy to it.
using data_type = typename src_view_type::non_const_data_type;
// The destination view type if it is not the same memory space
using view_type = Kokkos::DynRankView<data_type, array_layout, Space>;
};

} // namespace Impl

namespace Impl {
Expand All @@ -1397,9 +1377,9 @@ inline auto create_mirror(const DynRankView<T, P...>& src,
arg_prop, std::string(src.label()).append("_mirror"));

if constexpr (Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
using dst_type = typename Impl::MirrorDRVType<
using dst_type = typename Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type;
P...>::dest_view_type;
return dst_type(prop_copy,
Impl::reconstructLayout(src.layout(), src.rank()));
} else {
Expand Down
Loading

0 comments on commit 49e9c6a

Please sign in to comment.