Skip to content

Commit

Permalink
Merge branch 'develop' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
dalg24 authored Oct 1, 2024
2 parents 9572296 + f90a1e1 commit 7e8140c
Show file tree
Hide file tree
Showing 27 changed files with 739 additions and 87 deletions.
3 changes: 0 additions & 3 deletions .jenkins
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ pipeline {
-DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \
-DKokkos_ENABLE_TESTS=ON \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_OPENMP=ON \
.. && \
make -j8 && ctest --verbose'''
Expand Down Expand Up @@ -430,7 +429,6 @@ pipeline {
-DKokkos_ENABLE_TESTS=ON \
-DKokkos_ENABLE_BENCHMARKS=ON \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_TUNING=ON \
-DKokkos_ARCH_VOLTA70=ON \
.. && \
Expand Down Expand Up @@ -470,7 +468,6 @@ pipeline {
-DKokkos_ENABLE_TESTS=ON \
-DKokkos_ENABLE_BENCHMARKS=ON \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_LIBDL=OFF \
-DKokkos_ENABLE_OPENMP=ON \
-DKokkos_ENABLE_IMPL_MDSPAN=OFF \
Expand Down
2 changes: 2 additions & 0 deletions Makefile.targets
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ Kokkos_HIP_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp
Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
Kokkos_HIP_ZeroMemset.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_ZeroMemset.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_ZeroMemset.cpp
Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
endif
Expand Down
2 changes: 1 addition & 1 deletion containers/unit_tests/TestDynRankViewTypedefs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ constexpr bool test_view_typedefs_impl() {
static_assert(std::is_same_v<typename ViewType::data_handle_type, typename ViewType::pointer_type>);
static_assert(std::is_same_v<typename ViewType::reference, typename ViewType::reference_type>);
return true;
};
}

// Helper function to unpack data type and other args from the View, and pass them on
template<class T, class ... ViewArgs>
Expand Down
5 changes: 1 addition & 4 deletions containers/unit_tests/TestErrorReporter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ struct ErrorReporterDriver : public ErrorReporterDriverBase<DeviceType> {
}
};

#if !defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA)
template <typename DeviceType>
struct ErrorReporterDriverUseLambda
: public ErrorReporterDriverBase<DeviceType> {
Expand Down Expand Up @@ -178,7 +177,6 @@ struct ErrorReporterDriverUseLambda
driver_base::check_expectations(reporter_capacity, test_size);
}
};
#endif

#ifdef KOKKOS_ENABLE_OPENMP
struct ErrorReporterDriverNativeOpenMP
Expand All @@ -205,8 +203,7 @@ struct ErrorReporterDriverNativeOpenMP

// FIXME_MSVC MSVC just gets confused when using the base class in the
// KOKKOS_CLASS_LAMBDA
#if !defined(KOKKOS_COMPILER_MSVC) && \
(!defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA))
#ifndef KOKKOS_COMPILER_MSVC
TEST(TEST_CATEGORY, ErrorReporterViaLambda) {
TestErrorReporter<ErrorReporterDriverUseLambda<TEST_EXECSPACE>>();
}
Expand Down
14 changes: 0 additions & 14 deletions containers/unit_tests/TestOffsetView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ void test_offsetview_construction() {
ASSERT_EQ(ov.extent(0), 5u);
ASSERT_EQ(ov.extent(1), 5u);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
{
Kokkos::Experimental::OffsetView<Scalar*, Device> offsetV1("OneDOffsetView",
range0);
Expand Down Expand Up @@ -156,7 +155,6 @@ void test_offsetview_construction() {
}

ASSERT_EQ(OVResult, answer) << "Bad data found in OffsetView";
#endif

{
offset_view_type ovCopy(ov);
Expand Down Expand Up @@ -191,7 +189,6 @@ void test_offsetview_construction() {
range3_type rangePolicy3DZero(point3_type{{0, 0, 0}},
point3_type{{extent0, extent1, extent2}});

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
int view3DSum = 0;
Kokkos::parallel_reduce(
rangePolicy3DZero,
Expand All @@ -214,7 +211,6 @@ void test_offsetview_construction() {

ASSERT_EQ(view3DSum, offsetView3DSum)
<< "construction of OffsetView from View and begins array broken.";
#endif
}
view_type viewFromOV = ov.view();

Expand All @@ -239,7 +235,6 @@ void test_offsetview_construction() {
view_type aView("aView", ov.extent(0), ov.extent(1));
Kokkos::deep_copy(aView, ov);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
int sum = 0;
Kokkos::parallel_reduce(
rangePolicy2D,
Expand All @@ -249,7 +244,6 @@ void test_offsetview_construction() {
sum);

ASSERT_EQ(sum, 0) << "deep_copy(view, offsetView) broken.";
#endif
}

{ // test view to offsetview deep copy
Expand All @@ -258,7 +252,6 @@ void test_offsetview_construction() {
Kokkos::deep_copy(aView, 99);
Kokkos::deep_copy(ov, aView);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
int sum = 0;
Kokkos::parallel_reduce(
rangePolicy2D,
Expand All @@ -268,7 +261,6 @@ void test_offsetview_construction() {
sum);

ASSERT_EQ(sum, 0) << "deep_copy(offsetView, view) broken.";
#endif
}
}

Expand Down Expand Up @@ -521,7 +513,6 @@ void test_offsetview_subview() {
ASSERT_EQ(offsetSubview.begin(1), 0);
ASSERT_EQ(offsetSubview.end(1), 9);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
using range_type = Kokkos::MDRangePolicy<Device, Kokkos::Rank<2>,
Kokkos::IndexType<int> >;
using point_type = typename range_type::point_type;
Expand All @@ -547,7 +538,6 @@ void test_offsetview_subview() {
sum);

ASSERT_EQ(sum, 6 * (e0 - b0) * (e1 - b1));
#endif
}

// slice 2
Expand Down Expand Up @@ -644,7 +634,6 @@ void test_offsetview_subview() {
}
}

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
template <class InputIt, class T, class BinaryOperation>
KOKKOS_INLINE_FUNCTION T std_accumulate(InputIt first, InputIt last, T init,
BinaryOperation op) {
Expand Down Expand Up @@ -748,7 +737,6 @@ void test_offsetview_offsets_rank3() {

ASSERT_EQ(0, errors);
}
#endif

TEST(TEST_CATEGORY, offsetview_construction) {
test_offsetview_construction<int, TEST_EXECSPACE>();
Expand All @@ -767,7 +755,6 @@ TEST(TEST_CATEGORY, offsetview_subview) {
test_offsetview_subview<int, TEST_EXECSPACE>();
}

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
TEST(TEST_CATEGORY, offsetview_offsets_rank1) {
test_offsetview_offsets_rank1<TEST_EXECSPACE>();
}
Expand All @@ -779,7 +766,6 @@ TEST(TEST_CATEGORY, offsetview_offsets_rank2) {
TEST(TEST_CATEGORY, offsetview_offsets_rank3) {
test_offsetview_offsets_rank3<TEST_EXECSPACE>();
}
#endif

} // namespace Test

Expand Down
3 changes: 0 additions & 3 deletions containers/unit_tests/TestUnorderedMap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,6 @@ TEST(TEST_CATEGORY, UnorderedMap_shallow_copyable_on_device) {
ASSERT_EQ(1u, test_map_copy.m_map.size());
}

#if !defined(KOKKOS_ENABLE_CUDA) || \
(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_LAMBDA))
void test_unordered_map_device_capture() {
TestMapCopy::map_type map;

Expand All @@ -549,7 +547,6 @@ void test_unordered_map_device_capture() {
TEST(TEST_CATEGORY, UnorderedMap_lambda_capturable) {
test_unordered_map_device_capture();
}
#endif

/**
* @test This test ensures that an @ref UnorderedMap can be built
Expand Down
4 changes: 1 addition & 3 deletions core/perf_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,7 @@ endif()

kokkos_add_benchmark(PerformanceTest_Benchmark SOURCES ${BENCHMARK_SOURCES})

if(NOT KOKKOS_ENABLE_CUDA OR KOKKOS_ENABLE_CUDA_LAMBDA)
kokkos_add_benchmark(Benchmark_Atomic_MinMax SOURCES test_atomic_minmax_simple.cpp)
endif()
kokkos_add_benchmark(Benchmark_Atomic_MinMax SOURCES test_atomic_minmax_simple.cpp)

# FIXME_NVHPC
if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
Expand Down
2 changes: 0 additions & 2 deletions core/perf_test/PerfTest_ViewCopy_Raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

namespace Test {

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewDeepCopy_Raw<Kokkos::LayoutLeft, Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(10)
Expand All @@ -38,6 +37,5 @@ BENCHMARK(ViewDeepCopy_Raw<Kokkos::LayoutRight, Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(10)
->UseManualTime();
#endif

} // namespace Test
2 changes: 0 additions & 2 deletions core/perf_test/PerfTest_ViewFill_Raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

namespace Test {

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewFill_Raw<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
Expand All @@ -28,6 +27,5 @@ BENCHMARK(ViewFill_Raw<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();
#endif

} // namespace Test
2 changes: 0 additions & 2 deletions core/perf_test/PerfTest_ViewResize_Raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

namespace Test {

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewResize_NoInit_Raw<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
Expand All @@ -30,6 +29,5 @@ BENCHMARK(ViewResize_NoInit_Raw<Kokkos::LayoutRight>)
->Arg(N)
->UseManualTime()
->Iterations(R);
#endif

} // namespace Test
10 changes: 0 additions & 10 deletions core/src/Cuda/Kokkos_Cuda_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,16 +687,6 @@ void Cuda::print_configuration(std::ostream &os, bool /*verbose*/) const {
os << " KOKKOS_ENABLE_CUDA: yes\n";

os << "Cuda Options:\n";
os << " KOKKOS_ENABLE_CUDA_LAMBDA: ";
#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
os << "yes\n";
#else
os << "no\n";
#endif
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
os << " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC: ";
os << "yes\n";
#endif
os << " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE: ";
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
os << "yes\n";
Expand Down
36 changes: 36 additions & 0 deletions core/src/HIP/Kokkos_HIP_ZeroMemset.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE
#endif

#include <HIP/Kokkos_HIP_ZeroMemset.hpp>
#include <HIP/Kokkos_HIP_ParallelFor_Range.hpp>

namespace Kokkos {
namespace Impl {

// alternative to hipMemsetAsync, which sets the first `cnt` bytes of `dst` to 0
void zero_with_hip_kernel(const HIP& exec_space, void* dst, size_t cnt) {
Kokkos::parallel_for(
"Kokkos::ZeroMemset via parallel_for",
Kokkos::RangePolicy<Kokkos::HIP>(exec_space, 0, cnt),
KOKKOS_LAMBDA(size_t i) { static_cast<char*>(dst)[i] = 0; });
}

} // namespace Impl
} // namespace Kokkos
11 changes: 11 additions & 0 deletions core/src/HIP/Kokkos_HIP_ZeroMemset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,22 @@
namespace Kokkos {
namespace Impl {

// hipMemsetAsync sets the first `cnt` bytes of `dst` to the provided value
void zero_with_hip_kernel(const HIP& exec_space, void* dst, size_t cnt);

template <>
struct ZeroMemset<HIP> {
ZeroMemset(const HIP& exec_space, void* dst, size_t cnt) {
// in ROCm <= 6.2.0, hipMemsetAsync on a host-allocated pointer
// returns an invalid value error, but accessing the data via a
// GPU kernel works.
#if defined(KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY) && \
defined(KOKKOS_ARCH_AMD_GFX942)
zero_with_hip_kernel(exec_space, dst, cnt);
#else
KOKKOS_IMPL_HIP_SAFE_CALL(
hipMemsetAsync(dst, 0, cnt, exec_space.hip_stream()));
#endif
}
};

Expand Down
Loading

0 comments on commit 7e8140c

Please sign in to comment.