diff --git a/CMakeLists.txt b/CMakeLists.txt index 706d2e12..b92d11a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ cmake_minimum_required(VERSION 3.23) project(CARE LANGUAGES C CXX - VERSION 0.13.2) + VERSION 0.13.3) include(${PROJECT_SOURCE_DIR}/cmake/Setup.cmake) diff --git a/README.md b/README.md index 31080231..37644989 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [comment]: # (SPDX-License-Identifier: BSD-3-Clause) [comment]: # (#################################################################) -# CARE v0.13.2 +# CARE v0.13.3 CARE: CHAI and RAJA Extensions =============================== diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 4d2fffef..1b2382ce 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -12,6 +12,12 @@ in this file. The format of this file is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). +## [Version 0.13.3] - Release date 2024-07-31 + +### Fixed +- Replaced loop\_work alias with seq\_work (loop\_work was removed in RAJA v2024.02.2) +- Fixed CHUNKED loop macro implementations + ## [Version 0.13.2] - Release date 2024-07-29 ### Changed diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py index 63e5eb79..092950af 100644 --- a/docs/sphinx/conf.py +++ b/docs/sphinx/conf.py @@ -57,7 +57,7 @@ # The short X.Y version. version = '0.13' # The full version, including alpha/beta/rc tags. -release = '0.13.2' +release = '0.13.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/scripts/make_release_tarball.sh b/scripts/make_release_tarball.sh index c1fc4c4e..4ef78739 100755 --- a/scripts/make_release_tarball.sh +++ b/scripts/make_release_tarball.sh @@ -8,7 +8,7 @@ ############################################################################## TAR_CMD=gtar -VERSION=0.13.2 +VERSION=0.13.3 git archive --prefix=care-${VERSION}/ -o care-${VERSION}.tar HEAD 2> /dev/null diff --git a/src/care/DefaultMacros.h b/src/care/DefaultMacros.h index 6ca2b0b5..03df49ad 100644 --- a/src/care/DefaultMacros.h +++ b/src/care/DefaultMacros.h @@ -313,7 +313,7 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _ /// @arg[in] CHECK The variable to check that the start and end macros match /// //////////////////////////////////////////////////////////////////////////////// -#define CARE_CHECKED_CHUNKED_GPU_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) CARE_CHECKED_OPENMP_FOR_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) +#define CARE_CHECKED_CHUNKED_GPU_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) CARE_CHECKED_CHUNKED_OPENMP_FOR_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) #define CARE_CHECKED_CHUNKED_GPU_LOOP_END(CHECK) CARE_CHECKED_CHUNKED_OPENMP_FOR_LOOP_END(CHECK) @@ -366,12 +366,12 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _ /// @arg[in] CHECK The variable to check that the start and end macros match /// //////////////////////////////////////////////////////////////////////////////// -#define CARE_CHECKED_CHUNKED_PARALLEL_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) CARE_CHECKED_OPENMP_FOR_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) +#define CARE_CHECKED_CHUNKED_PARALLEL_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) CARE_CHECKED_CHUNKED_OPENMP_FOR_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) -#define CARE_CHECKED_CHUNKED_PARALLEL_LOOP_END(CHECK) CARE_CHECKED_OPENMP_FOR_LOOP_END(CHECK) +#define CARE_CHECKED_CHUNKED_PARALLEL_LOOP_END(CHECK) CARE_CHECKED_CHUNKED_OPENMP_FOR_LOOP_END(CHECK) #define CARE_CHECKED_CHUNKED_REDUCE_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) \ - CARE_CHECKED_PARALLEL_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) + CARE_CHECKED_CHUNKED_PARALLEL_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) #define CARE_CHECKED_CHUNKED_REDUCE_LOOP_END(CHECK) CARE_CHECKED_CHUNKED_PARALLEL_LOOP_END(CHECK) @@ -901,9 +901,9 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _ /// @arg[in] CHUNK_SIZE Maximum kernel size /// //////////////////////////////////////////////////////////////////////////////// -#define CARE_CHUNKED_LOOP(POLICY, INDEX, START_INDEX, END_INDEX, CHUNK_SIZE) CARE_CHECKED_LOOP_START(POLICY, INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, care_loop_chunked_check) +#define CARE_CHUNKED_LOOP(POLICY, INDEX, START_INDEX, END_INDEX, CHUNK_SIZE) CARE_CHECKED_CHUNKED_LOOP_START(POLICY, INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, care_loop_chunked_check) -#define CARE_CHUNKED_LOOP_END CARE_CHECKED_LOOP_END(care_loop_chunked_check) +#define CARE_CHUNKED_LOOP_END CARE_CHECKED_CHUNKED_LOOP_END(care_loop_chunked_check) //////////////////////////////////////////////////////////////////////////////// /// @@ -1041,7 +1041,7 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _ /// @arg[in] CHUNK_SIZE Maximum kernel size /// //////////////////////////////////////////////////////////////////////////////// -#define CARE_CHUNKED_GPU_LOOP(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE) CARE_CHECKED_GPU_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, care_gpu_loop_chunked_check) +#define CARE_CHUNKED_GPU_LOOP(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE) CARE_CHECKED_CHUNKED_GPU_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, care_gpu_loop_chunked_check) #define CARE_CHUNKED_GPU_LOOP_END CARE_CHECKED_CHUNKED_GPU_LOOP_END(care_gpu_loop_chunked_check) @@ -1149,7 +1149,7 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _ /// @arg[in] CHUNK_SIZE Maximum kernel size /// //////////////////////////////////////////////////////////////////////////////// -#define CARE_CHUNKED_WORK_LOOP(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE) CARE_CHECKED_PARALLEL_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, care_work_loop_chunked_check) +#define CARE_CHUNKED_WORK_LOOP(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE) CARE_CHECKED_CHUNKED_PARALLEL_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, care_work_loop_chunked_check) #define CARE_CHUNKED_WORK_LOOP_END CARE_CHECKED_CHUNKED_PARALLEL_LOOP_END(care_work_loop_chunked_check) @@ -1223,7 +1223,7 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _ /// @arg[in] CHUNK_SIZE Maximum kernel size /// //////////////////////////////////////////////////////////////////////////////// -#define CARE_CHUNKED_REDUCE_LOOP(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE) CARE_CHECKED_REDUCE_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, care_reduce_loop_chunked_check) +#define CARE_CHUNKED_REDUCE_LOOP(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE) CARE_CHECKED_CHUNKED_REDUCE_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, care_reduce_loop_chunked_check) #define CARE_CHUNKED_REDUCE_LOOP_END CARE_CHECKED_CHUNKED_REDUCE_LOOP_END(care_reduce_loop_chunked_check) diff --git a/src/care/LoopFuser.h b/src/care/LoopFuser.h index 49262f29..f770b7ca 100644 --- a/src/care/LoopFuser.h +++ b/src/care/LoopFuser.h @@ -26,6 +26,8 @@ constexpr double CARE_DEFAULT_PHASE = -FLT_MAX/2.0; #if CARE_ENABLE_LOOP_FUSER +#include "RAJA/RAJA.hpp" + #include "umpire/Allocator.hpp" #include "umpire/TypedAllocator.hpp" @@ -581,11 +583,11 @@ class LoopFuser : public FusedActions { RAJA::constant_stride_array_of_objects >; #else using workgroup_policy = RAJA::WorkGroupPolicy < - RAJA::loop_work, + RAJA::seq_work, RAJA::ordered, RAJA::ragged_array_of_objects >; using workgroup_ordered_policy = RAJA::WorkGroupPolicy < - RAJA::loop_work, + RAJA::seq_work, RAJA::ordered, RAJA::ragged_array_of_objects >; #endif diff --git a/test/TestForall.cpp b/test/TestForall.cpp index 8cadc3bc..fbfe0aed 100644 --- a/test/TestForall.cpp +++ b/test/TestForall.cpp @@ -79,7 +79,7 @@ CPU_TEST(forall, chunked_dynamic_policy) const int length = 10; care::host_device_ptr temp(length, "temp"); - CARE_LOOP(care::Policy::sequential, i, 0, length, batch_size) { + CARE_CHUNKED_LOOP(care::Policy::sequential, i, 0, length, batch_size) { temp[i] = i; } CARE_CHUNKED_LOOP_END @@ -185,6 +185,5 @@ GPU_TEST(forall, chunked_dynamic_policy) temp.free(); } - #endif // CARE_GPUCC