From f056a5daf2455b04bab142a9bbf5017e3f8a30fa Mon Sep 17 00:00:00 2001 From: mutsuki Date: Wed, 10 Nov 2021 18:41:42 +0900 Subject: [PATCH 1/5] Add `CULiP_cublas*hemm` --- include/CULiP/cublas.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/CULiP/cublas.hpp b/include/CULiP/cublas.hpp index d4f8d51..f576931 100644 --- a/include/CULiP/cublas.hpp +++ b/include/CULiP/cublas.hpp @@ -57,6 +57,8 @@ enum CULiP_cublas_control_t { CULiP_cublasStrsmBatched, CULiP_cublasCtrsmBatched, CULiP_cublasZtrsmBatched, + CULiP_cublasChemm, + CULiP_cublasZhemm, CULiP_cublas_enum_length }; From 4f3c6aaaa2bd2e338cef537ff653a538826fbe8d Mon Sep 17 00:00:00 2001 From: mutsuki Date: Wed, 10 Nov 2021 18:42:08 +0900 Subject: [PATCH 2/5] Add CULiP hemm --- src/cublas.cu | 20 +++++++++++++++++++ src/cublas.hemm.template.h | 40 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 src/cublas.hemm.template.h diff --git a/src/cublas.cu b/src/cublas.cu index 1aff72e..d43b45d 100644 --- a/src/cublas.cu +++ b/src/cublas.cu @@ -779,4 +779,24 @@ cublasStatus_t cublasGemmStridedBatchedEx(cublasHandle_t handle, #undef CULIP_FUNC_NAME #undef CULIP_FUNC_ENUM_NAME #undef CULIP_TYPE + +// ------------------------------------------------- +// HEMM +// ------------------------------------------------- + +#define CULIP_FUNC_NAME cublasChemm +#define CULIP_FUNC_ENUM_NAME CULiP_cublasChemm +#define CULIP_TYPE cuComplex +#include "cublas.hemm.template.h" +#undef CULIP_FUNC_NAME +#undef CULIP_FUNC_ENUM_NAME +#undef CULIP_TYPE + +#define CULIP_FUNC_NAME cublasZhemm +#define CULIP_FUNC_ENUM_NAME CULiP_cublasZhemm +#define CULIP_TYPE cuDoubleComplex +#include "cublas.hemm.template.h" +#undef CULIP_FUNC_NAME +#undef CULIP_FUNC_ENUM_NAME +#undef CULIP_TYPE } // extern "C" diff --git a/src/cublas.hemm.template.h b/src/cublas.hemm.template.h new file mode 100644 index 0000000..0e53256 --- /dev/null +++ b/src/cublas.hemm.template.h @@ -0,0 +1,40 @@ +cublasStatus_t CULIP_FUNC_NAME(cublasHandle_t handle, + cublasSideMode_t side, cublasFillMode_t uplo, + int m, int n, + const CULIP_TYPE *alpha, const CULIP_TYPE *A, int lda, + const CULIP_TYPE *B, int ldb, const CULIP_TYPE *beta, CULIP_TYPE *C, + int ldc) { + const int profiling_flag = (CULiP_profiling_control_array[CULIP_FUNC_ENUM_NAME] == 0) && CULiP_is_profiling_enabled(CULIP_CUBLAS_DISABLE_ENV_NAME); + + // Get the function pointer + cublasStatus_t (*cublas_lib_func)(cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, const CULIP_TYPE*, const CULIP_TYPE*, int, const CULIP_TYPE*, int, const CULIP_TYPE*, CULIP_TYPE*, int); + *(void**)(&cublas_lib_func) = CULiP_get_function_pointer(CULIP_CUBLAS_LIBRARY_NAME, CULIP_CUBLAS_ENV_NAME, __func__, &CULiP_cublas_lib_handle_cache); + + cudaStream_t cuda_stream; + struct CULiP_profile_result profile_result; + + if (profiling_flag) { + // Get current cuda stream + cublasGetStream(handle, &cuda_stream); + + // Profile result structure + snprintf(profile_result.function_name, profile_result.function_name_length - 1, "%s-%s%s-m%d-n%d", __func__, CULiP_get_cublasSideMode_t_string(side), CULiP_get_cublasFillMode_t_string(uplo), m, n); + + // Record start rimestamp + CULiP_launch_function(cuda_stream, &CULiP_record_timestamp, (void*)&profile_result.start_timestamp); + } + + // Call the function + const cublasStatus_t result = (*cublas_lib_func)(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); + CULIBPROFILER_DEBUG_PRINT(printf("[CULiP Debug][%s] executed\n", __func__)); + + if (profiling_flag) { + // Record end rimestamp + CULiP_launch_function(cuda_stream, &CULiP_record_timestamp, (void*)&profile_result.end_timestamp); + + // Print result + CULiP_launch_function(cuda_stream, &CULiP_print_profile_result, (void*)&profile_result); + } + + return result; +} From 230a41215c0f5d88d1dbb18ecfee8d11f2ad3fe9 Mon Sep 17 00:00:00 2001 From: mutsuki Date: Wed, 10 Nov 2021 18:42:24 +0900 Subject: [PATCH 3/5] Add tests for hemm --- tests/cublas_test.cu | 60 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/cublas_test.cu b/tests/cublas_test.cu index e6acbb5..764976f 100644 --- a/tests/cublas_test.cu +++ b/tests/cublas_test.cu @@ -385,6 +385,29 @@ GEMM_OP_TRSM_BATCHED(D, double); GEMM_OP_TRSM_BATCHED(C, cuComplex); GEMM_OP_TRSM_BATCHED(Z, cuDoubleComplex); +// ----------------------------------------------------- +// hemm +// ----------------------------------------------------- +template +cublasStatus_t hemm(cublasHandle_t handle, + cublasSideMode_t side, cublasFillMode_t uplo, + int m, int n, + const T *alpha, const T *A, int lda, + const T *B, int ldb, const T *beta, T *C, + int ldc); +#define GEMM_OP_HEMM(short_type, type)\ +template <>\ +cublasStatus_t hemm(cublasHandle_t handle,\ + cublasSideMode_t side, cublasFillMode_t uplo, \ + int m, int n, \ + const type *alpha, const type *A, int lda,\ + const type *B, int ldb, const type *beta, type *C,\ + int ldc) {\ + return cublas##short_type##hemm(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);\ +} +GEMM_OP_HEMM(C, cuComplex); +GEMM_OP_HEMM(Z, cuDoubleComplex); + // ------------- // Gemm3m // ------------- @@ -846,6 +869,40 @@ void trsm_batched_test() { cudaFreeHost(mat_b_array); } +template +void hemm_test() { + const std::size_t n = 1lu << 10; + const auto alpha = convert(1); + const auto beta = convert(0); + + T* mat_a; + T* mat_b; + T* mat_c; + + cudaMalloc(&mat_a, sizeof(T) * n * n); + cudaMalloc(&mat_b, sizeof(T) * n * n); + cudaMalloc(&mat_c, sizeof(T) * n * n); + + cublasHandle_t cublas_handle; + cublasCreate(&cublas_handle); + + hemm( + cublas_handle, + CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, + n, n, + &alpha, + mat_a, n, + mat_b, n, + &beta, + mat_c, n + ); + + cublasDestroy(cublas_handle); + cudaFree(mat_a); + cudaFree(mat_b); + cudaFree(mat_c); +} + template void gemm3m_test() { const std::size_t n = 1lu << 10; @@ -959,6 +1016,9 @@ void test_all() { trsm_batched_test(); trsm_batched_test(); + hemm_test(); + hemm_test(); + gemm3m_test(); gemm3m_test(); } From f7f0f776dacef001f28b616dd2cc9ab780a2ff07 Mon Sep 17 00:00:00 2001 From: mutsuki Date: Wed, 10 Nov 2021 18:44:57 +0900 Subject: [PATCH 4/5] Add hemm to supported functions list --- docs/cublas.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/cublas.md b/docs/cublas.md index c52b6d0..72fc785 100644 --- a/docs/cublas.md +++ b/docs/cublas.md @@ -21,6 +21,11 @@ - `cublasCgemmStridedBatched` - `cublasZgemmStridedBatched` - `cublasGemmStridedExBatched` +- SYMM + - `cublasDsymm` + - `cublasSsymm` + - `cublasCsymm` + - `cublasZsymm` - GEMM3M - `cublasHgemm3m` - `cublasCgemm3m` @@ -64,8 +69,6 @@ - `cublasStrsmBatched` - `cublasCtrsmBatched` - `cublasZtrsmBatched` -- SYMM - - `cublasDsymm` - - `cublasSsymm` - - `cublasCsymm` - - `cublasZsymm` +- HEMM + - `cublasChemm` + - `cublasZhemm` From 3e782d0c3dae1f25b3743d623953b906f99aa588 Mon Sep 17 00:00:00 2001 From: mutsuki Date: Wed, 10 Nov 2021 18:48:55 +0900 Subject: [PATCH 5/5] Update hemm result string format --- src/cublas.hemm.template.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cublas.hemm.template.h b/src/cublas.hemm.template.h index 0e53256..e8b96db 100644 --- a/src/cublas.hemm.template.h +++ b/src/cublas.hemm.template.h @@ -18,7 +18,7 @@ cublasStatus_t CULIP_FUNC_NAME(cublasHandle_t handle, cublasGetStream(handle, &cuda_stream); // Profile result structure - snprintf(profile_result.function_name, profile_result.function_name_length - 1, "%s-%s%s-m%d-n%d", __func__, CULiP_get_cublasSideMode_t_string(side), CULiP_get_cublasFillMode_t_string(uplo), m, n); + snprintf(profile_result.function_name, profile_result.function_name_length - 1, "%s-%s-%s-m%d-n%d", __func__, CULiP_get_cublasSideMode_t_string(side), CULiP_get_cublasFillMode_t_string(uplo), m, n); // Record start rimestamp CULiP_launch_function(cuda_stream, &CULiP_record_timestamp, (void*)&profile_result.start_timestamp);