separate hipBLAS compiler, update MMV_Y, move CXX/CC print

separate hipBLAS compiler, update MMV_Y value, move the section that prints CXX and CC compiler name
YellowRoseCx · Nov 5, 2023 · 53bee88 · 53bee88
1 parent a3d63f4
commit 53bee88
Showing 1 changed file with 10 additions and 14 deletions.
diff --git a/Makefile b/Makefile
@@ -196,16 +196,13 @@ endif # LLAMA_CUBLAS
 
 ifdef LLAMA_HIPBLAS
 	ROCM_PATH	?= /opt/rocm
-	CC         := $(ROCM_PATH)/llvm/bin/clang
-	CXX        := $(ROCM_PATH)/llvm/bin/clang++
+	HCC         := $(ROCM_PATH)/llvm/bin/clang
+	HCXX        := $(ROCM_PATH)/llvm/bin/clang++
 	GPU_TARGETS ?= gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
 	LLAMA_CUDA_DMMV_X ?= 32
-	LLAMA_CUDA_MMV_Y ?= 2
+	LLAMA_CUDA_MMV_Y ?= 1
 	LLAMA_CUDA_KQUANTS_ITER ?= 2
 	HIPFLAGS   += -DGGML_USE_HIPBLAS -DGGML_USE_CUBLAS $(shell $(ROCM_PATH)/bin/hipconfig -C)
-ifdef LLAMA_CUDA_FORCE_DMMV
-	HIPFLAGS 	+= -DGGML_CUDA_FORCE_DMMV
-endif # LLAMA_CUDA_FORCE_DMMV
 	HIPLDFLAGS    += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib -lhipblas -lamdhip64 -lrocblas
 	HIP_OBJS       += ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
 ggml-cuda.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
@@ -221,11 +218,11 @@ ggml_v2-cuda-legacy.o: HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) \
                         -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) \
                         -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
 ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
-	$(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
+	$(HCXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
 ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h
-	$(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
+	$(HCXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
 ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h
-	$(CXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
+	$(HCXX) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
 endif # LLAMA_HIPBLAS
 
 
@@ -259,8 +256,6 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
 	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
 endif
 
-CCV := $(shell $(CC) --version | head -n 1)
-CXXV := $(shell $(CXX) --version | head -n 1)
 
 DEFAULT_BUILD =
 FAILSAFE_BUILD =
@@ -281,7 +276,7 @@ ifeq ($(OS),Windows_NT)
 		CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o [email protected] $(CUBLASLD_FLAGS) $(LDFLAGS)
 	endif
 	ifdef LLAMA_HIPBLAS
-		HIPBLAS_BUILD = $(CXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o [email protected] $(HIPLDFLAGS) $(LDFLAGS)
+		HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o [email protected] $(HIPLDFLAGS) $(LDFLAGS)
 	endif
 else
 	DEFAULT_BUILD = $(CXX) $(CXXFLAGS)  $^ -shared -o [email protected] $(LDFLAGS)
@@ -300,7 +295,7 @@ else
 		CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o [email protected] $(CUBLASLD_FLAGS) $(LDFLAGS)
 	endif
 	ifdef LLAMA_HIPBLAS
-		HIPBLAS_BUILD = $(CXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o [email protected] $(HIPLDFLAGS) $(LDFLAGS)
+		HIPBLAS_BUILD = $(HCXX) $(CXXFLAGS) $(HIPFLAGS) $^ -shared -o [email protected] $(HIPLDFLAGS) $(LDFLAGS)
 	endif
 
 	ifndef LLAMA_OPENBLAS
@@ -314,7 +309,8 @@ else
 	endif
 endif
 
-
+CCV := $(shell $(CC) --version | head -n 1)
+CXXV := $(shell $(CXX) --version | head -n 1)
 
 #
 # Print build information