-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathMakefile
119 lines (88 loc) · 3.94 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
GPUARCH=-m64 -gencode arch=compute_70,code=sm_70
NVIDIA_ARCH=sm_70 #change to the target NVIDIA GPU arch
AMD_ARCH=gfx908 #change to the target AMD GPU arch
MAIN=Benchmark_su3
MAIN1=Benchmark_su3_debug
MAIN2=Benchmark_su3_mapper
SRCS=benchmarks/${MAIN}.cc \
Grid/util/Init.cc \
Grid/communicator/SharedMemory.cc \
Grid/communicator/SharedMemoryNone.cc \
Grid/allocator/AlignedAllocator.cc \
Grid/communicator/Communicator_base.cc \
Grid/communicator/Communicator_none.cc \
Grid/log/Log.cc \
# include path
INCLUDES=-I./ -I${CUDA_ROOT}/include
# linking path
LDFLAGS=-L${CUDA_ROOT}/lib64
# Macros in Grid
DEFS=-DGEN \
-DGEN_SIMD_WIDTH=16 \
-DHAVE_MALLOC_H \
-DGRID_COMMS_NONE \
-DGRID_DEFAULT_PRECISION_DOUBLE \
-DRNG_RANLUX \
# -DSPOT_CHECK=4 #Uncomment to spot check for correctness. If SPOT_CHECK is specified with a number, that site index will be checked. Otherwise, site 1 will be checked.
# OMPTARGET_MANAGED uses cudaMallocManaged or hipMallocManaged as the memory allocator
# OMPTARGET_MAP uses manual data copying using the "map" clauses
# OMPTARGET_UVM uses the built-in unified shared memory support -- NOT WORKING
OMPFLAGS=-DOMPTARGET -DOMPTARGET_MAP -DOMPTARGET_MANAGED #-DOMPTARGET_UVM
##xlC
XLCXXFLAGS=-qsmp=omp -qoffload -Ofast -std=c++11 -lcudart
xl: $(SRCS)
xlC_r $(INCLUDES) $(LDFLAGS) $(XLCXXFLAGS) $(OMPFLAGS) $(DEFS) $(SRCS) -o xl-$(MAIN).x
##icpx
ICPXFLAGS=-std=c++14 -O2 -fiopenmp -fopenmp-targets=spir64
intel: $(SRCS)
icpx $(INCLUDES) $(LDFLAGS) $(ICPXFLAGS) $(OMPFLAGS) $(DEFS) $(SRCS) -o intel-$(MAIN).x
##PGI/NVHPC for OpenACC
NVACCFLAGS=-fast --c++14 -acc -Mnollvm -Minfo=accel -ta=tesla:cc70,managed -Mlarge_arrays --no_exceptions
nv-acc: $(SRCS)
nvc++ $(INCLUDES) $(LDFLAGS) $(NVACCFLAGS) $(DEFS) $(SRCS) -o nvacc-$(MAIN).x
##Main LLVM/Clang
CXXFLAGS=-std=c++14 -g -fopenmp -fopenmp-cuda-mode -O3 -fopenmp-targets=nvptx64-nvidia-cuda -lcudart
clang-nvidia: $(SRCS)
clang++ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) $(OMPFLAGS) $(DEFS) $(SRCS) -o llvm-$(MAIN).x
##CXXFLAGS += -Xclang -fdump-record-layouts
#CXXFLAGS += -DOMPTARGET_UVM
#CXXFLAGS +=-DOMPTARGET_MANAGED
##CXXFLAGS += -DVECTOR_LOOPS
##LLVMFLAGS = -S -emit-llvm
##CXXFLAGS += -DVECTOR_LOOPS -Xclang -fdump-record-layouts-simple
##CXXFLAGS += -DDEBUG
##AOMP Clang - NVIDIA V100 GPU
#CXX=clang++
#CXXFLAGS = -std=c++14 -O3 -target x86_64-pc-linux-gnu -fopenmp -fopenmp-version=50
#CXXFLAGS += -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=$(NVIDIA_ARCH)
#CXXFLAGS += -DOMPTARGET
##AOMP Clang - AMD GPU
AMDFLAGS = -std=c++14 -O3 -target x86_64-pc-linux-gnu -fopenmp \
-fopenmp-targets=amdgcn-amd-amdhsa \
-Xopenmp-target=amdgcn-amd-amdhsa \
-march=$(AMD_ARCH) \
-fopenmp-cuda-mode
clang-amd: $(SRCS)
clang++ $(INCLUDES) $(LDFLAGS) $(AMDFLAGS) $(OMPFLAGS) $(DEFS) $(SRCS) -o amd-$(MAIN).x
##NVCC
NVCCFLAGS=--x cu ${GPUARCH} -I. -ccbin g++ -rdc=true --expt-extended-lambda --expt-relaxed-constexpr -std=c++14
nvcc: $(SRCS)
nvcc $(NVCCFLAGS) $(INCLUDES) $(LDFLAGS) $(DEFS) $(SRCS) -o cuda-$(MAIN).x
##NVC++ for OpenMP offloading
NVOMPFLAGS = -std=c++14 -mp=gpu -gpu=cc70 -cuda -Minfo=all
nv-omp: $(SRCS)
nvc++ $(INCLUDES) $(LDFLAGS) $(NVOMPFLAGS) $(OMPFLAGS) $(DEFS) $(SRCS) -o nvhpc-$(MAIN).x
#CXX=nvc++
#CXXFLAGS=-std=c++14 -mp=gpu -gpu=cc70 -cuda -DOMPTARGET_MANAGED -DOMPTARGET -Minfo
##GCC
GCCFLAGS=-std=c++14 -O3 -fopenmp -foffload=nvptx-none -lcudart
gcc-omp: $(SRCS)
g++ $(INCLUDES) $(LDFLAGS) $(GCCFLAGS) $(OMPFLAGS) $(DEFS) $(SRCS) -o gcc-$(MAIN).x
#CXX=g++
#CXXFLAGS=-std=c++14 -O3 -fopenmp -foffload=nvptx-none -DOMPTARGET #-DOMPTARGET_MANAGED -DDEBUG -lcudart
##CRAY CCE
CRAYFLAGS = -std=c++14 -fopenmp -fopenmp-targets=nvptx64 -Xopenmp-target -march=$(NVIDIA_ARCH)
cray-omp: $(SRCS)
CC $(INCLUDES) $(LDFLAGS) $(CRAYFLAGS) $(OMPFLAGS) $(DEFS) $(SRCS) -o cray-$(MAIN).x
clean:
rm -v *.x *.o