forked from RussWong/CUDATutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCMakeLists.txt
252 lines (247 loc) · 5.03 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project(CUDATutorial LANGUAGES CXX CUDA)
find_package(CUDA 10.0 REQUIRED)
# 需下载安装cuda toolkit,直接搜即可
set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
# set compiler flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} \
-gencode=arch=compute_70,code=\\\"sm_70,compute_70\\\" \
-gencode=arch=compute_75,code=\\\"sm_75,compute_75\\\" \
-gencode=arch=compute_80,code=\\\"sm_80,compute_80\\\" \
-gencode=arch=compute_86,code=\\\"sm_86,compute_86\\\" \
")
set(CMAKE_CUDA_ARCHITECTURES 70 75 80 86)
#输出的可执行文件保存地址
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(COMMON_HEADER_DIRS
${PROJECT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}/15_gemv
${CUDA_PATH}/include
)
set(COMMON_LIB_DIRS
${CUDA_PATH}/lib64
)
#指定头文件路径
include_directories(
${COMMON_HEADER_DIRS}
)
#指定待链接的lib路径
link_directories(
${COMMON_LIB_DIRS}
)
#1
add_executable(1_hellocuda
1_hellocuda.cu
)
target_link_libraries(
1_hellocuda PUBLIC
-lcudart
-lcudadevrt)
#2
add_executable(2_indexing
2_indexing.cu
)
target_link_libraries(
2_indexing PUBLIC
-lcudart
-lcudadevrt)
#3
add_executable(3_vectorAdd
3_vectorAdd/3_vectorAdd.cu
)
target_link_libraries(
3_vectorAdd PUBLIC
-lcudart
-lcudadevrt)
add_executable(3_1_vectorAdd
3_vectorAdd/3_1_vectorized_vectorAdd.cu
)
target_link_libraries(
3_1_vectorAdd PUBLIC
-lcudart
-lcudadevrt)
#4
add_executable(4_device_query
4_device_query.cu
)
target_link_libraries(
4_device_query PUBLIC
-lcudart
-lcudadevrt)
#5.reduce
add_executable(reduce_baseline
5_reduce/reduce_baseline.cu
)
target_link_libraries(
reduce_baseline PUBLIC
-lcudart
-lcudadevrt)
add_executable(reduce_v0
5_reduce/reduce_v0.cu
)
target_link_libraries(
reduce_v0 PUBLIC
-lcudart
-lcudadevrt)
add_executable(reduce_v1
5_reduce/reduce_v1.cu
)
target_link_libraries(
reduce_v1 PUBLIC
-lcudart
-lcudadevrt)
add_executable(reduce_v2
5_reduce/reduce_v2.cu
)
target_link_libraries(
reduce_v2 PUBLIC
-lcudart
-lcudadevrt)
add_executable(reduce_v3
5_reduce/reduce_v3.cu
)
target_link_libraries(
reduce_v3 PUBLIC
-lcudart
-lcudadevrt)
add_executable(reduce_v4
5_reduce/reduce_v4.cu
)
target_link_libraries(
reduce_v4 PUBLIC
-lcudart
-lcudadevrt)
add_executable(reduce_v5
5_reduce/reduce_v5.cu
)
target_link_libraries(
reduce_v5 PUBLIC
-lcudart
-lcudadevrt)
add_executable(reduce_v6
5_reduce/reduce_v6.cu
)
target_link_libraries(
reduce_v6 PUBLIC
-lcudart
-lcudadevrt)
#6
add_executable(6_warp_level_reduce
6_warp_level_reduce.cu
)
target_link_libraries(
6_warp_level_reduce PUBLIC
-lcudart
-lcudadevrt)
#7
add_executable(7_histogram
7_histogram.cu
)
target_link_libraries(
7_histogram PUBLIC
-lcudart
-lcudadevrt)
add_executable(7_1_histogram
7_1_histogram.cu
)
target_link_libraries(
7_1_histogram PUBLIC
-lcudart
-lcudadevrt)
#8
add_executable(8_copy_if
8_copy_if.cu
)
target_link_libraries(
8_copy_if PUBLIC
-lcudart
-lcudadevrt)
#9
add_executable(9_gelu
9_gelu.cu
)
target_link_libraries(
9_gelu PUBLIC
-lcudart
-lcudadevrt)
#10 fp32
add_executable(10_fp32
10_fused_bias_mask_scale_and_add/10_fused_bias_mask_scale_and_add_fp32.cu
)
target_link_libraries(
10_fp32 PUBLIC
-lcudart
-lcudadevrt)
#10 fp16
add_executable(10_fp16
10_fused_bias_mask_scale_and_add/10_fused_bias_mask_scale_and_add_fp16.cu
)
target_link_libraries(
10_fp16 PUBLIC
-lcudart
-lcudadevrt)
#11
add_executable(11_softmax
11_softmax.cu
)
target_link_libraries(
11_softmax PUBLIC
-lcudart
-lcudadevrt)
#12
add_executable(12_measure_GPU_peak_perf
12_measure_GPU_peak_perf.cu
)
target_link_libraries(
12_measure_GPU_peak_perf PUBLIC
-lcudart
-lcudadevrt)
#13 CUDA stream
add_executable(13_1_CUDAstream
13_CUDAstream/13_1_CUDAstream_kernels_overlap.cu
)
target_link_libraries(
13_1_CUDAstream PUBLIC
-lcudart
-lcudadevrt)
add_executable(13_2_CUDAstream
13_CUDAstream/13_2_CUDAstream_kernel_copy_overlap.cu
)
target_link_libraries(
13_2_CUDAstream PUBLIC
-lcudart
-lcudadevrt)
#14
add_executable(14_quantize
14_quantize.cu
)
target_link_libraries(
14_quantize PUBLIC
-lcudart
-lcudadevrt)
#15 gemv
add_executable(15_1_gemv
15_gemv/15_1_fp32_fp16_gemv.cu
)
target_link_libraries(
15_1_gemv PUBLIC
-lcudart
-lcudadevrt)
add_executable(15_2_gemv
15_gemv/15_2_fp32_fp16_gemv.cu
)
target_link_libraries(
15_2_gemv PUBLIC
-lcudart
-lcudadevrt)
#16 dropout
add_executable(16_dropout
16_fused_dropout/16_dropout.cu
)
target_link_libraries(
16_dropout PUBLIC
-lcudart
-lcudadevrt)