diff --git a/.gitignore b/.gitignore index 11652fae..e54e7d78 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,8 @@ ckp tests/samples/matrixMul tests/samples/bandwidthTest tests/samples/nbody +*.code-workspace + # Auto-generated by rpcgen cpu/cpu_rpc_prot_svc_mod.c diff --git a/tests/gdb_client_cmds b/tests/gdb_client_cmds index 825cfdd6..5fe44184 100644 --- a/tests/gdb_client_cmds +++ b/tests/gdb_client_cmds @@ -1,3 +1,6 @@ -python gdb.execute("set environment CRICKET_NOHASH=yes") +python gdb.execute("set environment CRICKET_RPCID=3") python gdb.execute("set environment REMOTE_GPU_ADDRESS=localhost") -python gdb.execute("set environment LD_PRELOAD=../../cpu/cricket-client.so") \ No newline at end of file +python gdb.execute("set environment LD_PRELOAD=../../cpu/cricket-client.so") +#python gdb.execute('set environment LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.5:/cricket/cpu/cricket-client.so') +#python gdb.execute("set environment ASAN_OPTIONS=protect_shadow_gap=0) +#python gdb.execute("set environment LSAN_OPTIONS=fast_unwind_on_malloc=0") diff --git a/tests/samples/.gitignore b/tests/samples/.gitignore index 33a20c36..64e04dc5 100644 --- a/tests/samples/.gitignore +++ b/tests/samples/.gitignore @@ -1,2 +1,3 @@ +cudnn-samples samples-bin samples diff --git a/tests/test_apps/Makefile b/tests/test_apps/Makefile index dafae5a3..be3d4c34 100644 --- a/tests/test_apps/Makefile +++ b/tests/test_apps/Makefile @@ -6,11 +6,11 @@ HOST_CFLAGS = -Wall -std=gnu99 CC = nvcc -ccbin g++ ARCH = sm_61 -CFLAGS = -arch=$(ARCH) -cudart shared +DEBUG_FLAGS = -g -G +CFLAGS = -arch=$(ARCH) -cudart shared $(DEBUG_FLAGS) #CFLAGS = -arch=$(ARCH) LD = nvcc -ccbin g++ -LDFLAGS = -arch=$(ARCH) -cudart shared -DEBUG_FLAGS = #-g -G +LDFLAGS = -arch=$(ARCH) -cudart shared $(DEBUG_FLAGS) #LDFLAGS = -lcuda -arch=$(ARCH) TEST_CPU_BIN = cpu.testapp TEST_CPU_O = test_cpu.o @@ -20,6 +20,9 @@ TEST_KERNEL_BIN = kernel.testapp TEST_KERNEL_O = test_kernel.o BINARY = cricket.testapp +TEST_CUDNN_BACKEND_BIN = cudnn-backend.testapp +TEST_CUDNN_BACKEND_O = cudnn-backend-test.o + TEST_KERNEL_LIB_O = test_kernel_lib.o TEST_KERNEL_LIB = test_kernel.so TEST_KERNEL_LIB_CALL_O = test_kernel_call.o @@ -57,6 +60,9 @@ $(TEST_KERNEL_BIN) : $(TEST_KERNEL_O) $(BINARY) : $(FILES) $(LD) $(LDFLAGS) -o $@ $^ + +$(TEST_CUDNN_BACKEND_BIN) : $(TEST_CUDNN_BACKEND_O) + $(LD) $(LDFLAGS) -o $@ $^ -lcudnn $(LIBCUDA_OBJ) : $(LIBCUDA_OBJ:.o=.c) $(HOST_CC) -c -fpic -o $@ $< $(LIBCUDA_LIBS) @@ -78,6 +84,6 @@ $(LIBCUDA_WRAPPER) : $(LIBCUDA_OBJ) clean : - rm -f *.elf *.hex *.o *.d .depend *~ $(BINARY) $(LIBCUDA_WRAPPER) $(TEST_CPU_BIN) $(TEST_API_BIN) $(TEST_KERNEL_BIN) $(TEST_KERNEL_LIB) $(TEST_KERNEL_LIB_CALL) + rm -f *.elf *.hex *.o *.d .depend *~ $(BINARY) $(LIBCUDA_WRAPPER) $(TEST_CPU_BIN) $(TEST_API_BIN) $(TEST_KERNEL_BIN) $(TEST_KERNEL_LIB) $(TEST_KERNEL_LIB_CALL) $(TEST_CUDNN_BACKEND_BIN) diff --git a/tests/test_apps/cudnn-backend-test.c b/tests/test_apps/cudnn-backend-test.c new file mode 100644 index 00000000..8e58d89b --- /dev/null +++ b/tests/test_apps/cudnn-backend-test.c @@ -0,0 +1,138 @@ +#include +#include +#include +#include + + +int main(int argc, char** argv) { + printf("Hello World\n"); + cudnnHandle_t cudnn; + cudnnCreate(&cudnn); + + printf("cudnn created\n"); + + cudnnBackendDescriptor_t xDesc; + cudnnBackendCreateDescriptor(CUDNN_BACKEND_TENSOR_DESCRIPTOR, &xDesc); + size_t xId = 'x'; + cudnnBackendSetAttribute(xDesc, CUDNN_ATTR_TENSOR_UNIQUE_ID, + CUDNN_TYPE_INT64, 1, &xId); + cudnnDataType_t dtype = CUDNN_DATA_FLOAT; + cudnnBackendSetAttribute(xDesc, CUDNN_ATTR_TENSOR_DATA_TYPE, + CUDNN_TYPE_DATA_TYPE, 1, &dtype); + size_t alignment = 4; + cudnnBackendSetAttribute(xDesc, CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT, + CUDNN_TYPE_INT64, 1, &alignment); + size_t tensor_dims = 3; + cudnnBackendSetAttribute(xDesc, CUDNN_ATTR_TENSOR_DIMENSIONS, + CUDNN_TYPE_INT64, 1, &tensor_dims); + size_t tensor_stride = 1; + cudnnBackendSetAttribute(xDesc, CUDNN_ATTR_TENSOR_STRIDES, + CUDNN_TYPE_INT64, 1, &tensor_stride); + cudnnBackendFinalize(xDesc); + + cudnnBackendDescriptor_t outDesc; + cudnnBackendCreateDescriptor(CUDNN_BACKEND_TENSOR_DESCRIPTOR, &outDesc); + size_t yId = 'y'; + cudnnBackendSetAttribute(outDesc, CUDNN_ATTR_TENSOR_UNIQUE_ID, + CUDNN_TYPE_INT64, 1, &id); + cudnnDataType_t dtype = CUDNN_DATA_FLOAT; + cudnnBackendSetAttribute(outDesc, CUDNN_ATTR_TENSOR_DATA_TYPE, + CUDNN_TYPE_DATA_TYPE, 1, &dtype); + cudnnBackendSetAttribute(outDesc, CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT, + CUDNN_TYPE_INT64, 1, &alignment); + cudnnBackendSetAttribute(outDesc, CUDNN_ATTR_TENSOR_DIMENSIONS, + CUDNN_TYPE_INT64, 1, &tensor_dims); + cudnnBackendSetAttribute(outDesc, CUDNN_ATTR_TENSOR_STRIDES, + CUDNN_TYPE_INT64, 1, &tensor_stride); + cudnnBackendFinalize(outDesc); + + printf("cudnn outDesc created\n"); + + cudnnBackendDescriptor_t concat; + cudnnBackendCreateDescriptor(CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR, &concat); + size_t concat_dim = 0; + cudnnBackendSetAttribute(concat, CUDNN_ATTR_OPERATION_CONCAT_AXIS, + CUDNN_TYPE_INT64, 1, &concat_dim); + cudnnBackendSetAttribute(concat, CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS, + CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &xDesc); + cudnnBackendSetAttribute(concat, CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC, + CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &outDesc); + cudnnBackendFinalize(concat); + + printf("cudnn concat created\n"); + + cudnnBackendDescriptor_t opset; + cudnnBackendCreateDescriptor(CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR, &opset); + cudnnBackendSetAttribute(opset, CUDNN_ATTR_OPERATIONGRAPH_HANDLE, + CUDNN_TYPE_HANDLE, 1, &cudnn); + cudnnBackendSetAttribute(opset, CUDNN_ATTR_OPERATIONGRAPH_OPS, + CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &concat); + cudnnBackendFinalize(opset); + + printf("cudnn opset created\n"); + + cudnnBackendDescriptor_t engine; + cudnnBackendCreateDescriptor(CUDNN_BACKEND_ENGINE_DESCRIPTOR, &engine); + cudnnBackendSetAttribute(engine, CUDNN_ATTR_ENGINE_OPERATION_GRAPH, + CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &opset); + int64_t gidx = 0; + cudnnBackendSetAttribute(engine, CUDNN_ATTR_ENGINE_GLOBAL_INDEX, + CUDNN_TYPE_INT64, 1, &gidx); + cudnnBackendFinalize(engine); + + printf("cudnn engine created\n"); + + cudnnBackendDescriptor_t engcfg; + cudnnBackendSetAttribute(engcfg, CUDNN_ATTR_ENGINECFG_ENGINE, + CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &engine); + cudnnBackendFinalize(engcfg); + + printf("cudnn engcfg created\n"); + + cudnnBackendDescriptor_t plan; + cudnnBackendCreateDescriptor(CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR, &plan); + cudnnBackendSetAttribute(plan, CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG, + CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &engcfg); + cudnnBackendFinalize(plan); + + printf("cudnn plan created\n"); + + int64_t workspaceSize; + cudnnBackendGetAttribute(plan, CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE, + CUDNN_TYPE_INT64, 1, NULL, &workspaceSize); + + float *xData = (float*)malloc(3 * sizeof(float)); + xData[0] = 1.0f; + xData[1] = 2.0f; + xData[2] = 3.0f; + void *xData_dev = NULL; + cudaMalloc(&xData_dev, 3 * sizeof(float)); + cudaMemcpy(xData_dev, xData, 3 * sizeof(float), cudaMemcpyHostToDevice); + void *yData_dev = NULL; + cudaMalloc(&yData_dev, 3 * sizeof(float)); + + void *dev_ptrs[1] = {xData_dev, yData_dev}; // device pointer + int64_t uids[1] = {'x', 'y'}; + void *workspace = NULL; + cudaMalloc(&workspace, workspaceSize); + + cudnnBackendDescriptor_t varpack; + cudnnBackendCreateDescriptor(CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR, &varpack); + cudnnBackendSetAttribute(varpack, CUDNN_ATTR_1ARIANT_PACK_DATA_POINTERS, + CUDNN_TYPE_VOID_PTR, 2, dev_ptrs); + cudnnBackendSetAttribute(varpack, CUDNN_AT1R_VARIANT_PACK_UNIQUE_IDS, + CUDNN_TYPE_INT64, 2, uids); + cudnnBackendSetAttribute(varpack, CUDNN_ATTR_VARIANT_PACK_WORKSPACE, + CUDNN_TYPE_VOID_PTR, 1, &workspace); + cudnnBackendFinalize(varpack); + + printf("cudnn varpack created\n"); + + cudnnBackendExecute(cudnn, plan, varpack); + + printf("cudnn executed\n"); + + cudnnDestroy(cudnn); + return 0; +} +