diff --git a/CudaRuntime1/CudaRuntime1.vcxproj b/CudaRuntime1/CudaRuntime1.vcxproj deleted file mode 100644 index 6b84f40..0000000 --- a/CudaRuntime1/CudaRuntime1.vcxproj +++ /dev/null @@ -1,86 +0,0 @@ - - - - - Debug - x64 - - - Release - x64 - - - - {001B89A2-9DAF-45E3-95EF-91A3C690096C} - CudaRuntime1 - - - - Application - true - MultiByte - v142 - - - Application - false - true - MultiByte - v142 - - - - - - - - - - - - - - true - - - - Level3 - Disabled - WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - true - Console - cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - 64 - - - - - Level3 - MaxSpeed - true - true - WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - - - true - true - true - Console - cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - - - 64 - - - - - - - - - - \ No newline at end of file diff --git a/CudaRuntime1/kernel.cu b/CudaRuntime1/kernel.cu deleted file mode 100644 index d2b1cf0..0000000 --- a/CudaRuntime1/kernel.cu +++ /dev/null @@ -1,121 +0,0 @@ - -#include "cuda_runtime.h" -#include "device_launch_parameters.h" - -#include - -cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size); - -__global__ void addKernel(int *c, const int *a, const int *b) -{ - int i = threadIdx.x; - c[i] = a[i] + b[i]; -} - -int main() -{ - const int arraySize = 5; - const int a[arraySize] = { 1, 2, 3, 4, 5 }; - const int b[arraySize] = { 10, 20, 30, 40, 50 }; - int c[arraySize] = { 0 }; - - // Add vectors in parallel. - cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "addWithCuda failed!"); - return 1; - } - - printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n", - c[0], c[1], c[2], c[3], c[4]); - - // cudaDeviceReset must be called before exiting in order for profiling and - // tracing tools such as Nsight and Visual Profiler to show complete traces. - cudaStatus = cudaDeviceReset(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceReset failed!"); - return 1; - } - - return 0; -} - -// Helper function for using CUDA to add vectors in parallel. -cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size) -{ - int *dev_a = 0; - int *dev_b = 0; - int *dev_c = 0; - cudaError_t cudaStatus; - - // Choose which GPU to run on, change this on a multi-GPU system. - cudaStatus = cudaSetDevice(0); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?"); - goto Error; - } - - // Allocate GPU buffers for three vectors (two input, one output) . - cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int)); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMalloc failed!"); - goto Error; - } - - cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int)); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMalloc failed!"); - goto Error; - } - - cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int)); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMalloc failed!"); - goto Error; - } - - // Copy input vectors from host memory to GPU buffers. - cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMemcpy failed!"); - goto Error; - } - - cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMemcpy failed!"); - goto Error; - } - - // Launch a kernel on the GPU with one thread for each element. - addKernel<<<1, size>>>(dev_c, dev_a, dev_b); - - // Check for any errors launching the kernel - cudaStatus = cudaGetLastError(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus)); - goto Error; - } - - // cudaDeviceSynchronize waits for the kernel to finish, and returns - // any errors encountered during the launch. - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus); - goto Error; - } - - // Copy output vector from GPU buffer to host memory. - cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMemcpy failed!"); - goto Error; - } - -Error: - cudaFree(dev_c); - cudaFree(dev_a); - cudaFree(dev_b); - - return cudaStatus; -}