chore: cublas demo + client fix

kevmo314 · Nov 14, 2024 · 519f303 · 519f303
1 parent c6b2291
commit 519f303
Show file tree

Hide file tree

Showing 6 changed files with 85 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -3,14 +3,25 @@
 SCUDA is a GPU over IP bridge allowing GPUs on remote machines to be attached
 to CPU-only machines.
 
-## Demo
+## Demos
+
+### CUBLAS Matrix Multiplication
+
+The below demo displays a NVIDIA GeForce RTX 4090 running on a remote machine (right pane).
+Left pane is a Mac running a docker container with nvidia utils installed.
+
+The docker container runs this [matrixMulCUBLAS](https://github.com/zchee/cuda-sample/blob/master/0_Simple/matrixMulCUBLAS/matrixMulCUBLAS.cpp) example.
+
+You can view the docker image used [here](./deploy/Dockerfile.cublas-test).
+
+### Simple torch example
 
 The below demo displays a NVIDIA GeForce RTX 4090 running on a remote machine (right pane).
 Left pane is a Mac running a docker container with nvidia utils installed.
 
 The docker container runs `python3 -c "import torch; print(torch.cuda.is_available())"` to check if cuda is available.
 
-You can view the docker image used [here](./Dockerfile.test).
+You can view the docker image used [here](./deploy/Dockerfile.torch-test).
 
 https://github.com/user-attachments/assets/035950bb-3cc1-4c73-9ad5-b00871a159ec
 

diff --git a/client.cpp b/client.cpp
@@ -86,28 +86,31 @@ int rpc_open()
         memset(&hints, 0, sizeof(hints));
         hints.ai_family = AF_INET;
         hints.ai_socktype = SOCK_STREAM;
-        if (getaddrinfo(server_ip, port, &hints, &res) != 0)
+        if (getaddrinfo(host, port, &hints, &res) != 0)
         {
-#ifdef VERBOSE
             std::cout << "getaddrinfo of " << host << " port " << port << " failed" << std::endl;
-#endif
-            return -1;
+            continue;
         }
 
         int flag = 1;
         int sockfd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
-        if (sockfd == -1 ||
-            setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, (char *)&flag, sizeof(int)) < 0 ||
-            connect(sockfd, res->ai_addr, res->ai_addrlen) < 0)
+        if (sockfd == -1)
         {
-#ifdef VERBOSE
-            std::cout << "connect to " << host << " port " << port << " failed" << std::endl;
-#endif
-            return -1;
+            printf("socket creation failed...\n");
+            exit(1);
+        }
+
+        int opts = setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, (char *)&flag, sizeof(int));
+        if (connect(sockfd, res->ai_addr, res->ai_addrlen) < 0)
+        {
+            std::cerr << "Connecting to " << host << " port " << port << " failed: " 
+              << strerror(errno) << std::endl;
+            exit(1);
         }
 
         conns[nconns++] = {sockfd, 0, 0, 0, 0, PTHREAD_MUTEX_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, PTHREAD_COND_INITIALIZER};
     }
+
     if (pthread_mutex_unlock(&conn_mutex) < 0)
         return -1;
     if (nconns == 0)
@@ -290,6 +293,7 @@ CUresult cuGetProcAddress_v2(const char *symbol, void **pfn, int cudaVersion, cu
 
 void *dlsym(void *handle, const char *name) __THROW
 {
+    std::cerr << "starting libscuda..." << std::endl;
     void *func = get_function_pointer(name);
 
     /** proc address function calls are basically dlsym; we should handle this differently at the top level. */

diff --git a/deploy/Dockerfile.cublas-test b/deploy/Dockerfile.cublas-test
@@ -0,0 +1,41 @@
+FROM ubuntu:24.04
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    wget \
+    curl \
+    python3 \
+    python3-pip \
+    gnupg \
+    software-properties-common && \
+    add-apt-repository 'deb http://archive.ubuntu.com/ubuntu jammy main universe' && \
+    apt-get update && \
+    apt-get install -y libtinfo5 && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb && \
+    dpkg -i cuda-keyring_1.0-1_all.deb && \
+    rm cuda-keyring_1.0-1_all.deb && \
+    apt-get update
+
+RUN apt-get install -y cuda-toolkit-12-2
+
+ENV PATH=/usr/local/cuda-12.2/bin:${PATH}
+ENV LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64
+
+ENV SCUDA_SERVER=71.183.65.76
+ENV libscuda_path=/usr/local/lib/libscuda.so
+
+COPY ./libscuda.so /usr/local/lib/libscuda.so
+
+COPY --from=samples . /app/cuda-sample
+
+RUN cd /app/cuda-sample/0_Simple/matrixMulCUBLAS && make build
+
+RUN cp /app/cuda-sample/0_Simple/matrixMulCUBLAS/matrixMulCUBLAS /matrixMulCUBLAS
+
+COPY start.sh /start.sh
+RUN chmod +x /start.sh
+RUN chmod +x /matrixMulCUBLAS
+
+CMD ["/bin/bash", "/start.sh", "cublas"]
diff --git a/Dockerfile.test → deploy/Dockerfile.torch-test b/Dockerfile.test → deploy/Dockerfile.torch-test
@@ -18,4 +18,4 @@ COPY start.sh /start.sh
 
 RUN chmod +x /start.sh
 
-CMD ["/bin/bash", "/start.sh"]
+CMD ["/bin/bash", "/start.sh", "torch"]
diff --git a/deploy/start.sh b/deploy/start.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+echo "Connecting to SCUDA server at: $SCUDA_SERVER"
+echo "Using scuda binary at path: $libscuda_path"
+
+if [[ "$1" == "torch" ]]; then
+    echo "Running torch example..."
+    LD_PRELOAD="$libscuda_path" python3 -c "import torch; print('CUDA Available:', torch.cuda.is_available())"
+elif [[ "$1" == "cublas" ]]; then
+    echo "Running cublas example..."
+
+    LD_PRELOAD="$libscuda_path" /matrixMulCUBLAS
+else
+    echo "Unknown option: $1. Please specify 'torch' or 'cublas'."
+fi
diff --git a/start.sh b/start.sh
Original file line number	Diff line number	Diff line change
Expand Up		@@ -18,4 +18,4 @@ COPY start.sh /start.sh

		RUN chmod +x /start.sh

		CMD ["/bin/bash", "/start.sh"]
		CMD ["/bin/bash", "/start.sh", "torch"]