[WIP]: late. 2 compilation errors Dense is somewhat solid. Sparse may…

… need to use transposes. need to check values in unit tests, and transpose in dgemm call as needed
apache · May 8, 2017 · aa8fdcf · aa8fdcf
1 parent 1674687
commit aa8fdcf
Show file tree

Hide file tree

Showing 3 changed files with 47 additions and 32 deletions.
diff --git a/cuda/src/main/scala/org/apache/mahout/cuda/DenseRowMatrix.scala b/cuda/src/main/scala/org/apache/mahout/cuda/DenseRowMatrix.scala
@@ -64,9 +64,9 @@ final class DenseRowMatrix {
 
     // create and setup matrix descriptor
     // Todo: do we want these? for dense %*% sparse?
-    //JCuda.cublasCreateMatDescr(descr)
-   // cublasSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)
-    //cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)
+    // JCuda.cublasCreateMatDescr(descr)
+    // cublasSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)
+    // cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)
 
   }
 
@@ -89,14 +89,13 @@ final class DenseRowMatrix {
 
     // create and setup matrix descriptor
     // Todo: do we want these? for dense %*% sparse?
-    //cusblasCreateMatDescr(descr)
-    //cusblasSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)
-    //cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)
+    // cusblasCreateMatDescr(descr)
+    // cusblasSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)
+    // cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)
 
     cudaMemcpy(vals, jcuda.Pointer.to(data.toList.flatten.toArray),
       (nrow) * (ncol) * jcuda.Sizeof.DOUBLE,
       cudaMemcpyHostToDevice)
-
   }
 
   /** Constructor with values on the device already.
@@ -116,13 +115,17 @@ final class DenseRowMatrix {
     vals = data
 
     // create and setup matrix descriptor
-    // Todo: do we want these? for dense %*% sparse?
+    // Todo: do we need these? for dense %*% sparse?
     //cusblasCreateMatDescr(descr)
     //cusblasSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)
     //cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)
 
   }
 
+  /**Set values with an 2d Array
+    *
+    * @param data
+    */
   def set (data: Array[Array[Double]]): Unit = {
     // Allocate row-major
     cublasAlloc(data.length * data(0).length * jcuda.Sizeof.DOUBLE,
@@ -132,7 +135,15 @@ final class DenseRowMatrix {
       cudaMemcpyHostToDevice)
   }
 
-  def flatten2dArray(arr2d: Array[Array[Double]]): Array[Double] = {
+  /** Set values with a pointer that is alredy created
+    *
+    * @param data
+    */
+  def set (data: Pointer): Unit = {
+    vals = data
+  }
+
+  private[cuda] def flatten2dArray(arr2d: Array[Array[Double]]): Array[Double] = {
     arr2d.toList.flatten.toArray
   }
 

diff --git a/cuda/src/main/scala/org/apache/mahout/cuda/package.scala b/cuda/src/main/scala/org/apache/mahout/cuda/package.scala
@@ -46,26 +46,35 @@ package object cuda {
     * @param src a (flattened) 2D cuda array
     * @return A Mahout DenseMatrix
     */
-  def fromVclDenseRM(src: DenseRowMatrix): Matrix = {
-
+  def fromCUDADenseRM(src: DenseRowMatrix): Matrix = {
 
     val nrowIntern = src.nrows
     val ncolIntern = src.ncols
 
+    var dbuff = new Pointer()
 
-    val dbuff = new Array.ofDim[Double](nrowIntern * ncolIntern)
+    // again will be doullbe copying.. consider copying directly from cuda memory
+    // into each row..
+    val jvmData = Array.ofDim[Double](nrowIntern,ncolIntern) //Double](nrowIntern * ncolIntern)
+    val cudaData = new Array[Double](nrowIntern * ncolIntern)
+    cudaMemcpy(jcuda.Pointer.to(cudaData), src.vals, (nrowIntern * ncolIntern)*jcuda.Sizeof.DOUBLE, cudaMemcpyDeviceToHost)
 
-    //Functions.fastCopy(src, dbuff)
+    // We could speed this up by doing a transpose here
+    // assuming that the matrix is in columnMajor format
+    // TODO: consider this getting late so make it work now.
     var srcOffset = 0
     val ncol = src.ncols
-    val rows = for (irow ← 0 until src.nrow) yield {
+    val rows = for (irow ← 0 until src.nrows) yield {
 
       val rowvec = new Array[Double](ncol)
-      dbuff.position(srcOffset).get(rowvec)
-
+      System.arraycopy(cudaData, srcOffset , rowvec , 0 , ncol)
       srcOffset += ncolIntern
       rowvec
     }
+
+    // Always! use shallow = true to avoid yet another copying.
+    // even another from viennacl :)
+    new DenseMatrix(rows.toArray, true)
   }
 
   /**
@@ -84,7 +93,7 @@ package object cuda {
   }
 
 
-  // TODO replace this with repackColumnMajor and use a different dgemm algorithm?
+  // TODO replace this with repackColumnMajor or use a different dgemm algorithm?
   // Most Mahout in-core matrices are row-major and we're using CSR so we may need to see
   // if JCuda is using an optimal csr/RowMajor DGEMM algortithm.
   // TODO: check with NS on this
@@ -234,32 +243,29 @@ package object cuda {
     val n = b.ncols
     val k = b.nrows
 
-    val d_A = valuesF.get(a).asInstanceOf[Array[Array[Double]]]
+   // val d_A = valuesF.get(a).asInstanceOf[Array[Array[Double]]]
 
 
     val c: DenseRowMatrix = new DenseRowMatrix(ctx, m, n)
     val d_C: Pointer = new Pointer()
     cudaMalloc(c.vals,  m * n * jcuda.Sizeof.DOUBLE)
 
-    // cublasSgemm('n', 'n', N, N, N, alpha,
-    //  d_A, N, d_B, N, beta, d_C, N);
-
-//    JCublas.cublasSgemm('n', 'n', N, N, N, alpha,
-//      d_A, N, d_B, N, beta, d_C, N);
-
     //C = alpha * op(A) * op(B) + beta * C,
     //where op(X) = X or op(X) = transpose(X),
-    JCublas.cublasDgemm(a.trans, b.trans, m, n, k,
+    // using transpose here because Mahout Matrices in general
+    // are row-major,  hardcoding this for now..
+    JCublas.cublasDgemm('t', 't', m, n, k,
       1.0d,    // alpha
       a.vals, m, // A, lda
       b.vals, k, // B , ldb
       0.0d,    // beta
       d_C,     // pointer to results
-      n)    // todo: check on this
-
-     //
-
+      n)    // todo: check on this are we correct here?
 
+     // set the data of c to the results
+     // may need to allocate data here or the other side.
+     c.set(d_C)
+    c
   }
 
   /**

diff --git a/cuda/src/test/scala/org/apache/mahout/cuda/CUDATestSuite.scala b/cuda/src/test/scala/org/apache/mahout/cuda/CUDATestSuite.scala
@@ -6,9 +6,7 @@ import scalabindings.RLikeOps._
 
 import scala.util.Random
 
-/**
-  * Created by andy on 3/29/17.
-  */
+
 class CUDATestSuite extends FunSuite with Matchers {
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,9 +6,7 @@ import scalabindings.RLikeOps._ @@
     import scala.util.Random
-    /**
-      * Created by andy on 3/29/17.
-      */
     class CUDATestSuite extends FunSuite with Matchers {
@@ Expand Down @@