Skip to content

Commit

Permalink
[WIP]: late. 2 compilation errors Dense is somewhat solid. Sparse may…
Browse files Browse the repository at this point in the history
… need to use transposes. need to check values in unit tests, and transpose in dgemm call as needed
  • Loading branch information
andrewpalumbo committed May 8, 2017
1 parent 1674687 commit aa8fdcf
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 32 deletions.
29 changes: 20 additions & 9 deletions cuda/src/main/scala/org/apache/mahout/cuda/DenseRowMatrix.scala
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ final class DenseRowMatrix {

// create and setup matrix descriptor
// Todo: do we want these? for dense %*% sparse?
//JCuda.cublasCreateMatDescr(descr)
// cublasSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)
//cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)
// JCuda.cublasCreateMatDescr(descr)
// cublasSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)
// cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)

}

Expand All @@ -89,14 +89,13 @@ final class DenseRowMatrix {

// create and setup matrix descriptor
// Todo: do we want these? for dense %*% sparse?
//cusblasCreateMatDescr(descr)
//cusblasSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)
//cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)
// cusblasCreateMatDescr(descr)
// cusblasSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)
// cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)

cudaMemcpy(vals, jcuda.Pointer.to(data.toList.flatten.toArray),
(nrow) * (ncol) * jcuda.Sizeof.DOUBLE,
cudaMemcpyHostToDevice)

}

/** Constructor with values on the device already.
Expand All @@ -116,13 +115,17 @@ final class DenseRowMatrix {
vals = data

// create and setup matrix descriptor
// Todo: do we want these? for dense %*% sparse?
// Todo: do we need these? for dense %*% sparse?
//cusblasCreateMatDescr(descr)
//cusblasSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)
//cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)

}

/**Set values with an 2d Array
*
* @param data
*/
def set (data: Array[Array[Double]]): Unit = {
// Allocate row-major
cublasAlloc(data.length * data(0).length * jcuda.Sizeof.DOUBLE,
Expand All @@ -132,7 +135,15 @@ final class DenseRowMatrix {
cudaMemcpyHostToDevice)
}

def flatten2dArray(arr2d: Array[Array[Double]]): Array[Double] = {
/** Set values with a pointer that is alredy created
*
* @param data
*/
def set (data: Pointer): Unit = {
vals = data
}

private[cuda] def flatten2dArray(arr2d: Array[Array[Double]]): Array[Double] = {
arr2d.toList.flatten.toArray
}

Expand Down
46 changes: 26 additions & 20 deletions cuda/src/main/scala/org/apache/mahout/cuda/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,26 +46,35 @@ package object cuda {
* @param src a (flattened) 2D cuda array
* @return A Mahout DenseMatrix
*/
def fromVclDenseRM(src: DenseRowMatrix): Matrix = {

def fromCUDADenseRM(src: DenseRowMatrix): Matrix = {

val nrowIntern = src.nrows
val ncolIntern = src.ncols

var dbuff = new Pointer()

val dbuff = new Array.ofDim[Double](nrowIntern * ncolIntern)
// again will be doullbe copying.. consider copying directly from cuda memory
// into each row..
val jvmData = Array.ofDim[Double](nrowIntern,ncolIntern) //Double](nrowIntern * ncolIntern)
val cudaData = new Array[Double](nrowIntern * ncolIntern)
cudaMemcpy(jcuda.Pointer.to(cudaData), src.vals, (nrowIntern * ncolIntern)*jcuda.Sizeof.DOUBLE, cudaMemcpyDeviceToHost)

//Functions.fastCopy(src, dbuff)
// We could speed this up by doing a transpose here
// assuming that the matrix is in columnMajor format
// TODO: consider this getting late so make it work now.
var srcOffset = 0
val ncol = src.ncols
val rows = for (irow 0 until src.nrow) yield {
val rows = for (irow 0 until src.nrows) yield {

val rowvec = new Array[Double](ncol)
dbuff.position(srcOffset).get(rowvec)

System.arraycopy(cudaData, srcOffset , rowvec , 0 , ncol)
srcOffset += ncolIntern
rowvec
}

// Always! use shallow = true to avoid yet another copying.
// even another from viennacl :)
new DenseMatrix(rows.toArray, true)
}

/**
Expand All @@ -84,7 +93,7 @@ package object cuda {
}


// TODO replace this with repackColumnMajor and use a different dgemm algorithm?
// TODO replace this with repackColumnMajor or use a different dgemm algorithm?
// Most Mahout in-core matrices are row-major and we're using CSR so we may need to see
// if JCuda is using an optimal csr/RowMajor DGEMM algortithm.
// TODO: check with NS on this
Expand Down Expand Up @@ -234,32 +243,29 @@ package object cuda {
val n = b.ncols
val k = b.nrows

val d_A = valuesF.get(a).asInstanceOf[Array[Array[Double]]]
// val d_A = valuesF.get(a).asInstanceOf[Array[Array[Double]]]


val c: DenseRowMatrix = new DenseRowMatrix(ctx, m, n)
val d_C: Pointer = new Pointer()
cudaMalloc(c.vals, m * n * jcuda.Sizeof.DOUBLE)

// cublasSgemm('n', 'n', N, N, N, alpha,
// d_A, N, d_B, N, beta, d_C, N);

// JCublas.cublasSgemm('n', 'n', N, N, N, alpha,
// d_A, N, d_B, N, beta, d_C, N);

//C = alpha * op(A) * op(B) + beta * C,
//where op(X) = X or op(X) = transpose(X),
JCublas.cublasDgemm(a.trans, b.trans, m, n, k,
// using transpose here because Mahout Matrices in general
// are row-major, hardcoding this for now..
JCublas.cublasDgemm('t', 't', m, n, k,
1.0d, // alpha
a.vals, m, // A, lda
b.vals, k, // B , ldb
0.0d, // beta
d_C, // pointer to results
n) // todo: check on this

//

n) // todo: check on this are we correct here?

// set the data of c to the results
// may need to allocate data here or the other side.
c.set(d_C)
c
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ import scalabindings.RLikeOps._

import scala.util.Random

/**
* Created by andy on 3/29/17.
*/

class CUDATestSuite extends FunSuite with Matchers {


Expand Down

0 comments on commit aa8fdcf

Please sign in to comment.