http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/scala-2.11/src/main/scala/org/apache/mahout/viennacl/openmp/package.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/scala-2.11/src/main/scala/org/apache/mahout/viennacl/openmp/package.scala
 
b/viennacl-omp/scala-2.11/src/main/scala/org/apache/mahout/viennacl/openmp/package.scala
deleted file mode 100644
index 89af010..0000000
--- 
a/viennacl-omp/scala-2.11/src/main/scala/org/apache/mahout/viennacl/openmp/package.scala
+++ /dev/null
@@ -1,434 +0,0 @@
-package org.apache.mahout.viennacl
-
-import java.nio._
-
-import org.apache.mahout.math._
-import scalabindings._
-import RLikeOps._
-
-import scala.collection.JavaConversions._
-import org.apache.mahout.viennacl.openmp.javacpp.DenseRowMatrix
-import org.apache.mahout.viennacl.openmp.javacpp._
-import org.bytedeco.javacpp.{DoublePointer, IntPointer}
-
-
-
-package object openmp {
-
-  type IntConvertor = Int => Int
-
-  def toVclDenseRM(src: Matrix, vclCtx: Context = new 
Context(Context.MAIN_MEMORY)): DenseRowMatrix = {
-    vclCtx.memoryType match {
-      case Context.MAIN_MEMORY ⇒
-        val vclMx = new DenseRowMatrix(
-          data = repackRowMajor(src, src.nrow, src.ncol),
-          nrow = src.nrow,
-          ncol = src.ncol,
-          ctx = vclCtx
-        )
-        vclMx
-      case _ ⇒
-        val vclMx = new DenseRowMatrix(src.nrow, src.ncol, vclCtx)
-        fastCopy(src, vclMx)
-        vclMx
-    }
-  }
-
-
-  /**
-    * Convert a dense row VCL matrix to mahout matrix.
-    *
-    * @param src
-    * @return
-    */
-  def fromVclDenseRM(src: DenseRowMatrix): Matrix = {
-    val nrowIntern = src.internalnrow
-    val ncolIntern = src.internalncol
-
-    // A technical debt here:
-
-    // We do double copying here, this is obviously suboptimal, but hopefully 
we'll compensate
-    // this with gains from running superlinear algorithms in VCL.
-    val dbuff = new DoublePointer(nrowIntern * ncolIntern)
-    Functions.fastCopy(src, dbuff)
-    var srcOffset = 0
-    val ncol = src.ncol
-    val rows = for (irow ← 0 until src.nrow) yield {
-
-      val rowvec = new Array[Double](ncol)
-      dbuff.position(srcOffset).get(rowvec)
-
-      srcOffset += ncolIntern
-      rowvec
-    }
-
-    // Always! use shallow = true to avoid yet another copying.
-    new DenseMatrix(rows.toArray, true)
-  }
-
-  def fastCopy(mxSrc: Matrix, dst: DenseRowMatrix) = {
-    val nrowIntern = dst.internalnrow
-    val ncolIntern = dst.internalncol
-
-    assert(nrowIntern >= mxSrc.nrow && ncolIntern >= mxSrc.ncol)
-
-    val rmajorData = repackRowMajor(mxSrc, nrowIntern, ncolIntern)
-    Functions.fastCopy(rmajorData, new 
DoublePointer(rmajorData).position(rmajorData.limit()), dst)
-
-    rmajorData.close()
-  }
-
-  private def repackRowMajor(mx: Matrix, nrowIntern: Int, ncolIntern: Int): 
DoublePointer = {
-
-    assert(mx.nrow <= nrowIntern && mx.ncol <= ncolIntern)
-
-    val dbuff = new DoublePointer(nrowIntern * ncolIntern)
-
-    mx match {
-      case dm: DenseMatrix ⇒
-        val valuesF = classOf[DenseMatrix].getDeclaredField("values")
-        valuesF.setAccessible(true)
-        val values = valuesF.get(dm).asInstanceOf[Array[Array[Double]]]
-        var dstOffset = 0
-        for (irow ← 0 until mx.nrow) {
-          val rowarr = values(irow)
-          dbuff.position(dstOffset).put(rowarr, 0, rowarr.size min ncolIntern)
-          dstOffset += ncolIntern
-        }
-        dbuff.position(0)
-      case _ ⇒
-        // Naive copying. Could be sped up for a DenseMatrix. TODO.
-        for (row ← mx) {
-          val dstOffset = row.index * ncolIntern
-          for (el ← row.nonZeroes) dbuff.put(dstOffset + el.index, el)
-        }
-    }
-
-    dbuff
-  }
-
-  /**
-    *
-    * @param mxSrc
-    * @param ctx
-    * @return
-    */
-  def toVclCmpMatrixAlt(mxSrc: Matrix, ctx: Context): CompressedMatrix = {
-
-    // use repackCSR(matrix, ctx) to convert all ints to unsigned ints if 
Context is Ocl
-    // val (jumpers, colIdcs, els) = repackCSRAlt(mxSrc)
-    val (jumpers, colIdcs, els) = repackCSR(mxSrc, ctx)
-
-    val compMx = new CompressedMatrix(mxSrc.nrow, mxSrc.ncol, 
els.capacity().toInt, ctx)
-    compMx.set(jumpers, colIdcs, els, mxSrc.nrow, mxSrc.ncol, 
els.capacity().toInt)
-    compMx
-  }
-
-  private def repackCSRAlt(mx: Matrix): (IntPointer, IntPointer, 
DoublePointer) = {
-    val nzCnt = mx.map(_.getNumNonZeroElements).sum
-    val jumpers = new IntPointer(mx.nrow + 1L)
-    val colIdcs = new IntPointer(nzCnt + 0L)
-    val els = new DoublePointer(nzCnt)
-    var posIdx = 0
-
-    var sortCols = false
-
-    // Row-wise loop. Rows may not necessarily come in order. But we have to 
have them in-order.
-    for (irow ← 0 until mx.nrow) {
-
-      val row = mx(irow, ::)
-      jumpers.put(irow.toLong, posIdx)
-
-      // Remember row start index in case we need to restart conversion of 
this row if out-of-order
-      // column index is detected
-      val posIdxStart = posIdx
-
-      // Retry loop: normally we are done in one pass thru it unless we need 
to re-run it because
-      // out-of-order column was detected.
-      var done = false
-      while (!done) {
-
-        // Is the sorting mode on?
-        if (sortCols) {
-
-          // Sorting of column indices is on. So do it.
-          row.nonZeroes()
-            // Need to convert to a strict collection out of iterator
-            .map(el ⇒ el.index → el.get)
-            // Sorting requires Sequence api
-            .toSeq
-            // Sort by column index
-            .sortBy(_._1)
-            // Flush to the CSR buffers.
-            .foreach { case (index, v) ⇒
-              colIdcs.put(posIdx.toLong, index)
-              els.put(posIdx.toLong, v)
-              posIdx += 1
-            }
-
-          // Never need to retry if we are already in the sorting mode.
-          done = true
-
-        } else {
-
-          // Try to run unsorted conversion here, switch lazily to sorted if 
out-of-order column is
-          // detected.
-          var lastCol = 0
-          val nzIter = row.nonZeroes().iterator()
-          var abortNonSorted = false
-
-          while (nzIter.hasNext && !abortNonSorted) {
-
-            val el = nzIter.next()
-            val index = el.index
-
-            if (index < lastCol) {
-
-              // Out of order detected: abort inner loop, reset posIdx and 
retry with sorting on.
-              abortNonSorted = true
-              sortCols = true
-              posIdx = posIdxStart
-
-            } else {
-
-              // Still in-order: save element and column, continue.
-              els.put(posIdx, el)
-              colIdcs.put(posIdx.toLong, index)
-              posIdx += 1
-
-              // Remember last column seen.
-              lastCol = index
-            }
-          } // inner non-sorted
-
-          // Do we need to re-run this row with sorting?
-          done = !abortNonSorted
-
-        } // if (sortCols)
-
-      } // while (!done) retry loop
-
-    } // row-wise loop
-
-    // Make sure Mahout matrix did not cheat on non-zero estimate.
-    assert(posIdx == nzCnt)
-
-    jumpers.put(mx.nrow.toLong, nzCnt)
-
-    (jumpers, colIdcs, els)
-  }
-
-  // same as repackCSRAlt except converts to jumpers, colIdcs to unsigned ints 
before setting
-  private def repackCSR(mx: Matrix, context: Context): (IntPointer, 
IntPointer, DoublePointer) = {
-    val nzCnt = mx.map(_.getNumNonZeroElements).sum
-    val jumpers = new IntPointer(mx.nrow + 1L)
-    val colIdcs = new IntPointer(nzCnt + 0L)
-    val els = new DoublePointer(nzCnt)
-    var posIdx = 0
-
-    var sortCols = false
-
-    def convertInt: IntConvertor = if(context.memoryType == 
Context.OPENCL_MEMORY) {
-      int2cl_uint
-    } else {
-      i: Int => i: Int
-    }
-
-    // Row-wise loop. Rows may not necessarily come in order. But we have to 
have them in-order.
-    for (irow ← 0 until mx.nrow) {
-
-      val row = mx(irow, ::)
-      jumpers.put(irow.toLong, posIdx)
-
-      // Remember row start index in case we need to restart conversion of 
this row if out-of-order
-      // column index is detected
-      val posIdxStart = posIdx
-
-      // Retry loop: normally we are done in one pass thru it unless we need 
to re-run it because
-      // out-of-order column was detected.
-      var done = false
-      while (!done) {
-
-        // Is the sorting mode on?
-        if (sortCols) {
-
-          // Sorting of column indices is on. So do it.
-          row.nonZeroes()
-            // Need to convert to a strict collection out of iterator
-            .map(el ⇒ el.index → el.get)
-            // Sorting requires Sequence api
-            .toIndexedSeq
-            // Sort by column index
-            .sortBy(_._1)
-            // Flush to the CSR buffers.
-            .foreach { case (index, v) ⇒
-            // convert to cl_uint if context is OCL
-            colIdcs.put(posIdx.toLong, convertInt(index))
-            els.put(posIdx.toLong, v)
-            posIdx += 1
-          }
-
-          // Never need to retry if we are already in the sorting mode.
-          done = true
-
-        } else {
-
-          // Try to run unsorted conversion here, switch lazily to sorted if 
out-of-order column is
-          // detected.
-          var lastCol = 0
-          val nzIter = row.nonZeroes().iterator()
-          var abortNonSorted = false
-
-          while (nzIter.hasNext && !abortNonSorted) {
-
-            val el = nzIter.next()
-            val index = el.index
-
-            if (index < lastCol) {
-
-              // Out of order detected: abort inner loop, reset posIdx and 
retry with sorting on.
-              abortNonSorted = true
-              sortCols = true
-              posIdx = posIdxStart
-
-            } else {
-
-              // Still in-order: save element and column, continue.
-              els.put(posIdx, el)
-              // convert to cl_uint if context is OCL
-              colIdcs.put(posIdx.toLong, convertInt(index))
-              posIdx += 1
-
-              // Remember last column seen.
-              lastCol = index
-            }
-          } // inner non-sorted
-
-          // Do we need to re-run this row with sorting?
-          done = !abortNonSorted
-
-        } // if (sortCols)
-
-      } // while (!done) retry loop
-
-    } // row-wise loop
-
-    // Make sure Mahout matrix did not cheat on non-zero estimate.
-    assert(posIdx == nzCnt)
-
-    // convert to cl_uint if context is OCL
-    jumpers.put(mx.nrow.toLong, convertInt(nzCnt))
-
-    (jumpers, colIdcs, els)
-  }
-
-
-
-  def fromVclCompressedMatrix(src: CompressedMatrix): Matrix = {
-    val m = src.size1
-    val n = src.size2
-    val NNz = src.nnz
-
-    val row_ptr_handle = src.handle1
-    val col_idx_handle = src.handle2
-    val element_handle = src.handle
-
-    val row_ptr = new IntPointer((m + 1).toLong)
-    val col_idx = new IntPointer(NNz.toLong)
-    val values = new DoublePointer(NNz.toLong)
-
-    Functions.memoryReadInt(row_ptr_handle, 0, (m + 1) * 4, row_ptr, false)
-    Functions.memoryReadInt(col_idx_handle, 0, NNz * 4, col_idx, false)
-    Functions.memoryReadDouble(element_handle, 0, NNz * 8, values, false)
-
-    val rowPtr = row_ptr.asBuffer()
-    val colIdx = col_idx.asBuffer()
-    val vals = values.asBuffer()
-
-    rowPtr.rewind()
-    colIdx.rewind()
-    vals.rewind()
-
-
-    val srMx = new SparseRowMatrix(m, n)
-
-    // read the values back into the matrix
-    var j = 0
-    // row wise, copy any non-zero elements from row(i-1,::)
-    for (i <- 1 to m) {
-      // for each nonzero element, set column col(idx(j) value to vals(j)
-      while (j < rowPtr.get(i)) {
-        srMx(i - 1, colIdx.get(j)) = vals.get(j)
-        j += 1
-      }
-    }
-    srMx
-  }
-
-  def toVclVec(vec: Vector, ctx: Context): VCLVector = {
-
-    vec match {
-      case vec: DenseVector => {
-        val valuesF = classOf[DenseVector].getDeclaredField("values")
-        valuesF.setAccessible(true)
-        val values = valuesF.get(vec).asInstanceOf[Array[Double]]
-        val el_ptr = new DoublePointer(values.length.toLong)
-        el_ptr.put(values, 0, values.length)
-
-        new VCLVector(el_ptr, ctx.memoryType, values.length)
-      }
-
-      case vec: SequentialAccessSparseVector => {
-        val it = vec.iterateNonZero
-        val size = vec.size()
-        val el_ptr = new DoublePointer(size.toLong)
-        while (it.hasNext) {
-          val el: Vector.Element = it.next
-          el_ptr.put(el.index, el.get())
-        }
-        new VCLVector(el_ptr, ctx.memoryType, size)
-      }
-
-      case vec: RandomAccessSparseVector => {
-        val it = vec.iterateNonZero
-        val size = vec.size()
-        val el_ptr = new DoublePointer(size.toLong)
-        while (it.hasNext) {
-          val el: Vector.Element = it.next
-          el_ptr.put(el.index, el.get())
-        }
-        new VCLVector(el_ptr, ctx.memoryType, size)
-      }
-      case _ => throw new IllegalArgumentException("Vector sub-type not 
supported.")
-    }
-
-  }
-
-  def fromVClVec(vclVec: VCLVector): Vector = {
-    val size = vclVec.size
-    val element_handle = vclVec.handle
-    val ele_ptr = new DoublePointer(size)
-    Functions.memoryReadDouble(element_handle, 0, size * 8, ele_ptr, false)
-
-    // for now just assume its dense since we only have one flavor of
-    // VCLVector
-    val mVec = new DenseVector(size)
-    for (i <- 0 until size) {
-      mVec.setQuick(i, ele_ptr.get(i + 0L))
-    }
-
-    mVec
-  }
-
-
-  // TODO: Fix this?  cl_uint must be an unsigned int per each machine's 
representation of such.
-  // this is currently not working anyways.
-  // cl_uint is needed for OpenCl sparse Buffers
-  // per 
https://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/scalarDataTypes.html
-  // it is simply an unsigned int, so strip the sign.
-  def int2cl_uint(i: Int): Int = {
-    ((i >>> 1) << 1) + (i & 1)
-  }
-
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/scala-2.11/src/test/scala/org/apache/mahout/viennacl/omp/ViennaCLSuiteOMP.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/scala-2.11/src/test/scala/org/apache/mahout/viennacl/omp/ViennaCLSuiteOMP.scala
 
b/viennacl-omp/scala-2.11/src/test/scala/org/apache/mahout/viennacl/omp/ViennaCLSuiteOMP.scala
deleted file mode 100644
index af29e3c..0000000
--- 
a/viennacl-omp/scala-2.11/src/test/scala/org/apache/mahout/viennacl/omp/ViennaCLSuiteOMP.scala
+++ /dev/null
@@ -1,249 +0,0 @@
-package org.apache.mahout.viennacl.openmp
-
-import org.apache.mahout.math._
-import scalabindings._
-import RLikeOps._
-import org.bytedeco.javacpp.DoublePointer
-import org.scalatest.{FunSuite, Matchers}
-import org.apache.mahout.viennacl.openmp.javacpp._
-import org.apache.mahout.viennacl.openmp.javacpp.Functions._
-import org.apache.mahout.viennacl.openmp.javacpp.LinalgFunctions._
-
-import scala.util.Random
-
-class ViennaCLSuiteOMP extends FunSuite with Matchers {
-
-  test("row-major viennacl::matrix") {
-
-    // Just to make sure the javacpp library is loaded:
-    Context.loadLib()
-
-    val m = 20
-    val n = 30
-    val data = new DoublePointer(m * n)
-    val buff = data.asBuffer()
-    // Fill with some noise
-    while (buff.remaining() > 0) buff.put(Random.nextDouble())
-
-    // Create row-major matrix with OpenCL
-    val hostClCtx = new Context(Context.MAIN_MEMORY)
-    val cpuMx = new DenseRowMatrix(data = data, nrow = m, ncol = n, hostClCtx)
-    // And free.
-    cpuMx.close()
-
-  }
-
-
-  test("mmul microbenchmark") {
-    val memCtx = new Context(Context.MAIN_MEMORY)
-
-    val m = 3000
-    val n = 3000
-    val s = 1000
-
-    val r = new Random(1234)
-
-    // Dense row-wise
-    val mxA = new DenseMatrix(m, s)
-    val mxB = new DenseMatrix(s, n)
-
-    // add some data
-    mxA := { (_, _, _) => r.nextDouble() }
-    mxB := { (_, _, _) => r.nextDouble() }
-
-    var ms = System.currentTimeMillis()
-    mxA %*% mxB
-    ms = System.currentTimeMillis() - ms
-    info(s"Mahout multiplication time: $ms ms.")
-
-    import LinalgFunctions._
-
-    // openMP/cpu time, including copying:
-    {
-      ms = System.currentTimeMillis()
-      val ompA = toVclDenseRM(mxA, memCtx)
-      val ompB = toVclDenseRM(mxB, memCtx)
-      val ompC = new DenseRowMatrix(prod(ompA, ompB))
-      val mxC = fromVclDenseRM(ompC)
-      ms = System.currentTimeMillis() - ms
-      info(s"ViennaCL/cpu/OpenMP multiplication time: $ms ms.")
-
-      ompA.close()
-      ompB.close()
-      ompC.close()
-    }
-
-  }
-
-  test("trans") {
-
-    val ompCtx = new Context(Context.MAIN_MEMORY)
-
-
-    val m = 20
-    val n = 30
-
-    val r = new Random(1234)
-
-    // Dense row-wise
-    val mxA = new DenseMatrix(m, n)
-
-    // add some data
-    mxA := { (_, _, _) => r.nextDouble() }
-
-
-    // Test transposition in OpenMP
-    {
-      val ompA = toVclDenseRM(src = mxA, ompCtx)
-      val ompAt = new DenseRowMatrix(trans(ompA))
-
-      val mxAt = fromVclDenseRM(ompAt)
-      ompA.close()
-      ompAt.close()
-
-      (mxAt - mxA.t).norm / m / n should be < 1e-16
-    }
-
-  }
-
-  test("sparse mmul microbenchmark") {
-
-    val ompCtx = new Context(Context.MAIN_MEMORY)
-
-    val m = 3000
-    val n = 3000
-    val s = 1000
-
-    val r = new Random(1234)
-
-    // sparse row-wise
-    val mxA = new SparseRowMatrix(m, s, false)
-    val mxB = new SparseRowMatrix(s, n, true)
-
-    // add some sparse data with 20% density
-    mxA := { (_, _, v) => if (r.nextDouble() < .20) r.nextDouble() else v }
-    mxB := { (_, _, v) => if (r.nextDouble() < .20) r.nextDouble() else v }
-
-    var ms = System.currentTimeMillis()
-    val mxC = mxA %*% mxB
-    ms = System.currentTimeMillis() - ms
-    info(s"Mahout Sparse multiplication time: $ms ms.")
-
-
-    // Test multiplication in OpenMP
-    {
-      ms = System.currentTimeMillis()
-      //      val ompA = toVclCompressedMatrix(src = mxA, ompCtx)
-      //      val ompB = toVclCompressedMatrix(src = mxB, ompCtx)
-
-      val ompA = toVclCmpMatrixAlt(mxA, ompCtx)
-      val ompB = toVclCmpMatrixAlt(mxB, ompCtx)
-
-      val ompC = new CompressedMatrix(prod(ompA, ompB))
-
-      ms = System.currentTimeMillis() - ms
-      info(s"ViennaCL/cpu/OpenMP Sparse multiplication time: $ms ms.")
-
-      val ompMxC = fromVclCompressedMatrix(ompC)
-      (mxC - ompMxC).norm / mxC.nrow / mxC.ncol should be < 1e-10
-
-      ompA.close()
-      ompB.close()
-      ompC.close()
-
-    }
-
-  }
-
-  test("VCL Dense Matrix %*% Dense vector - no OpenCl") {
-
-    val ompCtx = new Context(Context.MAIN_MEMORY)
-
-
-    val m = 3000
-    val s = 1000
-
-    val r = new Random(1234)
-
-    // Dense row-wise
-    val mxA = new DenseMatrix(m, s)
-    val dvecB = new DenseVector(s)
-
-    // add some random data
-    mxA := { (_,_,_) => r.nextDouble() }
-    dvecB := { (_,_) => r.nextDouble() }
-
-    //test in matrix %*% vec
-    var ms = System.currentTimeMillis()
-    val mDvecC = mxA %*% dvecB
-    ms = System.currentTimeMillis() - ms
-    info(s"Mahout dense matrix %*% dense vector multiplication time: $ms ms.")
-
-
-    //Test multiplication in OpenMP
-      {
-
-        ms = System.currentTimeMillis()
-        val ompMxA = toVclDenseRM(mxA, ompCtx)
-        val ompVecB = toVclVec(dvecB, ompCtx)
-
-        val ompVecC = new VCLVector(prod(ompMxA, ompVecB))
-        val ompDvecC = fromVClVec(ompVecC)
-
-        ms = System.currentTimeMillis() - ms
-        info(s"ViennaCL/cpu/OpenMP dense matrix %*% dense vector 
multiplication time: $ms ms.")
-        (ompDvecC.toColMatrix - mDvecC.toColMatrix).norm / s  should be < 1e-10
-
-        ompMxA.close()
-        ompVecB.close()
-        ompVecC.close()
-      }
-
-  }
-
-
-  test("Sparse %*% Dense mmul microbenchmark") {
-    val memCtx = new Context(Context.MAIN_MEMORY)
-
-    val m = 3000
-    val n = 3000
-    val s = 1000
-
-    val r = new Random(1234)
-
-    // Dense row-wise
-    val mxSr = new SparseMatrix(m, s)
-    val mxDn = new DenseMatrix(s, n)
-
-    // add some data
-    mxSr := { (_, _, v) => if (r.nextDouble() < .20) r.nextDouble() else v }
-    mxDn := { (_, _, _) => r.nextDouble() }
-
-    var ms = System.currentTimeMillis()
-    mxSr %*% mxDn
-    ms = System.currentTimeMillis() - ms
-    info(s"Mahout multiplication time: $ms ms.")
-
-    import LinalgFunctions._
-
-
-    // openMP/cpu time, including copying:
-    {
-      ms = System.currentTimeMillis()
-      val ompA = toVclCmpMatrixAlt(mxSr, memCtx)
-      val ompB = toVclDenseRM(mxDn, memCtx)
-      val ompC = new DenseRowMatrix(prod(ompA, ompB))
-      val mxC = fromVclDenseRM(ompC)
-      ms = System.currentTimeMillis() - ms
-      info(s"ViennaCL/cpu/OpenMP multiplication time: $ms ms.")
-
-      ompA.close()
-      ompB.close()
-      ompC.close()
-    }
-
-  }
-
-
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/Functions.java
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/Functions.java
 
b/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/Functions.java
new file mode 100644
index 0000000..c2bffe5
--- /dev/null
+++ 
b/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/Functions.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.viennacl.openmp.javacpp;
+
+import org.bytedeco.javacpp.BytePointer;
+import org.bytedeco.javacpp.DoublePointer;
+import org.bytedeco.javacpp.IntPointer;
+import org.bytedeco.javacpp.annotation.*;
+
+import java.nio.DoubleBuffer;
+import java.nio.IntBuffer;
+
+
+@Properties(inherit = Context.class,
+        value = @Platform(
+                library = "jniViennaCL"
+        )
+)
+@Namespace("viennacl")
+public final class Functions {
+
+    private Functions() {
+    }
+
+    // This is (imo) an inconsistency in Vienna cl: almost all operations 
require MatrixBase, and
+    // fast_copy require type `matrix`, i.e., one of DenseRowMatrix or 
DenseColumnMatrix.
+    @Name("fast_copy")
+    public static native void fastCopy(DoublePointer srcBegin, DoublePointer 
srcEnd, @ByRef DenseRowMatrix dst);
+
+    @Name("fast_copy")
+    public static native void fastCopy(DoublePointer srcBegin, DoublePointer 
srcEnd, @ByRef DenseColumnMatrix dst);
+
+    @Name("fast_copy")
+    public static native void fastCopy(@ByRef DenseRowMatrix src, 
DoublePointer dst);
+
+    @Name("fast_copy")
+    public static native void fastCopy(@ByRef DenseColumnMatrix src, 
DoublePointer dst);
+
+    @Name("fast_copy")
+    public static native void fastCopy(@Const @ByRef VectorBase dst, @Const 
@ByRef VCLVector src);
+
+    @Name("fast_copy")
+    public static native void fastCopy(@Const @ByRef VCLVector src, @Const 
@ByRef VectorBase dst);
+
+
+    @ByVal
+    public static native MatrixTransExpression trans(@ByRef MatrixBase src);
+
+    @Name("backend::memory_read")
+    public static native void memoryReadInt(@Const @ByRef MemHandle src_buffer,
+                                  int bytes_to_read,
+                                  int offset,
+                                  IntPointer ptr,
+                                  boolean async);
+
+    @Name("backend::memory_read")
+    public static native void memoryReadDouble(@Const @ByRef MemHandle 
src_buffer,
+                                            int bytes_to_read,
+                                            int offset,
+                                            DoublePointer ptr,
+                                            boolean async);
+
+    @Name("backend::memory_read")
+    public static native void memoryReadInt(@Const @ByRef MemHandle src_buffer,
+                                            int bytes_to_read,
+                                            int offset,
+                                            IntBuffer ptr,
+                                            boolean async);
+
+    @Name("backend::memory_read")
+    public static native void memoryReadDouble(@Const @ByRef MemHandle 
src_buffer,
+                                               int bytes_to_read,
+                                               int offset,
+                                               DoubleBuffer ptr,
+                                               boolean async);
+
+    @Name("backend::memory_read")
+    public static native void memoryReadBytes(@Const @ByRef MemHandle 
src_buffer,
+                                              int bytes_to_read,
+                                              int offset,
+                                              BytePointer ptr,
+                                              boolean async);
+
+
+    static {
+        Context.loadLib();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/LinalgFunctions.java
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/LinalgFunctions.java
 
b/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/LinalgFunctions.java
new file mode 100644
index 0000000..c2a40d9
--- /dev/null
+++ 
b/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/LinalgFunctions.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.viennacl.openmp.javacpp;
+
+import org.apache.mahout.viennacl.openmp.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+
+@Properties(inherit = Context.class,
+        value = @Platform(
+                library = "jniViennaCL"
+        )
+)
+@Namespace("viennacl::linalg")
+public final class LinalgFunctions {
+
+    private LinalgFunctions() {
+    }
+
+    static {
+        Context.loadLib();
+    }
+
+
+    @ByVal
+    public static native MatMatProdExpression prod(@Const @ByRef MatrixBase a,
+                                                   @Const @ByRef MatrixBase b);
+
+    @ByVal
+    public static native ProdExpression prod(@Const @ByRef CompressedMatrix a,
+                                             @Const @ByRef CompressedMatrix b);
+
+    @ByVal
+    public static native MatVecProdExpression prod(@Const @ByRef MatrixBase a,
+                                                   @Const @ByRef VectorBase b);
+
+    @ByVal
+    public static native SrMatDnMatProdExpression prod(@Const @ByRef 
CompressedMatrix spMx,
+                                                       @Const @ByRef 
MatrixBase dMx);
+    @ByVal
+    @Name("prod")
+    public static native DenseColumnMatrix prodCm(@Const @ByRef MatrixBase a,
+                                                  @Const @ByRef MatrixBase b);
+    @ByVal
+    @Name("prod")
+    public static native DenseRowMatrix prodRm(@Const @ByRef MatrixBase a,
+                                               @Const @ByRef MatrixBase b);
+
+    @ByVal
+    @Name("prod")
+    public static native DenseRowMatrix prodRm(@Const @ByRef CompressedMatrix 
spMx,
+                                               @Const @ByRef MatrixBase dMx);
+
+
+//    @ByVal
+//    public static native MatrixProdExpression prod(@Const @ByRef 
DenseRowMatrix a,
+//                                                   @Const @ByRef 
DenseRowMatrix b);
+//
+//    @ByVal
+//    public static native MatrixProdExpression prod(@Const @ByRef 
DenseRowMatrix a,
+//                                                   @Const @ByRef 
DenseColumnMatrix b);
+//
+//    @ByVal
+//    public static native MatrixProdExpression prod(@Const @ByRef 
DenseColumnMatrix a,
+//                                                   @Const @ByRef 
DenseRowMatrix b);
+//
+//    @ByVal
+//    public static native MatrixProdExpression prod(@Const @ByRef 
DenseColumnMatrix a,
+//                                                   @Const @ByRef 
DenseColumnMatrix b);
+
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/MatrixTransExpression.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/MatrixTransExpression.scala
 
b/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/MatrixTransExpression.scala
new file mode 100644
index 0000000..82574b4
--- /dev/null
+++ 
b/viennacl-omp/src/main/java/org/apache/mahout/viennacl/openmp/javacpp/MatrixTransExpression.scala
@@ -0,0 +1,34 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp;
+
+import org.bytedeco.javacpp.Pointer
+import org.bytedeco.javacpp.annotation.{Name, Namespace, Platform, Properties}
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    include = Array("matrix.hpp"),
+    library = "jniViennaCL")
+  ))
+@Namespace("viennacl")
+@Name(Array("matrix_expression<const viennacl::matrix_base<double>, " +
+  "const viennacl::matrix_base<double>, " +
+  "viennacl::op_trans>"))
+class MatrixTransExpression extends Pointer {
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/OMPMMul.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/OMPMMul.scala 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/OMPMMul.scala
new file mode 100644
index 0000000..9a59999
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/OMPMMul.scala
@@ -0,0 +1,449 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.viennacl.openmp
+
+import org.apache.mahout.logging._
+import org.apache.mahout.math
+import org.apache.mahout.math._
+import org.apache.mahout.math.backend.incore.MMulSolver
+import org.apache.mahout.math.flavor.{BackEnum, TraversingStructureEnum}
+import org.apache.mahout.math.function.Functions
+import org.apache.mahout.math.scalabindings.RLikeOps._
+import org.apache.mahout.math.scalabindings._
+import org.apache.mahout.viennacl.openmp.javacpp.Functions._
+import org.apache.mahout.viennacl.openmp.javacpp.LinalgFunctions._
+import org.apache.mahout.viennacl.openmp.javacpp.{CompressedMatrix, Context, 
DenseRowMatrix}
+
+import scala.collection.JavaConversions._
+
+object OMPMMul extends MMBinaryFunc {
+
+  private implicit val log = getLog(OMPMMul.getClass)
+
+  override def apply(a: Matrix, b: Matrix, r: Option[Matrix]): Matrix = {
+
+    require(a.ncol == b.nrow, "Incompatible matrix sizes in matrix 
multiplication.")
+
+    val (af, bf) = (a.getFlavor, b.getFlavor)
+    val backs = (af.getBacking, bf.getBacking)
+    val sd = (af.getStructure, math.scalabindings.densityAnalysis(a), 
bf.getStructure, densityAnalysis(b))
+
+
+    try {
+
+      val alg: MMulAlg = backs match {
+
+        // Both operands are jvm memory backs.
+        case (BackEnum.JVMMEM, BackEnum.JVMMEM) ⇒
+
+          sd match {
+
+            // Multiplication cases by a diagonal matrix.
+            case (TraversingStructureEnum.VECTORBACKED, _, 
TraversingStructureEnum.COLWISE, _)
+              if a.isInstanceOf[DiagonalMatrix] ⇒ jvmDiagCW
+            case (TraversingStructureEnum.VECTORBACKED, _, 
TraversingStructureEnum.SPARSECOLWISE, _)
+              if a.isInstanceOf[DiagonalMatrix] ⇒ jvmDiagCW
+            case (TraversingStructureEnum.VECTORBACKED, _, 
TraversingStructureEnum.ROWWISE, _)
+              if a.isInstanceOf[DiagonalMatrix] ⇒ jvmDiagRW
+            case (TraversingStructureEnum.VECTORBACKED, _, 
TraversingStructureEnum.SPARSEROWWISE, _)
+              if a.isInstanceOf[DiagonalMatrix] ⇒ jvmDiagRW
+
+            case (TraversingStructureEnum.COLWISE, _, 
TraversingStructureEnum.VECTORBACKED, _)
+              if b.isInstanceOf[DiagonalMatrix] ⇒ jvmCWDiag
+            case (TraversingStructureEnum.SPARSECOLWISE, _, 
TraversingStructureEnum.VECTORBACKED, _)
+              if b.isInstanceOf[DiagonalMatrix] ⇒ jvmCWDiag
+            case (TraversingStructureEnum.ROWWISE, _, 
TraversingStructureEnum.VECTORBACKED, _)
+              if b.isInstanceOf[DiagonalMatrix] ⇒ jvmRWDiag
+            case (TraversingStructureEnum.SPARSEROWWISE, _, 
TraversingStructureEnum.VECTORBACKED, _)
+              if b.isInstanceOf[DiagonalMatrix] ⇒ jvmRWDiag
+
+            // Dense-dense cases
+            case (TraversingStructureEnum.ROWWISE, true, 
TraversingStructureEnum.COLWISE, true) if a eq b.t ⇒ ompDRWAAt
+            case (TraversingStructureEnum.ROWWISE, true, 
TraversingStructureEnum.COLWISE, true) if a.t eq b ⇒ ompDRWAAt
+            case (TraversingStructureEnum.ROWWISE, true, 
TraversingStructureEnum.COLWISE, true) ⇒ ompRWCW
+            case (TraversingStructureEnum.ROWWISE, true, 
TraversingStructureEnum.ROWWISE, true) ⇒ jvmRWRW
+            case (TraversingStructureEnum.COLWISE, true, 
TraversingStructureEnum.COLWISE, true) ⇒ jvmCWCW
+            case (TraversingStructureEnum.COLWISE, true, 
TraversingStructureEnum.ROWWISE, true) if a eq b.t ⇒ jvmDCWAAt
+            case (TraversingStructureEnum.COLWISE, true, 
TraversingStructureEnum.ROWWISE, true) if a.t eq b ⇒ jvmDCWAAt
+            case (TraversingStructureEnum.COLWISE, true, 
TraversingStructureEnum.ROWWISE, true) ⇒ jvmCWRW
+
+            // Sparse row matrix x sparse row matrix (array of vectors)
+            case (TraversingStructureEnum.ROWWISE, false, 
TraversingStructureEnum.ROWWISE, false) ⇒ ompSparseRWRW
+            case (TraversingStructureEnum.ROWWISE, false, 
TraversingStructureEnum.COLWISE, false) ⇒ jvmSparseRWCW
+            case (TraversingStructureEnum.COLWISE, false, 
TraversingStructureEnum.ROWWISE, false) ⇒ jvmSparseCWRW
+            case (TraversingStructureEnum.COLWISE, false, 
TraversingStructureEnum.COLWISE, false) ⇒ jvmSparseCWCW
+
+            // Sparse matrix x sparse matrix (hashtable of vectors)
+            case (TraversingStructureEnum.SPARSEROWWISE, false, 
TraversingStructureEnum.SPARSEROWWISE, false) ⇒
+              ompSparseRowRWRW
+            case (TraversingStructureEnum.SPARSEROWWISE, false, 
TraversingStructureEnum.SPARSECOLWISE, false) ⇒
+              jvmSparseRowRWCW
+            case (TraversingStructureEnum.SPARSECOLWISE, false, 
TraversingStructureEnum.SPARSEROWWISE, false) ⇒
+              jvmSparseRowCWRW
+            case (TraversingStructureEnum.SPARSECOLWISE, false, 
TraversingStructureEnum.SPARSECOLWISE, false) ⇒
+              jvmSparseRowCWCW
+
+            // Sparse matrix x non-like
+            case (TraversingStructureEnum.SPARSEROWWISE, false, 
TraversingStructureEnum.ROWWISE, _) ⇒ ompSparseRowRWRW
+            case (TraversingStructureEnum.SPARSEROWWISE, false, 
TraversingStructureEnum.COLWISE, _) ⇒ jvmSparseRowRWCW
+            case (TraversingStructureEnum.SPARSECOLWISE, false, 
TraversingStructureEnum.ROWWISE, _) ⇒ jvmSparseRowCWRW
+            case (TraversingStructureEnum.SPARSECOLWISE, false, 
TraversingStructureEnum.COLWISE, _) ⇒ jvmSparseCWCW
+            case (TraversingStructureEnum.ROWWISE, _, 
TraversingStructureEnum.SPARSEROWWISE, false) ⇒ ompSparseRWRW
+            case (TraversingStructureEnum.ROWWISE, _, 
TraversingStructureEnum.SPARSECOLWISE, false) ⇒ jvmSparseRWCW
+            case (TraversingStructureEnum.COLWISE, _, 
TraversingStructureEnum.SPARSEROWWISE, false) ⇒ jvmSparseCWRW
+            case (TraversingStructureEnum.COLWISE, _, 
TraversingStructureEnum.SPARSECOLWISE, false) ⇒ jvmSparseRowCWCW
+
+            // Everything else including at least one sparse LHS or RHS 
argument
+            case (TraversingStructureEnum.ROWWISE, false, 
TraversingStructureEnum.ROWWISE, _) ⇒ ompSparseRWRW
+            case (TraversingStructureEnum.ROWWISE, false, 
TraversingStructureEnum.COLWISE, _) ⇒ jvmSparseRWCW
+            case (TraversingStructureEnum.COLWISE, false, 
TraversingStructureEnum.ROWWISE, _) ⇒ jvmSparseCWRW
+            case (TraversingStructureEnum.COLWISE, false, 
TraversingStructureEnum.COLWISE, _) ⇒ jvmSparseCWCW2flips
+
+            // Sparse methods are only effective if the first argument is 
sparse, so we need to do a swap.
+            case (_, _, _, false) ⇒ (a, b, r) ⇒ apply(b.t, a.t, r.map {
+              _.t
+            }).t
+
+            // Default jvm-jvm case.
+            // for some reason a SrarseRowMatrix DRM %*% SrarseRowMatrix DRM 
was dumping off to here
+            case _ ⇒ ompRWCW
+          }
+      }
+
+      alg(a, b, r)
+    } catch {
+      // TODO FASTHACK:  just revert to JVM if there is an exception..
+      //  eg. java.lang.nullPointerException if more openCL contexts
+      //  have been created than number of GPU cards.
+      //  better option wuold be to fall back to OpenCl First.
+      case ex: Exception =>
+        println(ex.getMessage + "falling back to JVM MMUL")
+        return MMul(a, b, r)
+    }
+  }
+
+  type MMulAlg = MMBinaryFunc
+
+  @inline
+  private def ompRWCW(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix 
= {
+    println("ompRWCW")
+    //
+    //    require(r.forall(mxR ⇒ mxR.nrow == a.nrow && mxR.ncol == b.ncol))
+    //    val (m, n) = (a.nrow, b.ncol)
+    //
+    //    val mxR = r.getOrElse(if (densityAnalysis(a)) a.like(m, n) else 
b.like(m, n))
+    //
+    //    for (row ← 0 until mxR.nrow; col ← 0 until mxR.ncol) {
+    //      // this vector-vector should be sort of optimized, right?
+    //      mxR(row, col) = a(row, ::) dot b(::, col)
+    //    }
+    //    mxR
+
+    val hasElementsA = a.zSum() >  0.0
+    val hasElementsB = b.zSum() >  0.0
+
+    // A has a sparse matrix structure of unknown size.  We do not want to
+    // simply convert it to a Dense Matrix which may result in an OOM error.
+
+    // If it is empty use JVM MMul, since we can not convert it to a VCL CSR 
Matrix.
+    if (!hasElementsA)  {
+      println("Matrix a has zero elements can not convert to CSR")
+      return MMul(a, b, r)
+    }
+
+    // CSR matrices are efficient up to 50% non-zero
+    if (b.getFlavor.isDense) {
+      var ms = System.currentTimeMillis()
+      val oclCtx = new Context(Context.MAIN_MEMORY)
+      val oclA = toVclCmpMatrixAlt(a, oclCtx)
+      val oclB = toVclDenseRM(b, oclCtx)
+      val oclC = new DenseRowMatrix(prod(oclA, oclB))
+      val mxC = fromVclDenseRM(oclC)
+      ms = System.currentTimeMillis() - ms
+      debug(s"ViennaCL/OpenMP multiplication time: $ms ms.")
+
+      oclA.close()
+      oclB.close()
+      oclC.close()
+
+      mxC
+    } else {
+      // Fall back to JVM based MMul if either matrix is sparse and empty
+      if (!hasElementsA || !hasElementsB)  {
+        println("Matrix a or b has zero elements can not convert to CSR")
+        return MMul(a, b, r)
+      }
+
+      var ms = System.currentTimeMillis()
+      val hostClCtx = new Context(Context.MAIN_MEMORY)
+      val oclA = toVclCmpMatrixAlt(a, hostClCtx)
+      val oclB = toVclCmpMatrixAlt(b, hostClCtx)
+      val oclC = new CompressedMatrix(prod(oclA, oclB))
+      val mxC = fromVclCompressedMatrix(oclC)
+      ms = System.currentTimeMillis() - ms
+      debug(s"ViennaCL/OpenMP multiplication time: $ms ms.")
+
+      oclA.close()
+      oclB.close()
+      oclC.close()
+
+      mxC
+    }
+  }
+
+
+  @inline
+  private def jvmRWRW(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix 
= {
+    log.info("Using jvmRWRW method")
+    // A bit hackish: currently, this relies a bit on the fact that like 
produces RW(?)
+    val bclone = b.like(b.ncol, b.nrow).t
+    for (brow ← b) bclone(brow.index(), ::) := brow
+
+    require(bclone.getFlavor.getStructure == TraversingStructureEnum.COLWISE 
|| bclone.getFlavor.getStructure ==
+      TraversingStructureEnum.SPARSECOLWISE, "COL wise conversion assumption 
of RHS is wrong, do over this code.")
+
+    ompRWCW(a, bclone, r)
+  }
+
+  private def jvmCWCW(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix 
= {
+    log.info("Using jvmCWCW method")
+    jvmRWRW(b.t, a.t, r.map(_.t)).t
+  }
+
+  private def jvmCWRW(a: Matrix, b: Matrix, r: Option[Matrix] = None): Matrix 
= {
+    log.info("Using jvmCWRW method")
+    // This is a primary contender with Outer Prod sum algo.
+    // Here, we force-reorient both matrices and run RWCW.
+    // A bit hackish: currently, this relies a bit on the fact that clone 
always produces RW(?)
+    val aclone = a.cloned
+
+    require(aclone.getFlavor.getStructure == TraversingStructureEnum.ROWWISE 
|| aclone.getFlavor.getStructure ==
+      TraversingStructureEnum.SPARSEROWWISE, "Row wise conversion assumption 
of RHS is wrong, do over this code.")
+
+    jvmRWRW(aclone, b, r)
+  }
+
+  // left is Sparse right is any
+  private def ompSparseRWRW(a: Matrix, b: Matrix, r: Option[Matrix] = None): 
Matrix = {
+    log.info("Using ompSparseRWRW method")
+    val mxR = r.getOrElse(b.like(a.nrow, b.ncol))
+
+    /* Make sure that the matrix is not empty.  VCL {{compressed_matrix}}s must
+       have nnz > 0
+       N.B. This method is horribly inefficent. However there is a difference 
between
+       getNumNonDefaultElements() and getNumNonZeroElements() which we do not 
always
+       have access to. We created MAHOUT-1882 for this.
+    */
+
+    val hasElementsA = a.zSum() >  0.0
+    val hasElementsB = b.zSum() >  0.0
+
+    // A has a sparse matrix structure of unknown size.  We do not want to
+    // simply convert it to a Dense Matrix which may result in an OOM error.
+    // If it is empty use JVM MMul, since we can not convert it to a VCL CSR 
Matrix.
+    if (!hasElementsA)  {
+      log.warn("Matrix a has zero elements can not convert to CSR")
+      return MMul(a, b, r)
+    }
+
+    // CSR matrices are efficient up to 50% non-zero
+    if(b.getFlavor.isDense) {
+      var ms = System.currentTimeMillis()
+      val hostClCtx = new Context(Context.MAIN_MEMORY)
+      val oclA = toVclCmpMatrixAlt(a, hostClCtx)
+      val oclB = toVclDenseRM(b, hostClCtx)
+      val oclC = new DenseRowMatrix(prod(oclA, oclB))
+      val mxC = fromVclDenseRM(oclC)
+      ms = System.currentTimeMillis() - ms
+      log.debug(s"ViennaCL/OpenMP multiplication time: $ms ms.")
+
+      oclA.close()
+      oclB.close()
+      oclC.close()
+
+      mxC
+    } else {
+      // Fall back to JVM based MMul if either matrix is sparse and empty
+      if (!hasElementsA || !hasElementsB)  {
+        log.warn("Matrix a or b has zero elements can not convert to CSR")
+        return MMul(a, b, r)
+      }
+
+      var ms = System.currentTimeMillis()
+      val hostClCtx = new Context(Context.MAIN_MEMORY)
+      val oclA = toVclCmpMatrixAlt(a, hostClCtx)
+      val oclB = toVclCmpMatrixAlt(b, hostClCtx)
+      val oclC = new CompressedMatrix(prod(oclA, oclB))
+      val mxC = fromVclCompressedMatrix(oclC)
+      ms = System.currentTimeMillis() - ms
+      log.debug(s"ViennaCL/OpenMP multiplication time: $ms ms.")
+
+      oclA.close()
+      oclB.close()
+      oclC.close()
+
+      mxC
+    }
+
+  }
+
+  //sparse %*% dense
+  private def ompSparseRowRWRW(a: Matrix, b: Matrix, r: Option[Matrix] = 
None): Matrix = {
+    log.info("Using ompSparseRowRWRW method")
+    val hasElementsA = a.zSum() >  0
+
+    // A has a sparse matrix structure of unknown size.  We do not want to
+    // simply convert it to a Dense Matrix which may result in an OOM error.
+    // If it is empty fall back to  JVM MMul, since we can not convert it
+    // to a VCL CSR Matrix.
+    if (!hasElementsA)  {
+      log.warn("Matrix a has zero elements can not convert to CSR")
+      return MMul(a, b, r)
+    }
+
+    var ms = System.currentTimeMillis()
+    val hostClCtx = new Context(Context.MAIN_MEMORY)
+    val oclA = toVclCmpMatrixAlt(a, hostClCtx)
+    val oclB = toVclDenseRM(b, hostClCtx)
+    val oclC = new DenseRowMatrix(prod(oclA, oclB))
+    val mxC = fromVclDenseRM(oclC)
+    ms = System.currentTimeMillis() - ms
+    log.debug(s"ViennaCL/OpenMP multiplication time: $ms ms.")
+
+    oclA.close()
+    oclB.close()
+    oclC.close()
+
+    mxC
+  }
+
+  private def jvmSparseRowCWCW(a: Matrix, b: Matrix, r: Option[Matrix] = None) 
=
+    ompSparseRowRWRW(b.t, a.t, r.map(_.t)).t
+
+  private def jvmSparseRowCWCW2flips(a: Matrix, b: Matrix, r: Option[Matrix] = 
None) =
+    ompSparseRowRWRW(a cloned, b cloned, r)
+
+  private def jvmSparseRowRWCW(a: Matrix, b: Matrix, r: Option[Matrix]) =
+    ompSparseRowRWRW(a, b cloned, r)
+
+  private def jvmSparseRowCWRW(a: Matrix, b: Matrix, r: Option[Matrix]) =
+    ompSparseRowRWRW(a cloned, b, r)
+
+  private def jvmSparseRWCW(a: Matrix, b: Matrix, r: Option[Matrix] = None) =
+    ompSparseRWRW(a, b.cloned, r)
+
+  private def jvmSparseCWRW(a: Matrix, b: Matrix, r: Option[Matrix] = None) =
+    ompSparseRWRW(a cloned, b, r)
+
+  private def jvmSparseCWCW(a: Matrix, b: Matrix, r: Option[Matrix] = None) =
+    ompSparseRWRW(b.t, a.t, r.map(_.t)).t
+
+  private def jvmSparseCWCW2flips(a: Matrix, b: Matrix, r: Option[Matrix] = 
None) =
+    ompSparseRWRW(a cloned, b cloned, r)
+
+  private def jvmDiagRW(diagm:Matrix, b:Matrix, r:Option[Matrix] = 
None):Matrix = {
+    log.info("Using jvmDiagRW method")
+    val mxR = r.getOrElse(b.like(diagm.nrow, b.ncol))
+
+    for (del ← diagm.diagv.nonZeroes())
+      mxR(del.index, ::).assign(b(del.index, ::), Functions.plusMult(del))
+
+    mxR
+  }
+
+  private def jvmDiagCW(diagm: Matrix, b: Matrix, r: Option[Matrix] = None): 
Matrix = {
+    log.info("Using jvmDiagCW method")
+    val mxR = r.getOrElse(b.like(diagm.nrow, b.ncol))
+    for (bcol ← b.t) mxR(::, bcol.index()) := bcol * diagm.diagv
+    mxR
+  }
+
+  private def jvmCWDiag(a: Matrix, diagm: Matrix, r: Option[Matrix] = None) =
+    jvmDiagRW(diagm, a.t, r.map {_.t}).t
+
+  private def jvmRWDiag(a: Matrix, diagm: Matrix, r: Option[Matrix] = None) =
+    jvmDiagCW(diagm, a.t, r.map {_.t}).t
+
+  /** Dense column-wise AA' */
+  private def jvmDCWAAt(a:Matrix, b:Matrix, r:Option[Matrix] = None) = {
+    // a.t must be equiv. to b. Cloning must rewrite to row-wise.
+    ompDRWAAt(a.cloned,null,r)
+  }
+
+  /** Dense Row-wise AA' */
+  // We probably will not want to use this for the actual release unless A is 
cached already
+  // but adding for testing purposes.
+  private def ompDRWAAt(a:Matrix, b:Matrix, r:Option[Matrix] = None) = {
+    // a.t must be equiv to b.
+    log.info("Executing on OMP")
+    log.debug("AAt computation detected; passing off to OMP")
+
+    // Check dimensions if result is supplied.
+    require(r.forall(mxR ⇒ mxR.nrow == a.nrow && mxR.ncol == a.nrow))
+
+    val mxR = r.getOrElse(a.like(a.nrow, a.nrow))
+
+    var ms = System.currentTimeMillis()
+    val hostClCtx = new Context(Context.MAIN_MEMORY)
+    val oclA = toVclDenseRM(src = a, hostClCtx)
+    val oclAt = new DenseRowMatrix(trans(oclA))
+    val oclC = new DenseRowMatrix(prod(oclA, oclAt))
+
+    val mxC = fromVclDenseRM(oclC)
+    ms = System.currentTimeMillis() - ms
+    log.debug(s"ViennaCL/OpenMP multiplication time: $ms ms.")
+
+    oclA.close()
+    //oclApr.close()
+    oclAt.close()
+    oclC.close()
+
+    mxC
+
+  }
+
+  private def jvmOuterProdSum(a: Matrix, b: Matrix, r: Option[Matrix] = None): 
Matrix = {
+    log.info("Using jvmOuterProdSum method")
+    // Need to check whether this is already laid out for outer product 
computation, which may be faster than
+    // reorienting both matrices.
+    val (m, n) = (a.nrow, b.ncol)
+
+    // Prefer col-wise result iff a is dense and b is sparse. In all other 
cases default to row-wise.
+    val preferColWiseR = a.getFlavor.isDense && !b.getFlavor.isDense
+
+    val mxR = r.getOrElse {
+      (a.getFlavor.isDense, preferColWiseR) match {
+        case (false, false) ⇒ b.like(m, n)
+        case (false, true) ⇒ b.like(n, m).t
+        case (true, false) ⇒ a.like(m, n)
+        case (true, true) ⇒ a.like(n, m).t
+      }
+    }
+
+    // Loop outer products
+    if (preferColWiseR) {
+      // this means B is sparse and A is not, so we need to iterate over b 
values and update R columns with +=
+      // one at a time.
+      for ((acol, brow) ← a.t.zip(b); bel ← brow.nonZeroes) mxR(::, 
bel.index()) += bel * acol
+    } else {
+      for ((acol, brow) ← a.t.zip(b); ael ← acol.nonZeroes()) 
mxR(ael.index(), ::) += ael * brow
+    }
+
+    mxR
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/CompressedMatrix.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/CompressedMatrix.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/CompressedMatrix.scala
new file mode 100644
index 0000000..3c5a6eb
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/CompressedMatrix.scala
@@ -0,0 +1,135 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import java.nio._
+
+import org.bytedeco.javacpp._
+import org.bytedeco.javacpp.annotation._
+
+import scala.collection.mutable.ArrayBuffer
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    include = Array("compressed_matrix.hpp"),
+    library="jniViennaCL"
+  )))
+@Name(Array("viennacl::compressed_matrix<double>"))
+final class CompressedMatrix(defaultCtr: Boolean = true) extends Pointer {
+
+  protected val ptrs = new ArrayBuffer[Pointer]()
+
+  // call this after set or better TODO: yet wrap set() in a public method 
that will call this
+  def registerPointersForDeallocation(p:Pointer): Unit = {
+    ptrs += p
+  }
+
+  override def deallocate(deallocate: Boolean): Unit = {
+    super.deallocate(deallocate)
+     ptrs.foreach(_.close())
+  }
+
+  if (defaultCtr) allocate()
+
+  def this(nrow: Int, ncol: Int) {
+    this(false)
+    allocate(nrow, ncol, new Context)
+  }
+
+  def this(nrow: Int, ncol: Int, ctx: Context) {
+    this(false)
+    allocate(nrow, ncol, ctx)
+  }
+
+  def this(nrow: Int, ncol: Int, nonzeros: Int) {
+    this(false)
+    allocate(nrow, ncol, nonzeros, new Context)
+  }
+
+  def this(nrow: Int, ncol: Int, nonzeros: Int, ctx: Context) {
+    this(false)
+    allocate(nrow, ncol, nonzeros, ctx)
+  }
+
+  def this(pe: ProdExpression) {
+    this(false)
+    allocate(pe)
+  }
+
+  @native protected def allocate()
+
+  @native protected def allocate(nrow: Int, ncol: Int, nonzeros: Int, @ByVal 
ctx: Context)
+
+  @native protected def allocate(nrow: Int, ncol: Int, @ByVal ctx: Context)
+
+  @native protected def allocate(@Const @ByRef pe: ProdExpression)
+
+//  @native protected def allocate(db: DoubleBuffer)
+//
+//  @native protected def allocate(ib: IntBuffer)
+
+  // Warning: apparently there are differences in bit interpretation between 
OpenCL and everything
+  // else for unsigned int type. So, for OpenCL backend, rowJumper and 
colIndices have to be packed
+  // with reference to that cl_uint type that Vienna-CL defines.
+  @native def set(@Cast(Array("const void*")) rowJumper: IntBuffer,
+                  @Cast(Array("const void*")) colIndices: IntBuffer,
+                  @Const elements: DoubleBuffer,
+                  nrow: Int,
+                  ncol: Int,
+                  nonzeros: Int
+                 )
+
+  /** With javacpp pointers. */
+  @native def set(@Cast(Array("const void*")) rowJumper: IntPointer,
+                  @Cast(Array("const void*")) colIndices: IntPointer,
+                  @Const elements: DoublePointer,
+                  nrow: Int,
+                  ncol: Int,
+                  nonzeros: Int
+                 )
+
+  @Name(Array("operator="))
+  @native def :=(@Const @ByRef pe: ProdExpression)
+
+  @native def generate_row_block_information()
+
+  /** getters for the compressed_matrix size */
+  //const vcl_size_t & size1() const { return rows_; }
+  @native def size1: Int
+  //const vcl_size_t & size2() const { return cols_; }
+  @native def size2: Int
+  //const vcl_size_t & nnz() const { return nonzeros_; }
+  @native def nnz: Int
+  //const vcl_size_t & blocks1() const { return row_block_num_; }
+ // @native def blocks1: Int
+
+  /** getters for the compressed_matrix buffers */
+  //const handle_type & handle1() const { return row_buffer_; }
+  @native @Const @ByRef def handle1: MemHandle
+  //const handle_type & handle2() const { return col_buffer_; }
+  @native @Const @ByRef def handle2: MemHandle
+  //const handle_type & handle3() const { return row_blocks_; }
+  @native @Const @ByRef def handle3: MemHandle
+  //const handle_type & handle() const { return elements_; }
+  @native @Const @ByRef def handle: MemHandle
+
+}
+
+object CompressedMatrix {
+  Context.loadLib()
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/Context.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/Context.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/Context.scala
new file mode 100644
index 0000000..ae1b782
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/Context.scala
@@ -0,0 +1,58 @@
+package org.apache.mahout.viennacl.openmp.javacpp
+
+
+import org.bytedeco.javacpp.annotation._
+import org.bytedeco.javacpp.{Loader, Pointer}
+
+/**
+  * This assumes viennacl 1.7.1 is installed, which in ubuntu Xenial defaults 
to
+  * /usr/include/viennacl, and is installed via
+  * {{{
+  *   sudo apt-get install libviennacl-dev
+  * }}}
+  *
+  * @param mtype
+  */
+@Properties(Array(
+  new Platform(
+    includepath = Array("/usr/include/viennacl"),
+    include = Array("matrix.hpp", "compressed_matrix.hpp"),
+    define = Array("VIENNACL_WITH_OPENMP"),
+    compiler = Array("fastfpu","viennacl"),
+    link = Array("OpenCL"),
+    library = "jniViennaCL"
+  )))
+@Namespace("viennacl")
+@Name(Array("context"))
+final class Context(mtype: Int = Context.MEMORY_NOT_INITIALIZED) extends 
Pointer {
+
+  import Context._
+
+  if (mtype == MEMORY_NOT_INITIALIZED)
+    allocate()
+  else
+    allocate(mtype)
+
+  @native protected def allocate()
+
+  @native protected def allocate(@Cast(Array("viennacl::memory_types")) mtype: 
Int)
+
+  @Name(Array("memory_type"))
+  @Cast(Array("int"))
+  @native def memoryType: Int
+
+}
+
+object Context {
+
+  def loadLib() = Loader.load(classOf[Context])
+
+  loadLib()
+
+  /* Memory types. Ported from VCL header files. */
+  val MEMORY_NOT_INITIALIZED = 0
+  val MAIN_MEMORY = 1
+  val OPENCL_MEMORY = 2
+  val CUDA_MEMORY = 3
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/DenseColumnMatrix.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/DenseColumnMatrix.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/DenseColumnMatrix.scala
new file mode 100644
index 0000000..71bdab2
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/DenseColumnMatrix.scala
@@ -0,0 +1,95 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp.DoublePointer
+import org.bytedeco.javacpp.annotation._
+
+/**
+  * ViennaCL dense matrix, column-major. This is an exact duplication of 
[[DenseRowMatrix]], and
+  * is only different in the materialized C++ template name. Unfortunately I 
so far have not figured
+  * out how to handle it with.
+  *
+  * Also, the [[Platform.library]] does not get inherited for some reason, and 
we really want to
+  * collect all class mappings in the same one libjni.so, so we have to repeat 
this `library` defi-
+  * nition in every mapped class in this package. (One .so per package 
convention).
+  */
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform (
+    include=Array("matrix.hpp"),
+    library="jniViennaCL"
+  )))
+@Name(Array("viennacl::matrix<double,viennacl::column_major>"))
+final class DenseColumnMatrix(initDefault:Boolean = true) extends MatrixBase {
+
+  def this(nrow: Int, ncol: Int) {
+    this(false)
+    allocate(nrow, ncol, new Context())
+  }
+
+  def this(nrow: Int, ncol: Int, ctx: Context) {
+    this(false)
+    allocate(nrow, ncol, ctx)
+  }
+
+  def this(data: DoublePointer, nrow: Int, ncol: Int) {
+    this(false)
+    allocate(data, new Context(Context.MAIN_MEMORY).memoryType, nrow, ncol)
+    // We save it to deallocate it ad deallocation time.
+    ptrs += data
+  }
+
+  def this(data: DoublePointer, nrow: Int, ncol: Int, ctx: Context) {
+    this(false)
+    allocate(data, ctx.memoryType, nrow, ncol)
+    // We save it to deallocate it ad deallocation time.
+    ptrs += data
+  }
+
+  def this(me: MatMatProdExpression) {
+    this(false)
+    allocate(me)
+  }
+
+  def this(me: MatrixTransExpression) {
+    this(false)
+    allocate(me)
+  }
+
+
+  if (initDefault) allocate()
+
+  @native protected def allocate()
+
+  @native protected def allocate(nrow: Int, ncol: Int, @ByVal ctx: Context)
+
+  @native protected def allocate(data: DoublePointer,
+                                 @Cast(Array("viennacl::memory_types"))
+                                 memType: Int,
+                                 nrow: Int,
+                                 ncol: Int
+                                )
+
+  @native protected def allocate(@Const @ByRef me: MatMatProdExpression)
+
+  @native protected def allocate(@Const @ByRef me: MatrixTransExpression)
+
+}
+
+object DenseColumnMatrix {
+  Context.loadLib()
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/DenseRowMatrix.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/DenseRowMatrix.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/DenseRowMatrix.scala
new file mode 100644
index 0000000..7bbd3a0
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/DenseRowMatrix.scala
@@ -0,0 +1,81 @@
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp.DoublePointer
+import org.bytedeco.javacpp.annotation._
+
+/**
+  * ViennaCL dense matrix, row-major.
+  */
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    library = "jniViennaCL"
+  )))
+@Name(Array("viennacl::matrix<double,viennacl::row_major>"))
+class DenseRowMatrix(initDefault: Boolean = true) extends MatrixBase {
+
+  def this(nrow: Int, ncol: Int) {
+    this(false)
+    allocate(nrow, ncol, new Context())
+  }
+
+  def this(nrow: Int, ncol: Int, ctx: Context) {
+    this(false)
+    allocate(nrow, ncol, ctx)
+  }
+
+  def this(data: DoublePointer, nrow: Int, ncol: Int) {
+    this(false)
+    allocate(data, new Context(Context.MAIN_MEMORY).memoryType, nrow, ncol)
+    // We save it to deallocate it ad deallocation time.
+    ptrs += data
+  }
+
+  def this(data: DoublePointer, nrow: Int, ncol: Int, ctx: Context) {
+    this(false)
+    allocate(data, ctx.memoryType, nrow, ncol)
+    // We save it to deallocate it ad deallocation time.
+    ptrs += data
+  }
+
+  def this(me: MatMatProdExpression) {
+    this(false)
+    allocate(me)
+  }
+
+  def this(me: MatrixTransExpression) {
+    this(false)
+    allocate(me)
+  }
+
+  // TODO: getting compilation errors here
+  def this(sd: SrMatDnMatProdExpression) {
+    this(false)
+    allocate(sd)
+  }
+
+  if (initDefault) allocate()
+
+  @native protected def allocate()
+
+  @native protected def allocate(nrow: Int, ncol: Int, @ByVal ctx: Context)
+
+  @native protected def allocate(data: DoublePointer,
+                                 @Cast(Array("viennacl::memory_types"))
+                                 memType: Int,
+                                 nrow: Int,
+                                 ncol: Int
+                                )
+
+  @native protected def allocate(@Const @ByRef me: MatMatProdExpression)
+
+  @native protected def allocate(@Const @ByRef me: MatrixTransExpression)
+
+  // TODO: Compilation errors here
+  @native protected def allocate(@Const @ByRef me: SrMatDnMatProdExpression)
+
+}
+
+
+object DenseRowMatrix {
+  Context.loadLib()
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatMatProdExpression.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatMatProdExpression.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatMatProdExpression.scala
new file mode 100644
index 0000000..c15bbd9
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatMatProdExpression.scala
@@ -0,0 +1,33 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp.Pointer
+import org.bytedeco.javacpp.annotation.{Name, Namespace, Platform, Properties}
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    library = "jniViennaCL")
+  ))
+@Namespace("viennacl")
+@Name(Array("matrix_expression<const viennacl::matrix_base<double>, " +
+  "const viennacl::matrix_base<double>, " +
+  "viennacl::op_mat_mat_prod>"))
+class MatMatProdExpression extends Pointer {
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatVecProdExpression.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatVecProdExpression.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatVecProdExpression.scala
new file mode 100644
index 0000000..4435232
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatVecProdExpression.scala
@@ -0,0 +1,33 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp.Pointer
+import org.bytedeco.javacpp.annotation.{Name, Namespace, Platform, Properties}
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    library = "jniViennaCL")
+  ))
+@Namespace("viennacl")
+@Name(Array("vector_expression<const viennacl::matrix_base<double>, " +
+  "const viennacl::vector_base<double>, " +
+  "viennacl::op_prod>"))
+class MatVecProdExpression extends Pointer {
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatrixBase.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatrixBase.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatrixBase.scala
new file mode 100644
index 0000000..00823b6
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MatrixBase.scala
@@ -0,0 +1,75 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp.Pointer
+import org.bytedeco.javacpp.annotation._
+
+import scala.collection.mutable.ArrayBuffer
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    library = "jniViennaCL"
+  )))
+@Name(Array("viennacl::matrix_base<double>"))
+class MatrixBase extends Pointer {
+
+  protected val ptrs = new ArrayBuffer[Pointer]()
+
+  override def deallocate(deallocate: Boolean): Unit = {
+    super.deallocate(deallocate)
+    ptrs.foreach(_.close())
+  }
+
+  @Name(Array("operator="))
+  @native def :=(@Const @ByRef src: DenseRowMatrix)
+
+  @Name(Array("operator="))
+  @native def :=(@Const @ByRef src: DenseColumnMatrix)
+
+  @Name(Array("size1"))
+  @native
+  def nrow: Int
+
+  @Name(Array("size2"))
+  @native
+  def ncol: Int
+
+  @Name(Array("row_major"))
+  @native
+  def isRowMajor: Boolean
+
+  @Name(Array("internal_size1"))
+  @native
+  def internalnrow: Int
+
+  @Name(Array("internal_size2"))
+  @native
+  def internalncol: Int
+
+  @Name(Array("memory_domain"))
+  @native
+  def memoryDomain: Int
+
+  @Name(Array("switch_memory_context"))
+  @native
+  def switchMemoryContext(@ByRef ctx: Context)
+
+
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MemHandle.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MemHandle.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MemHandle.scala
new file mode 100644
index 0000000..938a262
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/MemHandle.scala
@@ -0,0 +1,34 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp.Pointer
+import org.bytedeco.javacpp.annotation._
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    library = "jniViennaCL")
+  ))
+@Namespace("viennacl::backend")
+@Name(Array("mem_handle"))
+class MemHandle extends Pointer {
+
+  allocate()
+
+  @native def allocate()
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/ProdExpression.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/ProdExpression.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/ProdExpression.scala
new file mode 100644
index 0000000..315a03c
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/ProdExpression.scala
@@ -0,0 +1,33 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp.Pointer
+import org.bytedeco.javacpp.annotation.{Name, Namespace, Platform, Properties}
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    library = "jniViennaCL")
+  ))
+@Namespace("viennacl")
+@Name(Array("matrix_expression<const viennacl::compressed_matrix<double>, " +
+  "const viennacl::compressed_matrix<double>, " +
+  "viennacl::op_prod>"))
+class ProdExpression extends Pointer {
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/SrMatDnMatProdExpression.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/SrMatDnMatProdExpression.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/SrMatDnMatProdExpression.scala
new file mode 100644
index 0000000..e9c7bac
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/SrMatDnMatProdExpression.scala
@@ -0,0 +1,33 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp.Pointer
+import org.bytedeco.javacpp.annotation.{Name, Namespace, Platform, Properties}
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    library = "jniViennaCL")
+  ))
+@Namespace("viennacl")
+@Name(Array("matrix_expression<const viennacl::compressed_matrix<double>, " +
+  "const viennacl::matrix_base<double>, " +
+  "viennacl::op_prod>"))
+class SrMatDnMatProdExpression extends Pointer {
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VCLVector.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VCLVector.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VCLVector.scala
new file mode 100644
index 0000000..33947ec
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VCLVector.scala
@@ -0,0 +1,124 @@
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp._
+import org.bytedeco.javacpp.annotation._
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    library="jniViennaCL"
+  )))
+@Name(Array("viennacl::vector<double>"))
+final class VCLVector(defaultCtr: Boolean = true) extends VectorBase {
+
+  if (defaultCtr) allocate()
+
+  def this(){
+    this(false)
+    allocate()
+  }
+
+  def this(size: Int) {
+    this(false)
+    allocate(size, new Context(Context.MAIN_MEMORY))
+  }
+
+  def this(size: Int, ctx: Context ) {
+    this(false)
+    allocate(size, ctx)
+  }
+
+  def this(@Const @ByRef ve: VecMultExpression) {
+    this(false)
+    allocate(ve)
+  }
+
+  def this(@Const @ByRef vmp: MatVecProdExpression) {
+    this(false)
+    allocate(vmp)
+  }
+
+//   conflicting with the next signature as MemHandle is a pointer and so is a 
DoublePointer..
+//   leave out for now.
+//
+//   def this(h: MemHandle , vec_size: Int, vec_start: Int = 0, vec_stride: 
Int = 1) {
+//      this(false)
+//      allocate(h, vec_size, vec_start, vec_stride)
+//    }
+
+  def this(ptr_to_mem: DoublePointer,
+           @Cast(Array("viennacl::memory_types"))mem_type : Int,
+           vec_size: Int) {
+
+    this(false)
+    allocate(ptr_to_mem, mem_type, vec_size, 0, 1)
+    ptrs += ptr_to_mem
+  }
+
+  def this(ptr_to_mem: DoublePointer,
+           @Cast(Array("viennacl::memory_types"))mem_type : Int,
+           vec_size: Int,
+           start: Int,
+           stride: Int) {
+
+    this(false)
+    allocate(ptr_to_mem, mem_type, vec_size, start, stride)
+    ptrs += ptr_to_mem
+  }
+
+  def this(@Const @ByRef vc: VCLVector) {
+    this(false)
+    allocate(vc)
+  }
+  def this(@Const @ByRef vb: VectorBase) {
+    this(false)
+    allocate(vb)
+  }
+
+  @native protected def allocate()
+
+  @native protected def allocate(size: Int)
+
+  @native protected def allocate(size: Int, @ByVal ctx: Context)
+
+  @native protected def allocate(@Const @ByRef ve: VecMultExpression)
+
+  @native protected def allocate(@Const @ByRef ve: MatVecProdExpression)
+
+  @native protected def allocate(@Const @ByRef vb: VCLVector)
+
+  @native protected def allocate(@Const @ByRef vb: VectorBase)
+
+
+//  @native protected def allocate(h: MemHandle , vec_size: Int,
+//                                 vec_start: Int,
+//                                 vec_stride: Int)
+
+  @native protected def allocate(ptr_to_mem: DoublePointer,
+                                 
@Cast(Array("viennacl::memory_types"))mem_type : Int,
+                                 vec_size: Int,
+                                 start: Int,
+                                 stride: Int)
+
+  @Name(Array("viennacl::vector<double>::self_type"))
+  def selfType:VectorBase = this.asInstanceOf[VectorBase]
+
+
+  @native def switch_memory_context(@ByVal context: Context): Unit
+
+//  Swaps the handles of two vectors by swapping the OpenCL handles only, no 
data copy.
+//  @native def fast_swap(@ByVal other: VCLVector): VectorBase
+
+// add this operator in for tests many more can be added
+//  @Name(Array("operator*"))
+//  @native @ByPtr def *(i: Int): VectorMultExpression
+
+
+
+}
+
+object VCLVector {
+  Context.loadLib()
+}
+
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VecMultExpression.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VecMultExpression.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VecMultExpression.scala
new file mode 100644
index 0000000..7562de5
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VecMultExpression.scala
@@ -0,0 +1,32 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp.Pointer
+import org.bytedeco.javacpp.annotation.{Name, Namespace, Platform, Properties}
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    library = "jniViennaCL")
+  ))
+@Namespace("viennacl")
+@Name(Array("vector_expression<const viennacl::vector_base<double>," +
+  "const double, viennacl::op_mult >"))
+class VecMultExpression extends Pointer {
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/2f55adef/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VectorBase.scala
----------------------------------------------------------------------
diff --git 
a/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VectorBase.scala
 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VectorBase.scala
new file mode 100644
index 0000000..8efd377
--- /dev/null
+++ 
b/viennacl-omp/src/main/scala/org/apache/mahout/viennacl/openmp/javacpp/VectorBase.scala
@@ -0,0 +1,55 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+package org.apache.mahout.viennacl.openmp.javacpp
+
+import org.bytedeco.javacpp._
+import org.bytedeco.javacpp.annotation._
+
+import scala.collection.mutable.ArrayBuffer
+
+
+@Properties(inherit = Array(classOf[Context]),
+  value = Array(new Platform(
+    library="jniViennaCL"
+  )))
+@Name(Array("viennacl::vector_base<double>"))
+class VectorBase extends Pointer {
+
+  protected val ptrs = new ArrayBuffer[Pointer]()
+
+  override def deallocate(deallocate: Boolean): Unit = {
+    super.deallocate(deallocate)
+    ptrs.foreach(_.close())
+  }
+
+  // size of the vec elements
+  @native @Const def size(): Int
+
+  // size of the vec elements + padding
+  @native @Const def internal_size(): Int
+
+  // handle to the vec element buffer
+  @native @Const @ByRef def handle: MemHandle
+
+//  // add this operator in for tests many more can be added
+//  @Name(Array("operator* "))
+//  @native def *(i: Int): VectorMultExpression
+
+
+}
+
+

Reply via email to