http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOpsSuite.scala
----------------------------------------------------------------------
diff --git 
a/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOpsSuite.scala
 
b/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOpsSuite.scala
new file mode 100644
index 0000000..a943c5f
--- /dev/null
+++ 
b/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOpsSuite.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.scalabindings
+
+import org.scalatest.FunSuite
+import RLikeOps._
+import org.apache.mahout.test.MahoutSuite
+
+class RLikeMatrixOpsSuite extends FunSuite with MahoutSuite {
+
+  test("multiplication") {
+
+    val a = dense((1, 2, 3), (3, 4, 5))
+    val b = dense(1, 4, 5)
+    val m = a %*% b
+
+    assert(m(0, 0) == 24)
+    assert(m(1, 0) == 44)
+    println(m.toString)
+  }
+
+  test("Hadamard") {
+    val a = dense(
+      (1, 2, 3),
+      (3, 4, 5)
+    )
+    val b = dense(
+      (1, 1, 2),
+      (2, 1, 1)
+    )
+
+    val c = a * b
+
+    printf("C=\n%s\n", c)
+
+    assert(c(0, 0) == 1)
+    assert(c(1, 2) == 5)
+    println(c.toString)
+
+    val d = a * 5.0
+    assert(d(0, 0) == 5)
+    assert(d(1, 1) == 20)
+
+    a *= b
+    assert(a(0, 0) == 1)
+    assert(a(1, 2) == 5)
+    println(a.toString)
+
+  }
+
+  /** Test dsl overloads over scala operations over matrices */
+  test ("scalarOps") {
+    val a = dense(
+      (1, 2, 3),
+      (3, 4, 5)
+    )
+
+    (10 * a - (10 *: a)).norm shouldBe 0
+    (10 + a - (10 +: a)).norm shouldBe 0
+    (10 - a - (10 -: a)).norm shouldBe 0
+    (10 / a - (10 /: a)).norm shouldBe 0
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeVectorOpsSuite.scala
----------------------------------------------------------------------
diff --git 
a/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeVectorOpsSuite.scala
 
b/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeVectorOpsSuite.scala
new file mode 100644
index 0000000..832937b
--- /dev/null
+++ 
b/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeVectorOpsSuite.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.scalabindings
+
+import org.scalatest.FunSuite
+import org.apache.mahout.math.Vector
+import RLikeOps._
+import org.apache.mahout.test.MahoutSuite
+
+class RLikeVectorOpsSuite extends FunSuite with MahoutSuite {
+
+  test("Hadamard") {
+    val a: Vector = (1, 2, 3)
+    val b = (3, 4, 5)
+
+    val c = a * b
+    println(c)
+    assert(c ===(3, 8, 15))
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/test/scala/org/apache/mahout/math/scalabindings/VectorOpsSuite.scala
----------------------------------------------------------------------
diff --git 
a/samsara/src/test/scala/org/apache/mahout/math/scalabindings/VectorOpsSuite.scala
 
b/samsara/src/test/scala/org/apache/mahout/math/scalabindings/VectorOpsSuite.scala
new file mode 100644
index 0000000..037f562
--- /dev/null
+++ 
b/samsara/src/test/scala/org/apache/mahout/math/scalabindings/VectorOpsSuite.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.scalabindings
+
+import org.scalatest.FunSuite
+import org.apache.mahout.math.{RandomAccessSparseVector, Vector}
+import RLikeOps._
+import org.apache.mahout.test.MahoutSuite
+
+/** VectorOps Suite */
+class VectorOpsSuite extends FunSuite with MahoutSuite {
+
+  test("inline create") {
+
+    val sparseVec = svec((5 -> 1) :: (10 -> 2.0) :: Nil)
+    println(sparseVec)
+
+    val sparseVec2: Vector = (5 -> 1.0) :: (10 -> 2.0) :: Nil
+    println(sparseVec2)
+
+    val sparseVec3: Vector = new RandomAccessSparseVector(100) := (5 -> 1.0) 
:: Nil
+    println(sparseVec3)
+
+    val denseVec1: Vector = (1.0, 1.1, 1.2)
+    println(denseVec1)
+
+    val denseVec2 = dvec(1, 0, 1.1, 1.2)
+    println(denseVec2)
+  }
+
+  test("plus minus") {
+
+    val a: Vector = (1, 2, 3)
+    val b: Vector = (0 -> 3) :: (1 -> 4) :: (2 -> 5) :: Nil
+
+    val c = a + b
+    val d = b - a
+    val e = -b - a
+
+    assert(c ===(4, 6, 8))
+    assert(d ===(2, 2, 2))
+    assert(e ===(-4, -6, -8))
+
+  }
+
+  test("dot") {
+
+    val a: Vector = (1, 2, 3)
+    val b = (3, 4, 5)
+
+    val c = a dot b
+    println(c)
+    assert(c == 26)
+
+  }
+
+  test ("scalarOps") {
+    val a = dvec(1 to 5):Vector
+
+    10 * a shouldBe 10 *: a
+    10 + a shouldBe 10 +: a
+    10 - a shouldBe 10 -: a
+    10 / a shouldBe 10 /: a
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/test/scala/org/apache/mahout/nlp/tfidf/TFIDFtestBase.scala
----------------------------------------------------------------------
diff --git 
a/samsara/src/test/scala/org/apache/mahout/nlp/tfidf/TFIDFtestBase.scala 
b/samsara/src/test/scala/org/apache/mahout/nlp/tfidf/TFIDFtestBase.scala
new file mode 100644
index 0000000..3ec5ec1
--- /dev/null
+++ b/samsara/src/test/scala/org/apache/mahout/nlp/tfidf/TFIDFtestBase.scala
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.nlp.tfidf
+
+import org.apache.mahout.math._
+import org.apache.mahout.math.scalabindings._
+import org.apache.mahout.test.DistributedMahoutSuite
+import org.scalatest.{FunSuite, Matchers}
+import scala.collection._
+import RLikeOps._
+import scala.math._
+
+
+trait TFIDFtestBase extends DistributedMahoutSuite with Matchers {
+  this: FunSuite =>
+
+  val epsilon = 1E-6
+
+  val documents: List[(Int, String)] = List(
+    (1, "the first document contains 5 terms"),
+    (2, "document two document contains 4 terms"),
+    (3, "document three three terms"),
+    (4, "each document including this document contain the term document"))
+
+  def createDictionaryAndDfMaps(documents: List[(Int, String)]): (Map[String, 
Int], Map[Int, Int]) = {
+
+    // get a tf count for the entire dictionary
+    val dictMap = documents.unzip._2.mkString(" ").toLowerCase.split(" 
").groupBy(identity).mapValues(_.length)
+
+    // create a dictionary with an index for each term
+    val dictIndex = dictMap.zipWithIndex.map(x => x._1._1 -> x._2).toMap
+
+    val docFrequencyCount = new Array[Int](dictMap.size)
+
+    for (token <- dictMap) {
+      for (doc <- documents) {
+        // parse the string and get a word then increment the df count for 
that word
+        if (doc._2.toLowerCase.split(" ").contains(token._1)) {
+          docFrequencyCount(dictIndex(token._1)) += 1
+        }
+      }
+    }
+
+    val docFrequencyMap = docFrequencyCount.zipWithIndex.map(x => x._2 -> 
x._1).toMap
+
+    (dictIndex, docFrequencyMap)
+  }
+
+  def vectorizeDocument(document: String,
+                        dictionaryMap: Map[String, Int],
+                        dfMap: Map[Int, Int], weight: TermWeight = new TFIDF): 
Vector = {
+
+    val wordCounts = document.toLowerCase.split(" 
").groupBy(identity).mapValues(_.length)
+
+    val vec = new RandomAccessSparseVector(dictionaryMap.size)
+
+    val totalDFSize = dictionaryMap.size
+    val docSize = wordCounts.size
+
+    for (word <- wordCounts) {
+      val term = word._1
+      if (dictionaryMap.contains(term)) {
+        val termFreq = word._2
+        val dictIndex = dictionaryMap(term)
+        val docFreq = dfMap(dictIndex)
+        val currentWeight = weight.calculate(termFreq, docFreq.toInt, docSize, 
totalDFSize.toInt)
+        vec(dictIndex)= currentWeight
+      }
+    }
+    vec
+  }
+
+  test("TF test") {
+
+    val (dictionary, dfMap) = createDictionaryAndDfMaps(documents)
+
+    val tf: TermWeight = new TF()
+
+    val vectorizedDocuments: Matrix = new SparseMatrix(documents.size, 
dictionary.size)
+
+    for (doc <- documents) {
+      vectorizedDocuments(doc._1 - 1, ::) := vectorizeDocument(doc._2, 
dictionary, dfMap, tf)
+    }
+
+    // corpus:
+    //  (1, "the first document contains 5 terms"),
+    //  (2, "document two document contains 4 terms"),
+    //  (3, "document three three terms"),
+    //  (4, "each document including this document contain the term document")
+
+    // dictonary:
+    //  (this -> 0, 4 -> 1, three -> 2, document -> 3, two -> 4, term -> 5, 5 
-> 6, contain -> 7,
+    //   each -> 8, first -> 9, terms -> 10, contains -> 11, including -> 12, 
the -> 13)
+
+    // dfMap:
+    //  (0 -> 1, 5 -> 1, 10 -> 3, 1 -> 1, 6 -> 1, 9 -> 1, 13 -> 2, 2 -> 1, 12 
-> 1, 7 -> 1, 3 -> 4,
+    //   11 -> 2, 8 -> 1, 4 -> 1)
+
+    vectorizedDocuments(0, 0).toInt should be (0)
+    vectorizedDocuments(0, 13).toInt should be (1)
+    vectorizedDocuments(1, 3).toInt should be (2)
+    vectorizedDocuments(3, 3).toInt should be (3)
+
+  }
+
+
+  test("TFIDF test") {
+    val (dictionary, dfMap) = createDictionaryAndDfMaps(documents)
+
+    val tfidf: TermWeight = new TFIDF()
+
+    val vectorizedDocuments: Matrix = new SparseMatrix(documents.size, 
dictionary.size)
+
+    for (doc <- documents) {
+      vectorizedDocuments(doc._1 - 1, ::) := vectorizeDocument(doc._2, 
dictionary, dfMap, tfidf)
+    }
+
+    // corpus:
+    //  (1, "the first document contains 5 terms"),
+    //  (2, "document two document contains 4 terms"),
+    //  (3, "document three three terms"),
+    //  (4, "each document including this document contain the term document")
+
+    // dictonary:
+    //  (this -> 0, 4 -> 1, three -> 2, document -> 3, two -> 4, term -> 5, 5 
-> 6, contain -> 7,
+    //   each -> 8, first -> 9, terms -> 10, contains -> 11, including -> 12, 
the -> 13)
+
+    // dfMap:
+    //  (0 -> 1, 5 -> 1, 10 -> 3, 1 -> 1, 6 -> 1, 9 -> 1, 13 -> 2, 2 -> 1, 12 
-> 1, 7 -> 1, 3 -> 4,
+    //   11 -> 2, 8 -> 1, 4 -> 1)
+
+    abs(vectorizedDocuments(0, 0) -  0.0) should be < epsilon
+    abs(vectorizedDocuments(0, 13) - 2.540445) should be < epsilon
+    abs(vectorizedDocuments(1, 3) - 2.870315) should be < epsilon
+    abs(vectorizedDocuments(3, 3) - 3.515403) should be < epsilon
+  }
+
+  test("MLlib TFIDF test") {
+    val (dictionary, dfMap) = createDictionaryAndDfMaps(documents)
+
+    val tfidf: TermWeight = new MLlibTFIDF()
+
+    val vectorizedDocuments: Matrix = new SparseMatrix(documents.size, 
dictionary.size)
+
+    for (doc <- documents) {
+      vectorizedDocuments(doc._1 - 1, ::) := vectorizeDocument(doc._2, 
dictionary, dfMap, tfidf)
+    }
+
+    // corpus:
+    //  (1, "the first document contains 5 terms"),
+    //  (2, "document two document contains 4 terms"),
+    //  (3, "document three three terms"),
+    //  (4, "each document including this document contain the term document")
+
+    // dictonary:
+    //  (this -> 0, 4 -> 1, three -> 2, document -> 3, two -> 4, term -> 5, 5 
-> 6, contain -> 7,
+    //   each -> 8, first -> 9, terms -> 10, contains -> 11, including -> 12, 
the -> 13)
+
+    // dfMap:
+    //  (0 -> 1, 5 -> 1, 10 -> 3, 1 -> 1, 6 -> 1, 9 -> 1, 13 -> 2, 2 -> 1, 12 
-> 1, 7 -> 1, 3 -> 4,
+    //   11 -> 2, 8 -> 1, 4 -> 1)
+
+    abs(vectorizedDocuments(0, 0) -  0.0) should be < epsilon
+    abs(vectorizedDocuments(0, 13) - 1.609437) should be < epsilon
+    abs(vectorizedDocuments(1, 3) - 2.197224) should be < epsilon
+    abs(vectorizedDocuments(3, 3) - 3.295836) should be < epsilon
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/test/scala/org/apache/mahout/test/DistributedMahoutSuite.scala
----------------------------------------------------------------------
diff --git 
a/samsara/src/test/scala/org/apache/mahout/test/DistributedMahoutSuite.scala 
b/samsara/src/test/scala/org/apache/mahout/test/DistributedMahoutSuite.scala
new file mode 100644
index 0000000..3538991
--- /dev/null
+++ b/samsara/src/test/scala/org/apache/mahout/test/DistributedMahoutSuite.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.test
+
+import org.apache.mahout.math.drm.DistributedContext
+import org.scalatest.{Suite, FunSuite, Matchers}
+
+/**
+ * Unit tests that use a distributed context to run
+ */
+trait DistributedMahoutSuite extends MahoutSuite  { this: Suite =>
+  protected implicit var mahoutCtx: DistributedContext
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/test/scala/org/apache/mahout/test/LoggerConfiguration.scala
----------------------------------------------------------------------
diff --git 
a/samsara/src/test/scala/org/apache/mahout/test/LoggerConfiguration.scala 
b/samsara/src/test/scala/org/apache/mahout/test/LoggerConfiguration.scala
new file mode 100644
index 0000000..7a34aa2
--- /dev/null
+++ b/samsara/src/test/scala/org/apache/mahout/test/LoggerConfiguration.scala
@@ -0,0 +1,16 @@
+package org.apache.mahout.test
+
+import org.scalatest._
+import org.apache.log4j.{Level, Logger, BasicConfigurator}
+
+trait LoggerConfiguration extends BeforeAndAfterAllConfigMap {
+  this: Suite =>
+
+  override protected def beforeAll(configMap: ConfigMap): Unit = {
+    super.beforeAll(configMap)
+    BasicConfigurator.resetConfiguration()
+    BasicConfigurator.configure()
+    Logger.getRootLogger.setLevel(Level.ERROR)
+    
Logger.getLogger("org.apache.mahout.math.scalabindings").setLevel(Level.DEBUG)
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/test/scala/org/apache/mahout/test/MahoutSuite.scala
----------------------------------------------------------------------
diff --git a/samsara/src/test/scala/org/apache/mahout/test/MahoutSuite.scala 
b/samsara/src/test/scala/org/apache/mahout/test/MahoutSuite.scala
new file mode 100644
index 0000000..d3b8a38
--- /dev/null
+++ b/samsara/src/test/scala/org/apache/mahout/test/MahoutSuite.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.test
+
+import java.io.File
+import org.scalatest._
+import org.apache.mahout.common.RandomUtils
+
+trait MahoutSuite extends BeforeAndAfterEach with LoggerConfiguration with 
Matchers {
+  this: Suite =>
+
+  final val TmpDir = "tmp/"
+
+  override protected def beforeEach() {
+    super.beforeEach()
+    RandomUtils.useTestSeed()
+  }
+
+  override protected def beforeAll(configMap: ConfigMap) {
+    super.beforeAll(configMap)
+
+    // just in case there is an existing tmp dir clean it before every suite
+    deleteDirectory(new File(TmpDir))
+  }
+
+  override protected def afterEach() {
+
+    // clean the tmp dir after every test
+    deleteDirectory(new File(TmpDir))
+
+    super.afterEach()
+  }
+
+  /** Delete directory no symlink checking and exceptions are not caught */
+  private def deleteDirectory(path: File): Unit = {
+    if (path.isDirectory)
+      for (files <- path.listFiles) deleteDirectory(files)
+    path.delete
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/spark-shell/pom.xml
----------------------------------------------------------------------
diff --git a/spark-shell/pom.xml b/spark-shell/pom.xml
index 0903534..87fb187 100644
--- a/spark-shell/pom.xml
+++ b/spark-shell/pom.xml
@@ -112,7 +112,7 @@
 
     <dependency>
       <groupId>org.apache.mahout</groupId>
-      <artifactId>mahout-math-scala_${scala.compat.version}</artifactId>
+      <artifactId>mahout-samsara_${scala.compat.version}</artifactId>
       <classifier>tests</classifier>
       <scope>test</scope>
     </dependency>

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/spark/pom.xml
----------------------------------------------------------------------
diff --git a/spark/pom.xml b/spark/pom.xml
index 5646c25..885d5f2 100644
--- a/spark/pom.xml
+++ b/spark/pom.xml
@@ -134,7 +134,7 @@
 
     <dependency>
       <groupId>org.apache.mahout</groupId>
-      <artifactId>mahout-math-scala_${scala.compat.version}</artifactId>
+      <artifactId>mahout-samsara_${scala.compat.version}</artifactId>
     </dependency>
 
     <dependency>
@@ -150,7 +150,7 @@
 
     <dependency>
       <groupId>org.apache.mahout</groupId>
-      <artifactId>mahout-math-scala_${scala.compat.version}</artifactId>
+      <artifactId>mahout-samsara_${scala.compat.version}</artifactId>
       <classifier>tests</classifier>
       <scope>test</scope>
     </dependency>

Reply via email to