http://git-wip-us.apache.org/repos/asf/mahout/blob/ef6d93a3/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOpsSuite.scala ---------------------------------------------------------------------- diff --git a/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOpsSuite.scala b/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOpsSuite.scala deleted file mode 100644 index a943c5f..0000000 --- a/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOpsSuite.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.scalatest.FunSuite -import RLikeOps._ -import org.apache.mahout.test.MahoutSuite - -class RLikeMatrixOpsSuite extends FunSuite with MahoutSuite { - - test("multiplication") { - - val a = dense((1, 2, 3), (3, 4, 5)) - val b = dense(1, 4, 5) - val m = a %*% b - - assert(m(0, 0) == 24) - assert(m(1, 0) == 44) - println(m.toString) - } - - test("Hadamard") { - val a = dense( - (1, 2, 3), - (3, 4, 5) - ) - val b = dense( - (1, 1, 2), - (2, 1, 1) - ) - - val c = a * b - - printf("C=\n%s\n", c) - - assert(c(0, 0) == 1) - assert(c(1, 2) == 5) - println(c.toString) - - val d = a * 5.0 - assert(d(0, 0) == 5) - assert(d(1, 1) == 20) - - a *= b - assert(a(0, 0) == 1) - assert(a(1, 2) == 5) - println(a.toString) - - } - - /** Test dsl overloads over scala operations over matrices */ - test ("scalarOps") { - val a = dense( - (1, 2, 3), - (3, 4, 5) - ) - - (10 * a - (10 *: a)).norm shouldBe 0 - (10 + a - (10 +: a)).norm shouldBe 0 - (10 - a - (10 -: a)).norm shouldBe 0 - (10 / a - (10 /: a)).norm shouldBe 0 - - } - -}
http://git-wip-us.apache.org/repos/asf/mahout/blob/ef6d93a3/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeVectorOpsSuite.scala ---------------------------------------------------------------------- diff --git a/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeVectorOpsSuite.scala b/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeVectorOpsSuite.scala deleted file mode 100644 index 832937b..0000000 --- a/samsara/src/test/scala/org/apache/mahout/math/scalabindings/RLikeVectorOpsSuite.scala +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.scalatest.FunSuite -import org.apache.mahout.math.Vector -import RLikeOps._ -import org.apache.mahout.test.MahoutSuite - -class RLikeVectorOpsSuite extends FunSuite with MahoutSuite { - - test("Hadamard") { - val a: Vector = (1, 2, 3) - val b = (3, 4, 5) - - val c = a * b - println(c) - assert(c ===(3, 8, 15)) - } - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/ef6d93a3/samsara/src/test/scala/org/apache/mahout/math/scalabindings/VectorOpsSuite.scala ---------------------------------------------------------------------- diff --git a/samsara/src/test/scala/org/apache/mahout/math/scalabindings/VectorOpsSuite.scala b/samsara/src/test/scala/org/apache/mahout/math/scalabindings/VectorOpsSuite.scala deleted file mode 100644 index 037f562..0000000 --- a/samsara/src/test/scala/org/apache/mahout/math/scalabindings/VectorOpsSuite.scala +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.scalabindings - -import org.scalatest.FunSuite -import org.apache.mahout.math.{RandomAccessSparseVector, Vector} -import RLikeOps._ -import org.apache.mahout.test.MahoutSuite - -/** VectorOps Suite */ -class VectorOpsSuite extends FunSuite with MahoutSuite { - - test("inline create") { - - val sparseVec = svec((5 -> 1) :: (10 -> 2.0) :: Nil) - println(sparseVec) - - val sparseVec2: Vector = (5 -> 1.0) :: (10 -> 2.0) :: Nil - println(sparseVec2) - - val sparseVec3: Vector = new RandomAccessSparseVector(100) := (5 -> 1.0) :: Nil - println(sparseVec3) - - val denseVec1: Vector = (1.0, 1.1, 1.2) - println(denseVec1) - - val denseVec2 = dvec(1, 0, 1.1, 1.2) - println(denseVec2) - } - - test("plus minus") { - - val a: Vector = (1, 2, 3) - val b: Vector = (0 -> 3) :: (1 -> 4) :: (2 -> 5) :: Nil - - val c = a + b - val d = b - a - val e = -b - a - - assert(c ===(4, 6, 8)) - assert(d ===(2, 2, 2)) - assert(e ===(-4, -6, -8)) - - } - - test("dot") { - - val a: Vector = (1, 2, 3) - val b = (3, 4, 5) - - val c = a dot b - println(c) - assert(c == 26) - - } - - test ("scalarOps") { - val a = dvec(1 to 5):Vector - - 10 * a shouldBe 10 *: a - 10 + a shouldBe 10 +: a - 10 - a shouldBe 10 -: a - 10 / a shouldBe 10 /: a - - } - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/ef6d93a3/samsara/src/test/scala/org/apache/mahout/nlp/tfidf/TFIDFtestBase.scala ---------------------------------------------------------------------- diff --git a/samsara/src/test/scala/org/apache/mahout/nlp/tfidf/TFIDFtestBase.scala b/samsara/src/test/scala/org/apache/mahout/nlp/tfidf/TFIDFtestBase.scala deleted file mode 100644 index 3ec5ec1..0000000 --- a/samsara/src/test/scala/org/apache/mahout/nlp/tfidf/TFIDFtestBase.scala +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.nlp.tfidf - -import org.apache.mahout.math._ -import org.apache.mahout.math.scalabindings._ -import org.apache.mahout.test.DistributedMahoutSuite -import org.scalatest.{FunSuite, Matchers} -import scala.collection._ -import RLikeOps._ -import scala.math._ - - -trait TFIDFtestBase extends DistributedMahoutSuite with Matchers { - this: FunSuite => - - val epsilon = 1E-6 - - val documents: List[(Int, String)] = List( - (1, "the first document contains 5 terms"), - (2, "document two document contains 4 terms"), - (3, "document three three terms"), - (4, "each document including this document contain the term document")) - - def createDictionaryAndDfMaps(documents: List[(Int, String)]): (Map[String, Int], Map[Int, Int]) = { - - // get a tf count for the entire dictionary - val dictMap = documents.unzip._2.mkString(" ").toLowerCase.split(" ").groupBy(identity).mapValues(_.length) - - // create a dictionary with an index for each term - val dictIndex = dictMap.zipWithIndex.map(x => x._1._1 -> x._2).toMap - - val docFrequencyCount = new Array[Int](dictMap.size) - - for (token <- dictMap) { - for (doc <- documents) { - // parse the string and get a word then increment the df count for that word - if (doc._2.toLowerCase.split(" ").contains(token._1)) { - docFrequencyCount(dictIndex(token._1)) += 1 - } - } - } - - val docFrequencyMap = docFrequencyCount.zipWithIndex.map(x => x._2 -> x._1).toMap - - (dictIndex, docFrequencyMap) - } - - def vectorizeDocument(document: String, - dictionaryMap: Map[String, Int], - dfMap: Map[Int, Int], weight: TermWeight = new TFIDF): Vector = { - - val wordCounts = document.toLowerCase.split(" ").groupBy(identity).mapValues(_.length) - - val vec = new RandomAccessSparseVector(dictionaryMap.size) - - val totalDFSize = dictionaryMap.size - val docSize = wordCounts.size - - for (word <- wordCounts) { - val term = word._1 - if (dictionaryMap.contains(term)) { - val termFreq = word._2 - val dictIndex = dictionaryMap(term) - val docFreq = dfMap(dictIndex) - val currentWeight = weight.calculate(termFreq, docFreq.toInt, docSize, totalDFSize.toInt) - vec(dictIndex)= currentWeight - } - } - vec - } - - test("TF test") { - - val (dictionary, dfMap) = createDictionaryAndDfMaps(documents) - - val tf: TermWeight = new TF() - - val vectorizedDocuments: Matrix = new SparseMatrix(documents.size, dictionary.size) - - for (doc <- documents) { - vectorizedDocuments(doc._1 - 1, ::) := vectorizeDocument(doc._2, dictionary, dfMap, tf) - } - - // corpus: - // (1, "the first document contains 5 terms"), - // (2, "document two document contains 4 terms"), - // (3, "document three three terms"), - // (4, "each document including this document contain the term document") - - // dictonary: - // (this -> 0, 4 -> 1, three -> 2, document -> 3, two -> 4, term -> 5, 5 -> 6, contain -> 7, - // each -> 8, first -> 9, terms -> 10, contains -> 11, including -> 12, the -> 13) - - // dfMap: - // (0 -> 1, 5 -> 1, 10 -> 3, 1 -> 1, 6 -> 1, 9 -> 1, 13 -> 2, 2 -> 1, 12 -> 1, 7 -> 1, 3 -> 4, - // 11 -> 2, 8 -> 1, 4 -> 1) - - vectorizedDocuments(0, 0).toInt should be (0) - vectorizedDocuments(0, 13).toInt should be (1) - vectorizedDocuments(1, 3).toInt should be (2) - vectorizedDocuments(3, 3).toInt should be (3) - - } - - - test("TFIDF test") { - val (dictionary, dfMap) = createDictionaryAndDfMaps(documents) - - val tfidf: TermWeight = new TFIDF() - - val vectorizedDocuments: Matrix = new SparseMatrix(documents.size, dictionary.size) - - for (doc <- documents) { - vectorizedDocuments(doc._1 - 1, ::) := vectorizeDocument(doc._2, dictionary, dfMap, tfidf) - } - - // corpus: - // (1, "the first document contains 5 terms"), - // (2, "document two document contains 4 terms"), - // (3, "document three three terms"), - // (4, "each document including this document contain the term document") - - // dictonary: - // (this -> 0, 4 -> 1, three -> 2, document -> 3, two -> 4, term -> 5, 5 -> 6, contain -> 7, - // each -> 8, first -> 9, terms -> 10, contains -> 11, including -> 12, the -> 13) - - // dfMap: - // (0 -> 1, 5 -> 1, 10 -> 3, 1 -> 1, 6 -> 1, 9 -> 1, 13 -> 2, 2 -> 1, 12 -> 1, 7 -> 1, 3 -> 4, - // 11 -> 2, 8 -> 1, 4 -> 1) - - abs(vectorizedDocuments(0, 0) - 0.0) should be < epsilon - abs(vectorizedDocuments(0, 13) - 2.540445) should be < epsilon - abs(vectorizedDocuments(1, 3) - 2.870315) should be < epsilon - abs(vectorizedDocuments(3, 3) - 3.515403) should be < epsilon - } - - test("MLlib TFIDF test") { - val (dictionary, dfMap) = createDictionaryAndDfMaps(documents) - - val tfidf: TermWeight = new MLlibTFIDF() - - val vectorizedDocuments: Matrix = new SparseMatrix(documents.size, dictionary.size) - - for (doc <- documents) { - vectorizedDocuments(doc._1 - 1, ::) := vectorizeDocument(doc._2, dictionary, dfMap, tfidf) - } - - // corpus: - // (1, "the first document contains 5 terms"), - // (2, "document two document contains 4 terms"), - // (3, "document three three terms"), - // (4, "each document including this document contain the term document") - - // dictonary: - // (this -> 0, 4 -> 1, three -> 2, document -> 3, two -> 4, term -> 5, 5 -> 6, contain -> 7, - // each -> 8, first -> 9, terms -> 10, contains -> 11, including -> 12, the -> 13) - - // dfMap: - // (0 -> 1, 5 -> 1, 10 -> 3, 1 -> 1, 6 -> 1, 9 -> 1, 13 -> 2, 2 -> 1, 12 -> 1, 7 -> 1, 3 -> 4, - // 11 -> 2, 8 -> 1, 4 -> 1) - - abs(vectorizedDocuments(0, 0) - 0.0) should be < epsilon - abs(vectorizedDocuments(0, 13) - 1.609437) should be < epsilon - abs(vectorizedDocuments(1, 3) - 2.197224) should be < epsilon - abs(vectorizedDocuments(3, 3) - 3.295836) should be < epsilon - } - -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/ef6d93a3/samsara/src/test/scala/org/apache/mahout/test/DistributedMahoutSuite.scala ---------------------------------------------------------------------- diff --git a/samsara/src/test/scala/org/apache/mahout/test/DistributedMahoutSuite.scala b/samsara/src/test/scala/org/apache/mahout/test/DistributedMahoutSuite.scala deleted file mode 100644 index 3538991..0000000 --- a/samsara/src/test/scala/org/apache/mahout/test/DistributedMahoutSuite.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.test - -import org.apache.mahout.math.drm.DistributedContext -import org.scalatest.{Suite, FunSuite, Matchers} - -/** - * Unit tests that use a distributed context to run - */ -trait DistributedMahoutSuite extends MahoutSuite { this: Suite => - protected implicit var mahoutCtx: DistributedContext -} http://git-wip-us.apache.org/repos/asf/mahout/blob/ef6d93a3/samsara/src/test/scala/org/apache/mahout/test/LoggerConfiguration.scala ---------------------------------------------------------------------- diff --git a/samsara/src/test/scala/org/apache/mahout/test/LoggerConfiguration.scala b/samsara/src/test/scala/org/apache/mahout/test/LoggerConfiguration.scala deleted file mode 100644 index 7a34aa2..0000000 --- a/samsara/src/test/scala/org/apache/mahout/test/LoggerConfiguration.scala +++ /dev/null @@ -1,16 +0,0 @@ -package org.apache.mahout.test - -import org.scalatest._ -import org.apache.log4j.{Level, Logger, BasicConfigurator} - -trait LoggerConfiguration extends BeforeAndAfterAllConfigMap { - this: Suite => - - override protected def beforeAll(configMap: ConfigMap): Unit = { - super.beforeAll(configMap) - BasicConfigurator.resetConfiguration() - BasicConfigurator.configure() - Logger.getRootLogger.setLevel(Level.ERROR) - Logger.getLogger("org.apache.mahout.math.scalabindings").setLevel(Level.DEBUG) - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/ef6d93a3/samsara/src/test/scala/org/apache/mahout/test/MahoutSuite.scala ---------------------------------------------------------------------- diff --git a/samsara/src/test/scala/org/apache/mahout/test/MahoutSuite.scala b/samsara/src/test/scala/org/apache/mahout/test/MahoutSuite.scala deleted file mode 100644 index d3b8a38..0000000 --- a/samsara/src/test/scala/org/apache/mahout/test/MahoutSuite.scala +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.mahout.test - -import java.io.File -import org.scalatest._ -import org.apache.mahout.common.RandomUtils - -trait MahoutSuite extends BeforeAndAfterEach with LoggerConfiguration with Matchers { - this: Suite => - - final val TmpDir = "tmp/" - - override protected def beforeEach() { - super.beforeEach() - RandomUtils.useTestSeed() - } - - override protected def beforeAll(configMap: ConfigMap) { - super.beforeAll(configMap) - - // just in case there is an existing tmp dir clean it before every suite - deleteDirectory(new File(TmpDir)) - } - - override protected def afterEach() { - - // clean the tmp dir after every test - deleteDirectory(new File(TmpDir)) - - super.afterEach() - } - - /** Delete directory no symlink checking and exceptions are not caught */ - private def deleteDirectory(path: File): Unit = { - if (path.isDirectory) - for (files <- path.listFiles) deleteDirectory(files) - path.delete - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/ef6d93a3/spark-shell/pom.xml ---------------------------------------------------------------------- diff --git a/spark-shell/pom.xml b/spark-shell/pom.xml index 87fb187..0903534 100644 --- a/spark-shell/pom.xml +++ b/spark-shell/pom.xml @@ -112,7 +112,7 @@ <dependency> <groupId>org.apache.mahout</groupId> - <artifactId>mahout-samsara_${scala.compat.version}</artifactId> + <artifactId>mahout-math-scala_${scala.compat.version}</artifactId> <classifier>tests</classifier> <scope>test</scope> </dependency> http://git-wip-us.apache.org/repos/asf/mahout/blob/ef6d93a3/spark/pom.xml ---------------------------------------------------------------------- diff --git a/spark/pom.xml b/spark/pom.xml index 885d5f2..5646c25 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -134,7 +134,7 @@ <dependency> <groupId>org.apache.mahout</groupId> - <artifactId>mahout-samsara_${scala.compat.version}</artifactId> + <artifactId>mahout-math-scala_${scala.compat.version}</artifactId> </dependency> <dependency> @@ -150,7 +150,7 @@ <dependency> <groupId>org.apache.mahout</groupId> - <artifactId>mahout-samsara_${scala.compat.version}</artifactId> + <artifactId>mahout-math-scala_${scala.compat.version}</artifactId> <classifier>tests</classifier> <scope>test</scope> </dependency>
