[16/37] mahout git commit: MAHOUT-1681: Renamed mahout-math-scala to mahout-samsara

sslavic Tue, 26 May 2015 13:40:34 -0700

MAHOUT-1681: Renamed mahout-math-scala to mahout-samsara


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/f7b69fab
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/f7b69fab
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/f7b69fab

Branch: refs/heads/mahout-0.10.x
Commit: f7b69fabf1253b5e735e269c9410459d91816cdd
Parents: 63e82ea
Author: Stevo Slavic <[email protected]>
Authored: Tue Apr 14 08:28:15 2015 +0200
Committer: Stevo Slavic <[email protected]>
Committed: Tue Apr 14 08:28:15 2015 +0200

----------------------------------------------------------------------
 CHANGELOG                                       |   2 +
 distribution/pom.xml                            |   2 +-
 distribution/src/main/assembly/bin.xml          |   8 +-
 h2o/pom.xml                                     |   4 +-
 math-scala/pom.xml                              | 197 -------
 .../classifier/naivebayes/NBClassifier.scala    | 119 ----
 .../mahout/classifier/naivebayes/NBModel.scala  | 217 --------
 .../classifier/naivebayes/NaiveBayes.scala      | 380 -------------
 .../classifier/stats/ClassifierStats.scala      | 467 ----------------
 .../classifier/stats/ConfusionMatrix.scala      | 460 ----------------
 .../apache/mahout/drivers/MahoutDriver.scala    |  44 --
 .../mahout/drivers/MahoutOptionParser.scala     | 220 --------
 .../mahout/math/cf/SimilarityAnalysis.scala     | 308 -----------
 .../apache/mahout/math/decompositions/ALS.scala | 140 -----
 .../apache/mahout/math/decompositions/DQR.scala |  74 ---
 .../mahout/math/decompositions/DSPCA.scala      | 153 ------
 .../mahout/math/decompositions/DSSVD.scala      |  82 ---
 .../mahout/math/decompositions/SSVD.scala       | 165 ------
 .../mahout/math/decompositions/package.scala    | 141 -----
 .../org/apache/mahout/math/drm/BCast.scala      |  23 -
 .../org/apache/mahout/math/drm/CacheHint.scala  |  19 -
 .../mahout/math/drm/CheckpointedDrm.scala       |  47 --
 .../mahout/math/drm/CheckpointedOps.scala       |  43 --
 .../mahout/math/drm/DistributedContext.scala    |  27 -
 .../mahout/math/drm/DistributedEngine.scala     | 215 --------
 .../mahout/math/drm/DrmDoubleScalarOps.scala    |  33 --
 .../org/apache/mahout/math/drm/DrmLike.scala    |  55 --
 .../org/apache/mahout/math/drm/DrmLikeOps.scala | 118 ----
 .../apache/mahout/math/drm/RLikeDrmOps.scala    | 146 -----
 .../math/drm/logical/AbstractBinaryOp.scala     |  54 --
 .../math/drm/logical/AbstractUnaryOp.scala      |  37 --
 .../math/drm/logical/CheckpointAction.scala     |  47 --
 .../apache/mahout/math/drm/logical/OpAB.scala   |  41 --
 .../mahout/math/drm/logical/OpABAnyKey.scala    |  41 --
 .../apache/mahout/math/drm/logical/OpABt.scala  |  42 --
 .../apache/mahout/math/drm/logical/OpAewB.scala |  46 --
 .../mahout/math/drm/logical/OpAewScalar.scala   |  45 --
 .../apache/mahout/math/drm/logical/OpAt.scala   |  35 --
 .../apache/mahout/math/drm/logical/OpAtA.scala  |  36 --
 .../mahout/math/drm/logical/OpAtAnyKey.scala    |  34 --
 .../apache/mahout/math/drm/logical/OpAtB.scala  |  42 --
 .../apache/mahout/math/drm/logical/OpAtx.scala  |  41 --
 .../apache/mahout/math/drm/logical/OpAx.scala   |  42 --
 .../mahout/math/drm/logical/OpCbind.scala       |  42 --
 .../mahout/math/drm/logical/OpMapBlock.scala    |  43 --
 .../apache/mahout/math/drm/logical/OpPar.scala  |  18 -
 .../mahout/math/drm/logical/OpRbind.scala       |  40 --
 .../mahout/math/drm/logical/OpRowRange.scala    |  36 --
 .../math/drm/logical/OpTimesLeftMatrix.scala    |  43 --
 .../math/drm/logical/OpTimesRightMatrix.scala   |  46 --
 .../org/apache/mahout/math/drm/package.scala    | 136 -----
 .../math/indexeddataset/IndexedDataset.scala    |  63 ---
 .../math/indexeddataset/ReaderWriter.scala      | 117 ----
 .../mahout/math/indexeddataset/Schema.scala     | 104 ----
 .../math/scalabindings/DoubleScalarOps.scala    |  42 --
 .../scalabindings/MatlabLikeMatrixOps.scala     |  66 ---
 .../math/scalabindings/MatlabLikeOps.scala      |  35 --
 .../math/scalabindings/MatlabLikeTimesOps.scala |  28 -
 .../scalabindings/MatlabLikeVectorOps.scala     |  73 ---
 .../mahout/math/scalabindings/MatrixOps.scala   | 215 --------
 .../math/scalabindings/RLikeMatrixOps.scala     |  94 ----
 .../mahout/math/scalabindings/RLikeOps.scala    |  38 --
 .../math/scalabindings/RLikeTimesOps.scala      |  28 -
 .../math/scalabindings/RLikeVectorOps.scala     |  71 ---
 .../mahout/math/scalabindings/VectorOps.scala   | 141 -----
 .../mahout/math/scalabindings/package.scala     | 297 ----------
 .../org/apache/mahout/nlp/tfidf/TFIDF.scala     | 112 ----
 .../classifier/naivebayes/NBTestBase.scala      | 291 ----------
 .../stats/ClassifierStatsTestBase.scala         | 257 ---------
 .../decompositions/DecompositionsSuite.scala    | 113 ----
 .../DistributedDecompositionsSuiteBase.scala    | 219 --------
 .../mahout/math/drm/DrmLikeOpsSuiteBase.scala   |  93 ----
 .../mahout/math/drm/DrmLikeSuiteBase.scala      |  76 ---
 .../mahout/math/drm/RLikeDrmOpsSuiteBase.scala  | 550 -------------------
 .../mahout/math/scalabindings/MathSuite.scala   | 214 --------
 .../MatlabLikeMatrixOpsSuite.scala              |  67 ---
 .../math/scalabindings/MatrixOpsSuite.scala     | 185 -------
 .../scalabindings/RLikeMatrixOpsSuite.scala     |  80 ---
 .../scalabindings/RLikeVectorOpsSuite.scala     |  36 --
 .../math/scalabindings/VectorOpsSuite.scala     |  82 ---
 .../apache/mahout/nlp/tfidf/TFIDFtestBase.scala | 184 -------
 .../mahout/test/DistributedMahoutSuite.scala    |  28 -
 .../mahout/test/LoggerConfiguration.scala       |  16 -
 .../org/apache/mahout/test/MahoutSuite.scala    |  54 --
 pom.xml                                         |   6 +-
 samsara/pom.xml                                 | 194 +++++++
 .../classifier/naivebayes/NBClassifier.scala    | 119 ++++
 .../mahout/classifier/naivebayes/NBModel.scala  | 217 ++++++++
 .../classifier/naivebayes/NaiveBayes.scala      | 380 +++++++++++++
 .../classifier/stats/ClassifierStats.scala      | 467 ++++++++++++++++
 .../classifier/stats/ConfusionMatrix.scala      | 460 ++++++++++++++++
 .../apache/mahout/drivers/MahoutDriver.scala    |  44 ++
 .../mahout/drivers/MahoutOptionParser.scala     | 220 ++++++++
 .../mahout/math/cf/SimilarityAnalysis.scala     | 308 +++++++++++
 .../apache/mahout/math/decompositions/ALS.scala | 140 +++++
 .../apache/mahout/math/decompositions/DQR.scala |  74 +++
 .../mahout/math/decompositions/DSPCA.scala      | 153 ++++++
 .../mahout/math/decompositions/DSSVD.scala      |  82 +++
 .../mahout/math/decompositions/SSVD.scala       | 165 ++++++
 .../mahout/math/decompositions/package.scala    | 141 +++++
 .../org/apache/mahout/math/drm/BCast.scala      |  23 +
 .../org/apache/mahout/math/drm/CacheHint.scala  |  19 +
 .../mahout/math/drm/CheckpointedDrm.scala       |  47 ++
 .../mahout/math/drm/CheckpointedOps.scala       |  43 ++
 .../mahout/math/drm/DistributedContext.scala    |  27 +
 .../mahout/math/drm/DistributedEngine.scala     | 215 ++++++++
 .../mahout/math/drm/DrmDoubleScalarOps.scala    |  33 ++
 .../org/apache/mahout/math/drm/DrmLike.scala    |  55 ++
 .../org/apache/mahout/math/drm/DrmLikeOps.scala | 118 ++++
 .../apache/mahout/math/drm/RLikeDrmOps.scala    | 146 +++++
 .../math/drm/logical/AbstractBinaryOp.scala     |  54 ++
 .../math/drm/logical/AbstractUnaryOp.scala      |  37 ++
 .../math/drm/logical/CheckpointAction.scala     |  47 ++
 .../apache/mahout/math/drm/logical/OpAB.scala   |  41 ++
 .../mahout/math/drm/logical/OpABAnyKey.scala    |  41 ++
 .../apache/mahout/math/drm/logical/OpABt.scala  |  42 ++
 .../apache/mahout/math/drm/logical/OpAewB.scala |  46 ++
 .../mahout/math/drm/logical/OpAewScalar.scala   |  45 ++
 .../apache/mahout/math/drm/logical/OpAt.scala   |  35 ++
 .../apache/mahout/math/drm/logical/OpAtA.scala  |  36 ++
 .../mahout/math/drm/logical/OpAtAnyKey.scala    |  34 ++
 .../apache/mahout/math/drm/logical/OpAtB.scala  |  42 ++
 .../apache/mahout/math/drm/logical/OpAtx.scala  |  41 ++
 .../apache/mahout/math/drm/logical/OpAx.scala   |  42 ++
 .../mahout/math/drm/logical/OpCbind.scala       |  42 ++
 .../mahout/math/drm/logical/OpMapBlock.scala    |  43 ++
 .../apache/mahout/math/drm/logical/OpPar.scala  |  18 +
 .../mahout/math/drm/logical/OpRbind.scala       |  40 ++
 .../mahout/math/drm/logical/OpRowRange.scala    |  36 ++
 .../math/drm/logical/OpTimesLeftMatrix.scala    |  43 ++
 .../math/drm/logical/OpTimesRightMatrix.scala   |  46 ++
 .../org/apache/mahout/math/drm/package.scala    | 136 +++++
 .../math/indexeddataset/IndexedDataset.scala    |  63 +++
 .../math/indexeddataset/ReaderWriter.scala      | 117 ++++
 .../mahout/math/indexeddataset/Schema.scala     | 104 ++++
 .../math/scalabindings/DoubleScalarOps.scala    |  42 ++
 .../scalabindings/MatlabLikeMatrixOps.scala     |  66 +++
 .../math/scalabindings/MatlabLikeOps.scala      |  35 ++
 .../math/scalabindings/MatlabLikeTimesOps.scala |  28 +
 .../scalabindings/MatlabLikeVectorOps.scala     |  73 +++
 .../mahout/math/scalabindings/MatrixOps.scala   | 215 ++++++++
 .../math/scalabindings/RLikeMatrixOps.scala     |  94 ++++
 .../mahout/math/scalabindings/RLikeOps.scala    |  38 ++
 .../math/scalabindings/RLikeTimesOps.scala      |  28 +
 .../math/scalabindings/RLikeVectorOps.scala     |  71 +++
 .../mahout/math/scalabindings/VectorOps.scala   | 141 +++++
 .../mahout/math/scalabindings/package.scala     | 297 ++++++++++
 .../org/apache/mahout/nlp/tfidf/TFIDF.scala     | 112 ++++
 .../classifier/naivebayes/NBTestBase.scala      | 291 ++++++++++
 .../stats/ClassifierStatsTestBase.scala         | 257 +++++++++
 .../decompositions/DecompositionsSuite.scala    | 113 ++++
 .../DistributedDecompositionsSuiteBase.scala    | 219 ++++++++
 .../mahout/math/drm/DrmLikeOpsSuiteBase.scala   |  93 ++++
 .../mahout/math/drm/DrmLikeSuiteBase.scala      |  76 +++
 .../mahout/math/drm/RLikeDrmOpsSuiteBase.scala  | 550 +++++++++++++++++++
 .../mahout/math/scalabindings/MathSuite.scala   | 214 ++++++++
 .../MatlabLikeMatrixOpsSuite.scala              |  67 +++
 .../math/scalabindings/MatrixOpsSuite.scala     | 185 +++++++
 .../scalabindings/RLikeMatrixOpsSuite.scala     |  80 +++
 .../scalabindings/RLikeVectorOpsSuite.scala     |  36 ++
 .../math/scalabindings/VectorOpsSuite.scala     |  82 +++
 .../apache/mahout/nlp/tfidf/TFIDFtestBase.scala | 184 +++++++
 .../mahout/test/DistributedMahoutSuite.scala    |  28 +
 .../mahout/test/LoggerConfiguration.scala       |  16 +
 .../org/apache/mahout/test/MahoutSuite.scala    |  54 ++
 spark-shell/pom.xml                             |   2 +-
 spark/pom.xml                                   |   4 +-
 167 files changed, 8961 insertions(+), 8962 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 777963a..a3e39ac 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 0.11.0 - unreleased
 
+  MAHOUT-1681: Renamed mahout-math-scala to mahout-samsara
+
   MAHOUT-1680: Renamed mahout-distribution to apache-mahout-distribution
 
 Release 0.10.0 - 2015-04-11

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/distribution/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/pom.xml b/distribution/pom.xml
index bc17a08..3a47e08 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -115,7 +115,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.mahout</groupId>
-      <artifactId>mahout-math-scala_${scala.compat.version}</artifactId>
+      <artifactId>mahout-samsara_${scala.compat.version}</artifactId>
     </dependency>
   </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/distribution/src/main/assembly/bin.xml
----------------------------------------------------------------------
diff --git a/distribution/src/main/assembly/bin.xml 
b/distribution/src/main/assembly/bin.xml
index c49ddc2..5dd014c 100644
--- a/distribution/src/main/assembly/bin.xml
+++ b/distribution/src/main/assembly/bin.xml
@@ -117,7 +117,7 @@
       <outputDirectory/>
     </fileSet>
     <fileSet>
-      <directory>${project.basedir}/../math-scala/target</directory>
+      <directory>${project.basedir}/../samsara/target</directory>
       <includes>
         <include>mahout-*.jar</include>
         <include>mahout-*.job</include>
@@ -193,12 +193,12 @@
       <outputDirectory>docs/mahout-examples</outputDirectory>
     </fileSet>
     <fileSet>
-      
<directory>${project.basedir}/../math-scala/target/site/scaladocs</directory>
-      <outputDirectory>docs/mahout-examples</outputDirectory>
+      
<directory>${project.basedir}/../samsara/target/site/scaladocs</directory>
+      <outputDirectory>docs/mahout-samsara</outputDirectory>
     </fileSet>
     <fileSet>
       <directory>${project.basedir}/../spark/target/site/scaladocs</directory>
-      <outputDirectory>docs/mahout-examples</outputDirectory>
+      <outputDirectory>docs/mahout-spark</outputDirectory>
     </fileSet>
     <fileSet>
       
<directory>${project.basedir}/../spark-shell/target/site/scaladocs</directory>

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/h2o/pom.xml
----------------------------------------------------------------------
diff --git a/h2o/pom.xml b/h2o/pom.xml
index b9d101a..c0ccdcc 100644
--- a/h2o/pom.xml
+++ b/h2o/pom.xml
@@ -127,7 +127,7 @@
 
     <dependency>
       <groupId>org.apache.mahout</groupId>
-      <artifactId>mahout-math-scala_${scala.compat.version}</artifactId>
+      <artifactId>mahout-samsara_${scala.compat.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
 
@@ -140,7 +140,7 @@
 
    <dependency>
       <groupId>org.apache.mahout</groupId>
-      <artifactId>mahout-math-scala_${scala.compat.version}</artifactId>
+      <artifactId>mahout-samsara_${scala.compat.version}</artifactId>
       <classifier>tests</classifier>
       <scope>test</scope>
    </dependency>

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/math-scala/pom.xml
----------------------------------------------------------------------
diff --git a/math-scala/pom.xml b/math-scala/pom.xml
deleted file mode 100644
index 78331dd..0000000
--- a/math-scala/pom.xml
+++ /dev/null
@@ -1,197 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.mahout</groupId>
-    <artifactId>mahout</artifactId>
-    <version>0.11.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>mahout-math-scala_${scala.compat.version}</artifactId>
-  <name>Mahout Math Scala bindings</name>
-  <description>High performance scientific and technical computing data 
structures and methods,
-    mostly based on CERN's
-    Colt Java API
-  </description>
-
-  <packaging>jar</packaging>
-
-  <build>
-    <plugins>
-      <!-- create test jar so other modules can reuse the math-scala test 
utility classes. -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-            <phase>package</phase>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <artifactId>maven-javadoc-plugin</artifactId>
-      </plugin>
-
-      <plugin>
-        <artifactId>maven-source-plugin</artifactId>
-      </plugin>
-
-      <plugin>
-        <groupId>net.alchim31.maven</groupId>
-        <artifactId>scala-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>add-scala-sources</id>
-            <phase>initialize</phase>
-            <goals>
-              <goal>add-source</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>scala-compile</id>
-            <phase>process-resources</phase>
-            <goals>
-              <goal>compile</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>scala-test-compile</id>
-            <phase>process-test-resources</phase>
-            <goals>
-              <goal>testCompile</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!--this is what scalatest recommends to do to enable scala tests -->
-
-      <!-- disable surefire -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <skipTests>true</skipTests>
-        </configuration>
-      </plugin>
-      <!-- enable scalatest -->
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>test</id>
-            <goals>
-              <goal>test</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-
-    </plugins>
-  </build>
-
-  <dependencies>
-
-    <dependency>
-      <groupId>org.apache.mahout</groupId>
-      <artifactId>mahout-math</artifactId>
-    </dependency>
-
-    <!--  3rd-party -->
-    <dependency>
-      <groupId>log4j</groupId>
-      <artifactId>log4j</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>com.github.scopt</groupId>
-      <artifactId>scopt_${scala.compat.version}</artifactId>
-      <version>3.3.0</version>
-    </dependency>
-
-    <!-- scala stuff -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-compiler</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-reflect</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-actors</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scalap</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.compat.version}</artifactId>
-    </dependency>
-
-  </dependencies>
-
-  <profiles>
-    <profile>
-      <id>mahout-release</id>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>net.alchim31.maven</groupId>
-            <artifactId>scala-maven-plugin</artifactId>
-            <executions>
-              <execution>
-                <id>generate-scaladoc</id>
-                <goals>
-                  <goal>doc</goal>
-                </goals>
-              </execution>
-              <execution>
-                <id>attach-scaladoc-jar</id>
-                <goals>
-                  <goal>doc-jar</goal>
-                </goals>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
-</project>

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NBClassifier.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NBClassifier.scala
 
b/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NBClassifier.scala
deleted file mode 100644
index 5de0733..0000000
--- 
a/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NBClassifier.scala
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-package org.apache.mahout.classifier.naivebayes
-
-import org.apache.mahout.math.Vector
-import scala.collection.JavaConversions._
-
-/**
- * Abstract Classifier base for Complentary and Standard Classifiers
- * @param nbModel a trained NBModel
- */
-abstract class AbstractNBClassifier(nbModel: NBModel) extends 
java.io.Serializable {
-
-  // Trained Naive Bayes Model
-  val model = nbModel
-
-  /** scoring method for standard and complementary classifiers */
-  protected def getScoreForLabelFeature(label: Int, feature: Int): Double
-
-  /** getter for model */
-  protected def getModel: NBModel= {
-     model
-  }
-
-  /**
-   * Compute the score for a Vector of weighted TF-IDF featured
-   * @param label Label to be scored
-   * @param instance Vector of weights to be calculate score
-   * @return score for this Label
-   */
-  protected def getScoreForLabelInstance(label: Int, instance: Vector): Double 
= {
-    var result: Double = 0.0
-    for (e <- instance.nonZeroes) {
-      result += e.get * getScoreForLabelFeature(label, e.index)
-    }
-    result
-  }
-
-  /** number of categories the model has been trained on */
-  def numCategories: Int = {
-     model.numLabels
-  }
-
-  /**
-   * get a scoring vector for a vector of TF of TF-IDF weights
-   * @param instance vector of TF of TF-IDF weights to be classified
-   * @return a vector of scores.
-   */
-  def classifyFull(instance: Vector): Vector = {
-    classifyFull(model.createScoringVector, instance)
-  }
-
-  /** helper method for classifyFull(Vector) */
-  def classifyFull(r: Vector, instance: Vector): Vector = {
-    var label: Int = 0
-    for (label <- 0 until model.numLabels) {
-        r.setQuick(label, getScoreForLabelInstance(label, instance))
-      }
-    r
-  }
-}
-
-/**
- * Standard Multinomial Naive Bayes Classifier
- * @param nbModel a trained NBModel
- */
-class StandardNBClassifier(nbModel: NBModel) extends 
AbstractNBClassifier(nbModel: NBModel) with java.io.Serializable{
-  override def getScoreForLabelFeature(label: Int, feature: Int): Double = {
-    val model: NBModel = getModel
-    StandardNBClassifier.computeWeight(model.weight(label, feature), 
model.labelWeight(label), model.alphaI, model.numFeatures)
-  }
-}
-
-/** helper object for StandardNBClassifier */
-object StandardNBClassifier extends java.io.Serializable {
-  /** Compute Standard Multinomial Naive Bayes Weights See Rennie et. al. 
Section 2.1 */
-  def computeWeight(featureLabelWeight: Double, labelWeight: Double, alphaI: 
Double, numFeatures: Double): Double = {
-    val numerator: Double = featureLabelWeight + alphaI
-    val denominator: Double = labelWeight + alphaI * numFeatures
-    return Math.log(numerator / denominator)
-  }
-}
-
-/**
- * Complementary Naive Bayes Classifier
- * @param nbModel a trained NBModel
- */
-class ComplementaryNBClassifier(nbModel: NBModel) extends 
AbstractNBClassifier(nbModel: NBModel) with java.io.Serializable {
-  override def getScoreForLabelFeature(label: Int, feature: Int): Double = {
-    val model: NBModel = getModel
-    val weight: Double = 
ComplementaryNBClassifier.computeWeight(model.featureWeight(feature), 
model.weight(label, feature), model.totalWeightSum, model.labelWeight(label), 
model.alphaI, model.numFeatures)
-    return weight / model.thetaNormalizer(label)
-  }
-}
-
-/** helper object for ComplementaryNBClassifier */
-object ComplementaryNBClassifier extends java.io.Serializable {
-
-  /** Compute Complementary weights See Rennie et. al. Section 3.1 */
-  def computeWeight(featureWeight: Double, featureLabelWeight: Double, 
totalWeight: Double, labelWeight: Double, alphaI: Double, numFeatures: Double): 
Double = {
-    val numerator: Double = featureWeight - featureLabelWeight + alphaI
-    val denominator: Double = totalWeight - labelWeight + alphaI * numFeatures
-    return -Math.log(numerator / denominator)
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NBModel.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NBModel.scala
 
b/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NBModel.scala
deleted file mode 100644
index 3ceae96..0000000
--- 
a/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NBModel.scala
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.classifier.naivebayes
-
-import org.apache.mahout.math._
-
-import org.apache.mahout.math.{drm, scalabindings}
-
-import scalabindings._
-import scalabindings.RLikeOps._
-import drm.RLikeDrmOps._
-import drm._
-import scala.collection.JavaConverters._
-import scala.language.asInstanceOf
-import scala.collection._
-import JavaConversions._
-
-/**
- *
- * @param weightsPerLabelAndFeature Aggregated matrix of weights of labels x 
features
- * @param weightsPerFeature Vector of summation of all feature weights.
- * @param weightsPerLabel Vector of summation of all label weights.
- * @param perlabelThetaNormalizer Vector of weight normalizers per label (used 
only for complemtary models)
- * @param labelIndex HashMap of labels and their corresponding row in the 
weightMatrix
- * @param alphaI Laplace smoothing factor.
- * @param isComplementary Whether or not this is a complementary model.
- */
-class NBModel(val weightsPerLabelAndFeature: Matrix = null,
-              val weightsPerFeature: Vector = null,
-              val weightsPerLabel: Vector = null,
-              val perlabelThetaNormalizer: Vector = null,
-              val labelIndex: Map[String, Integer] = null,
-              val alphaI: Float = 1.0f,
-              val isComplementary: Boolean= false)  extends 
java.io.Serializable {
-
-
-  val numFeatures: Double = weightsPerFeature.getNumNondefaultElements
-  val totalWeightSum: Double = weightsPerLabel.zSum
-  val alphaVector: Vector = null
-
-  validate()
-
-  // todo: Maybe it is a good idea to move the dfsWrite and dfsRead out
-  // todo: of the model and into a helper
-
-  // TODO: weightsPerLabelAndFeature, a sparse (numFeatures x numLabels) 
matrix should fit
-  // TODO: upfront in memory and should not require a DRM decide if we want 
this to scale out.
-
-
-  /** getter for summed label weights.  Used by legacy classifier */
-  def labelWeight(label: Int): Double = {
-     weightsPerLabel.getQuick(label)
-  }
-
-  /** getter for weight normalizers.  Used by legacy classifier */
-  def thetaNormalizer(label: Int): Double = {
-    perlabelThetaNormalizer.get(label)
-  }
-
-  /** getter for summed feature weights.  Used by legacy classifier */
-  def featureWeight(feature: Int): Double = {
-    weightsPerFeature.getQuick(feature)
-  }
-
-  /** getter for individual aggregated weights.  Used by legacy classifier */
-  def weight(label: Int, feature: Int): Double = {
-    weightsPerLabelAndFeature.getQuick(label, feature)
-  }
-
-  /** getter for a single empty vector of weights */
-  def createScoringVector: Vector = {
-     weightsPerLabel.like
-  }
-
-  /** getter for a the number of labels to consider */
-  def numLabels: Int = {
-     weightsPerLabel.size
-  }
-
-  /**
-   * Write a trained model to the filesystem as a series of DRMs
-   * @param pathToModel Directory to which the model will be written
-   */
-  def dfsWrite(pathToModel: String)(implicit ctx: DistributedContext): Unit = {
-    //todo:  write out as smaller partitions or possibly use reader and 
writers to
-    //todo:  write something other than a DRM for label Index, is 
Complementary, alphaI.
-
-    // add a directory to put all of the DRMs in
-    val fullPathToModel = pathToModel + NBModel.modelBaseDirectory
-
-    drmParallelize(weightsPerLabelAndFeature).dfsWrite(fullPathToModel + 
"/weightsPerLabelAndFeatureDrm.drm")
-    drmParallelize(sparse(weightsPerFeature)).dfsWrite(fullPathToModel + 
"/weightsPerFeatureDrm.drm")
-    drmParallelize(sparse(weightsPerLabel)).dfsWrite(fullPathToModel + 
"/weightsPerLabelDrm.drm")
-    drmParallelize(sparse(perlabelThetaNormalizer)).dfsWrite(fullPathToModel + 
"/perlabelThetaNormalizerDrm.drm")
-    drmParallelize(sparse(svec((0,alphaI)::Nil))).dfsWrite(fullPathToModel + 
"/alphaIDrm.drm")
-
-    // isComplementry is true if isComplementaryDrm(0,0) == 1 else false
-    val isComplementaryDrm = sparse(0 to 1, 0 to 1)
-    if(isComplementary){
-      isComplementaryDrm(0,0) = 1.0
-    } else {
-      isComplementaryDrm(0,0) = 0.0
-    }
-    drmParallelize(isComplementaryDrm).dfsWrite(fullPathToModel + 
"/isComplementaryDrm.drm")
-
-    // write the label index as a String-Keyed DRM.
-    val labelIndexDummyDrm = weightsPerLabelAndFeature.like()
-    labelIndexDummyDrm.setRowLabelBindings(labelIndex)
-    // get a reverse map of [Integer, String] and set the value of firsr 
column of the drm
-    // to the corresponding row number for it's Label (the rows may not be 
read back in the same order)
-    val revMap = labelIndex.map(x => x._2 -> x._1)
-    for(i <- 0 until labelIndexDummyDrm.numRows() ){
-      labelIndexDummyDrm.set(labelIndex(revMap(i)), 0, i.toDouble)
-    }
-
-    drmParallelizeWithRowLabels(labelIndexDummyDrm).dfsWrite(fullPathToModel + 
"/labelIndex.drm")
-  }
-
-  /** Model Validation */
-  def validate() {
-    assert(alphaI > 0, "alphaI has to be greater than 0!")
-    assert(numFeatures > 0, "the vocab count has to be greater than 0!")
-    assert(totalWeightSum > 0, "the totalWeightSum has to be greater than 0!")
-    assert(weightsPerLabel != null, "the number of labels has to be defined!")
-    assert(weightsPerLabel.getNumNondefaultElements > 0, "the number of labels 
has to be greater than 0!")
-    assert(weightsPerFeature != null, "the feature sums have to be defined")
-    assert(weightsPerFeature.getNumNondefaultElements > 0, "the feature sums 
have to be greater than 0!")
-    if (isComplementary) {
-      assert(perlabelThetaNormalizer != null, "the theta normalizers have to 
be defined")
-      assert(perlabelThetaNormalizer.getNumNondefaultElements > 0, "the number 
of theta normalizers has to be greater than 0!")
-      assert(Math.signum(perlabelThetaNormalizer.minValue) == 
Math.signum(perlabelThetaNormalizer.maxValue), "Theta normalizers do not all 
have the same sign")
-      assert(perlabelThetaNormalizer.getNumNonZeroElements == 
perlabelThetaNormalizer.size, "Weight normalizers can not have zero value.")
-    }
-    assert(labelIndex.size == weightsPerLabel.getNumNondefaultElements, "label 
index must have entries for all labels")
-  }
-}
-
-object NBModel extends java.io.Serializable {
-
-  val modelBaseDirectory = "/naiveBayesModel"
-
-  /**
-   * Read a trained model in from from the filesystem.
-   * @param pathToModel directory from which to read individual model 
components
-   * @return a valid NBModel
-   */
-  def dfsRead(pathToModel: String)(implicit ctx: DistributedContext): NBModel 
= {
-    //todo:  Takes forever to read we need a more practical method of writing 
models. Readers/Writers?
-
-    // read from a base directory for all drms
-    val fullPathToModel = pathToModel + modelBaseDirectory
-
-    val weightsPerFeatureDrm = drmDfsRead(fullPathToModel + 
"/weightsPerFeatureDrm.drm").checkpoint(CacheHint.MEMORY_ONLY)
-    val weightsPerFeature = weightsPerFeatureDrm.collect(0, ::)
-    weightsPerFeatureDrm.uncache()
-
-    val weightsPerLabelDrm = drmDfsRead(fullPathToModel + 
"/weightsPerLabelDrm.drm").checkpoint(CacheHint.MEMORY_ONLY)
-    val weightsPerLabel = weightsPerLabelDrm.collect(0, ::)
-    weightsPerLabelDrm.uncache()
-
-    val alphaIDrm = drmDfsRead(fullPathToModel + 
"/alphaIDrm.drm").checkpoint(CacheHint.MEMORY_ONLY)
-    val alphaI: Float = alphaIDrm.collect(0, 0).toFloat
-    alphaIDrm.uncache()
-
-    // isComplementry is true if isComplementaryDrm(0,0) == 1 else false
-    val isComplementaryDrm = drmDfsRead(fullPathToModel + 
"/isComplementaryDrm.drm").checkpoint(CacheHint.MEMORY_ONLY)
-    val isComplementary = isComplementaryDrm.collect(0, 0).toInt == 1
-    isComplementaryDrm.uncache()
-
-    var perLabelThetaNormalizer= weightsPerFeature.like()
-    if (isComplementary) {
-      val perLabelThetaNormalizerDrm = drm.drmDfsRead(fullPathToModel + 
"/perlabelThetaNormalizerDrm.drm")
-                                             .checkpoint(CacheHint.MEMORY_ONLY)
-      perLabelThetaNormalizer = perLabelThetaNormalizerDrm.collect(0, ::)
-    }
-
-    val dummyLabelDrm= drmDfsRead(fullPathToModel + "/labelIndex.drm")
-                         .checkpoint(CacheHint.MEMORY_ONLY)
-    val labelIndexMap:java.util.Map[String, Integer] = 
dummyLabelDrm.getRowLabelBindings
-    dummyLabelDrm.uncache()
-
-    // map the labels to the corresponding row numbers of weightsPerFeatureDrm 
(values in dummyLabelDrm)
-    val scalaLabelIndexMap: mutable.Map[String, Integer] =
-      labelIndexMap.map(x => x._1 -> dummyLabelDrm.get(labelIndexMap(x._1), 0)
-        .toInt
-        .asInstanceOf[Integer])
-
-    val weightsPerLabelAndFeatureDrm = drmDfsRead(fullPathToModel + 
"/weightsPerLabelAndFeatureDrm.drm").checkpoint(CacheHint.MEMORY_ONLY)
-    val weightsPerLabelAndFeature = weightsPerLabelAndFeatureDrm.collect
-    weightsPerLabelAndFeatureDrm.uncache()
-
-    // model validation is triggered automatically by constructor
-    val model: NBModel = new NBModel(weightsPerLabelAndFeature,
-      weightsPerFeature,
-      weightsPerLabel,
-      perLabelThetaNormalizer,
-      scalaLabelIndexMap,
-      alphaI,
-      isComplementary)
-
-    model
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala
 
b/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala
deleted file mode 100644
index a15ca09..0000000
--- 
a/math-scala/src/main/scala/org/apache/mahout/classifier/naivebayes/NaiveBayes.scala
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.classifier.naivebayes
-
-import org.apache.mahout.classifier.stats.{ResultAnalyzer, ClassifierResult}
-import org.apache.mahout.math._
-import scalabindings._
-import scalabindings.RLikeOps._
-import drm.RLikeDrmOps._
-import drm._
-import scala.reflect.ClassTag
-import scala.language.asInstanceOf
-import collection._
-import scala.collection.JavaConversions._
-
-/**
- * Distributed training of a Naive Bayes model. Follows the approach presented 
in Rennie et.al.: Tackling the poor
- * assumptions of Naive Bayes Text classifiers, ICML 2003, 
http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf
- */
-trait NaiveBayes extends java.io.Serializable{
-
-  /** default value for the Laplacian smoothing parameter */
-  def defaultAlphaI = 1.0f
-
-  // function to extract categories from string keys
-  type CategoryParser = String => String
-
-  /** Default: seqdirectory/seq2Sparse Categories are Stored in Drm Keys as: 
/Category/document_id */
-  def seq2SparseCategoryParser: CategoryParser = x => x.split("/")(1)
-
-
-  /**
-   * Distributed training of a Naive Bayes model. Follows the approach 
presented in Rennie et.al.: Tackling the poor
-   * assumptions of Naive Bayes Text classifiers, ICML 2003, 
http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf
-   *
-   * @param observationsPerLabel a DrmLike[Int] matrix containing term 
frequency counts for each label.
-   * @param trainComplementary whether or not to train a complementary Naive 
Bayes model
-   * @param alphaI Laplace smoothing parameter
-   * @return trained naive bayes model
-   */
-  def train(observationsPerLabel: DrmLike[Int],
-            labelIndex: Map[String, Integer],
-            trainComplementary: Boolean = true,
-            alphaI: Float = defaultAlphaI): NBModel = {
-
-    // Summation of all weights per feature
-    val weightsPerFeature = observationsPerLabel.colSums
-
-    // Distributed summation of all weights per label
-    val weightsPerLabel = observationsPerLabel.rowSums
-
-    // Collect a matrix to pass to the NaiveBayesModel
-    val inCoreTFIDF = observationsPerLabel.collect
-
-    // perLabelThetaNormalizer Vector is expected by NaiveBayesModel. We can 
pass a null value
-    // or Vector of zeroes in the case of a standard NB model.
-    var thetaNormalizer = weightsPerFeature.like()
-
-    // Instantiate a trainer and retrieve the perLabelThetaNormalizer Vector 
from it in the case of
-    // a complementary NB model
-    if (trainComplementary) {
-      val thetaTrainer = new ComplementaryNBThetaTrainer(weightsPerFeature,
-                                                         weightsPerLabel,
-                                                         alphaI)
-      // local training of the theta normalization
-      for (labelIndex <- 0 until inCoreTFIDF.nrow) {
-        thetaTrainer.train(labelIndex, inCoreTFIDF(labelIndex, ::))
-      }
-      thetaNormalizer = thetaTrainer.retrievePerLabelThetaNormalizer
-    }
-
-    new NBModel(inCoreTFIDF,
-                weightsPerFeature,
-                weightsPerLabel,
-                thetaNormalizer,
-                labelIndex,
-                alphaI,
-                trainComplementary)
-  }
-
-  /**
-   * Extract label Keys from raw TF or TF-IDF Matrix generated by 
seqdirectory/seq2sparse
-   * and aggregate TF or TF-IDF values by their label
-   * Override this method in engine specific modules to optimize
-   *
-   * @param stringKeyedObservations DrmLike matrix; Output from seq2sparse
-   *   in form K = eg./Category/document_title
-   *           V = TF or TF-IDF values per term
-   * @param cParser a String => String function used to extract categories from
-   *   Keys of the stringKeyedObservations DRM. The default
-   *   CategoryParser will extract "Category" from: '/Category/document_id'
-   * @return  (labelIndexMap,aggregatedByLabelObservationDrm)
-   *   labelIndexMap is a HashMap [String, Integer] K = label row index
-   *                                                V = label
-   *   aggregatedByLabelObservationDrm is a DrmLike[Int] of aggregated
-   *   TF or TF-IDF counts per label
-   */
-  def extractLabelsAndAggregateObservations[K: 
ClassTag](stringKeyedObservations: DrmLike[K],
-                                                         cParser: 
CategoryParser = seq2SparseCategoryParser)
-                                                        (implicit ctx: 
DistributedContext):
-                                                        
(mutable.HashMap[String, Integer], DrmLike[Int])= {
-
-    stringKeyedObservations.checkpoint()
-
-    val numDocs=stringKeyedObservations.nrow
-    val numFeatures=stringKeyedObservations.ncol
-
-    // Extract categories from labels assigned by seq2sparse
-    // Categories are Stored in Drm Keys as eg.: /Category/document_id
-
-    // Get a new DRM with a single column so that we don't have to collect the
-    // DRM into memory upfront.
-    val strippedObeservations= stringKeyedObservations.mapBlock(ncol=1){
-      case(keys, block) =>
-        val blockB = block.like(keys.size, 1)
-        keys -> blockB
-    }
-
-    // Extract the row label bindings (the String keys) from the slim Drm
-    // strip the document_id from the row keys keeping only the category.
-    // Sort the bindings alphabetically into a Vector
-    val labelVectorByRowIndex = strippedObeservations
-                                  .getRowLabelBindings
-                                  .map(x => x._2 -> cParser(x._1))
-                                  .toVector.sortWith(_._1 < _._1)
-
-    //TODO: add a .toIntKeyed(...) method to DrmLike?
-
-    // Copy stringKeyedObservations to an Int-Keyed Drm so that we can compute 
transpose
-    // Copy the Collected Matrices up front for now until we hav a distributed 
way of converting
-    val inCoreStringKeyedObservations = stringKeyedObservations.collect
-    val inCoreIntKeyedObservations = new SparseMatrix(
-                             stringKeyedObservations.nrow.toInt,
-                             stringKeyedObservations.ncol)
-    for (i <- 0 until inCoreStringKeyedObservations.nrow.toInt) {
-      inCoreIntKeyedObservations(i, ::) = inCoreStringKeyedObservations(i, ::)
-    }
-
-    val intKeyedObservations= drmParallelize(inCoreIntKeyedObservations)
-
-    stringKeyedObservations.uncache()
-
-    var labelIndex = 0
-    val labelIndexMap = new mutable.HashMap[String, Integer]
-    val encodedLabelByRowIndexVector = new 
DenseVector(labelVectorByRowIndex.size)
-    
-    // Encode Categories as an Integer (Double) so we can broadcast as a vector
-    // where each element is an Int-encoded category whose index corresponds
-    // to its row in the Drm
-    for (i <- 0 until labelVectorByRowIndex.size) {
-      if (!(labelIndexMap.contains(labelVectorByRowIndex(i)._2))) {
-        encodedLabelByRowIndexVector(i) = labelIndex.toDouble
-        labelIndexMap.put(labelVectorByRowIndex(i)._2, labelIndex)
-        labelIndex += 1
-      }
-      // don't like this casting but need to use a java.lang.Integer when 
setting rowLabelBindings
-      encodedLabelByRowIndexVector(i) = labelIndexMap
-                                          
.getOrElse(labelVectorByRowIndex(i)._2, -1)
-                                          .asInstanceOf[Int].toDouble
-    }
-
-    // "Combiner": Map and aggregate by Category. Do this by broadcasting the 
encoded
-    // category vector and mapping a transposed IntKeyed Drm out so that all 
categories
-    // will be present on all nodes as columns and can be referenced by
-    // BCastEncodedCategoryByRowVector.  Iteratively sum all categories.
-    val nLabels = labelIndex
-
-    val bcastEncodedCategoryByRowVector = 
drmBroadcast(encodedLabelByRowIndexVector)
-
-    val aggregetedObservationByLabelDrm = intKeyedObservations.t.mapBlock(ncol 
= nLabels) {
-      case (keys, blockA) =>
-        val blockB = blockA.like(keys.size, nLabels)
-        var label : Int = 0
-        for (i <- 0 until keys.size) {
-          blockA(i, ::).nonZeroes().foreach { elem =>
-            label = bcastEncodedCategoryByRowVector.get(elem.index).toInt
-            blockB(i, label) = blockB(i, label) + blockA(i, elem.index)
-          }
-        }
-        keys -> blockB
-    }.t
-
-    (labelIndexMap, aggregetedObservationByLabelDrm)
-  }
-
-  /**
-   * Test a trained model with a labeled dataset sequentially
-   * @param model a trained NBModel
-   * @param testSet a labeled testing set
-   * @param testComplementary test using a complementary or a standard NB 
classifier
-   * @param cParser a String => String function used to extract categories from
-   *   Keys of the testing set DRM. The default
-   *   CategoryParser will extract "Category" from: '/Category/document_id'
-   *
-   *   *Note*: this method brings the entire test set into upfront memory,
-   *           This method is optimized and parallelized in SparkNaiveBayes
-   *
-   * @tparam K implicitly determined Key type of test set DRM: String
-   * @return a result analyzer with confusion matrix and accuracy statistics
-   */
-  def test[K: ClassTag](model: NBModel,
-                        testSet: DrmLike[K],
-                        testComplementary: Boolean = false,
-                        cParser: CategoryParser = seq2SparseCategoryParser)
-                       (implicit ctx: DistributedContext): ResultAnalyzer = {
-
-    val labelMap = model.labelIndex
-
-    val numLabels = model.numLabels
-
-    testSet.checkpoint()
-
-    val numTestInstances = testSet.nrow.toInt
-
-    // instantiate the correct type of classifier
-    val classifier = testComplementary match {
-      case true => new ComplementaryNBClassifier(model) with Serializable
-      case _ => new StandardNBClassifier(model) with Serializable
-    }
-    
-    if (testComplementary) {
-      assert(testComplementary == model.isComplementary,
-        "Complementary Label Assignment requires Complementary Training")
-    }
-
-
-    // Sequentially assign labels to the test set:
-    // *Note* this brings the entire test set into memory upfront:
-
-    // Since we cant broadcast the model as is do it sequentially up front for 
now
-    val inCoreTestSet = testSet.collect
-
-    // get the labels of the test set and extract the keys
-    val testSetLabelMap = testSet.getRowLabelBindings
-
-    // empty Matrix in which we'll set the classification scores
-    val inCoreScoredTestSet = testSet.like(numTestInstances, numLabels)
-
-    testSet.uncache()
-    
-    for (i <- 0 until numTestInstances) {
-      inCoreScoredTestSet(i, ::) := classifier.classifyFull(inCoreTestSet(i, 
::))
-    }
-
-    // todo: reverse the labelMaps in training and through the model?
-
-    // reverse the label map and extract the labels
-    val reverseTestSetLabelMap = testSetLabelMap.map(x => x._2 -> 
cParser(x._1))
-
-    val reverseLabelMap = labelMap.map(x => x._2 -> x._1)
-
-    val analyzer = new ResultAnalyzer(labelMap.keys.toList.sorted, "DEFAULT")
-
-    // assign labels- winner takes all
-    for (i <- 0 until numTestInstances) {
-      val (bestIdx, bestScore) = argmax(inCoreScoredTestSet(i, ::))
-      val classifierResult = new ClassifierResult(reverseLabelMap(bestIdx), 
bestScore)
-      analyzer.addInstance(reverseTestSetLabelMap(i), classifierResult)
-    }
-
-    analyzer
-  }
-
-  /**
-   * argmax with values as well
-   * returns a tuple of index of the max score and the score itself.
-   * @param v Vector of of scores
-   * @return  (bestIndex, bestScore)
-   */
-  def argmax(v: Vector): (Int, Double) = {
-    var bestIdx: Int = Integer.MIN_VALUE
-    var bestScore: Double = Integer.MIN_VALUE.asInstanceOf[Int].toDouble
-    for(i <- 0 until v.size) {
-      if(v(i) > bestScore){
-        bestScore = v(i)
-        bestIdx = i
-      }
-    }
-    (bestIdx, bestScore)
-  }
-
-}
-
-object NaiveBayes extends NaiveBayes with java.io.Serializable
-
-/**
- * Trainer for the weight normalization vector used by Transform Weight 
Normalized Complement
- * Naive Bayes.  See: Rennie et.al.: Tackling the poor assumptions of Naive 
Bayes Text classifiers,
- * ICML 2003, http://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf Sec. 
3.2.
- *
- * @param weightsPerFeature a Vector of summed TF or TF-IDF weights for each 
word in dictionary.
- * @param weightsPerLabel a Vector of summed TF or TF-IDF weights for each 
label.
- * @param alphaI Laplace smoothing factor. Defaut value of 1.
- */
-class ComplementaryNBThetaTrainer(private val weightsPerFeature: Vector,
-                                  private val weightsPerLabel: Vector,
-                                  private val alphaI: Double = 1.0) {
-                                   
-   private val perLabelThetaNormalizer: Vector = weightsPerLabel.like()
-   private val totalWeightSum: Double = weightsPerLabel.zSum
-   private var numFeatures: Double = weightsPerFeature.getNumNondefaultElements
-
-   assert(weightsPerFeature != null, "weightsPerFeature vector can not be 
null")
-   assert(weightsPerLabel != null, "weightsPerLabel vector can not be null")
-
-  /**
-   * Train the weight normalization vector for each label
-   * @param label
-   * @param featurePerLabelWeight
-   */
-  def train(label: Int, featurePerLabelWeight: Vector) {
-    val currentLabelWeight = labelWeight(label)
-    // sum weights for each label including those with zero word counts
-    for (i <- 0 until featurePerLabelWeight.size) {
-      val currentFeaturePerLabelWeight = featurePerLabelWeight(i)
-      updatePerLabelThetaNormalizer(label,
-        ComplementaryNBClassifier.computeWeight(featureWeight(i),
-                                                currentFeaturePerLabelWeight,
-                                                totalWeightSum,
-                                                currentLabelWeight,
-                                                alphaI,
-                                                numFeatures)
-                                   )
-    }
-  }
-
-  /**
-   * getter for summed TF or TF-IDF weights by label
-   * @param label index of label
-   * @return sum of word TF or TF-IDF weights for label
-   */
-  def labelWeight(label: Int): Double = {
-    weightsPerLabel(label)
-  }
-
-  /**
-   * getter for summed TF or TF-IDF weights by word.
-   * @param feature index of word.
-   * @return sum of TF or TF-IDF weights for word.
-   */
-  def featureWeight(feature: Int): Double = {
-    weightsPerFeature(feature)
-  }
-
-  /**
-   * add the magnitude of the current weight to the current
-   * label's corresponding Vector element.
-   * @param label index of label to update.
-   * @param weight weight to add.
-   */
-  def updatePerLabelThetaNormalizer(label: Int, weight: Double) {
-    perLabelThetaNormalizer(label) = perLabelThetaNormalizer(label) + 
Math.abs(weight)
-  }
-
-  /**
-   * Getter for the weight normalizer vector as indexed by label
-   * @return a copy of the weight normalizer vector.
-   */
-  def retrievePerLabelThetaNormalizer: Vector = {
-    perLabelThetaNormalizer.cloned
-  }
-
-
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala
 
b/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala
deleted file mode 100644
index 8f1413a..0000000
--- 
a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ClassifierStats.scala
+++ /dev/null
@@ -1,467 +0,0 @@
-/*
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-
-package org.apache.mahout.classifier.stats
-
-import java.text.{DecimalFormat, NumberFormat}
-import java.util
-import org.apache.mahout.math.stats.OnlineSummarizer
-
-
-/**
- * Result of a document classification. The label and the associated score 
(usually probabilty)
- */
-class ClassifierResult (private var label: String = null,
-                        private var score: Double = 0.0,
-                        private var logLikelihood: Double = 
Integer.MAX_VALUE.toDouble) {
-
-  def getLogLikelihood: Double = logLikelihood
-
-  def setLogLikelihood(llh: Double) {
-    logLikelihood = llh
-  }
-
-  def getLabel: String = label
-
-  def getScore: Double = score
-
-  def setLabel(lbl: String) {
-    label = lbl
-  }
-
-  def setScore(sc: Double) {
-    score = sc
-  }
-
-  override def toString: String = {
-     "ClassifierResult{" + "category='" + label + '\'' + ", score=" + score + 
'}'
-  }
-
-}
-
-/**
- * ResultAnalyzer captures the classification statistics and displays in a 
tabular manner
- * @param labelSet Set of labels to be considered in classification
- * @param defaultLabel  the default label for an unknown classification
- */
-class ResultAnalyzer(private val labelSet: util.Collection[String], 
defaultLabel: String) {
-
-  val confusionMatrix = new ConfusionMatrix(labelSet, defaultLabel)
-  val summarizer = new OnlineSummarizer
-
-  private var hasLL: Boolean = false
-  private var correctlyClassified: Int = 0
-  private var incorrectlyClassified: Int = 0
-
-
-  def getConfusionMatrix: ConfusionMatrix = confusionMatrix
-
-  /**
-   *
-   * @param correctLabel
-   * The correct label
-   * @param classifiedResult
-   * The classified result
-   * @return whether the instance was correct or not
-   */
-  def addInstance(correctLabel: String, classifiedResult: ClassifierResult): 
Boolean = {
-    val result: Boolean = correctLabel == classifiedResult.getLabel
-    if (result) {
-      correctlyClassified += 1
-    }
-    else {
-      incorrectlyClassified += 1
-    }
-    confusionMatrix.addInstance(correctLabel, classifiedResult)
-    if (classifiedResult.getLogLikelihood != Integer.MAX_VALUE.toDouble) {
-      summarizer.add(classifiedResult.getLogLikelihood)
-      hasLL = true
-    }
-
-    result
-  }
-
-  /** Dump the resulting statistics to a string */
-  override def toString: String = {
-    val returnString: StringBuilder = new StringBuilder
-    returnString.append('\n')
-    
returnString.append("=======================================================\n")
-    returnString.append("Summary\n")
-    
returnString.append("-------------------------------------------------------\n")
-    val totalClassified: Int = correctlyClassified + incorrectlyClassified
-    val percentageCorrect: Double = 100.asInstanceOf[Double] * 
correctlyClassified / totalClassified
-    val percentageIncorrect: Double = 100.asInstanceOf[Double] * 
incorrectlyClassified / totalClassified
-    val decimalFormatter: NumberFormat = new DecimalFormat("0.####")
-    returnString.append("Correctly Classified Instances")
-                .append(": ")
-                .append(Integer.toString(correctlyClassified))
-                .append('\t')
-                .append(decimalFormatter.format(percentageCorrect))
-                .append("%\n")
-    returnString.append("Incorrectly Classified Instances")
-                .append(": ")
-                .append(Integer.toString(incorrectlyClassified))
-                .append('\t')
-                .append(decimalFormatter.format(percentageIncorrect))
-                .append("%\n")
-    returnString.append("Total Classified Instances")
-                .append(": ")
-                .append(Integer.toString(totalClassified))
-                .append('\n')
-    returnString.append('\n')
-    returnString.append(confusionMatrix)
-    
returnString.append("=======================================================\n")
-    returnString.append("Statistics\n")
-    
returnString.append("-------------------------------------------------------\n")
-    val normStats: RunningAverageAndStdDev = confusionMatrix.getNormalizedStats
-    returnString.append("Kappa: \t")
-                .append(decimalFormatter.format(confusionMatrix.getKappa))
-                .append('\n')
-    returnString.append("Accuracy: \t")
-                .append(decimalFormatter.format(confusionMatrix.getAccuracy))
-                .append("%\n")
-    returnString.append("Reliability: \t")
-                .append(decimalFormatter.format(normStats.getAverage * 
100.00000001))
-                .append("%\n")
-    returnString.append("Reliability (std dev): \t")
-                
.append(decimalFormatter.format(normStats.getStandardDeviation))
-                .append('\n')
-    returnString.append("Weighted precision: \t")
-                
.append(decimalFormatter.format(confusionMatrix.getWeightedPrecision))
-                .append('\n')
-    returnString.append("Weighted recall: \t")
-                
.append(decimalFormatter.format(confusionMatrix.getWeightedRecall))
-                .append('\n')
-    returnString.append("Weighted F1 score: \t")
-                
.append(decimalFormatter.format(confusionMatrix.getWeightedF1score))
-                .append('\n')
-    if (hasLL) {
-      returnString.append("Log-likelihood: \t")
-                  .append("mean      :  \t")
-                  .append(decimalFormatter.format(summarizer.getMean))
-                  .append('\n')
-      returnString.append("25%-ile   :  \t")
-                  .append(decimalFormatter.format(summarizer.getQuartile(1)))
-                  .append('\n')
-      returnString.append("75%-ile   :  \t")
-                  .append(decimalFormatter.format(summarizer.getQuartile(3)))
-                  .append('\n')
-    }
-
-    returnString.toString()
-  }
-
-
-}
-
-/**
- *
- * Interface for classes that can keep track of a running average of a series 
of numbers. One can add to or
- * remove from the series, as well as update a datum in the series. The class 
does not actually keep track of
- * the series of values, just its running average, so it doesn't even matter 
if you remove/change a value that
- * wasn't added.
- *
- * Ported from org.apache.mahout.cf.taste.impl.common.RunningAverage.java
- */
-trait RunningAverage {
-
-  /**
-   * @param datum
-   * new item to add to the running average
-   * @throws IllegalArgumentException
-   * if datum is { @link Double#NaN}
-   */
-  def addDatum(datum: Double)
-
-  /**
-   * @param datum
-   * item to remove to the running average
-   * @throws IllegalArgumentException
-   * if datum is { @link Double#NaN}
-   * @throws IllegalStateException
-   * if count is 0
-   */
-  def removeDatum(datum: Double)
-
-  /**
-   * @param delta
-   * amount by which to change a datum in the running average
-   * @throws IllegalArgumentException
-   * if delta is { @link Double#NaN}
-   * @throws IllegalStateException
-   * if count is 0
-   */
-  def changeDatum(delta: Double)
-
-  def getCount: Int
-
-  def getAverage: Double
-
-  /**
-   * @return a (possibly immutable) object whose average is the negative of 
this object's
-   */
-  def inverse: RunningAverage
-}
-
-/**
- *
- * Extends {@link RunningAverage} by adding standard deviation too.
- *
- * Ported from 
org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev.java
- */
-trait RunningAverageAndStdDev extends RunningAverage {
-
-  /** @return standard deviation of data */
-  def getStandardDeviation: Double
-
-  /**
-   * @return a (possibly immutable) object whose average is the negative of 
this object's
-   */
-  def inverse: RunningAverageAndStdDev
-}
-
-
-class InvertedRunningAverage(private val delegate: RunningAverage) extends 
RunningAverage {
-
-  override def addDatum(datum: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  override def removeDatum(datum: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  override def changeDatum(delta: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  override def getCount: Int = {
-     delegate.getCount
-  }
-
-  override def getAverage: Double = {
-     -delegate.getAverage
-  }
-
-  override def inverse: RunningAverage = {
-     delegate
-  }
-}
-
-
-/**
- *
- * A simple class that can keep track of a running average of a series of 
numbers. One can add to or remove
- * from the series, as well as update a datum in the series. The class does 
not actually keep track of the
- * series of values, just its running average, so it doesn't even matter if 
you remove/change a value that
- * wasn't added.
- *
- * Ported from org.apache.mahout.cf.taste.impl.common.FullRunningAverage.java
- */
-class FullRunningAverage(private var count: Int = 0,
-                         private var average: Double = Double.NaN ) extends 
RunningAverage {
-
-  /**
-   * @param datum
-   * new item to add to the running average
-   */
-  override def addDatum(datum: Double) {
-    count += 1
-    if (count == 1) {
-      average = datum
-    }
-    else {
-      average = average * (count - 1) / count + datum / count
-    }
-  }
-
-  /**
-   * @param datum
-   * item to remove from the running average
-   * @throws IllegalStateException
-   * if count is 0
-   */
-  override def removeDatum(datum: Double) {
-    if (count == 0) {
-      throw new IllegalStateException
-    }
-    count -= 1
-    if (count == 0) {
-      average = Double.NaN
-    }
-    else {
-      average = average * (count + 1) / count - datum / count
-    }
-  }
-
-  /**
-   * @param delta
-   * amount by which to change a datum in the running average
-   * @throws IllegalStateException
-   * if count is 0
-   */
-  override def changeDatum(delta: Double) {
-    if (count == 0) {
-      throw new IllegalStateException
-    }
-    average += delta / count
-  }
-
-  override def getCount: Int = {
-    count
-  }
-
-  override def getAverage: Double = {
-    average
-  }
-
-  override def inverse: RunningAverage = {
-    new InvertedRunningAverage(this)
-  }
-
-  override def toString: String = {
-    String.valueOf(average)
-  }
-}
-
-
-/**
- *
- * Extends {@link FullRunningAverage} to add a running standard deviation 
computation.
- * Uses Welford's method, as described at 
http://www.johndcook.com/standard_deviation.html
- *
- * Ported from 
org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev.java
- */
-class FullRunningAverageAndStdDev(private var count: Int = 0,
-                                  private var average: Double = 0.0,
-                                  private var mk: Double = 0.0,
-                                  private var sk: Double = 0.0) extends 
FullRunningAverage with RunningAverageAndStdDev {
-
-  var stdDev: Double = 0.0
-
-  recomputeStdDev
-
-  def getMk: Double = {
-     mk
-  }
-
-  def getSk: Double = {
-    sk
-  }
-
-  override def getStandardDeviation: Double = {
-    stdDev
-  }
-
-  override def addDatum(datum: Double) {
-    super.addDatum(datum)
-    val count: Int = getCount
-    if (count == 1) {
-      mk = datum
-      sk = 0.0
-    }
-    else {
-      val oldmk: Double = mk
-      val diff: Double = datum - oldmk
-      mk += diff / count
-      sk += diff * (datum - mk)
-    }
-    recomputeStdDev
-  }
-
-  override def removeDatum(datum: Double) {
-    val oldCount: Int = getCount
-    super.removeDatum(datum)
-    val oldmk: Double = mk
-    mk = (oldCount * oldmk - datum) / (oldCount - 1)
-    sk -= (datum - mk) * (datum - oldmk)
-    recomputeStdDev
-  }
-
-  /**
-   * @throws UnsupportedOperationException
-   */
-  override def changeDatum(delta: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  private def recomputeStdDev {
-    val count: Int = getCount
-    stdDev = if (count > 1) Math.sqrt(sk / (count - 1)) else Double.NaN
-  }
-
-  override def inverse: RunningAverageAndStdDev = {
-     new InvertedRunningAverageAndStdDev(this)
-  }
-
-  override def toString: String = {
-     String.valueOf(String.valueOf(getAverage) + ',' + stdDev)
-  }
-
-}
-
-
-/**
- *
- * @param delegate RunningAverageAndStdDev instance
- *
- * Ported from 
org.apache.mahout.cf.taste.impl.common.InvertedRunningAverageAndStdDev.java
- */
-class InvertedRunningAverageAndStdDev(private val delegate: 
RunningAverageAndStdDev) extends RunningAverageAndStdDev {
-
-  /**
-   * @throws UnsupportedOperationException
-   */
-  override def addDatum(datum: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * @throws UnsupportedOperationException
-   */
-
-  override def removeDatum(datum: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * @throws UnsupportedOperationException
-   */
-  override def changeDatum(delta: Double) {
-    throw new UnsupportedOperationException
-  }
-
-  override def getCount: Int = {
-     delegate.getCount
-  }
-
-  override def getAverage: Double = {
-     -delegate.getAverage
-  }
-
-  override def getStandardDeviation: Double = {
-     delegate.getStandardDeviation
-  }
-
-  override def inverse: RunningAverageAndStdDev = {
-     delegate
-  }
-}
-
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala
 
b/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala
deleted file mode 100644
index 328d27b..0000000
--- 
a/math-scala/src/main/scala/org/apache/mahout/classifier/stats/ConfusionMatrix.scala
+++ /dev/null
@@ -1,460 +0,0 @@
-/*
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-
-package org.apache.mahout.classifier.stats
-
-import java.util
-import org.apache.commons.math3.stat.descriptive.moment.Mean // This is 
brought in by mahout-math
-import org.apache.mahout.math.{DenseMatrix, Matrix}
-import scala.collection.mutable
-import scala.collection.JavaConversions._
-
-/**
- *
- * Ported from org.apache.mahout.classifier.ConfusionMatrix.java
- *
- * The ConfusionMatrix Class stores the result of Classification of a Test 
Dataset.
- *
- * The fact of whether there is a default is not stored. A row of zeros is the 
only indicator that there is no default.
- *
- * See http://en.wikipedia.org/wiki/Confusion_matrix for background
- *
- *
- * @param labels The labels to consider for classification
- * @param defaultLabel default unknown label
- */
-class ConfusionMatrix(private var labels: util.Collection[String] = null,
-                      private var defaultLabel: String = "unknown")  {
-  /**
-   * Matrix Constructor
-   * @param m a DenseMatrix with RowLabelBindings
-   */
-//   def this(m: Matrix) {
-//     this()
-//     confusionMatrix = Array.ofDim[Int](m.numRows, m.numRows)
-//     setMatrix(m)
-//   }
-
-   // val LOG: Logger = LoggerFactory.getLogger(classOf[ConfusionMatrix])
-
-  var confusionMatrix = Array.ofDim[Int](labels.size + 1, labels.size + 1)
-
-  val labelMap = new mutable.HashMap[String,Integer]()
-
-  var samples: Int = 0
-
-  var i: Integer = 0
-  for (label <- labels) {
-    labelMap.put(label, i)
-    i+=1
-  }
-  labelMap.put(defaultLabel, i)
-
-
-  def getConfusionMatrix: Array[Array[Int]] = confusionMatrix
-
-  def getLabels = labelMap.keys.toList
-
-  def numLabels: Int = labelMap.size
-
-  def getAccuracy(label: String): Double = {
-    val labelId: Int = labelMap(label)
-    var labelTotal: Int = 0
-    var correct: Int = 0
-    for (i <- 0 until numLabels) {
-      labelTotal += confusionMatrix(labelId)(i)
-      if (i == labelId) {
-        correct += confusionMatrix(labelId)(i)
-      }
-    }
-
-    100.0 * correct / labelTotal
-  }
-
-  def getAccuracy: Double = {
-    var total: Int = 0
-    var correct: Int = 0
-    for (i <- 0 until numLabels) {
-      for (j <- 0 until numLabels) {
-        total += confusionMatrix(i)(j)
-        if (i == j) {
-          correct += confusionMatrix(i)(j)
-        }
-      }
-    }
-
-    100.0 * correct / total
-  }
-
-  /** Sum of true positives and false negatives */
-  private def getActualNumberOfTestExamplesForClass(label: String): Int = {
-    val labelId: Int = labelMap(label)
-    var sum: Int = 0
-    for (i <- 0 until numLabels) {
-      sum += confusionMatrix(labelId)(i)
-    }
-    sum
-  }
-
-  def getPrecision(label: String): Double = {
-    val labelId: Int = labelMap(label)
-    val truePositives: Int = confusionMatrix(labelId)(labelId)
-    var falsePositives: Int = 0
-
-    for (i <- 0 until numLabels) {
-      if (i != labelId) {
-        falsePositives += confusionMatrix(i)(labelId)
-      }
-    }
-
-    if (truePositives + falsePositives == 0) {
-      0
-    } else {
-      (truePositives.asInstanceOf[Double]) / (truePositives + falsePositives)
-    }
-  }
-
-
-  def getWeightedPrecision: Double = {
-    val precisions: Array[Double] = new Array[Double](numLabels)
-    val weights: Array[Double] = new Array[Double](numLabels)
-    var index: Int = 0
-    for (label <- labelMap.keys) {
-      precisions(index) = getPrecision(label)
-      weights(index) = getActualNumberOfTestExamplesForClass(label)
-      index += 1
-    }
-    new Mean().evaluate(precisions, weights)
-  }
-
-  def getRecall(label: String): Double = {
-    val labelId: Int = labelMap(label)
-    val truePositives: Int = confusionMatrix(labelId)(labelId)
-    var falseNegatives: Int = 0
-    for (i <- 0 until numLabels) {
-      if (i != labelId) {
-        falseNegatives += confusionMatrix(labelId)(i)
-      }
-    }
-
-    if (truePositives + falseNegatives == 0) {
-      0
-    } else {
-      (truePositives.asInstanceOf[Double]) / (truePositives + falseNegatives)
-    }
-  }
-
-  def getWeightedRecall: Double = {
-    val recalls: Array[Double] = new Array[Double](numLabels)
-    val weights: Array[Double] = new Array[Double](numLabels)
-    var index: Int = 0
-    for (label <- labelMap.keys) {
-      recalls(index) = getRecall(label)
-      weights(index) = getActualNumberOfTestExamplesForClass(label)
-      index += 1
-    }
-    new Mean().evaluate(recalls, weights)
-  }
-
-  def getF1score(label: String): Double = {
-    val precision: Double = getPrecision(label)
-    val recall: Double = getRecall(label)
-    if (precision + recall == 0) {
-      0
-    } else {
-      2 * precision * recall / (precision + recall)
-    }
-  }
-
-  def getWeightedF1score: Double = {
-    val f1Scores: Array[Double] = new Array[Double](numLabels)
-    val weights: Array[Double] = new Array[Double](numLabels)
-    var index: Int = 0
-    for (label <- labelMap.keys) {
-      f1Scores(index) = getF1score(label)
-      weights(index) = getActualNumberOfTestExamplesForClass(label)
-      index += 1
-    }
-    new Mean().evaluate(f1Scores, weights)
-  }
-
-  def getReliability: Double = {
-    var count: Int = 0
-    var accuracy: Double = 0
-    for (label <- labelMap.keys) {
-      if (!(label == defaultLabel)) {
-        accuracy += getAccuracy(label)
-      }
-      count += 1
-    }
-    accuracy / count
-  }
-
-  /**
-   * Accuracy v.s. randomly classifying all samples.
-   * kappa() = (totalAccuracy() - randomAccuracy()) / (1 - randomAccuracy())
-   * Cohen, Jacob. 1960. A coefficient of agreement for nominal scales.
-   * Educational And Psychological Measurement 20:37-46.
-   *
-   * Formula and variable names from:
-   * http://www.yale.edu/ceo/OEFS/Accuracy.pdf
-   *
-   * @return double
-   */
-  def getKappa: Double = {
-    var a: Double = 0.0
-    var b: Double = 0.0
-    for (i <- 0 until confusionMatrix.length) {
-      a += confusionMatrix(i)(i)
-      var br: Int = 0
-      for (j <- 0 until confusionMatrix.length) {
-        br += confusionMatrix(i)(j)
-      }
-      var bc: Int = 0
-      //TODO: verify this as an iterator
-      for (vec <- confusionMatrix) {
-        bc += vec(i)
-      }
-      b += br * bc
-    }
-    (samples * a - b) / (samples * samples - b)
-  }
-
-  def getCorrect(label: String): Int = {
-    val labelId: Int = labelMap(label)
-    confusionMatrix(labelId)(labelId)
-  }
-
-  def getTotal(label: String): Int = {
-    val labelId: Int = labelMap(label)
-    var labelTotal: Int = 0
-    for (i <- 0 until numLabels) {
-      labelTotal += confusionMatrix(labelId)(i)
-    }
-    labelTotal
-  }
-
-  /**
-   * Standard deviation of normalized producer accuracy
-   * Not a standard score
-   * @return double
-   */
-  def getNormalizedStats: RunningAverageAndStdDev = {
-    val summer = new FullRunningAverageAndStdDev()
-    for (d <- 0 until  confusionMatrix.length) {
-      var total: Double = 0.0
-      for (j <- 0 until  confusionMatrix.length) {
-        total += confusionMatrix(d)(j)
-      }
-      summer.addDatum(confusionMatrix(d)(d) / (total + 0.000001))
-    }
-    summer
-  }
-
-  def addInstance(correctLabel: String, classifiedResult: ClassifierResult): 
Unit = {
-    samples += 1
-    incrementCount(correctLabel, classifiedResult.getLabel)
-  }
-
-  def addInstance(correctLabel: String, classifiedLabel: String): Unit = {
-    samples += 1
-    incrementCount(correctLabel, classifiedLabel)
-  }
-
-  def getCount(correctLabel: String, classifiedLabel: String): Int = {
-    if (!labelMap.containsKey(correctLabel)) {
-    //  LOG.warn("Label {} did not appear in the training examples", 
correctLabel)
-      return 0
-    }
-    assert(labelMap.containsKey(classifiedLabel), "Label not found: " + 
classifiedLabel)
-    val correctId: Int = labelMap(correctLabel)
-    val classifiedId: Int = labelMap(classifiedLabel)
-    confusionMatrix(correctId)(classifiedId)
-  }
-
-  def putCount(correctLabel: String, classifiedLabel: String, count: Int): 
Unit = {
-    if (!labelMap.containsKey(correctLabel)) {
-    //  LOG.warn("Label {} did not appear in the training examples", 
correctLabel)
-      return
-    }
-    assert(labelMap.containsKey(classifiedLabel), "Label not found: " + 
classifiedLabel)
-    val correctId: Int = labelMap(correctLabel)
-    val classifiedId: Int = labelMap(classifiedLabel)
-    if (confusionMatrix(correctId)(classifiedId) == 0.0 && count != 0) {
-      samples += 1
-    }
-    confusionMatrix(correctId)(classifiedId) = count
-  }
-
-  def incrementCount(correctLabel: String, classifiedLabel: String, count: 
Int): Unit = {
-    putCount(correctLabel, classifiedLabel, count + getCount(correctLabel, 
classifiedLabel))
-  }
-
-  def incrementCount(correctLabel: String, classifiedLabel: String): Unit = {
-    incrementCount(correctLabel, classifiedLabel, 1)
-  }
-
-  def getDefaultLabel: String = {
-    defaultLabel
-  }
-
-  def merge(b: ConfusionMatrix): ConfusionMatrix = {
-    assert(labelMap.size == b.getLabels.size, "The label sizes do not match")
-    for (correctLabel <- this.labelMap.keys) {
-      for (classifiedLabel <- this.labelMap.keys) {
-        incrementCount(correctLabel, classifiedLabel, b.getCount(correctLabel, 
classifiedLabel))
-      }
-    }
-    this
-  }
-
-  def getMatrix: Matrix = {
-    val length: Int = confusionMatrix.length
-    val m: Matrix = new DenseMatrix(length, length)
-
-    val labels: java.util.HashMap[String, Integer] = new java.util.HashMap()
-
-    for (r <- 0 until length) {
-      for (c <- 0 until length) {
-        m.set(r, c, confusionMatrix(r)(c))
-      }
-    }
-
-    for (entry <- labelMap.entrySet) {
-      labels.put(entry.getKey, entry.getValue)
-    }
-    m.setRowLabelBindings(labels)
-    m.setColumnLabelBindings(labels)
-
-    m
-  }
-
-  def setMatrix(m: Matrix) : Unit = {
-    val length: Int = confusionMatrix.length
-    if (m.numRows != m.numCols) {
-      throw new IllegalArgumentException("ConfusionMatrix: matrix(" + 
m.numRows + ',' + m.numCols + ") must be square")
-    }
-
-    for (r <- 0 until length) {
-      for (c <- 0 until length) {
-        confusionMatrix(r)(c) = Math.round(m.get(r, c)).toInt
-      }
-    }
-
-    var labels = m.getRowLabelBindings
-    if (labels == null) {
-      labels = m.getColumnLabelBindings
-    }
-
-    if (labels != null) {
-      val sorted: Array[String] = sortLabels(labels)
-      verifyLabels(length, sorted)
-      labelMap.clear
-      for (i <- 0 until length) {
-        labelMap.put(sorted(i), i)
-      }
-    }
-  }
-
-  def verifyLabels(length: Int, sorted: Array[String]): Unit = {
-    assert(sorted.length == length, "One label, one row")
-    for (i <- 0 until length) {
-      if (sorted(i) == null) {
-        assert(false, "One label, one row")
-      }
-    }
-  }
-
-  def sortLabels(labels: java.util.Map[String, Integer]): Array[String] = {
-    val sorted: Array[String] = new Array[String](labels.size)
-    for (entry <- labels.entrySet) {
-      sorted(entry.getValue) = entry.getKey
-    }
-
-    sorted
-  }
-
-  /**
-   * This is overloaded. toString() is not a formatted report you print for a 
manager :)
-   * Assume that if there are no default assignments, the default feature was 
not used
-   */
-  override def toString: String = {
-
-    val returnString: StringBuilder = new StringBuilder(200)
-
-    
returnString.append("=======================================================").append('\n')
-    returnString.append("Confusion Matrix\n")
-    
returnString.append("-------------------------------------------------------").append('\n')
-
-    val unclassified: Int = getTotal(defaultLabel)
-
-    for (entry <- this.labelMap.entrySet) {
-      if (!((entry.getKey == defaultLabel) && unclassified == 0)) {
-        returnString.append(getSmallLabel(entry.getValue) + "     
").append('\t')
-      }
-    }
-
-    returnString.append("<--Classified as").append('\n')
-
-    for (entry <- this.labelMap.entrySet) {
-      if (!((entry.getKey == defaultLabel) && unclassified == 0)) {
-        val correctLabel: String = entry.getKey
-        var labelTotal: Int = 0
-
-        for (classifiedLabel <- this.labelMap.keySet) {
-          if (!((classifiedLabel == defaultLabel) && unclassified == 0)) {
-            returnString.append(Integer.toString(getCount(correctLabel, 
classifiedLabel)) + "     ")
-                        .append('\t')
-            labelTotal += getCount(correctLabel, classifiedLabel)
-          }
-        }
-        returnString.append(" |  ").append(String.valueOf(labelTotal) + "      
")
-                    .append('\t')
-                    .append(getSmallLabel(entry.getValue) + "     ")
-                    .append(" = ")
-                    .append(correctLabel)
-                    .append('\n')
-      }
-    }
-
-    if (unclassified > 0) {
-      returnString.append("Default Category: ")
-                  .append(defaultLabel)
-                  .append(": ")
-                  .append(unclassified)
-                  .append('\n')
-    }
-    returnString.append('\n')
-
-    returnString.toString()
-  }
-
-
-  def getSmallLabel(i: Int): String = {
-    var value: Int = i
-    val returnString: StringBuilder = new StringBuilder
-    do {
-      val n: Int = value % 26
-      returnString.insert(0, ('a' + n).asInstanceOf[Char])
-      value /= 26
-    } while (value > 0)
-
-    returnString.toString()
-  }
-
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala
----------------------------------------------------------------------
diff --git 
a/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala 
b/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala
deleted file mode 100644
index 32515f1..0000000
--- a/math-scala/src/main/scala/org/apache/mahout/drivers/MahoutDriver.scala
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.drivers
-
-import org.apache.mahout.math.drm.DistributedContext
-
-/** Extended by a platform specific version of this class to create a Mahout 
CLI driver. */
-abstract class MahoutDriver {
-
-  implicit protected var mc: DistributedContext = _
-  implicit protected var parser: MahoutOptionParser = _
-
-  var _useExistingContext: Boolean = false // used in the test suite to reuse 
one context per suite
-
-  /** must be overriden to setup the DistributedContext mc*/
-  protected def start() : Unit
-
-  /** Override (optionally) for special cleanup */
-  protected def stop(): Unit = {
-    if (!_useExistingContext) mc.close
-  }
-
-  /** This is where you do the work, call start first, then before exiting 
call stop */
-  protected def process(): Unit
-
-  /** Parse command line and call process */
-  def main(args: Array[String]): Unit
-
-}

[16/37] mahout git commit: MAHOUT-1681: Renamed mahout-math-scala to mahout-samsara

Reply via email to