http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/MapBlock.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/MapBlock.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/MapBlock.java deleted file mode 100644 index 5a50f03..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/MapBlock.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.math.Matrix; -import org.apache.mahout.h2obindings.H2OBlockMatrix; -import org.apache.mahout.h2obindings.drm.H2ODrm; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; -import water.fvec.NewChunk; - -import java.io.Serializable; -import java.util.Arrays; - -import scala.reflect.ClassTag; - -/** - * MapBlock operator. - */ -public class MapBlock { - /** - * Execute a BlockMapFunction on DRM partitions to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @param ncol Number of columns output by BMF. - * @param bmf BlockMapFunction which maps input DRM partition to output. - * @param isRstr flag indicating if key type of output DRM is a String. - * @param k ClassTag of intput DRM key type. - * @param r ClassTag of output DRM key type. - * @return new DRM constructed from mapped blocks of drmA through bmf. - */ - public static <K,R> H2ODrm exec(H2ODrm drmA, int ncol, Object bmf, final boolean isRstr, - final ClassTag<K> k, final ClassTag<R> r) { - Frame A = drmA.frame; - Vec keys = drmA.keys; - - /** - * MRTask to execute bmf on partitions. Partitions are - * made accessible to bmf in the form of H2OBlockMatrix. - */ - class MRTaskBMF extends MRTask<MRTaskBMF> { - Serializable bmf; - Vec labels; - MRTaskBMF(Object _bmf, Vec _labels) { - // BlockMapFun does not implement Serializable, - // but Scala closures are _always_ Serializable. - // - // So receive the object as a plain Object (else - // compilation fails) and typcast it with conviction, - // that Scala always tags the actually generated - // closure functions with Serializable. - bmf = (Serializable)_bmf; - labels = _labels; - } - - /** Create H2OBlockMatrix from the partition */ - private Matrix blockify(Chunk chks[]) { - return new H2OBlockMatrix(chks); - } - - /** Ingest the output of bmf into the output partition */ - private void deblockify(Matrix out, NewChunk ncs[]) { - // assert (out.colSize() == ncs.length) - for (int c = 0; c < out.columnSize(); c++) { - for (int r = 0; r < out.rowSize(); r++) { - ncs[c].addNum(out.getQuick(r, c)); - } - } - } - - // Input: - // chks.length == A.numCols() - // - // Output: - // ncs.length == (A.numCols() + 1) if String keyed - // (A.numCols() + 0) if Int or Long keyed - // - // First A.numCols() ncs[] elements are fed back the output - // of bmf() output's _2 in deblockify() - // - // If String keyed, then MapBlockHelper.exec() would have - // filled in the Strings into ncs[ncol] already - // - public void map(Chunk chks[], NewChunk ncs[]) { - long start = chks[0].start(); - NewChunk nclabel = isRstr ? ncs[ncs.length - 1] : null; - deblockify(MapBlockHelper.exec(bmf, blockify(chks), start, labels, nclabel, k, r), ncs); - // assert chks[i]._len == ncs[j]._len - } - } - - int ncolRes = ncol + (isRstr ? 1 : 0); - Frame fmap = new MRTaskBMF(bmf, keys).doAll(ncolRes, A).outputFrame(null, null); - Vec vmap = null; - if (isRstr) { - // If output was String keyed, then the last Vec in fmap is the String vec. - // If so, peel it out into a separate Vec (vmap) and set fmap to be the - // Frame with just the first ncol Vecs - vmap = fmap.vecs()[ncol]; - fmap = new Frame(Arrays.copyOfRange(fmap.vecs(), 0, ncol)); - } - return new H2ODrm(fmap, vmap); - } -}
http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Par.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Par.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Par.java deleted file mode 100644 index 57eee22..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Par.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; -import water.fvec.NewChunk; -import water.parser.ValueString; - -import org.apache.mahout.h2obindings.H2OHelper; -import org.apache.mahout.h2obindings.drm.H2ODrm; - -/** - * Parallelize operator. - */ -public class Par { - /** - * (re)Parallelize DRM data according to new partitioning hints. - * - * @param drmA Input DRM containing data. - * @param min Hint of minimum number of partitions to parallelize, if not -1. - * @param exact Hint of exact number of partitions to parallelize, if not -1. - * @return new DRM holding the same data but parallelized according to new hints. - */ - public static H2ODrm exec(H2ODrm drmA, int min, int exact) { - final Frame frin = drmA.frame; - final Vec vin = drmA.keys; - - // First create a new empty Frame with the required partitioning - Frame frout = H2OHelper.emptyFrame(frin.numRows(), frin.numCols(), min, exact); - Vec vout = null; - - if (vin != null) { - // If String keyed, then run an MRTask on the new frame, and also - // creat yet another 1-column newer frame for the re-orged String keys. - // The new String Vec will therefore be similarly partitioned as the - // new Frame. - // - // vout is finally collected by calling anyVec() on outputFrame(), - // as it is the only column in the output frame. - vout = new MRTask() { - public void map(Chunk chks[], NewChunk nc) { - int chunkSize = chks[0].len(); - Vec vins[] = frin.vecs(); - long start = chks[0].start(); - ValueString vstr = new ValueString(); - - for (int r = 0; r < chunkSize; r++) { - for (int c = 0; c < chks.length; c++) { - chks[c].set(r, vins[c].at(start + r)); - } - nc.addStr(vin.atStr(vstr, start + r)); - } - } - }.doAll(1, frout).outputFrame(null, null).anyVec(); - } else { - // If not String keyed, then run and MRTask on the new frame, and - // just pull in right elements from frin - new MRTask() { - public void map(Chunk chks[]) { - int chunkSize = chks[0].len(); - Vec vins[] = frin.vecs(); - long start = chks[0].start(); - - for (int r = 0; r < chunkSize; r++) { - for (int c = 0; c < chks.length; c++) { - chks[c].set(r, vins[c].at(start + r)); - } - } - } - }.doAll(frout); - } - - return new H2ODrm(frout, vout); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Rbind.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Rbind.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Rbind.java deleted file mode 100644 index c4f9490..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/Rbind.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; -import water.fvec.NewChunk; -import water.parser.ValueString; - -import org.apache.mahout.h2obindings.H2OHelper; -import org.apache.mahout.h2obindings.drm.H2ODrm; - -/** - * R-like rbind like operator, on two DRMs - */ -public class Rbind { - /** - * Combine the rows of two DRMs A and B to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @param drmB DRM representing matrix B. - * @return new DRM containing rows of B below A. - */ - public static H2ODrm exec(H2ODrm drmA, H2ODrm drmB) { - final Frame fra = drmA.frame; - final Vec keysa = drmA.keys; - final Frame frb = drmB.frame; - final Vec keysb = drmB.keys; - - // Create new frame and copy A's data at the top, and B's data below. - // Create the frame in the same VectorGroup as A, so A's data does not - // cross the wire during copy. B's data could potentially cross the wire. - Frame frbind = H2OHelper.emptyFrame(fra.numRows() + frb.numRows(), fra.numCols(), - -1, -1, fra.anyVec().group()); - Vec keys = null; - - MRTask task = new MRTask() { - public void map(Chunk chks[], NewChunk nc) { - Vec A_vecs[] = fra.vecs(); - Vec B_vecs[] = frb.vecs(); - long A_rows = fra.numRows(); - long B_rows = frb.numRows(); - long start = chks[0].start(); - int chunkSize = chks[0].len(); - ValueString vstr = new ValueString(); - - for (int r = 0; r < chunkSize; r++) { - for (int c = 0; c < chks.length; c++) { - if (r + start < A_rows) { - chks[c].set(r, A_vecs[c].at(r + start)); - if (keysa != null) { - nc.addStr(keysa.atStr(vstr, r + start)); - } - } else { - chks[c].set(r, B_vecs[c].at(r + start - A_rows)); - if (keysb != null) { - nc.addStr(keysb.atStr(vstr, r + start - A_rows)); - } - } - } - } - } - }; - - if (keysa == null) { - keys = task.doAll(1, frbind).outputFrame(null, null).anyVec(); - } else { - task.doAll(frbind); - } - - return new H2ODrm(frbind, keys); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/RowRange.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/RowRange.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/RowRange.java deleted file mode 100644 index 43cc024..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/RowRange.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import scala.collection.immutable.Range; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; -import water.fvec.NewChunk; -import water.parser.ValueString; - -import org.apache.mahout.h2obindings.drm.H2ODrm; - -/** - * Filter operation - */ -public class RowRange { - /** - * Filter rows from intput DRM, to include only row indiced included in R. - * - * @param drmA Input DRM. - * @param R Range object specifying the start and end row numbers to filter. - * @return new DRM with just the filtered rows. - */ - public static H2ODrm exec(H2ODrm drmA, final Range R) { - Frame A = drmA.frame; - Vec keys = drmA.keys; - - // Run a filtering MRTask on A. If row number falls within R.start() and - // R.end(), then the row makes it into the output - Frame Arr = new MRTask() { - public void map(Chunk chks[], NewChunk ncs[]) { - int chunkSize = chks[0].len(); - long chunkStart = chks[0].start(); - - // First check if the entire chunk even overlaps with R - if (chunkStart > R.end() || (chunkStart + chunkSize) < R.start()) { - return; - } - - // This chunk overlaps, filter out just the overlapping rows - for (int r = 0; r < chunkSize; r++) { - if (!R.contains(chunkStart + r)) { - continue; - } - - for (int c = 0; c < chks.length; c++) { - ncs[c].addNum(chks[c].atd(r)); - } - } - } - }.doAll(A.numCols(), A).outputFrame(null, null); - - Vec Vrr = (keys == null) ? null : new MRTask() { - // This is a String keyed DRM. Do the same thing as above, - // but this time just one column of Strings. - public void map(Chunk chk, NewChunk nc) { - int chunkSize = chk.len(); - long chunkStart = chk.start(); - ValueString vstr = new ValueString(); - - if (chunkStart > R.end() || (chunkStart + chunkSize) < R.start()) { - return; - } - - for (int r = 0; r < chunkSize; r++) { - if (!R.contains(chunkStart + r)) { - continue; - } - - nc.addStr(chk.atStr(vstr, r)); - } - } - }.doAll(1, keys).outputFrame(null, null).anyVec(); - - return new H2ODrm(Arr, Vrr); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/java/org/apache/mahout/h2obindings/ops/TimesRightMatrix.java ---------------------------------------------------------------------- diff --git a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/TimesRightMatrix.java b/h2o/src/main/java/org/apache/mahout/h2obindings/ops/TimesRightMatrix.java deleted file mode 100644 index aefb514..0000000 --- a/h2o/src/main/java/org/apache/mahout/h2obindings/ops/TimesRightMatrix.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops; - -import org.apache.mahout.math.Matrix; -import org.apache.mahout.math.Vector; -import org.apache.mahout.math.DiagonalMatrix; -import org.apache.mahout.h2obindings.drm.H2OBCast; -import org.apache.mahout.h2obindings.drm.H2ODrm; - -import water.MRTask; -import water.fvec.Frame; -import water.fvec.Vec; -import water.fvec.Chunk; -import water.fvec.NewChunk; - -/** - * Multiple DRM with in-core Matrix - */ -public class TimesRightMatrix { - /** - * Multiply a DRM with an in-core Matrix to create a new DRM. - * - * @param drmA DRM representing matrix A. - * @param B in-core Mahout Matrix. - * @return new DRM containing drmA times B. - */ - public static H2ODrm exec(H2ODrm drmA, Matrix B) { - Frame A = drmA.frame; - Vec keys = drmA.keys; - Frame AinCoreB = null; - - if (B instanceof DiagonalMatrix) { - AinCoreB = execDiagonal(A, B.viewDiagonal()); - } else { - AinCoreB = execCommon(A, B); - } - - return new H2ODrm(AinCoreB, keys); - } - - /** - * Multiply Frame A with in-core diagonal Matrix (whose diagonal Vector is d) - * - * A.numCols() == d.size() - */ - private static Frame execDiagonal(final Frame A, Vector d) { - final H2OBCast<Vector> bd = new H2OBCast<>(d); - - return new MRTask() { - public void map(Chunk chks[], NewChunk ncs[]) { - Vector D = bd.value(); - int chunkSize = chks[0].len(); - - for (int c = 0; c < ncs.length; c++) { - for (int r = 0; r < chunkSize; r++) { - double v = (chks[c].atd(r) * D.getQuick(c)); - ncs[c].addNum(v); - } - } - } - }.doAll(d.size(), A).outputFrame(null, null); - } - - /** - * Multiply Frame A with in-core Matrix b - * - * A.numCols() == b.rowSize() - */ - private static Frame execCommon(final Frame A, Matrix b) { - final H2OBCast<Matrix> bb = new H2OBCast<>(b); - - return new MRTask() { - public void map(Chunk chks[], NewChunk ncs[]) { - Matrix B = bb.value(); - int chunkSize = chks[0].len(); - - for (int c = 0; c < ncs.length; c++) { - for (int r = 0; r < chunkSize; r++) { - double v = 0; - for (int i = 0; i < chks.length; i++) { - v += (chks[i].atd(r) * B.getQuick(i, c)); - } - ncs[c].addNum(v); - } - } - } - }.doAll(b.columnSize(), A).outputFrame(null, null); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/scala/org/apache/mahout/h2o/common/DrmMetadata.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2o/common/DrmMetadata.scala b/h2o/src/main/scala/org/apache/mahout/h2o/common/DrmMetadata.scala deleted file mode 100644 index 33aafef..0000000 --- a/h2o/src/main/scala/org/apache/mahout/h2o/common/DrmMetadata.scala +++ /dev/null @@ -1,56 +0,0 @@ -package org.apache.mahout.h2o.common - -import scala.reflect.ClassTag -import org.apache.hadoop.io._ -import java.util.Arrays - -class DrmMetadata( - - /** Writable key type as a sub-type of Writable */ - val keyTypeWritable: Class[_], - - /** Value writable type, as a sub-type of Writable */ - val valueTypeWritable: Class[_] - - ) { - - import DrmMetadata._ - - val ( - - /** Actual drm key class tag once converted out of writable */ - keyClassTag: ClassTag[_], - - /** Conversion from Writable to value type of the DRM key */ - keyW2ValFunc: ((Writable) => Any) - - ) = keyTypeWritable match { - case cz if cz == classOf[IntWritable] => ClassTag.Int -> w2int _ - case cz if cz == classOf[LongWritable] => ClassTag.Long -> w2long _ - case cz if cz == classOf[DoubleWritable] => ClassTag.Double -> w2double _ - case cz if cz == classOf[FloatWritable] => ClassTag.Float -> w2float _ - case cz if cz == classOf[Text] => ClassTag(classOf[String]) -> w2string _ - case cz if cz == classOf[BooleanWritable] => ClassTag(classOf[Boolean]) -> w2bool _ - case cz if cz == classOf[BytesWritable] => ClassTag(classOf[Array[Byte]]) -> w2bytes _ - case _ => throw new IllegalArgumentException(s"Unsupported DRM key type:${keyTypeWritable.getName}") - } - -} - -object DrmMetadata { - - private[common] def w2int(w: Writable) = w.asInstanceOf[IntWritable].get() - - private[common] def w2long(w: Writable) = w.asInstanceOf[LongWritable].get() - - private[common] def w2double(w: Writable) = w.asInstanceOf[DoubleWritable].get() - - private[common] def w2float(w: Writable) = w.asInstanceOf[FloatWritable].get() - - private[common] def w2string(w: Writable) = w.asInstanceOf[Text].toString() - - private[common] def w2bool(w: Writable) = w.asInstanceOf[BooleanWritable].get() - - private[common] def w2bytes(w: Writable) = Arrays.copyOf(w.asInstanceOf[BytesWritable].getBytes(), - w.asInstanceOf[BytesWritable].getLength()) -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/scala/org/apache/mahout/h2o/common/HDFSUtil.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2o/common/HDFSUtil.scala b/h2o/src/main/scala/org/apache/mahout/h2o/common/HDFSUtil.scala deleted file mode 100644 index 266b6aa..0000000 --- a/h2o/src/main/scala/org/apache/mahout/h2o/common/HDFSUtil.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2o.common - -/** High level Hadoop version-specific hdfs manipulations we need in context of our operations. */ -trait HDFSUtil { - - /** Read DRM header information off (H)DFS. */ - def readDrmHeader(path:String):DrmMetadata -} - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/scala/org/apache/mahout/h2o/common/Hadoop2HDFSUtil.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2o/common/Hadoop2HDFSUtil.scala b/h2o/src/main/scala/org/apache/mahout/h2o/common/Hadoop2HDFSUtil.scala deleted file mode 100644 index 4053d09..0000000 --- a/h2o/src/main/scala/org/apache/mahout/h2o/common/Hadoop2HDFSUtil.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2o.common - -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path -import org.apache.hadoop.io.{SequenceFile, Writable} - -/** - * Deprecated Hadoop 1 api which we currently explicitly import via Mahout dependencies. May not work - * with Hadoop 2.0 - */ -object Hadoop2HDFSUtil extends HDFSUtil { - - - def readDrmHeader(path: String): DrmMetadata = { - val dfsPath = new Path(path) - val fs = dfsPath.getFileSystem(new Configuration()) - - val partFilePath:Path = fs.listStatus(dfsPath) - - // Filter out anything starting with . - .filter { s => !s.getPath.getName.startsWith("\\.") && !s.getPath.getName.startsWith("_") && !s.isDirectory } - - // Take path - .map(_.getPath) - - // Take only one, if any - .headOption - - // Require there's at least one partition file found. - .getOrElse { - throw new IllegalArgumentException(s"No partition files found in ${dfsPath.toString}.") - } - - val reader = new SequenceFile.Reader(fs.getConf, SequenceFile.Reader.file(partFilePath)) - try { - new DrmMetadata( - keyTypeWritable = reader.getKeyClass.asSubclass(classOf[Writable]), - valueTypeWritable = reader.getValueClass.asSubclass(classOf[Writable]) - ) - } finally { - reader.close() - } - - } - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/scala/org/apache/mahout/h2obindings/H2ODistributedContext.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/H2ODistributedContext.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/H2ODistributedContext.scala deleted file mode 100644 index 7290f2b..0000000 --- a/h2o/src/main/scala/org/apache/mahout/h2obindings/H2ODistributedContext.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings - -import org.apache.mahout.math.drm._ - -class H2ODistributedContext(val masterUrl: String) extends DistributedContext { - val h2octx = new H2OContext(masterUrl) - - def close(): Unit = return - - val engine: DistributedEngine = H2OEngine -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/scala/org/apache/mahout/h2obindings/H2OEngine.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/H2OEngine.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/H2OEngine.scala deleted file mode 100644 index 494e8a8..0000000 --- a/h2o/src/main/scala/org/apache/mahout/h2obindings/H2OEngine.scala +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings - -import org.apache.mahout.math.indexeddataset.{BiDictionary, IndexedDataset, Schema, DefaultIndexedDatasetReadSchema} - -import scala.reflect._ -import org.apache.mahout.math._ -import org.apache.mahout.math.drm._ -import org.apache.mahout.math.drm.logical._ -import org.apache.mahout.h2obindings.ops._ -import org.apache.mahout.h2obindings.drm._ -import org.apache.mahout.h2o.common.{Hadoop2HDFSUtil, HDFSUtil} -import org.apache.mahout.logging._ - -/** H2O specific non-DRM operations */ -object H2OEngine extends DistributedEngine { - - private final implicit val log = getLog(H2OEngine.getClass) - - // By default, use Hadoop 1 utils - var hdfsUtils: HDFSUtil = Hadoop2HDFSUtil - - def colMeans[K](drm: CheckpointedDrm[K]): Vector = - H2OHelper.colMeans(drm.h2odrm.frame) - - def colSums[K](drm: CheckpointedDrm[K]): Vector = - H2OHelper.colSums(drm.h2odrm.frame) - - def norm[K](drm: CheckpointedDrm[K]): Double = - H2OHelper.sumSqr(drm.h2odrm.frame) - - def numNonZeroElementsPerColumn[K](drm: CheckpointedDrm[K]): Vector = - H2OHelper.nonZeroCnt(drm.h2odrm.frame) - - /** Broadcast support */ - def drmBroadcast(m: Matrix)(implicit dc: DistributedContext): BCast[Matrix] = - new H2OBCast(m) - - /** Broadcast support */ - def drmBroadcast(v: Vector)(implicit dc: DistributedContext): BCast[Vector] = - new H2OBCast(v) - - /** - * Load DRM from hdfs (as in Mahout DRM format) - * - * @param path Path to DRM file - * @param parMin Hint of minimum number of partitions to split while distributing - * - * @return DRM[Any] where Any is automatically translated to value type - */ - def drmDfsRead(path: String, parMin: Int = 0)(implicit dc: DistributedContext): CheckpointedDrm[_] = { - val drmMetadata = hdfsUtils.readDrmHeader(path) - - new CheckpointedDrmH2O(H2OHdfs.drmFromFile(path, parMin), dc, CacheHint.NONE)(drmMetadata.keyClassTag. - asInstanceOf[ClassTag[Any]]) - } - - /** This creates an empty DRM with specified number of partitions and cardinality. */ - def drmParallelizeEmpty(nrow: Int, ncol: Int, numPartitions: Int)(implicit dc: DistributedContext): CheckpointedDrm[Int] = - new CheckpointedDrmH2O[Int](H2OHelper.emptyDrm(nrow, ncol, numPartitions, -1), dc, CacheHint.NONE) - - def drmParallelizeEmptyLong(nrow: Long, ncol: Int, numPartitions: Int)(implicit dc: DistributedContext): CheckpointedDrm[Long] = - new CheckpointedDrmH2O[Long](H2OHelper.emptyDrm(nrow, ncol, numPartitions, -1), dc, CacheHint.NONE) - - /** Parallelize in-core matrix as H2O distributed matrix, using row ordinal indices as data set keys. */ - def drmParallelizeWithRowIndices(m: Matrix, numPartitions: Int)(implicit dc: DistributedContext): CheckpointedDrm[Int] = - new CheckpointedDrmH2O[Int](H2OHelper.drmFromMatrix(m, numPartitions, -1), dc, CacheHint.NONE) - - /** Parallelize in-core matrix as H2O distributed matrix, using row labels as a data set keys. */ - def drmParallelizeWithRowLabels(m: Matrix, numPartitions: Int)(implicit dc: DistributedContext): CheckpointedDrm[String] = - new CheckpointedDrmH2O[String](H2OHelper.drmFromMatrix(m, numPartitions, -1), dc, CacheHint.NONE) - - def toPhysical[K:ClassTag](plan: DrmLike[K], ch: CacheHint.CacheHint): CheckpointedDrm[K] = - new CheckpointedDrmH2O[K](tr2phys(plan), plan.context, ch) - - /** Eagerly evaluate operator graph into an H2O DRM */ - private def tr2phys[K: ClassTag](oper: DrmLike[K]): H2ODrm = { - oper match { - case OpAtAnyKey(_) => - throw new IllegalArgumentException("\"A\" must be Int-keyed in this A.t expression.") - // Linear algebra operators - case op@OpAt(a) => At.exec(tr2phys(a)(a.keyClassTag)) - case op@OpABt(a, b) => ABt.exec(tr2phys(a)(a.keyClassTag), tr2phys(b)(b.keyClassTag)) - case op@OpAtB(a, b) => AtB.exec(tr2phys(a)(a.keyClassTag), tr2phys(b)(b.keyClassTag)) - case op@OpAtA(a) => AtA.exec(tr2phys(a)(a.keyClassTag)) - case op@OpAx(a, v) => Ax.exec(tr2phys(a)(a.keyClassTag), v) - case op@OpAtx(a, v) => Atx.exec(tr2phys(a)(a.keyClassTag), v) - case op@OpAewUnaryFunc(a, f, z) => AewUnary.exec(tr2phys(a)(a.keyClassTag), op.f, z) - case op@OpAewUnaryFuncFusion(a, f) => AewUnary.exec(tr2phys(a)(a.keyClassTag), op.f, op.evalZeros) - case op@OpAewB(a, b, opId) => AewB.exec(tr2phys(a)(a.keyClassTag), tr2phys(b)(b.keyClassTag), opId) - case op@OpAewScalar(a, s, opId) => AewScalar.exec(tr2phys(a)(a.keyClassTag), s, opId) - case op@OpTimesRightMatrix(a, m) => TimesRightMatrix.exec(tr2phys(a)(a.keyClassTag), m) - // Non arithmetic - case op@OpCbind(a, b) => Cbind.exec(tr2phys(a)(a.keyClassTag), tr2phys(b)(b.keyClassTag)) - case op@OpCbindScalar(a, d, left) => CbindScalar.exec(tr2phys(a)(a.keyClassTag), d, left) - case op@OpRbind(a, b) => Rbind.exec(tr2phys(a)(a.keyClassTag), tr2phys(b)(b.keyClassTag)) - case op@OpRowRange(a, r) => RowRange.exec(tr2phys(a)(a.keyClassTag), r) - // Custom operators - case blockOp: OpMapBlock[K, _] => MapBlock.exec(tr2phys(blockOp.A)(blockOp.A.keyClassTag), blockOp.ncol, blockOp.bmf, - (blockOp.keyClassTag == classTag[String]), blockOp.A.keyClassTag, blockOp.keyClassTag) - case op@OpPar(a, m, e) => Par.exec(tr2phys(a)(a.keyClassTag), m, e) - case cp: CheckpointedDrm[K] => cp.h2odrm - case _ => throw new IllegalArgumentException("Internal:Optimizer has no exec policy for operator %s." - .format(oper)) - } - } - - implicit def cp2cph2o[K](drm: CheckpointedDrm[K]): CheckpointedDrmH2O[K] = drm.asInstanceOf[CheckpointedDrmH2O[K]] - - /** stub class not implemented in H2O */ - abstract class IndexedDatasetH2O(val matrix: CheckpointedDrm[Int], val rowIDs: BiDictionary, val columnIDs: BiDictionary) - extends IndexedDataset {} - - /** - * Reads an IndexedDatasetH2O from default text delimited files - * @todo unimplemented - * @param src a comma separated list of URIs to read from - * @param schema how the text file is formatted - * @return - */ - def indexedDatasetDFSRead(src: String, - schema: Schema = DefaultIndexedDatasetReadSchema, - existingRowIDs: Option[BiDictionary] = None) - (implicit sc: DistributedContext): - IndexedDatasetH2O = { - - error("Unimplemented indexedDatasetDFSReadElements.") - - ??? - } - - /** - * Reads an IndexedDatasetH2O from default text delimited files - * @todo unimplemented - * @param src a comma separated list of URIs to read from - * @param schema how the text file is formatted - * @return - */ - def indexedDatasetDFSReadElements(src: String, - schema: Schema = DefaultIndexedDatasetReadSchema, - existingRowIDs: Option[BiDictionary] = None) - (implicit sc: DistributedContext): IndexedDatasetH2O = { - - error("Unimplemented indexedDatasetDFSReadElements.") - - ??? - } - - /** - * Optional engine-specific all reduce tensor operation. - * - * TODO: implement this please. - * - */ - override def allreduceBlock[K](drm: CheckpointedDrm[K], bmf: BlockMapFunc2[K], rf: BlockReduceFunc) - : Matrix = H2OHelper.allreduceBlock(drm.h2odrm, bmf, rf) - - /** - * TODO: implement this please. - */ - override def drmSampleKRows[K](drmX: DrmLike[K], numSamples: Int, replacement: Boolean): Matrix = ??? - - /** - * (Optional) Sampling operation. Consistent with Spark semantics of the same. - * TODO: implement this please. - */ - override def drmSampleRows[K](drmX: DrmLike[K], fraction: Double, replacement: Boolean): DrmLike[K] = ??? - - /** - * TODO: implement this please. - */ - override def drm2IntKeyed[K](drmX: DrmLike[K], computeMap: Boolean) - : (DrmLike[Int], Option[DrmLike[K]]) = ??? -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/scala/org/apache/mahout/h2obindings/drm/CheckpointedDrmH2O.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/drm/CheckpointedDrmH2O.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/drm/CheckpointedDrmH2O.scala deleted file mode 100644 index faf584a..0000000 --- a/h2o/src/main/scala/org/apache/mahout/h2obindings/drm/CheckpointedDrmH2O.scala +++ /dev/null @@ -1,68 +0,0 @@ -package org.apache.mahout.h2obindings.drm - -import org.apache.mahout.h2obindings._ -import org.apache.mahout.math.Matrix -import org.apache.mahout.math.drm.CacheHint.CacheHint -import org.apache.mahout.math.drm._ - -import scala.reflect._ - -/** - * H2O-specific optimizer-checkpointed DRM. - * - * @param h2odrm Underlying Frame and optional label Vec to wrap around - * @param context Distributed context to the H2O Cloud - * @tparam K Matrix key type - */ -class CheckpointedDrmH2O[K: ClassTag]( - val h2odrm: H2ODrm, - val context: DistributedContext, - override val cacheHint: CacheHint -) extends CheckpointedDrm[K] { - - override val keyClassTag: ClassTag[K] = classTag[K] - - /** - * Collecting DRM to in-core Matrix - * - * If key in DRM is Int, then matrix is collected using key as row index. - * Otherwise, order of rows in result is undefined but key.toString is applied - * as rowLabelBindings of the in-core matrix. - */ - def collect: Matrix = H2OHelper.matrixFromDrm(h2odrm) - - /* XXX: call frame.remove */ - def uncache(): this.type = this - - /** - * Persist DRM to on-disk over HDFS in Mahout DRM format. - */ - def dfsWrite(path: String): Unit = H2OHdfs.drmToFile(path, h2odrm) - - /** - * Action operator - Eagerly evaluate the lazily built operator graph to create - * a CheckpointedDrm - */ - def checkpoint(cacheHint: CacheHint.CacheHint): CheckpointedDrm[K] = this - - def ncol: Int = h2odrm.frame.numCols - - def nrow: Long = h2odrm.frame.numRows - - def canHaveMissingRows: Boolean = false - - protected[mahout] def partitioningTag: Long = h2odrm.frame.anyVec.group.hashCode - - /** stub need to make IndexedDataset core but since drmWrap is not in H2O left for someone else */ - override def newRowCardinality(n: Int): CheckpointedDrm[K] = { - throw new UnsupportedOperationException("CheckpointedDrmH2O#newRowCardinality is not implemented.") - /* this is the Spark impl - assert(n > -1) - assert( n >= nrow) - val newCheckpointedDrm = drmWrap[K](rdd, n, ncol) - newCheckpointedDrm - */ - this - } - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala deleted file mode 100644 index 552fd9e..0000000 --- a/h2o/src/main/scala/org/apache/mahout/h2obindings/ops/MapBlockHelper.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops - -import org.apache.mahout.math.Matrix -import org.apache.mahout.math.drm.BlockMapFunc -import scala.reflect.ClassTag - -import water.fvec.{Vec,NewChunk} -import water.parser.ValueString - -object MapBlockHelper { - def exec[K: ClassTag, R: ClassTag](bmf: Object, in: Matrix, startlong: Long, labels: Vec, nclabel: NewChunk): Matrix = { - val i = implicitly[ClassTag[Int]] - val l = implicitly[ClassTag[Long]] - val s = implicitly[ClassTag[String]] - - val inarray = implicitly[ClassTag[K]] match { - case `i` => val startint: Int = startlong.asInstanceOf[Int] - startint until (startint + in.rowSize) toArray - case `l` => startlong until (startlong + in.rowSize) toArray - case `s` => - val arr = new Array[String](in.rowSize) - val vstr = new ValueString - for (i <- 0 until in.rowSize) { - arr(i) = labels.atStr(vstr, i + startlong).toString - } - arr - } - - val _bmf = bmf.asInstanceOf[BlockMapFunc[K,R]] - val out = _bmf((inarray.asInstanceOf[Array[K]], in)) - - implicitly[ClassTag[R]] match { - case `s` => - val vstr = new ValueString - for (str <- out._1) { - nclabel.addStr(vstr.setTo(str.asInstanceOf[String])) - } - case _ => - } - out._2 - } -} - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/main/scala/org/apache/mahout/h2obindings/package.scala ---------------------------------------------------------------------- diff --git a/h2o/src/main/scala/org/apache/mahout/h2obindings/package.scala b/h2o/src/main/scala/org/apache/mahout/h2obindings/package.scala deleted file mode 100644 index 79d9c5b..0000000 --- a/h2o/src/main/scala/org/apache/mahout/h2obindings/package.scala +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout - -package object h2obindings { - def mahoutH2OContext(masterURL: String): H2ODistributedContext = { - new H2ODistributedContext(masterURL) - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/classifier/naivebayes/NBH2OTestSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/classifier/naivebayes/NBH2OTestSuite.scala b/h2o/src/test/scala/org/apache/mahout/classifier/naivebayes/NBH2OTestSuite.scala deleted file mode 100644 index 7a0a3f4..0000000 --- a/h2o/src/test/scala/org/apache/mahout/classifier/naivebayes/NBH2OTestSuite.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.mahout.classifier.naivebayes - -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.apache.mahout.test.MahoutSuite -import org.scalatest.FunSuite - -class NBH2OTestSuite extends FunSuite with MahoutSuite with DistributedH2OSuite with NBTestBase http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsH2OTestSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsH2OTestSuite.scala b/h2o/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsH2OTestSuite.scala deleted file mode 100644 index 909a8fa..0000000 --- a/h2o/src/test/scala/org/apache/mahout/classifier/stats/ClassifierStatsH2OTestSuite.scala +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.classifier.stats - -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.apache.mahout.test.MahoutSuite -import org.scalatest.FunSuite - -class ClassifierStatsH2OTestSuite extends FunSuite with MahoutSuite with DistributedH2OSuite with ClassifierStatsTestBase - - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeOpsSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeOpsSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeOpsSuite.scala deleted file mode 100644 index 7b199df..0000000 --- a/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeOpsSuite.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.drm - -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.apache.mahout.math.drm._ -import org.scalatest.FunSuite - -/** Tests for DrmLikeOps */ -class DrmLikeOpsSuite extends FunSuite with DistributedH2OSuite with DrmLikeOpsSuiteBase http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeSuite.scala deleted file mode 100644 index ea7056e..0000000 --- a/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/DrmLikeSuite.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.drm - -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.apache.mahout.math.drm._ -import org.scalatest.FunSuite - - -class DrmLikeSuite extends FunSuite with DistributedH2OSuite with DrmLikeSuiteBase http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/RLikeDrmOpsSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/RLikeDrmOpsSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/RLikeDrmOpsSuite.scala deleted file mode 100644 index 5cbe47c..0000000 --- a/h2o/src/test/scala/org/apache/mahout/h2obindings/drm/RLikeDrmOpsSuite.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.drm - -import org.scalatest.FunSuite -import org.apache.mahout.math._ -import drm._ -import org.apache.mahout.h2obindings._ -import test.DistributedH2OSuite - -/** ==R-like DRM DSL operation tests -- H2O== - * */ -class RLikeDrmOpsSuite extends FunSuite with DistributedH2OSuite with RLikeDrmOpsSuiteBase http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/ABtSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/ABtSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/ABtSuite.scala deleted file mode 100644 index 29ae2fd..0000000 --- a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/ABtSuite.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops - -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.scalatest.FunSuite -import org.apache.mahout.math.scalabindings._ -import org.apache.mahout.math.drm._ -import RLikeOps._ -import RLikeDrmOps._ -import org.apache.mahout.math.drm._ - -/** Tests for AB' operator algorithms */ -class ABtSuite extends FunSuite with DistributedH2OSuite { - test("ABt") { - val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 5)) - val inCoreB = dense((3, 4, 5), (5, 6, 7)) - val A = drmParallelize(m = inCoreA, numPartitions = 3) - val B = drmParallelize(m = inCoreB, numPartitions = 2) - - val ABt = A %*% B.t - - val inCoreMControl = inCoreA %*% inCoreB.t - val inCoreM = ABt.collect - - assert((inCoreM - inCoreMControl).norm < 1E-5) - - println(inCoreM) - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AewBSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AewBSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AewBSuite.scala deleted file mode 100644 index 8843c08..0000000 --- a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AewBSuite.scala +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops - -import org.scalatest.FunSuite -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.apache.mahout.math.scalabindings._ -import RLikeOps._ -import org.apache.mahout.math.drm._ -import RLikeDrmOps._ - -/** Elementwise matrix operation tests */ -class AewBSuite extends FunSuite with DistributedH2OSuite { - test("A * B Hadamard") { - val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 5), (7, 8, 9)) - val inCoreB = dense((3, 4, 5), (5, 6, 7), (0, 0, 0), (9, 8, 7)) - val A = drmParallelize(m = inCoreA, numPartitions = 2) - val B = drmParallelize(m = inCoreB) - - val M = A * B - - val inCoreM = M.collect - val inCoreMControl = inCoreA * inCoreB - - assert((inCoreM - inCoreMControl).norm < 1E-10) - } - - test("A + B Elementwise") { - val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 5), (7, 8, 9)) - val inCoreB = dense((3, 4, 5), (5, 6, 7), (0, 0, 0), (9, 8, 7)) - val A = drmParallelize(m = inCoreA, numPartitions = 2) - val B = drmParallelize(m = inCoreB) - - val M = A + B - - val inCoreM = M.collect - val inCoreMControl = inCoreA + inCoreB - - assert((inCoreM - inCoreMControl).norm < 1E-10) - } - - test("A - B Elementwise") { - val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 5), (7, 8, 9)) - val inCoreB = dense((3, 4, 5), (5, 6, 7), (0, 0, 0), (9, 8, 7)) - val A = drmParallelize(m = inCoreA, numPartitions = 2) - val B = drmParallelize(m = inCoreB) - - val M = A - B - - val inCoreM = M.collect - val inCoreMControl = inCoreA - inCoreB - - assert((inCoreM - inCoreMControl).norm < 1E-10) - } - - test("A / B Elementwise") { - val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 0), (7, 8, 9)) - val inCoreB = dense((3, 4, 5), (5, 6, 7), (10, 20, 30), (9, 8, 7)) - val A = drmParallelize(m = inCoreA, numPartitions = 2) - val B = drmParallelize(m = inCoreB) - - val M = A / B - - val inCoreM = M.collect - val inCoreMControl = inCoreA / inCoreB - - assert((inCoreM - inCoreMControl).norm < 1E-10) - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtASuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtASuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtASuite.scala deleted file mode 100644 index 514d5b9..0000000 --- a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtASuite.scala +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops - -import org.scalatest.FunSuite -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.apache.mahout.math.scalabindings._ -import org.apache.mahout.math.drm._ -import RLikeOps._ -import RLikeDrmOps._ -import org.apache.mahout.math.drm._ - -/** Tests for @link XtX */ -class AtASuite extends FunSuite with DistributedH2OSuite { - test("AtA slim") { - val inCoreA = dense((1, 2), (2, 3)) - val drmA = drmParallelize(inCoreA) - - val M = drmA.t %*% drmA - val inCoreAtA = M.collect - println(inCoreAtA) - - val expectedAtA = inCoreA.t %*% inCoreA - println(expectedAtA) - - assert(expectedAtA === inCoreAtA) - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtSuite.scala deleted file mode 100644 index 91eaa84..0000000 --- a/h2o/src/test/scala/org/apache/mahout/h2obindings/ops/AtSuite.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.ops - -import org.scalatest.FunSuite -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.apache.mahout.math.scalabindings._ -import org.apache.mahout.math.drm._ -import RLikeOps._ -import RLikeDrmOps._ -import org.apache.mahout.math.drm._ - -/** Tests for A' algorithms */ -class AtSuite extends FunSuite with DistributedH2OSuite { - test("At") { - val inCoreA = dense((1, 2, 3), (2, 3, 4), (3, 4, 5)) - val A = drmParallelize(m = inCoreA, numPartitions = 2) - - val AtDrm = A.t - val inCoreAt = AtDrm.collect - val inCoreControlAt = inCoreA.t - - println(inCoreAt) - assert((inCoreAt - inCoreControlAt).norm < 1E-5) - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/h2obindings/test/DistributedH2OSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/test/DistributedH2OSuite.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/test/DistributedH2OSuite.scala deleted file mode 100644 index 26182b4..0000000 --- a/h2o/src/test/scala/org/apache/mahout/h2obindings/test/DistributedH2OSuite.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.h2obindings.test - -import org.scalatest.Suite -import org.apache.mahout.h2obindings._ -import org.apache.mahout.test.DistributedMahoutSuite -import org.apache.mahout.math.drm.DistributedContext - -trait DistributedH2OSuite extends DistributedMahoutSuite with LoggerConfiguration { - this: Suite => - - protected implicit var mahoutCtx: DistributedContext = _ - - override protected def beforeEach() { - super.beforeEach() - mahoutCtx = mahoutH2OContext("mah2out" + System.currentTimeMillis()) - } - - override protected def afterEach() { - if (mahoutCtx != null) { - try { - mahoutCtx.close() - } finally { - mahoutCtx = null - } - } - super.afterEach() - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/h2obindings/test/LoggerConfiguration.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/h2obindings/test/LoggerConfiguration.scala b/h2o/src/test/scala/org/apache/mahout/h2obindings/test/LoggerConfiguration.scala deleted file mode 100644 index 98e5a42..0000000 --- a/h2o/src/test/scala/org/apache/mahout/h2obindings/test/LoggerConfiguration.scala +++ /dev/null @@ -1,13 +0,0 @@ -package org.apache.mahout.h2obindings.test - -import org.scalatest.{Suite, ConfigMap} -import org.apache.log4j.{Level, Logger} - -trait LoggerConfiguration extends org.apache.mahout.test.LoggerConfiguration { - this: Suite => - - override protected def beforeAll(configMap: ConfigMap): Unit = { - super.beforeAll(configMap) - Logger.getLogger("org.apache.mahout.h2obindings").setLevel(Level.DEBUG) - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/math/algorithms/ClusteringSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/math/algorithms/ClusteringSuite.scala b/h2o/src/test/scala/org/apache/mahout/math/algorithms/ClusteringSuite.scala deleted file mode 100644 index 61d54ac..0000000 --- a/h2o/src/test/scala/org/apache/mahout/math/algorithms/ClusteringSuite.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.algorithms - -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.scalatest.FunSuite - -class ClusteringSuite extends FunSuite - with DistributedH2OSuite with ClusteringSuiteBase - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala b/h2o/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala deleted file mode 100644 index e777f8b..0000000 --- a/h2o/src/test/scala/org/apache/mahout/math/algorithms/PreprocessorSuite.scala +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.algorithms - -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.scalatest.FunSuite - -class PreprocessorSuite extends FunSuite - with DistributedH2OSuite with PreprocessorSuiteBase \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala b/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala deleted file mode 100644 index 503eb06..0000000 --- a/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionSuite.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.algorithms - -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.scalatest.FunSuite - -class RegressionSuite extends FunSuite - with DistributedH2OSuite with RegressionSuiteBase - http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala b/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala deleted file mode 100644 index 864b045..0000000 --- a/h2o/src/test/scala/org/apache/mahout/math/algorithms/RegressionTestsSuite.scala +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.algorithms - -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.scalatest.FunSuite - -class RegressionTestsSuite extends FunSuite - with DistributedH2OSuite with RegressionTestsSuiteBase http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/h2o/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuite.scala ---------------------------------------------------------------------- diff --git a/h2o/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuite.scala b/h2o/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuite.scala deleted file mode 100644 index 20bc0f7..0000000 --- a/h2o/src/test/scala/org/apache/mahout/math/decompositions/DistributedDecompositionsSuite.scala +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.decompositions - -import org.apache.mahout.h2obindings.test.DistributedH2OSuite -import org.scalatest.FunSuite - -class DistributedDecompositionsSuite extends FunSuite with DistributedH2OSuite with DistributedDecompositionsSuiteBase http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/hdfs/pom.xml ---------------------------------------------------------------------- diff --git a/hdfs/pom.xml b/hdfs/pom.xml deleted file mode 100644 index 2d909a2..0000000 --- a/hdfs/pom.xml +++ /dev/null @@ -1,246 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <parent> - <groupId>org.apache.mahout</groupId> - <artifactId>mahout</artifactId> - <version>0.13.1-SNAPSHOT</version> - <relativePath>../pom.xml</relativePath> - </parent> - - <!-- modules inherit parent's group id and version. --> - <artifactId>mahout-hdfs</artifactId> - <name>Mahout HDFS</name> - <description>Scalable machine learning libraries</description> - - <packaging>jar</packaging> - - <build> - <resources> - <resource> - <directory>src/main/resources</directory> - </resource> - <resource> - <directory>../src/conf</directory> - <includes> - <include>driver.classes.default.props</include> - </includes> - </resource> - </resources> - <plugins> - <!-- copy jars to top directory, which is MAHOUT_HOME --> - <plugin> - <artifactId>maven-antrun-plugin</artifactId> - <version>1.4</version> - <executions> - <execution> - <id>copy</id> - <phase>package</phase> - <configuration> - <tasks> - <copy file="target/mahout-hdfs-${version}.jar" tofile="../mahout-hdfs-${version}.jar" /> - </tasks> - </configuration> - <goals> - <goal>run</goal> - </goals> - </execution> - </executions> - </plugin> - <!-- create test jar so other modules can reuse the core test utility classes. --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <executions> - <execution> - <goals> - <goal>test-jar</goal> - </goals> - </execution> - </executions> - </plugin> - - <plugin> - <artifactId>maven-javadoc-plugin</artifactId> - </plugin> - - <plugin> - <artifactId>maven-source-plugin</artifactId> - </plugin> - - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-remote-resources-plugin</artifactId> - <configuration> - <appendedResourcesDirectory>../src/main/appended-resources</appendedResourcesDirectory> - <resourceBundles> - <resourceBundle>org.apache:apache-jar-resource-bundle:1.4</resourceBundle> - </resourceBundles> - <supplementalModels> - <supplementalModel>supplemental-models.xml</supplementalModel> - </supplementalModels> - </configuration> - </plugin> - <!-- remove jars from top directory on clean --> - <plugin> - <artifactId>maven-clean-plugin</artifactId> - <version>3.0.0</version> - <configuration> - <filesets> - <fileset> - <directory>../</directory> - <includes> - <include>mahout-hdfs*.jar</include> - </includes> - <followSymlinks>false</followSymlinks> - </fileset> - </filesets> - </configuration> - </plugin> - </plugins> - </build> - - <dependencies> - - <!-- our modules --> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>mahout-math</artifactId> - </dependency> - - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>mahout-math</artifactId> - <type>test-jar</type> - <scope>test</scope> - </dependency> - - <!-- Third Party --> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - </dependency> - - <dependency> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-core</artifactId> - </dependency> - - <dependency> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-api</artifactId> - </dependency> - - <dependency> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-jcl</artifactId> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.apache.commons</groupId> - <artifactId>commons-lang3</artifactId> - </dependency> - - <dependency> - <groupId>commons-cli</groupId> - <artifactId>commons-cli</artifactId> - </dependency> - - <dependency> - <groupId>com.thoughtworks.xstream</groupId> - <artifactId>xstream</artifactId> - </dependency> - - <dependency> - <groupId>org.apache.lucene</groupId> - <artifactId>lucene-core</artifactId> - </dependency> - - <dependency> - <groupId>org.apache.lucene</groupId> - <artifactId>lucene-analyzers-common</artifactId> - </dependency> - - <dependency> - <groupId>org.apache.mahout.commons</groupId> - <artifactId>commons-cli</artifactId> - </dependency> - - <dependency> - <groupId>org.apache.commons</groupId> - <artifactId>commons-math3</artifactId> - </dependency> - - <dependency> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> - </dependency> - - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.hamcrest</groupId> - <artifactId>hamcrest-all</artifactId> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>com.carrotsearch.randomizedtesting</groupId> - <artifactId>randomizedtesting-runner</artifactId> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.easymock</groupId> - <artifactId>easymock</artifactId> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.apache.mrunit</groupId> - <artifactId>mrunit</artifactId> - <version>1.0.0</version> - <classifier>${hadoop.classifier}</classifier> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>commons-httpclient</groupId> - <artifactId>commons-httpclient</artifactId> - <version>3.0.1</version> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.apache.solr</groupId> - <artifactId>solr-commons-csv</artifactId> - <version>3.5.0</version> - </dependency> - - </dependencies> - -</project> http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/hdfs/src/main/java/org/apache/mahout/common/IOUtils.java ---------------------------------------------------------------------- diff --git a/hdfs/src/main/java/org/apache/mahout/common/IOUtils.java b/hdfs/src/main/java/org/apache/mahout/common/IOUtils.java deleted file mode 100644 index 0372ed4..0000000 --- a/hdfs/src/main/java/org/apache/mahout/common/IOUtils.java +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.common; - -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.Collection; - -import org.apache.hadoop.mapred.lib.MultipleOutputs; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * <p> - * I/O-related utility methods that don't have a better home. - * </p> - */ -public final class IOUtils { - - private static final Logger log = LoggerFactory.getLogger(IOUtils.class); - - private IOUtils() { } - - // Sheez, why can't ResultSet, Statement and Connection implement Closeable? - - public static void quietClose(ResultSet closeable) { - if (closeable != null) { - try { - closeable.close(); - } catch (SQLException sqle) { - log.warn("Unexpected exception while closing; continuing", sqle); - } - } - } - - public static void quietClose(Statement closeable) { - if (closeable != null) { - try { - closeable.close(); - } catch (SQLException sqle) { - log.warn("Unexpected exception while closing; continuing", sqle); - } - } - } - - public static void quietClose(Connection closeable) { - if (closeable != null) { - try { - closeable.close(); - } catch (SQLException sqle) { - log.warn("Unexpected exception while closing; continuing", sqle); - } - } - } - - /** - * Closes a {@link ResultSet}, {@link Statement} and {@link Connection} (if not null) and logs (but does not - * rethrow) any resulting {@link SQLException}. This is useful for cleaning up after a database query. - * - * @param resultSet - * {@link ResultSet} to close - * @param statement - * {@link Statement} to close - * @param connection - * {@link Connection} to close - */ - public static void quietClose(ResultSet resultSet, Statement statement, Connection connection) { - quietClose(resultSet); - quietClose(statement); - quietClose(connection); - } - - /** - * make sure to close all sources, log all of the problems occurred, clear - * {@code closeables} (to prevent repeating close attempts), re-throw the - * last one at the end. Helps resource scope management (e.g. compositions of - * {@link Closeable}s objects) - * <P> - * <p/> - * Typical pattern: - * <p/> - * - * <pre> - * LinkedList<Closeable> closeables = new LinkedList<Closeable>(); - * try { - * InputStream stream1 = new FileInputStream(...); - * closeables.addFirst(stream1); - * ... - * InputStream streamN = new FileInputStream(...); - * closeables.addFirst(streamN); - * ... - * } finally { - * IOUtils.close(closeables); - * } - * </pre> - * - * @param closeables - * must be a modifiable collection of {@link Closeable}s - * @throws IOException - * the last exception (if any) of all closed resources - */ - public static void close(Collection<? extends Closeable> closeables) - throws IOException { - Throwable lastThr = null; - - for (Closeable closeable : closeables) { - try { - closeable.close(); - } catch (Throwable thr) { - log.error(thr.getMessage(), thr); - lastThr = thr; - } - } - - // make sure we don't double-close - // but that has to be modifiable collection - closeables.clear(); - - if (lastThr != null) { - if (lastThr instanceof IOException) { - throw (IOException) lastThr; - } else if (lastThr instanceof RuntimeException) { - throw (RuntimeException) lastThr; - } else { - throw (Error) lastThr; - } - } - - } - - - /** - * for temporary files, a file may be considered as a {@link Closeable} too, - * where file is wiped on close and thus the disk resource is released - * ('closed'). - * - * - */ - public static class DeleteFileOnClose implements Closeable { - - private final File file; - - public DeleteFileOnClose(File file) { - this.file = file; - } - - @Override - public void close() throws IOException { - if (file.isFile()) { - file.delete(); - } - } - } - - /** - * MultipleOutputs to closeable adapter. - * - */ - public static class MultipleOutputsCloseableAdapter implements Closeable { - private final MultipleOutputs mo; - - public MultipleOutputsCloseableAdapter(MultipleOutputs mo) { - this.mo = mo; - } - - @Override - public void close() throws IOException { - if (mo != null) { - mo.close(); - } - } - } - -}
