http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/AbstractBinaryOp.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/AbstractBinaryOp.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/AbstractBinaryOp.scala new file mode 100644 index 0000000..3b6b8bf --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/AbstractBinaryOp.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm.{DistributedContext, DrmLike} + +/** + * Any logical binary operator (such as A + B). + * <P/> + * + * Any logical operator derived from this is also capabile of triggering optimizer checkpoint, hence, + * it also inherits CheckpointAction. + * <P/> + * + * @param evidence$1 LHS key type tag + * @param evidence$2 RHS key type tag + * @param evidence$3 expression key type tag + * @tparam A LHS key type + * @tparam B RHS key type + * @tparam K result key type + */ +abstract class AbstractBinaryOp[A: ClassTag, B: ClassTag, K: ClassTag] + extends CheckpointAction[K] with DrmLike[K] { + + protected[drm] var A: DrmLike[A] + protected[drm] var B: DrmLike[B] + lazy val context: DistributedContext = A.context + + protected[mahout] def canHaveMissingRows: Boolean = false + + // These are explicit evidence export. Sometimes scala falls over to figure that on its own. + def classTagA: ClassTag[A] = implicitly[ClassTag[A]] + + def classTagB: ClassTag[B] = implicitly[ClassTag[B]] + + def classTagK: ClassTag[K] = implicitly[ClassTag[K]] + +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/AbstractUnaryOp.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/AbstractUnaryOp.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/AbstractUnaryOp.scala new file mode 100644 index 0000000..a445f21 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/AbstractUnaryOp.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm.{DistributedContext, DrmLike} + +/** Abstract unary operator */ +abstract class AbstractUnaryOp[A: ClassTag, K: ClassTag] + extends CheckpointAction[K] with DrmLike[K] { + + protected[drm] var A: DrmLike[A] + + lazy val context: DistributedContext = A.context + + def classTagA: ClassTag[A] = implicitly[ClassTag[A]] + + def classTagK: ClassTag[K] = implicitly[ClassTag[K]] + + override protected[mahout] lazy val canHaveMissingRows: Boolean = A.canHaveMissingRows + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/CheckpointAction.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/CheckpointAction.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/CheckpointAction.scala new file mode 100644 index 0000000..aa3a3b9 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/CheckpointAction.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import scala.util.Random +import org.apache.mahout.math.drm._ + +/** Implementation of distributed expression checkpoint and optimizer. */ +abstract class CheckpointAction[K: ClassTag] extends DrmLike[K] { + + protected[mahout] lazy val partitioningTag: Long = Random.nextLong() + + private[mahout] var cp:Option[CheckpointedDrm[K]] = None + + def isIdenticallyPartitioned(other:DrmLike[_]) = + partitioningTag!= 0L && partitioningTag == other.partitioningTag + + /** + * Action operator -- does not necessary means Spark action; but does mean running BLAS optimizer + * and writing down Spark graph lineage since last checkpointed DRM. + */ + def checkpoint(cacheHint: CacheHint.CacheHint): CheckpointedDrm[K] = cp match { + case None => + val physPlan = context.toPhysical(context.optimizerRewrite(this), cacheHint) + cp = Some(physPlan) + physPlan + case Some(cp) => cp + } + +} + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAB.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAB.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAB.scala new file mode 100644 index 0000000..804a00e --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAB.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm.DrmLike + +/** Logical AB */ +case class OpAB[K: ClassTag ]( + override var A: DrmLike[K], + override var B: DrmLike[Int]) + extends AbstractBinaryOp[K, Int, K] { + + assert(A.ncol == B.nrow, "Incompatible operand geometry") + + /** R-like syntax for number of rows. */ + def nrow: Long = A.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = B.ncol + + /** Non-zero element count */ + def nNonZero: Long = + // TODO: for purposes of cost calculation, approximate based on operands + throw new UnsupportedOperationException +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpABAnyKey.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpABAnyKey.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpABAnyKey.scala new file mode 100644 index 0000000..f131f3f --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpABAnyKey.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm.DrmLike + +/** Logical AB */ +case class OpABAnyKey[B:ClassTag, K: ClassTag ]( + override var A: DrmLike[K], + override var B: DrmLike[B]) + extends AbstractBinaryOp[K, B, K] { + + assert(A.ncol == B.nrow, "Incompatible operand geometry") + + /** R-like syntax for number of rows. */ + def nrow: Long = A.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = B.ncol + + /** Non-zero element count */ + def nNonZero: Long = + // TODO: for purposes of cost calculation, approximate based on operands + throw new UnsupportedOperationException +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpABt.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpABt.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpABt.scala new file mode 100644 index 0000000..f6503ed --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpABt.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm._ + +/** Logical AB' */ +case class OpABt[K: ClassTag]( + override var A: DrmLike[K], + override var B: DrmLike[Int]) + extends AbstractBinaryOp[K,Int,K] { + + assert(A.ncol == B.ncol, "Incompatible operand geometry") + + /** R-like syntax for number of rows. */ + def nrow: Long = A.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = safeToNonNegInt(B.nrow) + + /** Non-zero element count */ + def nNonZero: Long = + // TODO: for purposes of cost calculation, approximate based on operands + throw new UnsupportedOperationException + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAewB.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAewB.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAewB.scala new file mode 100644 index 0000000..da7c0d5 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAewB.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm.DrmLike +import scala.util.Random + +/** DRM elementwise operator */ +case class OpAewB[K: ClassTag]( + override var A: DrmLike[K], + override var B: DrmLike[K], + val op: String + ) extends AbstractBinaryOp[K, K, K] { + + + + assert(A.ncol == B.ncol, "arguments must have same number of columns") + assert(A.nrow == B.nrow, "arguments must have same number of rows") + + override protected[mahout] lazy val partitioningTag: Long = + if (A.partitioningTag == B.partitioningTag) A.partitioningTag + else Random.nextLong() + + /** R-like syntax for number of rows. */ + def nrow: Long = A.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = A.ncol + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAewScalar.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAewScalar.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAewScalar.scala new file mode 100644 index 0000000..19a910c --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAewScalar.scala @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm.DrmLike +import scala.util.Random + +/** Operator denoting expressions like 5.0 - A or A * 5.6 */ +case class OpAewScalar[K: ClassTag]( + override var A: DrmLike[K], + val scalar: Double, + val op: String + ) extends AbstractUnaryOp[K,K] { + + override protected[mahout] lazy val partitioningTag: Long = + if (A.canHaveMissingRows) + Random.nextLong() + else A.partitioningTag + + /** Stuff like `A +1` is always supposed to fix this */ + override protected[mahout] lazy val canHaveMissingRows: Boolean = false + + /** R-like syntax for number of rows. */ + def nrow: Long = A.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = A.ncol + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAt.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAt.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAt.scala new file mode 100644 index 0000000..4791301 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAt.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import org.apache.mahout.math.drm._ + +/** Logical A' */ +case class OpAt( + override var A: DrmLike[Int]) + extends AbstractUnaryOp[Int, Int] { + + /** R-like syntax for number of rows. */ + def nrow: Long = A.ncol + + /** R-like syntax for number of columns */ + def ncol: Int = safeToNonNegInt(A.nrow) + + /** A' after simplifications cannot produce missing rows, ever. */ + override protected[mahout] lazy val canHaveMissingRows: Boolean = false +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtA.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtA.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtA.scala new file mode 100644 index 0000000..ad2a5d8 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtA.scala @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm.DrmLike + +/** A'A */ +case class OpAtA[K: ClassTag]( + override var A: DrmLike[K] + ) extends AbstractUnaryOp[K, Int] { + + /** R-like syntax for number of rows. */ + def nrow: Long = A.ncol + + /** R-like syntax for number of columns */ + def ncol: Int = A.ncol + + override protected[mahout] lazy val canHaveMissingRows: Boolean = false + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtAnyKey.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtAnyKey.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtAnyKey.scala new file mode 100644 index 0000000..4e1dd5c --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtAnyKey.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm._ + +/** Logical A' for any row key to support A'A optimizations */ +case class OpAtAnyKey[A: ClassTag]( + override var A: DrmLike[A]) + extends AbstractUnaryOp[A, Int] { + + /** R-like syntax for number of rows. */ + def nrow: Long = A.ncol + + /** R-like syntax for number of columns */ + def ncol: Int = safeToNonNegInt(A.nrow) + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtB.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtB.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtB.scala new file mode 100644 index 0000000..ef3ae6b --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtB.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm.DrmLike + +/** Logical A'B */ +case class OpAtB[A: ClassTag]( + override var A: DrmLike[A], + override var B: DrmLike[A]) + extends AbstractBinaryOp[A, A, Int] { + + assert(A.nrow == B.nrow, "Incompatible operand geometry") + + /** R-like syntax for number of rows. */ + def nrow: Long = A.ncol + + /** R-like syntax for number of columns */ + def ncol: Int = B.ncol + + /** Non-zero element count */ + def nNonZero: Long = + // TODO: for purposes of cost calculation, approximate based on operands + throw new UnsupportedOperationException + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtx.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtx.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtx.scala new file mode 100644 index 0000000..36769c7 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAtx.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import org.apache.mahout.math.Vector +import org.apache.mahout.math.scalabindings._ +import RLikeOps._ +import org.apache.mahout.math.drm._ + +/** Logical A'x. */ +case class OpAtx( + override var A: DrmLike[Int], + val x: Vector + ) extends AbstractUnaryOp[Int, Int] { + + override protected[mahout] lazy val partitioningTag: Long = A.partitioningTag + + assert(A.nrow == x.length, "Incompatible operand geometry") + + /** R-like syntax for number of rows. */ + def nrow: Long = safeToNonNegInt(A.ncol) + + /** R-like syntax for number of columns */ + def ncol: Int = 1 + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAx.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAx.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAx.scala new file mode 100644 index 0000000..a726989 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpAx.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.Vector +import org.apache.mahout.math.scalabindings._ +import RLikeOps._ +import org.apache.mahout.math.drm.DrmLike + +/** Logical Ax. */ +case class OpAx[K: ClassTag]( + override var A: DrmLike[K], + val x: Vector + ) extends AbstractUnaryOp[K, K] { + + override protected[mahout] lazy val partitioningTag: Long = A.partitioningTag + + assert(A.ncol == x.length, "Incompatible operand geometry") + + /** R-like syntax for number of rows. */ + def nrow: Long = A.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = 1 + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpCbind.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpCbind.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpCbind.scala new file mode 100644 index 0000000..1425264 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpCbind.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm.DrmLike +import scala.util.Random + +/** cbind() logical operator */ +case class OpCbind[K: ClassTag]( + override var A: DrmLike[K], + override var B: DrmLike[K] + ) extends AbstractBinaryOp[K, K, K] { + + assert(A.nrow == B.nrow, "arguments must have same number of rows") + + override protected[mahout] lazy val partitioningTag: Long = + if (A.partitioningTag == B.partitioningTag) A.partitioningTag + else Random.nextLong() + + /** R-like syntax for number of rows. */ + def nrow: Long = A.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = A.ncol + B.ncol + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpMapBlock.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpMapBlock.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpMapBlock.scala new file mode 100644 index 0000000..7299d9e --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpMapBlock.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.scalabindings._ +import RLikeOps._ +import org.apache.mahout.math.drm.{BlockMapFunc, DrmLike} +import scala.util.Random + +class OpMapBlock[S: ClassTag, R: ClassTag]( + override var A: DrmLike[S], + val bmf: BlockMapFunc[S, R], + val _ncol: Int = -1, + val _nrow: Long = -1, + identicallyPartitioned:Boolean + ) extends AbstractUnaryOp[S, R] { + + override protected[mahout] lazy val partitioningTag: Long = + if (identicallyPartitioned) A.partitioningTag else Random.nextLong() + + /** R-like syntax for number of rows. */ + def nrow: Long = if (_nrow >= 0) _nrow else A.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = if (_ncol >= 0) _ncol else A.ncol + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpPar.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpPar.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpPar.scala new file mode 100644 index 0000000..f438728 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpPar.scala @@ -0,0 +1,18 @@ +package org.apache.mahout.math.drm.logical + +import org.apache.mahout.math.drm.DrmLike +import scala.reflect.ClassTag + +/** Parallelism operator */ +case class OpPar[K: ClassTag]( + override var A: DrmLike[K], + val minSplits: Int = -1, + val exactSplits: Int = -1) + extends AbstractUnaryOp[K, K] { + + /** R-like syntax for number of rows. */ + def nrow: Long = A.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = A.ncol +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpRbind.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpRbind.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpRbind.scala new file mode 100644 index 0000000..d45714b --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpRbind.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.drm.DrmLike +import scala.util.Random + +/** rbind() logical operator */ +case class OpRbind[K: ClassTag]( + override var A: DrmLike[K], + override var B: DrmLike[K] + ) extends AbstractBinaryOp[K, K, K] { + + assert(A.ncol == B.ncol, "arguments must have same number of columns") + + override protected[mahout] lazy val partitioningTag: Long = Random.nextLong() + + /** R-like syntax for number of rows. */ + def nrow: Long = A.nrow + B.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = A.ncol + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpRowRange.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpRowRange.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpRowRange.scala new file mode 100644 index 0000000..697bbd3 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpRowRange.scala @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import org.apache.mahout.math.drm.DrmLike + +/** Logical row-range slicing */ +case class OpRowRange( + override var A: DrmLike[Int], + val rowRange: Range + ) extends AbstractUnaryOp[Int, Int] { + + assert(rowRange.head >= 0 && rowRange.last < A.nrow, "row range out of range") + + /** R-like syntax for number of rows. */ + def nrow: Long = rowRange.length + + /** R-like syntax for number of columns */ + def ncol: Int = A.ncol + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesLeftMatrix.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesLeftMatrix.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesLeftMatrix.scala new file mode 100644 index 0000000..1ca79b3 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesLeftMatrix.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import org.apache.mahout.math.Matrix +import org.apache.mahout.math.scalabindings._ +import RLikeOps._ +import org.apache.mahout.math.drm.DrmLike + +/** Logical Times-left over in-core matrix operand */ +case class OpTimesLeftMatrix( + val left: Matrix, + override var A: DrmLike[Int] + ) extends AbstractUnaryOp[Int, Int] { + + assert(left.ncol == A.nrow, "Incompatible operand geometry") + + /** R-like syntax for number of rows. */ + def nrow: Long = left.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = A.ncol + + /** Non-zero element count */ + // TODO + def nNonZero: Long = throw new UnsupportedOperationException + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesRightMatrix.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesRightMatrix.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesRightMatrix.scala new file mode 100644 index 0000000..c55f7f0 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/logical/OpTimesRightMatrix.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.drm.logical + +import scala.reflect.ClassTag +import org.apache.mahout.math.Matrix +import org.apache.mahout.math.scalabindings._ +import RLikeOps._ +import org.apache.mahout.math.drm.DrmLike + +/** Logical times-right over in-core matrix operand. */ +case class OpTimesRightMatrix[K: ClassTag]( + override var A: DrmLike[K], + val right: Matrix + ) extends AbstractUnaryOp[K, K] { + + override protected[mahout] lazy val partitioningTag: Long = A.partitioningTag + + assert(A.ncol == right.nrow, "Incompatible operand geometry") + + /** R-like syntax for number of rows. */ + def nrow: Long = A.nrow + + /** R-like syntax for number of columns */ + def ncol: Int = right.ncol + + /** Non-zero element count */ + // TODO + def nNonZero: Long = throw new UnsupportedOperationException + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/drm/package.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/drm/package.scala b/samsara/src/main/scala/org/apache/mahout/math/drm/package.scala new file mode 100644 index 0000000..81f6ab1 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/drm/package.scala @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math + +import com.google.common.collect.{HashBiMap, BiMap} +import org.apache.mahout.math.drm.DistributedContext +import org.apache.mahout.math.indexeddataset.{IndexedDataset, DefaultIndexedDatasetReadSchema, Schema} +import org.apache.mahout.math.scalabindings.RLikeOps._ +import org.apache.mahout.math.scalabindings._ + +import scala.reflect.ClassTag + +package object drm { + + /** Drm row-wise tuple */ + type DrmTuple[K] = (K, Vector) + + /** Drm block-wise tuple: Array of row keys and the matrix block. */ + type BlockifiedDrmTuple[K] = (Array[K], _ <: Matrix) + + + /** Block-map func */ + type BlockMapFunc[S, R] = BlockifiedDrmTuple[S] => BlockifiedDrmTuple[R] + + /** CacheHint type */ + // type CacheHint = CacheHint.CacheHint + + def safeToNonNegInt(x: Long): Int = { + assert(x == x << -31 >>> -31, "transformation from long to Int is losing signficant bits, or is a negative number") + x.toInt + } + + /** Broadcast support API */ + def drmBroadcast(m:Matrix)(implicit ctx:DistributedContext):BCast[Matrix] = ctx.drmBroadcast(m) + + /** Broadcast support API */ + def drmBroadcast(v:Vector)(implicit ctx:DistributedContext):BCast[Vector] = ctx.drmBroadcast(v) + + /** Load DRM from hdfs (as in Mahout DRM format) */ + def drmDfsRead (path: String)(implicit ctx: DistributedContext): CheckpointedDrm[_] = ctx.drmDfsRead(path) + + /** Shortcut to parallelizing matrices with indices, ignore row labels. */ + def drmParallelize(m: Matrix, numPartitions: Int = 1) + (implicit sc: DistributedContext): CheckpointedDrm[Int] = drmParallelizeWithRowIndices(m, numPartitions)(sc) + + /** Parallelize in-core matrix as spark distributed matrix, using row ordinal indices as data set keys. */ + def drmParallelizeWithRowIndices(m: Matrix, numPartitions: Int = 1) + (implicit ctx: DistributedContext): CheckpointedDrm[Int] = ctx.drmParallelizeWithRowIndices(m, numPartitions) + + /** Parallelize in-core matrix as spark distributed matrix, using row labels as a data set keys. */ + def drmParallelizeWithRowLabels(m: Matrix, numPartitions: Int = 1) + (implicit ctx: DistributedContext): CheckpointedDrm[String] = ctx.drmParallelizeWithRowLabels(m, numPartitions) + + /** This creates an empty DRM with specified number of partitions and cardinality. */ + def drmParallelizeEmpty(nrow: Int, ncol: Int, numPartitions: Int = 10) + (implicit ctx: DistributedContext): CheckpointedDrm[Int] = ctx.drmParallelizeEmpty(nrow, ncol, numPartitions) + + /** Creates empty DRM with non-trivial height */ + def drmParallelizeEmptyLong(nrow: Long, ncol: Int, numPartitions: Int = 10) + (implicit ctx: DistributedContext): CheckpointedDrm[Long] = ctx.drmParallelizeEmptyLong(nrow, ncol, numPartitions) + + /** Implicit broadcast -> value conversion. */ + implicit def bcast2val[T](bcast: BCast[T]): T = bcast.value + + /** Just throw all engine operations into context as well. */ + implicit def ctx2engine(ctx: DistributedContext): DistributedEngine = ctx.engine + + implicit def drm2drmCpOps[K: ClassTag](drm: CheckpointedDrm[K]): CheckpointedOps[K] = + new CheckpointedOps[K](drm) + + /** + * We assume that whenever computational action is invoked without explicit checkpoint, the user + * doesn't imply caching + */ + implicit def drm2Checkpointed[K: ClassTag](drm: DrmLike[K]): CheckpointedDrm[K] = drm.checkpoint(CacheHint.NONE) + + /** Implicit conversion to in-core with NONE caching of the result. */ + implicit def drm2InCore[K: ClassTag](drm: DrmLike[K]): Matrix = drm.collect + + /** Do vertical concatenation of collection of blockified tuples */ + def rbind[K: ClassTag](blocks: Iterable[BlockifiedDrmTuple[K]]): BlockifiedDrmTuple[K] = { + assert(blocks.nonEmpty, "rbind: 0 blocks passed in") + if (blocks.size == 1) { + // No coalescing required. + blocks.head + } else { + // compute total number of rows in a new block + val m = blocks.view.map(_._2.nrow).sum + val n = blocks.head._2.ncol + val coalescedBlock = blocks.head._2.like(m, n) + val coalescedKeys = new Array[K](m) + var row = 0 + for (elem <- blocks.view) { + val block = elem._2 + val rowEnd = row + block.nrow + coalescedBlock(row until rowEnd, ::) := block + elem._1.copyToArray(coalescedKeys, row) + row = rowEnd + } + coalescedKeys -> coalescedBlock + } + } + +} + +package object indexeddataset { + /** Load IndexedDataset from text delimited files */ + def indexedDatasetDFSRead(src: String, + schema: Schema = DefaultIndexedDatasetReadSchema, + existingRowIDs: BiMap[String, Int] = HashBiMap.create()) + (implicit ctx: DistributedContext): + IndexedDataset = ctx.indexedDatasetDFSRead(src, schema, existingRowIDs) + + def indexedDatasetDFSReadElements(src: String, + schema: Schema = DefaultIndexedDatasetReadSchema, + existingRowIDs: BiMap[String, Int] = HashBiMap.create()) + (implicit ctx: DistributedContext): + IndexedDataset = ctx.indexedDatasetDFSReadElements(src, schema, existingRowIDs) + +} + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/IndexedDataset.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/IndexedDataset.scala b/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/IndexedDataset.scala new file mode 100644 index 0000000..f6811e2 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/IndexedDataset.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.indexeddataset + +import com.google.common.collect.BiMap +import org.apache.mahout.math.drm.{DistributedContext, CheckpointedDrm} +import org.apache.mahout.math.indexeddataset + +/** + * Wrap an [[org.apache.mahout.math.drm.DrmLike]] with bidirectional ID mappings [[com.google.common.collect.BiMap]] + * so a user specified labels/IDs can be stored and mapped to and from the Mahout Int ID used internal to Mahout + * core code. + * @todo Often no need for both or perhaps either dictionary, so save resources by allowing to be not created + * when not needed. + */ + +trait IndexedDataset { + val matrix: CheckpointedDrm[Int] + val rowIDs: BiMap[String,Int] + val columnIDs: BiMap[String,Int] + + /** + * Write a text delimited file(s) with the row and column IDs from dictionaries. + * @param dest write location, usually a directory + * @param schema params to control writing + * @param sc the [[org.apache.mahout.math.drm.DistributedContext]] used to do a distributed write + */ + def dfsWrite(dest: String, schema: Schema)(implicit sc: DistributedContext): Unit + + /** Factory method, creates the extending class and returns a new instance */ + def create(matrix: CheckpointedDrm[Int], rowIDs: BiMap[String,Int], columnIDs: BiMap[String,Int]): + IndexedDataset + + /** + * Adds the equivalent of blank rows to the sparse CheckpointedDrm, which only changes the row cardinality value. + * No changes are made to the underlying drm. + * @param n number to use for new row cardinality, should be larger than current + * @return a new IndexedDataset or extending class with new cardinality + * @note should be done before any optimizer actions are performed on the matrix or you'll get unpredictable + * results. + */ + def newRowCardinality(n: Int): IndexedDataset = { + // n is validated in matrix + this.create(matrix.newRowCardinality(n), rowIDs, columnIDs) + } + +} + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/ReaderWriter.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/ReaderWriter.scala b/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/ReaderWriter.scala new file mode 100644 index 0000000..f7653ae --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/ReaderWriter.scala @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.indexeddataset + +import com.google.common.collect.{BiMap, HashBiMap} +import org.apache.mahout.math.drm.DistributedContext + +/** + * Reader trait is abstract in the sense that the elementReader and rowReader functions must be supplied by an + * extending trait, which also defines the type to be read. + * @tparam T type of object to read. + */ +trait Reader[T]{ + + val mc: DistributedContext + val readSchema: Schema + + /** + * Override in extending trait to supply T and perform a parallel read of collection elements + * @param mc a [[org.apache.mahout.math.drm.DistributedContext]] to read from + * @param readSchema map of parameters controlling formating and how the read is executed + * @param source list of comma delimited files to read from + * @param existingRowIDs [[com.google.common.collect.BiMap]] containing row IDs that have already + * been applied to this collection--used to synchronize row IDs between several + * collections + * @return a new collection of type T + */ + protected def elementReader( + mc: DistributedContext, + readSchema: Schema, + source: String, + existingRowIDs: BiMap[String, Int]): T + + /** + * Override in extending trait to supply T and perform a parallel read of collection rows + * @param mc a [[org.apache.mahout.math.drm.DistributedContext]] to read from + * @param readSchema map of parameters controlling formating and how the read is executed + * @param source list of comma delimited files to read from + * @param existingRowIDs [[com.google.common.collect.BiMap]] containing row IDs that have already + * been applied to this collection--used to synchronize row IDs between several + * collections + * @return a new collection of type T + */ + protected def rowReader( + mc: DistributedContext, + readSchema: Schema, + source: String, + existingRowIDs: BiMap[String, Int]): T + + /** + * Public method called to perform the element-wise read. Usually no need to override + * @param source comma delimited URIs to read from + * @param existingRowIDs a [[com.google.common.collect.BiMap]] containing previously used id mappings--used + * to synchronize all row ids is several collections + * @return a new collection of type T + */ + def readElementsFrom( + source: String, + existingRowIDs: BiMap[String, Int] = HashBiMap.create()): T = + elementReader(mc, readSchema, source, existingRowIDs) + + /** + * Public method called to perform the row-wise read. Usually no need to override. + * @param source comma delimited URIs to read from + * @param existingRowIDs a [[com.google.common.collect.BiMap]] containing previously used id mappings--used + * to synchronize all row ids is several collections + * @return a new collection of type T + */ + def readRowsFrom( + source: String, + existingRowIDs: BiMap[String, Int] = HashBiMap.create()): T = + rowReader(mc, readSchema, source, existingRowIDs) +} + +/** + * Writer trait is abstract in the sense that the writer method must be supplied by an extending trait, + * which also defines the type to be written. + * @tparam T type of object to write, usually a matrix type thing. + */ +trait Writer[T]{ + + val mc: DistributedContext + val sort: Boolean + val writeSchema: Schema + + /** + * Override to provide writer method + * @param mc context used to do distributed write + * @param writeSchema map with params to control format and execution of the write + * @param dest root directory to write to + * @param collection usually a matrix like collection to write + * @param sort flags whether to sort the rows by value descending + */ + protected def writer(mc: DistributedContext, writeSchema: Schema, dest: String, collection: T, sort: Boolean): Unit + + /** + * Call this method to perform the write, usually no need to override. + * @param collection what to write + * @param dest root directory to write to + */ + def writeTo(collection: T, dest: String) = writer(mc, writeSchema, dest, collection, sort) +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/Schema.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/Schema.scala b/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/Schema.scala new file mode 100644 index 0000000..3b4a2e9 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/indexeddataset/Schema.scala @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.indexeddataset + +import scala.collection.mutable.HashMap + +/** + * Syntactic sugar for mutable.HashMap[String, Any] + * @param params list of mappings for instantiation {{{val mySchema = new Schema("one" -> 1, "two" -> "2", ...)}}} + */ +class Schema(params: Tuple2[String, Any]*) extends HashMap[String, Any] { + // note: this require a mutable HashMap, do we care? + this ++= params + + /** + * Constructor for copying an existing Schema + * @param schemaToClone return a copy of this Schema + */ + def this(schemaToClone: Schema){ + this() + this ++= schemaToClone + } +} + +// These can be used to keep the text in and out fairly standard to Mahout, where an application specific +// format is not required. These apply to formatting of [[org.apache.mahout.math.indexeddataset.IndexedDataset]] +// which can be used to create a Mahout DRM for DSL ops. + +/** + * Simple default Schema for typical text delimited element file input + * This tells the reader to input elements of the default (rowID<comma, tab, or space>columnID + * <comma, tab, or space>here may be other ignored text...) + */ +final object DefaultIndexedDatasetElementReadSchema extends Schema( + "delim" -> "[,\t ]", //comma, tab or space + "filter" -> "", + "rowIDColumn" -> 0, + "columnIDPosition" -> 1, + "filterColumn" -> -1) + +/** + * Default Schema for text delimited [[org.apache.mahout.math.indexeddataset.IndexedDataset]] file output with + * one row per line. + * The default form: + * (rowID<tab>columnID1:score1<space>columnID2:score2...) + */ +final object DefaultIndexedDatasetWriteSchema extends Schema( + "rowKeyDelim" -> "\t", + "columnIdStrengthDelim" -> ":", + "elementDelim" -> " ", + "omitScore" -> false) + +/** + * Default Schema for typical text delimited [[org.apache.mahout.math.indexeddataset.IndexedDataset]] file + * row-wise input. This tells the reader to input text lines of the form: + * (rowID<tab>columnID1:score1,columnID2:score2,...) + */ +final object DefaultIndexedDatasetReadSchema extends Schema( + "rowKeyDelim" -> "\t", + "columnIdStrengthDelim" -> ":", + "elementDelim" -> " ") + +/** + * Default Schema for reading a text delimited [[org.apache.mahout.math.indexeddataset.IndexedDataset]] file where + * the score of any element is ignored. + * This tells the reader to input DRM lines of the form + * (rowID<tab>columnID1:score1<space>columnID2:score2...) remember the score is ignored. + * Alternatively the format can be + * (rowID<tab>columnID1<space>columnID2 ...) where presence indicates a score of 1. This is the default + * output format for [[IndexedDatasetWriteBooleanSchema]] + */ +final object IndexedDatasetReadBooleanSchema extends Schema( + "rowKeyDelim" -> "\t", + "columnIdStrengthDelim" -> ":", + "elementDelim" -> " ", + "omitScore" -> true) + +/** + * Default Schema for typical text delimited [[org.apache.mahout.math.indexeddataset.IndexedDataset]] file output + * where the score of a element is omitted. This tells the writer to output + * [[org.apache.mahout.math.indexeddataset.IndexedDataset]] row of the form + * (rowID<tab>columnID1<space>columnID2...) + */ +final object IndexedDatasetWriteBooleanSchema extends Schema( + "rowKeyDelim" -> "\t", + "columnIdStrengthDelim" -> ":", + "elementDelim" -> " ", + "omitScore" -> true) + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/scalabindings/DoubleScalarOps.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/scalabindings/DoubleScalarOps.scala b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/DoubleScalarOps.scala new file mode 100644 index 0000000..9fdd6e5 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/DoubleScalarOps.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.scalabindings + +import org.apache.mahout.math._ + +class DoubleScalarOps(val x:Double) extends AnyVal{ + + import RLikeOps._ + + def +(that:Matrix) = that + x + + def +(that:Vector) = that + x + + def *(that:Matrix) = that * x + + def *(that:Vector) = that * x + + def -(that:Matrix) = x -: that + + def -(that:Vector) = x -: that + + def /(that:Matrix) = x /: that + + def /(that:Vector) = x /: that + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeMatrixOps.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeMatrixOps.scala b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeMatrixOps.scala new file mode 100644 index 0000000..13d80ea --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeMatrixOps.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.mahout.math.scalabindings + +import org.apache.mahout.math.{Vector, Matrix} +import scala.collection.JavaConversions._ +import RLikeOps._ + +class MatlabLikeMatrixOps(_m: Matrix) extends MatrixOps(_m) { + + /** + * matrix-matrix multiplication + * @param that + * @return + */ + def *(that: Matrix) = m.times(that) + + /** + * matrix-vector multiplication + * @param that + * @return + */ + def *(that: Vector) = m.times(that) + + /** + * Hadamard product + * + * @param that + * @return + */ + + private[math] def *@(that: Matrix) = cloned *= that + + private[math] def *@(that: Double) = cloned *= that + + /** + * in-place Hadamard product. We probably don't want to use assign + * to optimize for sparse operations, in case of Hadamard product + * it really can be done + * @param that + */ + private[math] def *@=(that: Matrix) = { + m.zip(that).foreach(t => t._1.vector *= t._2.vector) + m + } + + private[math] def *@=(that: Double) = { + m.foreach(_.vector() *= that) + m + } +} + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeOps.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeOps.scala b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeOps.scala new file mode 100644 index 0000000..8304af7 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeOps.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.scalabindings + +import org.apache.mahout.math.{Vector, MatrixTimesOps, Matrix} + +/** + * Matlab-like operators. Declare <code>import MatlabLikeOps._</code> to enable. + * + * (This option is mutually exclusive to other translations such as RLikeOps). + */ +object MatlabLikeOps { + + implicit def v2vOps(v: Vector) = new MatlabLikeVectorOps(v) + + implicit def times2timesOps(m: MatrixTimesOps) = new MatlabLikeTimesOps(m) + + implicit def m2mOps(m: Matrix) = new MatlabLikeMatrixOps(m) + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeTimesOps.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeTimesOps.scala b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeTimesOps.scala new file mode 100644 index 0000000..9af179a --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeTimesOps.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.scalabindings + +import org.apache.mahout.math.{Matrix, MatrixTimesOps} + +class MatlabLikeTimesOps(m: MatrixTimesOps) { + + def :*(that: Matrix) = m.timesRight(that) + + def *:(that: Matrix) = m.timesLeft(that) + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeVectorOps.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeVectorOps.scala b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeVectorOps.scala new file mode 100644 index 0000000..ca3573f --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatlabLikeVectorOps.scala @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.scalabindings + +import org.apache.mahout.math.Vector +import org.apache.mahout.math.function.Functions +import RLikeOps._ + +/** + * R-like operators. + * + * For now, all element-wise operators are declared private to math package + * since we are still discussing what is the best approach to have to replace + * Matlab syntax for elementwise '.*' since it is not directly available for + * Scala DSL. + * + * @param _v + */ +class MatlabLikeVectorOps(_v: Vector) extends VectorOps(_v) { + + /** Elementwise *= */ + private[math] def *@=(that: Vector) = v.assign(that, Functions.MULT) + + /** Elementwise /= */ + private[math] def /@=(that: Vector) = v.assign(that, Functions.DIV) + + /** Elementwise *= */ + private[math] def *@=(that: Double) = v.assign(Functions.MULT, that) + + /** Elementwise /= */ + private[math] def /@=(that: Double) = v.assign(Functions.DIV, that) + + /** Elementwise right-associative /= */ + private[math] def /@=:(that: Double) = v.assign(Functions.INV).assign(Functions.MULT, that) + + /** Elementwise right-associative /= */ + private[math] def /@=:(that: Vector) = v.assign(Functions.INV).assign(that, Functions.MULT) + + /** Elementwise * */ + private[math] def *@(that: Vector) = cloned *= that + + /** Elementwise * */ + private[math] def *@(that: Double) = cloned *= that + + /** Elementwise / */ + private[math] def /@(that: Vector) = cloned /= that + + /** Elementwise / */ + private[math] def /@(that: Double) = cloned /= that + + /** Elementwise right-associative / */ + private[math] def /@:(that: Double) = that /=: v.cloned + + /** Elementwise right-associative / */ + private[math] def /@:(that: Vector) = that.cloned /= v + + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala new file mode 100644 index 0000000..910035f --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/MatrixOps.scala @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.scalabindings + +import org.apache.mahout.math.{Matrices, QRDecomposition, Vector, Matrix} +import scala.collection.JavaConversions._ +import org.apache.mahout.math.function.{DoubleDoubleFunction, VectorFunction, DoubleFunction, Functions} +import scala.math._ + +class MatrixOps(val m: Matrix) { + + import MatrixOps._ + + // We need this for some functions below (but it would screw some functions above) + import RLikeOps.v2vOps + + def nrow = m.rowSize() + + def ncol = m.columnSize() + + /** + * Warning: this creates a clone (as in mx * -1), in many applications inplace inversion `mx *= -1` + * might be an infinitely better choice. + */ + def unary_- = cloned.assign(Functions.NEGATE) + + def +=(that: Matrix) = m.assign(that, Functions.PLUS) + + def -=(that: Matrix) = m.assign(that, Functions.MINUS) + + def +=(that: Double) = m.assign(new DoubleFunction { + def apply(x: Double): Double = x + that + }) + + def -=(that: Double) = +=(-that) + + def -=:(that: Double) = m.assign(Functions.minus(that)) + + /** A := B - A which is -(A - B) */ + def -=:(that: Matrix) = m.assign(that, Functions.chain(Functions.NEGATE, Functions.MINUS)) + + def +(that: Matrix) = cloned += that + + def -(that: Matrix) = cloned -= that + + def -:(that: Matrix) = that - m + + // m.plus(that)? + + def +(that: Double) = cloned += that + + def +:(that:Double) = cloned += that + + def -(that: Double) = cloned -= that + + def -:(that: Double) = that -=: cloned + + + def norm = sqrt(m.aggregate(Functions.PLUS, Functions.SQUARE)) + + def pnorm(p: Int) = pow(m.aggregate(Functions.PLUS, Functions.chain(Functions.ABS, Functions.pow(p))), 1.0 / p) + + def apply(row: Int, col: Int) = m.get(row, col) + + def update(row: Int, col: Int, v: Double): Matrix = { + m.setQuick(row, col, v); + m + } + + def update(rowRange: Range, colRange: Range, that: Matrix) = apply(rowRange, colRange) := that + + def update(row: Int, colRange: Range, that: Vector) = apply(row, colRange) := that + + def update(rowRange: Range, col: Int, that: Vector) = apply(rowRange, col) := that + + def apply(rowRange: Range, colRange: Range): Matrix = { + + if (rowRange == :: && + colRange == ::) return m + + val rr = if (rowRange == ::) (0 until m.nrow) + else rowRange + val cr = if (colRange == ::) (0 until m.ncol) + else colRange + + return m.viewPart(rr.start, rr.length, cr.start, cr.length) + + } + + def apply(row: Int, colRange: Range): Vector = { + var r = m.viewRow(row) + if (colRange != ::) r = r.viewPart(colRange.start, colRange.length) + r + } + + def apply(rowRange: Range, col: Int): Vector = { + var c = m.viewColumn(col) + if (rowRange != ::) c = c.viewPart(rowRange.start, rowRange.length) + c + } + + /** + * Warning: This provides read-only view only. + * In most cases that's what one wants. To get a copy, + * use <code>m.t cloned</code> + * @return transposed view + */ + def t = Matrices.transposedView(m) + + def det = m.determinant() + + def sum = m.zSum() + + def :=(that: Matrix) = m.assign(that) + + /** + * Assigning from a row-wise collection of vectors + * @param that + */ + def :=(that: TraversableOnce[Vector]) = { + var row = 0 + that.foreach(v => { + m.assignRow(row, v) + row += 1 + }) + } + + def :=(f: (Int, Int, Double) => Double): Matrix = { + for (r <- 0 until nrow; c <- 0 until ncol) m(r, c) = f(r, c, m(r, c)) + m + } + + def cloned: Matrix = m.like := m + + /** + * Ideally, we would probably want to override equals(). But that is not + * possible without modifying AbstractMatrix implementation in Mahout + * which would require discussion at Mahout team. + * @param that + * @return + */ + def equiv(that: Matrix) = + that != null && + nrow == that.nrow && + m.view.zip(that).forall(t => { + t._1.equiv(t._2) + }) + + def nequiv(that: Matrix) = !equiv(that) + + def ===(that: Matrix) = equiv(that) + + def !==(that: Matrix) = nequiv(that) + + /** + * test if rank == min(nrow,ncol). + * @return + */ + def isFullRank: Boolean = + new QRDecomposition(if (nrow < ncol) m t else m cloned).hasFullRank + + def colSums() = m.aggregateColumns(vectorSumFunc) + + def rowSums() = m.aggregateRows(vectorSumFunc) + + def colMeans() = if (m.nrow == 0) colSums() else colSums() /= m.nrow + + def rowMeans() = if (m.ncol == 0) rowSums() else rowSums() /= m.ncol + + /* Diagonal */ + def diagv: Vector = m.viewDiagonal() + + /* Diagonal assignment */ + def diagv_=(that: Vector) = diagv := that + + /* Diagonal assignment */ + def diagv_=(that: Double) = diagv := that + + /* Row and Column non-zero element counts */ + def numNonZeroElementsPerColumn() = m.aggregateColumns(vectorCountNonZeroElementsFunc) + + def numNonZeroElementsPerRow() = m.aggregateRows(vectorCountNonZeroElementsFunc) +} + +object MatrixOps { + + import RLikeOps.v2vOps + + implicit def m2ops(m: Matrix): MatrixOps = new MatrixOps(m) + + private def vectorSumFunc = new VectorFunction { + def apply(f: Vector): Double = f.sum + } + + private def vectorCountNonZeroElementsFunc = new VectorFunction { + //def apply(f: Vector): Double = f.aggregate(Functions.PLUS, Functions.notEqual(0)) + def apply(f: Vector): Double = f.getNumNonZeroElements().toDouble + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala new file mode 100644 index 0000000..97e06cf --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeMatrixOps.scala @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.mahout.math.scalabindings + +import org.apache.mahout.math.{Vector, Matrix} +import scala.collection.JavaConversions._ +import RLikeOps._ + +class RLikeMatrixOps(m: Matrix) extends MatrixOps(m) { + + /** + * matrix-matrix multiplication + * @param that + * @return + */ + def %*%(that: Matrix) = m.times(that) + + /** + * matrix-vector multiplication + * @param that + * @return + */ + def %*%(that: Vector) = m.times(that) + + /** + * Hadamard product + * + * @param that + * @return + */ + + def *(that: Matrix) = cloned *= that + + def *(that: Double) = cloned *= that + + def *:(that:Double) = cloned *= that + + def /(that: Matrix) = cloned /= that + + def /:(that: Matrix) = that / m + + def /(that: Double) = cloned /= that + + /** 1.0 /: A is eqivalent to R's 1.0/A */ + def /:(that: Double) = that /=: cloned + + /** + * in-place Hadamard product. We probably don't want to use assign + * to optimize for sparse operations, in case of Hadamard product + * it really can be done + * @param that + */ + def *=(that: Matrix) = { + m.zip(that).foreach(t => t._1.vector *= t._2.vector) + m + } + + /** Elementwise deletion */ + def /=(that: Matrix) = { + m.zip(that).foreach(t => t._1.vector() /= t._2.vector) + m + } + + def *=(that: Double) = { + m.foreach(_.vector() *= that) + m + } + + def /=(that: Double) = { + m.foreach(_.vector() /= that) + m + } + + /** 1.0 /=: A is equivalent to A = 1.0/A in R */ + def /=:(that: Double) = { + m.foreach(that /=: _.vector()) + m + } +} + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala new file mode 100644 index 0000000..ba32304 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeOps.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.scalabindings + +import org.apache.mahout.math.{Vector, MatrixTimesOps, Matrix} + +/** + * R-like operators. Declare <code>import RLikeOps._</code> to enable. + */ +object RLikeOps { + + implicit def double2Scalar(x:Double) = new DoubleScalarOps(x) + + implicit def v2vOps(v: Vector) = new RLikeVectorOps(v) + + implicit def el2elOps(el: Vector.Element) = new ElementOps(el) + + implicit def times2timesOps(m: MatrixTimesOps) = new RLikeTimesOps(m) + + implicit def m2mOps(m: Matrix) = new RLikeMatrixOps(m) + + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeTimesOps.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeTimesOps.scala b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeTimesOps.scala new file mode 100644 index 0000000..51f0f63 --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeTimesOps.scala @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.scalabindings + +import org.apache.mahout.math.{Matrix, MatrixTimesOps} + +class RLikeTimesOps(m: MatrixTimesOps) { + + def :%*%(that: Matrix) = m.timesRight(that) + + def %*%:(that: Matrix) = m.timesLeft(that) + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/f7b69fab/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala ---------------------------------------------------------------------- diff --git a/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala new file mode 100644 index 0000000..d2198bd --- /dev/null +++ b/samsara/src/main/scala/org/apache/mahout/math/scalabindings/RLikeVectorOps.scala @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.scalabindings + +import org.apache.mahout.math.Vector +import org.apache.mahout.math.function.Functions +import RLikeOps._ + +/** + * R-like operators + * + * @param _v + */ +class RLikeVectorOps(_v: Vector) extends VectorOps(_v) { + + /** Elementwise *= */ + def *=(that: Vector) = v.assign(that, Functions.MULT) + + /** Elementwise /= */ + def /=(that: Vector) = v.assign(that, Functions.DIV) + + /** Elementwise *= */ + def *=(that: Double) = v.assign(Functions.MULT, that) + + /** Elementwise /= */ + def /=(that: Double) = v.assign(Functions.DIV, that) + + /** Elementwise right-associative /= */ + def /=:(that: Double) = v.assign(Functions.INV).assign(Functions.MULT, that) + + /** Elementwise right-associative /= */ + def /=:(that: Vector) = v.assign(Functions.INV).assign(that, Functions.MULT) + + /** Elementwise * */ + def *(that: Vector) = cloned *= that + + /** Elementwise * */ + def *(that: Double) = cloned *= that + + /** Elementwise * */ + def *:(that: Double) = cloned *= that + + /** Elementwise / */ + def /(that: Vector) = cloned /= that + + /** Elementwise / */ + def /(that: Double) = cloned /= that + + /** Elementwise right-associative / */ + def /:(that: Double) = that /=: v.cloned + + /** Elementwise right-associative / */ + def /:(that: Vector) = that.cloned /= v + + +} \ No newline at end of file
