[GitHub] incubator-hivemall pull request #111: [HIVEMALL-17] Support SLIM

2017-09-27 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/incubator-hivemall/pull/111


---


[GitHub] incubator-hivemall pull request #111: [HIVEMALL-17] Support SLIM

2017-09-21 Thread myui
Github user myui commented on a diff in the pull request:

https://github.com/apache/incubator-hivemall/pull/111#discussion_r140207562
  
--- Diff: core/src/test/java/hivemall/recommend/SlimUDTFTest.java ---
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.recommend;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.*;
+
+public class SlimUDTFTest {
+@Test
+public void testAllSamples() throws HiveException {
+SlimUDTF slim = new SlimUDTF();
+ObjectInspector itemIOI = 
PrimitiveObjectInspectorFactory.javaIntObjectInspector;
+ObjectInspector itemJOI = 
PrimitiveObjectInspectorFactory.javaIntObjectInspector;
+
+ObjectInspector itemIRatesOI = 
ObjectInspectorFactory.getStandardMapObjectInspector(
+PrimitiveObjectInspectorFactory.javaIntObjectInspector,
+PrimitiveObjectInspectorFactory.javaFloatObjectInspector);
+ObjectInspector itemJRatesOI = 
ObjectInspectorFactory.getStandardMapObjectInspector(
+PrimitiveObjectInspectorFactory.javaIntObjectInspector,
+PrimitiveObjectInspectorFactory.javaFloatObjectInspector);
+ObjectInspector topKRatesOfIOI = 
ObjectInspectorFactory.getStandardMapObjectInspector(
+PrimitiveObjectInspectorFactory.javaIntObjectInspector,
+ObjectInspectorFactory.getStandardMapObjectInspector(
+PrimitiveObjectInspectorFactory.javaIntObjectInspector,
+PrimitiveObjectInspectorFactory.javaFloatObjectInspector));
+ObjectInspector optionArgumentOI = 
ObjectInspectorUtils.getConstantObjectInspector(
+PrimitiveObjectInspectorFactory.javaStringObjectInspector, 
"-l2 0.01 -l1 0.01");
+
+ObjectInspector[] argOIs = {itemIOI, itemIRatesOI, topKRatesOfIOI, 
itemJOI, itemJRatesOI,
+optionArgumentOI};
+
+slim.initialize(argOIs);
+int numUser = 4;
+int numItem = 5;
+
+float[][] data = { {1.f, 4.f, 0.f, 0.f, 0.f}, {0.f, 3.f, 0.f, 1.f, 
2.f},
+{2.f, 2.f, 0.f, 0.f, 3.f}, {0.f, 1.f, 1.f, 0.f, 0.f}};
+
+for (int i = 0; i < numItem; i++) {
+Map Ri = new HashMap<>();
+for (int u = 0; u < numUser; u++) {
+if (data[u][i] != 0.) {
+Ri.put(u, data[u][i]);
+}
+}
+
+// most similar data
+Map> knnRatesOfI = new 
HashMap<>();
+for (int u = 0; u < numUser; u++) {
+Map Ru = new HashMap<>();
+for (int k = 0; k < numItem; k++) {
+if (k == i)
+continue;
+Ru.put(k, data[u][k]);
+}
+knnRatesOfI.put(u, Ru);
+}
+
+for (int j = 0; j < numItem; j++) {
+if (i == j)
+continue;
+Map Rj = new HashMap<>();
+for (int u = 0; u < numUser; u++) {
+if (data[u][j] != 0.) {
+Rj.put(u, data[u][j]);
+}
+}
+
+Object[] args = {i, Ri, knnRatesOfI, j, Rj};
+slim.process(args);
+}
+}
+slim.finalizeTraining();
+}
+
+@Test(expected = HiveException.class)
+public void test

[GitHub] incubator-hivemall pull request #111: [HIVEMALL-17] Support SLIM

2017-09-20 Thread myui
Github user myui commented on a diff in the pull request:

https://github.com/apache/incubator-hivemall/pull/111#discussion_r139901202
  
--- Diff: core/src/main/java/hivemall/evaluation/HitRateUDAF.java ---
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+package hivemall.evaluation;
+
+import hivemall.utils.hadoop.HiveUtils;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import javax.annotation.Nonnull;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+
+@Description(
+name = "hitrate",
+value = "_FUNC_(array rankItems, array correctItems [, const int 
recommendSize = rankItems.size])"
++ " - Returns HitRate")
+public final class HitRateUDAF extends AbstractGenericUDAFResolver {
+
+// prevent instantiation
+private HitRateUDAF() {}
+
+@Override
+public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) 
throws SemanticException {
+if (typeInfo.length != 2 && typeInfo.length != 3) {
+throw new UDFArgumentTypeException(typeInfo.length - 1,
+"_FUNC_ takes two or three arguments");
+}
+
+ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]);
+if 
(!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) {
+throw new UDFArgumentTypeException(0,
+"The first argument `array rankItems` is invalid form: " + 
typeInfo[0]);
+}
+ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]);
+if 
(!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) {
+throw new UDFArgumentTypeException(1,
+"The second argument `array correctItems` is invalid form: 
" + typeInfo[1]);
+}
+
+return new HitRateUDAF.Evaluator();
+}
+

[GitHub] incubator-hivemall pull request #111: [HIVEMALL-17] Support SLIM

2017-09-20 Thread myui
Github user myui commented on a diff in the pull request:

https://github.com/apache/incubator-hivemall/pull/111#discussion_r139900716
  
--- Diff: 
core/src/main/java/hivemall/evaluation/GradedResponsesMeasures.java ---
@@ -54,4 +54,23 @@ public static double DCG(final List 
topRelScoreList, final int recommend
 return dcg;
 }
 
+
+/**
+ * Computes Reciprocal HitRank (RHR)
+ *
+ * @param recommendList predicted item list order by score
+ * @param truthList gruond truth item list order by rank
+ * @param recommendSize the number of positive items
+ * @return RHR
+ */
+public static double RHR(final List recommendList, final List 
truthList,
--- End diff --

@nzw0301 (cc: @takuti )

This is not graded response but binary response.

`ReciprocalRank` is already implemented in `BinaryResponseMeasures` and  
`MRRUDAF` implements [Mean reciprocal 
rank](https://en.wikipedia.org/wiki/Mean_reciprocal_rank).

ARHRUDAF is duplicate to MRRUDAF and MRR is more generic name of ARHR.

[Average reciprocal hit-rate is also referred to as the mean reciprocal 
rank 
(MRR)](https://books.google.co.jp/books?id=GKjWCwAAQBAJ&pg=PA246&lpg=PA246&dq=%22average+reciprocal+hit-rate%22+MRR&source=bl&ots=Tq_wVfI-QU&sig=Hbyh-R1Sm7M26pZbHPjuTfyXXqI&hl=ja&sa=X&ved=0ahUKEwie1MD0prPWAhWMW7wKHVHzD88Q6AEIJzAA#v=onepage&q=%22average%20reciprocal%20hit-rate%22%20MRR&f=false)


---


[GitHub] incubator-hivemall pull request #111: [HIVEMALL-17] Support SLIM

2017-09-12 Thread nzw0301
Github user nzw0301 commented on a diff in the pull request:

https://github.com/apache/incubator-hivemall/pull/111#discussion_r138523391
  
--- Diff: core/src/main/java/hivemall/recommend/SlimUDTF.java ---
@@ -0,0 +1,636 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.recommend;
+
+
+import hivemall.UDTFWithOptions;
+import hivemall.annotations.VisibleForTesting;
+import hivemall.common.ConversionState;
+import hivemall.math.matrix.sparse.DoKMatrix;
+import hivemall.math.vector.VectorProcedure;
+import hivemall.utils.collections.maps.Int2FloatOpenHashTable;
+import hivemall.utils.hadoop.HiveUtils;
+import hivemall.utils.io.FileUtils;
+import hivemall.utils.io.NioStatefullSegment;
+import hivemall.utils.lang.NumberUtils;
+import hivemall.utils.lang.Primitives;
+import hivemall.utils.lang.SizeOf;
+import hivemall.utils.lang.mutable.MutableDouble;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.*;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.*;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.Reporter;
+
+import javax.annotation.Nonnull;
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.*;
+
+
+@Description(
+name = "train_slim",
+value = "_FUNC_( int i, map r_i, map> topKRatesOfI, int j, map r_j [, constant string options]) 
" +
+"- Returns row index, column index and non-zero weight 
value of prediction model")
+public class SlimUDTF extends UDTFWithOptions {
+private static final Log logger = LogFactory.getLog(SlimUDTF.class);
+
+private double l1;
+private double l2;
+private int numIterations;
+private int previousItemId;
+
+private transient DoKMatrix weightMatrix; // item-item weight matrix
+private transient DoKMatrix dataMatrix; // item-user matrix to get the 
number of nnz values in column
+
+private PrimitiveObjectInspector itemIOI;
+private PrimitiveObjectInspector itemJOI;
+private MapObjectInspector riOI;
+private MapObjectInspector rjOI;
+
+private MapObjectInspector knnItemsOI;
+private PrimitiveObjectInspector knnItemsKeyOI;
+private MapObjectInspector knnItemsValueOI;
+private PrimitiveObjectInspector knnItemsValueKeyOI;
+private PrimitiveObjectInspector knnItemsValueValueOI;
+
+private PrimitiveObjectInspector riKeyOI;
+private PrimitiveObjectInspector riValueOI;
+
+private PrimitiveObjectInspector rjKeyOI;
+private PrimitiveObjectInspector rjValueOI;
+
+// used to store KNN data into temporary file for iterative training
+private NioStatefullSegment fileIO;
+private ByteBuffer inputBuf;
+
+private ConversionState cvState;
+private long observedTrainingExamples;
+
+public SlimUDTF() {}
+
+@Override
+public StructObjectInspector initialize(ObjectInspector[] argOIs) 
throws UDFArgumentException {
+final int numArgs = argOIs.length;
+
+if (numArgs == 1 && HiveUtils.isStringOI(argOIs[0])) {
--- End diff --

I add this a few line to show the slim explanation without other arguments. 
But it may not be conventional way to show `help` for hivemall.


---