[GitHub] incubator-hivemall pull request #111: [HIVEMALL-17] Support SLIM
Github user asfgit closed the pull request at: https://github.com/apache/incubator-hivemall/pull/111 ---
[GitHub] incubator-hivemall pull request #111: [HIVEMALL-17] Support SLIM
Github user myui commented on a diff in the pull request: https://github.com/apache/incubator-hivemall/pull/111#discussion_r140207562 --- Diff: core/src/test/java/hivemall/recommend/SlimUDTFTest.java --- @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.recommend; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.*; + +public class SlimUDTFTest { +@Test +public void testAllSamples() throws HiveException { +SlimUDTF slim = new SlimUDTF(); +ObjectInspector itemIOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector; +ObjectInspector itemJOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector; + +ObjectInspector itemIRatesOI = ObjectInspectorFactory.getStandardMapObjectInspector( +PrimitiveObjectInspectorFactory.javaIntObjectInspector, +PrimitiveObjectInspectorFactory.javaFloatObjectInspector); +ObjectInspector itemJRatesOI = ObjectInspectorFactory.getStandardMapObjectInspector( +PrimitiveObjectInspectorFactory.javaIntObjectInspector, +PrimitiveObjectInspectorFactory.javaFloatObjectInspector); +ObjectInspector topKRatesOfIOI = ObjectInspectorFactory.getStandardMapObjectInspector( +PrimitiveObjectInspectorFactory.javaIntObjectInspector, +ObjectInspectorFactory.getStandardMapObjectInspector( +PrimitiveObjectInspectorFactory.javaIntObjectInspector, +PrimitiveObjectInspectorFactory.javaFloatObjectInspector)); +ObjectInspector optionArgumentOI = ObjectInspectorUtils.getConstantObjectInspector( +PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-l2 0.01 -l1 0.01"); + +ObjectInspector[] argOIs = {itemIOI, itemIRatesOI, topKRatesOfIOI, itemJOI, itemJRatesOI, +optionArgumentOI}; + +slim.initialize(argOIs); +int numUser = 4; +int numItem = 5; + +float[][] data = { {1.f, 4.f, 0.f, 0.f, 0.f}, {0.f, 3.f, 0.f, 1.f, 2.f}, +{2.f, 2.f, 0.f, 0.f, 3.f}, {0.f, 1.f, 1.f, 0.f, 0.f}}; + +for (int i = 0; i < numItem; i++) { +Map Ri = new HashMap<>(); +for (int u = 0; u < numUser; u++) { +if (data[u][i] != 0.) { +Ri.put(u, data[u][i]); +} +} + +// most similar data +Map> knnRatesOfI = new HashMap<>(); +for (int u = 0; u < numUser; u++) { +Map Ru = new HashMap<>(); +for (int k = 0; k < numItem; k++) { +if (k == i) +continue; +Ru.put(k, data[u][k]); +} +knnRatesOfI.put(u, Ru); +} + +for (int j = 0; j < numItem; j++) { +if (i == j) +continue; +Map Rj = new HashMap<>(); +for (int u = 0; u < numUser; u++) { +if (data[u][j] != 0.) { +Rj.put(u, data[u][j]); +} +} + +Object[] args = {i, Ri, knnRatesOfI, j, Rj}; +slim.process(args); +} +} +slim.finalizeTraining(); +} + +@Test(expected = HiveException.class) +public void test
[GitHub] incubator-hivemall pull request #111: [HIVEMALL-17] Support SLIM
Github user myui commented on a diff in the pull request: https://github.com/apache/incubator-hivemall/pull/111#discussion_r139901202 --- Diff: core/src/main/java/hivemall/evaluation/HitRateUDAF.java --- @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ +package hivemall.evaluation; + +import hivemall.utils.hadoop.HiveUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import javax.annotation.Nonnull; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.LongWritable; + + +@Description( +name = "hitrate", +value = "_FUNC_(array rankItems, array correctItems [, const int recommendSize = rankItems.size])" ++ " - Returns HitRate") +public final class HitRateUDAF extends AbstractGenericUDAFResolver { + +// prevent instantiation +private HitRateUDAF() {} + +@Override +public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo) throws SemanticException { +if (typeInfo.length != 2 && typeInfo.length != 3) { +throw new UDFArgumentTypeException(typeInfo.length - 1, +"_FUNC_ takes two or three arguments"); +} + +ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]); +if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) { +throw new UDFArgumentTypeException(0, +"The first argument `array rankItems` is invalid form: " + typeInfo[0]); +} +ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]); +if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) { +throw new UDFArgumentTypeException(1, +"The second argument `array correctItems` is invalid form: " + typeInfo[1]); +} + +return new HitRateUDAF.Evaluator(); +} +
[GitHub] incubator-hivemall pull request #111: [HIVEMALL-17] Support SLIM
Github user myui commented on a diff in the pull request: https://github.com/apache/incubator-hivemall/pull/111#discussion_r139900716 --- Diff: core/src/main/java/hivemall/evaluation/GradedResponsesMeasures.java --- @@ -54,4 +54,23 @@ public static double DCG(final List topRelScoreList, final int recommend return dcg; } + +/** + * Computes Reciprocal HitRank (RHR) + * + * @param recommendList predicted item list order by score + * @param truthList gruond truth item list order by rank + * @param recommendSize the number of positive items + * @return RHR + */ +public static double RHR(final List recommendList, final List truthList, --- End diff -- @nzw0301 (cc: @takuti ) This is not graded response but binary response. `ReciprocalRank` is already implemented in `BinaryResponseMeasures` and `MRRUDAF` implements [Mean reciprocal rank](https://en.wikipedia.org/wiki/Mean_reciprocal_rank). ARHRUDAF is duplicate to MRRUDAF and MRR is more generic name of ARHR. [Average reciprocal hit-rate is also referred to as the mean reciprocal rank (MRR)](https://books.google.co.jp/books?id=GKjWCwAAQBAJ&pg=PA246&lpg=PA246&dq=%22average+reciprocal+hit-rate%22+MRR&source=bl&ots=Tq_wVfI-QU&sig=Hbyh-R1Sm7M26pZbHPjuTfyXXqI&hl=ja&sa=X&ved=0ahUKEwie1MD0prPWAhWMW7wKHVHzD88Q6AEIJzAA#v=onepage&q=%22average%20reciprocal%20hit-rate%22%20MRR&f=false) ---
[GitHub] incubator-hivemall pull request #111: [HIVEMALL-17] Support SLIM
Github user nzw0301 commented on a diff in the pull request: https://github.com/apache/incubator-hivemall/pull/111#discussion_r138523391 --- Diff: core/src/main/java/hivemall/recommend/SlimUDTF.java --- @@ -0,0 +1,636 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.recommend; + + +import hivemall.UDTFWithOptions; +import hivemall.annotations.VisibleForTesting; +import hivemall.common.ConversionState; +import hivemall.math.matrix.sparse.DoKMatrix; +import hivemall.math.vector.VectorProcedure; +import hivemall.utils.collections.maps.Int2FloatOpenHashTable; +import hivemall.utils.hadoop.HiveUtils; +import hivemall.utils.io.FileUtils; +import hivemall.utils.io.NioStatefullSegment; +import hivemall.utils.lang.NumberUtils; +import hivemall.utils.lang.Primitives; +import hivemall.utils.lang.SizeOf; +import hivemall.utils.lang.mutable.MutableDouble; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.*; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.*; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.mapred.Counters; +import org.apache.hadoop.mapred.Reporter; + +import javax.annotation.Nonnull; +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.*; + + +@Description( +name = "train_slim", +value = "_FUNC_( int i, map r_i, map> topKRatesOfI, int j, map r_j [, constant string options]) " + +"- Returns row index, column index and non-zero weight value of prediction model") +public class SlimUDTF extends UDTFWithOptions { +private static final Log logger = LogFactory.getLog(SlimUDTF.class); + +private double l1; +private double l2; +private int numIterations; +private int previousItemId; + +private transient DoKMatrix weightMatrix; // item-item weight matrix +private transient DoKMatrix dataMatrix; // item-user matrix to get the number of nnz values in column + +private PrimitiveObjectInspector itemIOI; +private PrimitiveObjectInspector itemJOI; +private MapObjectInspector riOI; +private MapObjectInspector rjOI; + +private MapObjectInspector knnItemsOI; +private PrimitiveObjectInspector knnItemsKeyOI; +private MapObjectInspector knnItemsValueOI; +private PrimitiveObjectInspector knnItemsValueKeyOI; +private PrimitiveObjectInspector knnItemsValueValueOI; + +private PrimitiveObjectInspector riKeyOI; +private PrimitiveObjectInspector riValueOI; + +private PrimitiveObjectInspector rjKeyOI; +private PrimitiveObjectInspector rjValueOI; + +// used to store KNN data into temporary file for iterative training +private NioStatefullSegment fileIO; +private ByteBuffer inputBuf; + +private ConversionState cvState; +private long observedTrainingExamples; + +public SlimUDTF() {} + +@Override +public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { +final int numArgs = argOIs.length; + +if (numArgs == 1 && HiveUtils.isStringOI(argOIs[0])) { --- End diff -- I add this a few line to show the slim explanation without other arguments. But it may not be conventional way to show `help` for hivemall. ---