[1/2] hive git commit: HIVE-19126: CachedStore: Use memory estimation to limit cache size during prewarm (Vaibhav Gumashta reviewed by Thejas Nair)

2018-04-17 Thread vgumashta
Repository: hive
Updated Branches:
  refs/heads/branch-3 9db29e9d4 -> 624e464a2


http://git-wip-us.apache.org/repos/asf/hive/blob/624e464a/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java
--
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java
new file mode 100644
index 000..9421691
--- /dev/null
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java
@@ -0,0 +1,640 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.util;
+
+import java.lang.reflect.AccessibleObject;
+import java.lang.reflect.Array;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Creates size estimators for java objects. The estimators attempt to do most 
of the reflection
+ * work at initialization time, and also take some shortcuts, to minimize the 
amount of work done
+ * during the actual estimation.
+ * TODO: clean up
+ */
+public class IncrementalObjectSizeEstimator {
+  public static final JavaDataModel memoryModel = JavaDataModel.get();
+  static final private Logger LOG =
+  LoggerFactory.getLogger(IncrementalObjectSizeEstimator.class.getName());
+
+  private enum FieldType {
+PRIMITIVE_ARRAY, OBJECT_ARRAY, COLLECTION, MAP, OTHER
+  };
+
+  public static HashMap, ObjectEstimator> createEstimators(Object 
rootObj) {
+HashMap, ObjectEstimator> byType = new HashMap<>();
+addHardcodedEstimators(byType);
+createEstimators(rootObj, byType);
+return byType;
+  }
+
+  public static void createEstimators(Object rootObj, HashMap, 
ObjectEstimator> byType) {
+// Code initially inspired by Google ObjectExplorer.
+// TODO: roll in the direct-only estimators from fields. Various other 
optimizations possible.
+Deque stack = createWorkStack(rootObj, byType);
+
+while (!stack.isEmpty()) {
+  Object obj = stack.pop();
+  Class clazz;
+  if (obj instanceof Class) {
+clazz = (Class) obj;
+obj = null;
+  } else {
+clazz = obj.getClass();
+  }
+  ObjectEstimator estimator = byType.get(clazz);
+  assert estimator != null;
+  if (!estimator.isFromClass && obj == null) {
+// The object was added later for the same class; see addToProcessing.
+continue;
+  }
+  if (estimator.isProcessed())
+continue;
+  estimator.init();
+  for (Field field : getAllFields(clazz)) {
+Class fieldClass = field.getType();
+if (Modifier.isStatic(field.getModifiers())) {
+  continue;
+}
+if (Class.class.isAssignableFrom(fieldClass)) {
+  continue;
+}
+if (fieldClass.isPrimitive()) {
+  estimator.addPrimitive(fieldClass);
+  continue;
+}
+if (Enum.class.isAssignableFrom(fieldClass)) {
+  estimator.addEnum();
+  continue;
+}
+boolean isArray = fieldClass.isArray();
+if (isArray && fieldClass.getComponentType().isPrimitive()) {
+  estimator.addField(FieldType.PRIMITIVE_ARRAY, field);
+  continue;
+}
+Object fieldObj = null;
+if (obj != null) {
+  fieldObj = extractFieldObj(obj, field);
+  fieldClass = determineRealClass(byType, stack, field, fieldClass, 
fieldObj);
+}
+if (isArray) {
+  estimator.addField(FieldType.OBJECT_ARRAY, field);
+  addArrayEstimator(byType, stack, field, fieldObj);
+} else if (Collection.class.isAssignableFrom(fieldClass)) {
+  estimator.ad

[1/2] hive git commit: HIVE-19126: CachedStore: Use memory estimation to limit cache size during prewarm (Vaibhav Gumashta reviewed by Thejas Nair)

2018-04-17 Thread vgumashta
Repository: hive
Updated Branches:
  refs/heads/master 3d1bf34b1 -> 4cfec3eb9


http://git-wip-us.apache.org/repos/asf/hive/blob/4cfec3eb/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java
--
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java
new file mode 100644
index 000..9421691
--- /dev/null
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java
@@ -0,0 +1,640 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.util;
+
+import java.lang.reflect.AccessibleObject;
+import java.lang.reflect.Array;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Creates size estimators for java objects. The estimators attempt to do most 
of the reflection
+ * work at initialization time, and also take some shortcuts, to minimize the 
amount of work done
+ * during the actual estimation.
+ * TODO: clean up
+ */
+public class IncrementalObjectSizeEstimator {
+  public static final JavaDataModel memoryModel = JavaDataModel.get();
+  static final private Logger LOG =
+  LoggerFactory.getLogger(IncrementalObjectSizeEstimator.class.getName());
+
+  private enum FieldType {
+PRIMITIVE_ARRAY, OBJECT_ARRAY, COLLECTION, MAP, OTHER
+  };
+
+  public static HashMap, ObjectEstimator> createEstimators(Object 
rootObj) {
+HashMap, ObjectEstimator> byType = new HashMap<>();
+addHardcodedEstimators(byType);
+createEstimators(rootObj, byType);
+return byType;
+  }
+
+  public static void createEstimators(Object rootObj, HashMap, 
ObjectEstimator> byType) {
+// Code initially inspired by Google ObjectExplorer.
+// TODO: roll in the direct-only estimators from fields. Various other 
optimizations possible.
+Deque stack = createWorkStack(rootObj, byType);
+
+while (!stack.isEmpty()) {
+  Object obj = stack.pop();
+  Class clazz;
+  if (obj instanceof Class) {
+clazz = (Class) obj;
+obj = null;
+  } else {
+clazz = obj.getClass();
+  }
+  ObjectEstimator estimator = byType.get(clazz);
+  assert estimator != null;
+  if (!estimator.isFromClass && obj == null) {
+// The object was added later for the same class; see addToProcessing.
+continue;
+  }
+  if (estimator.isProcessed())
+continue;
+  estimator.init();
+  for (Field field : getAllFields(clazz)) {
+Class fieldClass = field.getType();
+if (Modifier.isStatic(field.getModifiers())) {
+  continue;
+}
+if (Class.class.isAssignableFrom(fieldClass)) {
+  continue;
+}
+if (fieldClass.isPrimitive()) {
+  estimator.addPrimitive(fieldClass);
+  continue;
+}
+if (Enum.class.isAssignableFrom(fieldClass)) {
+  estimator.addEnum();
+  continue;
+}
+boolean isArray = fieldClass.isArray();
+if (isArray && fieldClass.getComponentType().isPrimitive()) {
+  estimator.addField(FieldType.PRIMITIVE_ARRAY, field);
+  continue;
+}
+Object fieldObj = null;
+if (obj != null) {
+  fieldObj = extractFieldObj(obj, field);
+  fieldClass = determineRealClass(byType, stack, field, fieldClass, 
fieldObj);
+}
+if (isArray) {
+  estimator.addField(FieldType.OBJECT_ARRAY, field);
+  addArrayEstimator(byType, stack, field, fieldObj);
+} else if (Collection.class.isAssignableFrom(fieldClass)) {
+  estimator.addF