[jira] [Commented] (GIRAPH-1125) Add memory estimation mechanism to out-of-core
[ https://issues.apache.org/jira/browse/GIRAPH-1125?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15764788#comment-15764788 ] ASF GitHub Bot commented on GIRAPH-1125: Github user heslami commented on a diff in the pull request: https://github.com/apache/giraph/pull/12#discussion_r93289032 --- Diff: giraph-core/src/main/java/org/apache/giraph/ooc/policy/MemoryEstimatorOracle.java --- @@ -0,0 +1,851 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.ooc.policy; + +import com.sun.management.GarbageCollectionNotificationInfo; +import org.apache.commons.math.stat.regression.OLSMultipleLinearRegression; +import org.apache.giraph.comm.NetworkMetrics; +import org.apache.giraph.conf.FloatConfOption; +import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; +import org.apache.giraph.conf.LongConfOption; +import org.apache.giraph.edge.AbstractEdgeStore; +import org.apache.giraph.ooc.OutOfCoreEngine; +import org.apache.giraph.ooc.command.IOCommand; +import org.apache.giraph.ooc.command.LoadPartitionIOCommand; +import org.apache.giraph.ooc.command.WaitIOCommand; +import org.apache.giraph.worker.EdgeInputSplitsCallable; +import org.apache.giraph.worker.VertexInputSplitsCallable; +import org.apache.giraph.worker.WorkerProgress; +import org.apache.log4j.Logger; + +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryPoolMXBean; +import java.lang.management.MemoryUsage; +import java.util.List; +import java.util.Map; +import java.util.Vector; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import static com.google.common.base.Preconditions.checkState; + +/** + * Implementation of {@link OutOfCoreOracle} that uses a linear regression model + * to estimate actual memory usage based on the current state of computation. + * The model takes into consideration 5 parameters: + * + * y = c0 + c1*x1 + c2*x2 + c3*x3 + c4*x4 + c5*x5 + * + * y: memory usage + * x1: edges loaded + * x2: vertices loaded + * x3: vertices processed + * x4: bytes received due to messages + * x5: bytes loaded/stored from/to disk due to OOC. + * + */ +public class MemoryEstimatorOracle implements OutOfCoreOracle { + /** Memory check interval in msec */ + public static final LongConfOption CHECK_MEMORY_INTERVAL = +new LongConfOption("giraph.garbageEstimator.checkMemoryInterval", 1000, +"The interval where memory checker thread wakes up and " + +"monitors memory footprint (in milliseconds)"); + /** + * If mem-usage is above this threshold and no Full GC has been called, + * we call it manually + */ + public static final FloatConfOption MANUAL_GC_MEMORY_PRESSURE = +new FloatConfOption("giraph.garbageEstimator.manualGCPressure", 0.95f, +"The threshold above which GC is called manually if Full GC has not " + +"happened in a while"); + /** Used to detect a high memory pressure situation */ + public static final FloatConfOption GC_MINIMUM_RECLAIM_FRACTION = +new FloatConfOption("giraph.garbageEstimator.gcReclaimFraction", 0.05f, +"Minimum percentage of memory we expect to be reclaimed after a Full " + +"GC. If less than this amount is reclaimed, it is sage to say " + +"we are in a high memory situation and the estimation mechanism " + +"has not recognized it yet!"); + /** If mem-usage is above this threshold, active threads are set to 0 */ + public static final FloatConfOption AM_HIGH_THRESHOLD = +new FloatConfOption("giraph.amHighThreshold", 0.95f, +"If mem-usage is above this threshold, all active threads " + +"(compute/input) are paused."); + /** If mem-usage is
[jira] [Commented] (GIRAPH-1125) Add memory estimation mechanism to out-of-core
[ https://issues.apache.org/jira/browse/GIRAPH-1125?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15764777#comment-15764777 ] ASF GitHub Bot commented on GIRAPH-1125: Github user heslami commented on a diff in the pull request: https://github.com/apache/giraph/pull/12#discussion_r93288153 --- Diff: giraph-core/src/main/java/org/apache/giraph/ooc/policy/MemoryEstimatorOracle.java --- @@ -0,0 +1,851 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.ooc.policy; + +import com.sun.management.GarbageCollectionNotificationInfo; +import org.apache.commons.math.stat.regression.OLSMultipleLinearRegression; +import org.apache.giraph.comm.NetworkMetrics; +import org.apache.giraph.conf.FloatConfOption; +import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; +import org.apache.giraph.conf.LongConfOption; +import org.apache.giraph.edge.AbstractEdgeStore; +import org.apache.giraph.ooc.OutOfCoreEngine; +import org.apache.giraph.ooc.command.IOCommand; +import org.apache.giraph.ooc.command.LoadPartitionIOCommand; +import org.apache.giraph.ooc.command.WaitIOCommand; +import org.apache.giraph.worker.EdgeInputSplitsCallable; +import org.apache.giraph.worker.VertexInputSplitsCallable; +import org.apache.giraph.worker.WorkerProgress; +import org.apache.log4j.Logger; + +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryPoolMXBean; +import java.lang.management.MemoryUsage; +import java.util.List; +import java.util.Map; +import java.util.Vector; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import static com.google.common.base.Preconditions.checkState; + +/** + * Implementation of {@link OutOfCoreOracle} that uses a linear regression model + * to estimate actual memory usage based on the current state of computation. + * The model takes into consideration 5 parameters: + * + * y = c0 + c1*x1 + c2*x2 + c3*x3 + c4*x4 + c5*x5 + * + * y: memory usage + * x1: edges loaded + * x2: vertices loaded + * x3: vertices processed + * x4: bytes received due to messages + * x5: bytes loaded/stored from/to disk due to OOC. + * + */ +public class MemoryEstimatorOracle implements OutOfCoreOracle { + /** Memory check interval in msec */ + public static final LongConfOption CHECK_MEMORY_INTERVAL = +new LongConfOption("giraph.garbageEstimator.checkMemoryInterval", 1000, +"The interval where memory checker thread wakes up and " + +"monitors memory footprint (in milliseconds)"); + /** + * If mem-usage is above this threshold and no Full GC has been called, + * we call it manually + */ + public static final FloatConfOption MANUAL_GC_MEMORY_PRESSURE = +new FloatConfOption("giraph.garbageEstimator.manualGCPressure", 0.95f, +"The threshold above which GC is called manually if Full GC has not " + +"happened in a while"); + /** Used to detect a high memory pressure situation */ + public static final FloatConfOption GC_MINIMUM_RECLAIM_FRACTION = +new FloatConfOption("giraph.garbageEstimator.gcReclaimFraction", 0.05f, +"Minimum percentage of memory we expect to be reclaimed after a Full " + +"GC. If less than this amount is reclaimed, it is sage to say " + +"we are in a high memory situation and the estimation mechanism " + +"has not recognized it yet!"); + /** If mem-usage is above this threshold, active threads are set to 0 */ + public static final FloatConfOption AM_HIGH_THRESHOLD = +new FloatConfOption("giraph.amHighThreshold", 0.95f, +"If mem-usage is above this threshold, all active threads " + +"(compute/input) are paused."); + /** If mem-usage is