[jira] [Commented] (GIRAPH-1125) Add memory estimation mechanism to out-of-core

2016-12-19 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/GIRAPH-1125?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15762795#comment-15762795
 ] 

ASF GitHub Bot commented on GIRAPH-1125:


Github user heslami commented on a diff in the pull request:

https://github.com/apache/giraph/pull/12#discussion_r93152281
  
--- Diff: 
giraph-core/src/main/java/org/apache/giraph/ooc/policy/MemoryEstimatorOracle.java
 ---
@@ -0,0 +1,851 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.ooc.policy;
+
+import com.sun.management.GarbageCollectionNotificationInfo;
+import org.apache.commons.math.stat.regression.OLSMultipleLinearRegression;
+import org.apache.giraph.comm.NetworkMetrics;
+import org.apache.giraph.conf.FloatConfOption;
+import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
+import org.apache.giraph.conf.LongConfOption;
+import org.apache.giraph.edge.AbstractEdgeStore;
+import org.apache.giraph.ooc.OutOfCoreEngine;
+import org.apache.giraph.ooc.command.IOCommand;
+import org.apache.giraph.ooc.command.LoadPartitionIOCommand;
+import org.apache.giraph.ooc.command.WaitIOCommand;
+import org.apache.giraph.worker.EdgeInputSplitsCallable;
+import org.apache.giraph.worker.VertexInputSplitsCallable;
+import org.apache.giraph.worker.WorkerProgress;
+import org.apache.log4j.Logger;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryPoolMXBean;
+import java.lang.management.MemoryUsage;
+import java.util.List;
+import java.util.Map;
+import java.util.Vector;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import static com.google.common.base.Preconditions.checkState;
+
+/**
+ * Implementation of {@link OutOfCoreOracle} that uses a linear regression 
model
+ * to estimate actual memory usage based on the current state of 
computation.
+ * The model takes into consideration 5 parameters:
+ *
+ * y = c0 + c1*x1 + c2*x2 + c3*x3 + c4*x4 + c5*x5
+ *
+ * y: memory usage
+ * x1: edges loaded
+ * x2: vertices loaded
+ * x3: vertices processed
+ * x4: bytes received due to messages
+ * x5: bytes loaded/stored from/to disk due to OOC.
+ *
+ */
+public class MemoryEstimatorOracle implements OutOfCoreOracle {
+  /** Memory check interval in msec */
+  public static final LongConfOption CHECK_MEMORY_INTERVAL =
+new LongConfOption("giraph.garbageEstimator.checkMemoryInterval", 1000,
+"The interval where memory checker thread wakes up and " +
+"monitors memory footprint (in milliseconds)");
+  /**
+   * If mem-usage is above this threshold and no Full GC has been called,
+   * we call it manually
+   */
+  public static final FloatConfOption MANUAL_GC_MEMORY_PRESSURE =
+new FloatConfOption("giraph.garbageEstimator.manualGCPressure", 0.95f,
+"The threshold above which GC is called manually if Full GC has 
not " +
+"happened in a while");
+  /** Used to detect a high memory pressure situation */
+  public static final FloatConfOption GC_MINIMUM_RECLAIM_FRACTION =
+new FloatConfOption("giraph.garbageEstimator.gcReclaimFraction", 0.05f,
+"Minimum percentage of memory we expect to be reclaimed after a 
Full " +
+"GC. If less than this amount is reclaimed, it is sage to say 
" +
+"we are in a high memory situation and the estimation 
mechanism " +
+"has not recognized it yet!");
+  /** If mem-usage is above this threshold, active threads are set to 0 */
+  public static final FloatConfOption AM_HIGH_THRESHOLD =
+new FloatConfOption("giraph.amHighThreshold", 0.95f,
+"If mem-usage is above this threshold, all active threads " +
+"(compute/input) are paused.");
+  /** If mem-usage is 

[jira] [Commented] (GIRAPH-1125) Add memory estimation mechanism to out-of-core

2016-12-19 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/GIRAPH-1125?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15762637#comment-15762637
 ] 

ASF GitHub Bot commented on GIRAPH-1125:


Github user heslami commented on a diff in the pull request:

https://github.com/apache/giraph/pull/12#discussion_r93143346
  
--- Diff: 
giraph-core/src/main/java/org/apache/giraph/ooc/policy/MemoryEstimatorOracle.java
 ---
@@ -0,0 +1,851 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.ooc.policy;
+
+import com.sun.management.GarbageCollectionNotificationInfo;
+import org.apache.commons.math.stat.regression.OLSMultipleLinearRegression;
+import org.apache.giraph.comm.NetworkMetrics;
+import org.apache.giraph.conf.FloatConfOption;
+import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
+import org.apache.giraph.conf.LongConfOption;
+import org.apache.giraph.edge.AbstractEdgeStore;
+import org.apache.giraph.ooc.OutOfCoreEngine;
+import org.apache.giraph.ooc.command.IOCommand;
+import org.apache.giraph.ooc.command.LoadPartitionIOCommand;
+import org.apache.giraph.ooc.command.WaitIOCommand;
+import org.apache.giraph.worker.EdgeInputSplitsCallable;
+import org.apache.giraph.worker.VertexInputSplitsCallable;
+import org.apache.giraph.worker.WorkerProgress;
+import org.apache.log4j.Logger;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryPoolMXBean;
+import java.lang.management.MemoryUsage;
+import java.util.List;
+import java.util.Map;
+import java.util.Vector;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import static com.google.common.base.Preconditions.checkState;
+
+/**
+ * Implementation of {@link OutOfCoreOracle} that uses a linear regression 
model
+ * to estimate actual memory usage based on the current state of 
computation.
+ * The model takes into consideration 5 parameters:
+ *
+ * y = c0 + c1*x1 + c2*x2 + c3*x3 + c4*x4 + c5*x5
+ *
+ * y: memory usage
+ * x1: edges loaded
+ * x2: vertices loaded
+ * x3: vertices processed
+ * x4: bytes received due to messages
+ * x5: bytes loaded/stored from/to disk due to OOC.
+ *
+ */
+public class MemoryEstimatorOracle implements OutOfCoreOracle {
+  /** Memory check interval in msec */
+  public static final LongConfOption CHECK_MEMORY_INTERVAL =
+new LongConfOption("giraph.garbageEstimator.checkMemoryInterval", 1000,
+"The interval where memory checker thread wakes up and " +
+"monitors memory footprint (in milliseconds)");
+  /**
+   * If mem-usage is above this threshold and no Full GC has been called,
+   * we call it manually
+   */
+  public static final FloatConfOption MANUAL_GC_MEMORY_PRESSURE =
+new FloatConfOption("giraph.garbageEstimator.manualGCPressure", 0.95f,
+"The threshold above which GC is called manually if Full GC has 
not " +
+"happened in a while");
+  /** Used to detect a high memory pressure situation */
+  public static final FloatConfOption GC_MINIMUM_RECLAIM_FRACTION =
+new FloatConfOption("giraph.garbageEstimator.gcReclaimFraction", 0.05f,
+"Minimum percentage of memory we expect to be reclaimed after a 
Full " +
+"GC. If less than this amount is reclaimed, it is sage to say 
" +
+"we are in a high memory situation and the estimation 
mechanism " +
+"has not recognized it yet!");
+  /** If mem-usage is above this threshold, active threads are set to 0 */
+  public static final FloatConfOption AM_HIGH_THRESHOLD =
+new FloatConfOption("giraph.amHighThreshold", 0.95f,
+"If mem-usage is above this threshold, all active threads " +
+"(compute/input) are paused.");
+  /** If mem-usage is 

[jira] [Commented] (GIRAPH-1125) Add memory estimation mechanism to out-of-core

2016-12-19 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/GIRAPH-1125?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15762464#comment-15762464
 ] 

ASF GitHub Bot commented on GIRAPH-1125:


Github user heslami commented on a diff in the pull request:

https://github.com/apache/giraph/pull/12#discussion_r93131260
  
--- Diff: 
giraph-core/src/main/java/org/apache/giraph/ooc/policy/MemoryEstimatorOracle.java
 ---
@@ -0,0 +1,851 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.ooc.policy;
+
+import com.sun.management.GarbageCollectionNotificationInfo;
+import org.apache.commons.math.stat.regression.OLSMultipleLinearRegression;
+import org.apache.giraph.comm.NetworkMetrics;
+import org.apache.giraph.conf.FloatConfOption;
+import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
+import org.apache.giraph.conf.LongConfOption;
+import org.apache.giraph.edge.AbstractEdgeStore;
+import org.apache.giraph.ooc.OutOfCoreEngine;
+import org.apache.giraph.ooc.command.IOCommand;
+import org.apache.giraph.ooc.command.LoadPartitionIOCommand;
+import org.apache.giraph.ooc.command.WaitIOCommand;
+import org.apache.giraph.worker.EdgeInputSplitsCallable;
+import org.apache.giraph.worker.VertexInputSplitsCallable;
+import org.apache.giraph.worker.WorkerProgress;
+import org.apache.log4j.Logger;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryPoolMXBean;
+import java.lang.management.MemoryUsage;
+import java.util.List;
+import java.util.Map;
+import java.util.Vector;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import static com.google.common.base.Preconditions.checkState;
+
+/**
+ * Implementation of {@link OutOfCoreOracle} that uses a linear regression 
model
+ * to estimate actual memory usage based on the current state of 
computation.
+ * The model takes into consideration 5 parameters:
+ *
+ * y = c0 + c1*x1 + c2*x2 + c3*x3 + c4*x4 + c5*x5
+ *
+ * y: memory usage
+ * x1: edges loaded
+ * x2: vertices loaded
+ * x3: vertices processed
+ * x4: bytes received due to messages
+ * x5: bytes loaded/stored from/to disk due to OOC.
+ *
+ */
+public class MemoryEstimatorOracle implements OutOfCoreOracle {
+  /** Memory check interval in msec */
+  public static final LongConfOption CHECK_MEMORY_INTERVAL =
+new LongConfOption("giraph.garbageEstimator.checkMemoryInterval", 1000,
+"The interval where memory checker thread wakes up and " +
+"monitors memory footprint (in milliseconds)");
+  /**
+   * If mem-usage is above this threshold and no Full GC has been called,
+   * we call it manually
+   */
+  public static final FloatConfOption MANUAL_GC_MEMORY_PRESSURE =
+new FloatConfOption("giraph.garbageEstimator.manualGCPressure", 0.95f,
+"The threshold above which GC is called manually if Full GC has 
not " +
+"happened in a while");
+  /** Used to detect a high memory pressure situation */
+  public static final FloatConfOption GC_MINIMUM_RECLAIM_FRACTION =
+new FloatConfOption("giraph.garbageEstimator.gcReclaimFraction", 0.05f,
+"Minimum percentage of memory we expect to be reclaimed after a 
Full " +
+"GC. If less than this amount is reclaimed, it is sage to say 
" +
+"we are in a high memory situation and the estimation 
mechanism " +
+"has not recognized it yet!");
+  /** If mem-usage is above this threshold, active threads are set to 0 */
+  public static final FloatConfOption AM_HIGH_THRESHOLD =
+new FloatConfOption("giraph.amHighThreshold", 0.95f,
+"If mem-usage is above this threshold, all active threads " +
+"(compute/input) are paused.");
+  /** If mem-usage is 

[jira] [Commented] (GIRAPH-1125) Add memory estimation mechanism to out-of-core

2016-12-19 Thread ASF GitHub Bot (JIRA)

[ 
https://issues.apache.org/jira/browse/GIRAPH-1125?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15762437#comment-15762437
 ] 

ASF GitHub Bot commented on GIRAPH-1125:


Github user heslami commented on a diff in the pull request:

https://github.com/apache/giraph/pull/12#discussion_r93129286
  
--- Diff: 
giraph-core/src/main/java/org/apache/giraph/ooc/policy/MemoryEstimatorOracle.java
 ---
@@ -0,0 +1,851 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.ooc.policy;
+
+import com.sun.management.GarbageCollectionNotificationInfo;
+import org.apache.commons.math.stat.regression.OLSMultipleLinearRegression;
+import org.apache.giraph.comm.NetworkMetrics;
+import org.apache.giraph.conf.FloatConfOption;
+import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
+import org.apache.giraph.conf.LongConfOption;
+import org.apache.giraph.edge.AbstractEdgeStore;
+import org.apache.giraph.ooc.OutOfCoreEngine;
+import org.apache.giraph.ooc.command.IOCommand;
+import org.apache.giraph.ooc.command.LoadPartitionIOCommand;
+import org.apache.giraph.ooc.command.WaitIOCommand;
+import org.apache.giraph.worker.EdgeInputSplitsCallable;
+import org.apache.giraph.worker.VertexInputSplitsCallable;
+import org.apache.giraph.worker.WorkerProgress;
+import org.apache.log4j.Logger;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryPoolMXBean;
+import java.lang.management.MemoryUsage;
+import java.util.List;
+import java.util.Map;
+import java.util.Vector;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import static com.google.common.base.Preconditions.checkState;
+
+/**
+ * Implementation of {@link OutOfCoreOracle} that uses a linear regression 
model
+ * to estimate actual memory usage based on the current state of 
computation.
+ * The model takes into consideration 5 parameters:
+ *
+ * y = c0 + c1*x1 + c2*x2 + c3*x3 + c4*x4 + c5*x5
+ *
+ * y: memory usage
+ * x1: edges loaded
+ * x2: vertices loaded
+ * x3: vertices processed
+ * x4: bytes received due to messages
+ * x5: bytes loaded/stored from/to disk due to OOC.
+ *
+ */
+public class MemoryEstimatorOracle implements OutOfCoreOracle {
+  /** Memory check interval in msec */
+  public static final LongConfOption CHECK_MEMORY_INTERVAL =
+new LongConfOption("giraph.garbageEstimator.checkMemoryInterval", 1000,
+"The interval where memory checker thread wakes up and " +
+"monitors memory footprint (in milliseconds)");
+  /**
+   * If mem-usage is above this threshold and no Full GC has been called,
+   * we call it manually
+   */
+  public static final FloatConfOption MANUAL_GC_MEMORY_PRESSURE =
+new FloatConfOption("giraph.garbageEstimator.manualGCPressure", 0.95f,
+"The threshold above which GC is called manually if Full GC has 
not " +
+"happened in a while");
+  /** Used to detect a high memory pressure situation */
+  public static final FloatConfOption GC_MINIMUM_RECLAIM_FRACTION =
+new FloatConfOption("giraph.garbageEstimator.gcReclaimFraction", 0.05f,
+"Minimum percentage of memory we expect to be reclaimed after a 
Full " +
+"GC. If less than this amount is reclaimed, it is sage to say 
" +
+"we are in a high memory situation and the estimation 
mechanism " +
+"has not recognized it yet!");
+  /** If mem-usage is above this threshold, active threads are set to 0 */
+  public static final FloatConfOption AM_HIGH_THRESHOLD =
+new FloatConfOption("giraph.amHighThreshold", 0.95f,
+"If mem-usage is above this threshold, all active threads " +
+"(compute/input) are paused.");
+  /** If mem-usage is