Michael Blow has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/2426
Change subject: [NO ISSUE] Set MaxGCPauseMillis to not exceed 1/2 of dead node detection threshold ...................................................................... [NO ISSUE] Set MaxGCPauseMillis to not exceed 1/2 of dead node detection threshold Help prevent nodes under heavy gc from missing too many heartbeats Change-Id: I7e51db5ccfbb4771ba1f6e0264abfd69f833e7e7 --- M hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java 1 file changed, 30 insertions(+), 7 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/26/2426/1 diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java index aa7a4fe..38175f0 100644 --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java @@ -25,8 +25,10 @@ import java.io.StringWriter; import java.net.Socket; +import org.apache.hyracks.api.config.IApplicationConfig; import org.apache.hyracks.api.config.Section; import org.apache.hyracks.control.cc.ClusterControllerService; +import org.apache.hyracks.control.common.config.ConfigManager; import org.apache.hyracks.control.common.controllers.NCConfig; import org.apache.hyracks.control.common.controllers.ServiceConstants.ServiceCommand; import org.apache.hyracks.control.common.work.AbstractWork; @@ -34,6 +36,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.ini4j.Ini; +import org.ini4j.Profile; /** * A work which is run at CC startup for each NC specified in the configuration file. @@ -64,7 +67,7 @@ ObjectOutputStream oos = new ObjectOutputStream(s.getOutputStream()); oos.writeUTF(NC_SERVICE_MAGIC_COOKIE); oos.writeUTF(ServiceCommand.START_NC.name()); - oos.writeUTF(TriggerNCWork.this.serializeIni(ccs.getCCConfig().getIni())); + oos.writeUTF(TriggerNCWork.this.serializeIni()); oos.close(); return; // QQQ Should probably have an ACK here @@ -83,14 +86,30 @@ /** * Given an Ini object, serialize it to String with some enhancements. - * @param ccini the ini file to decorate and forward to NC */ - private String serializeIni(Ini ccini) throws IOException { + private String serializeIni() throws IOException { StringWriter iniString = new StringWriter(); - ccini.get(Section.NC.sectionName()).putIfAbsent(NCConfig.Option.CLUSTER_ADDRESS.ini(), - ccs.getCCConfig().getClusterPublicAddress()); - ccini.get(Section.NC.sectionName()).putIfAbsent(NCConfig.Option.CLUSTER_PORT.ini(), - String.valueOf(ccs.getCCConfig().getClusterPublicPort())); + ConfigManager configManager = ccs.getCCConfig().getConfigManager(); + Ini ccini = configManager.toIni(false); + IApplicationConfig ncConfig = configManager.getNodeEffectiveConfig(ncId); + String sectionName = Section.NC.sectionName() + "/" + ncId; + Profile.Section ncSection = ccini.get(sectionName); + if (ncSection == null) { + ncSection = ccini.add(sectionName); + } + if (ncConfig.getString(NCConfig.Option.CLUSTER_ADDRESS) == null) { + ncSection.put(NCConfig.Option.CLUSTER_ADDRESS.ini(), ccs.getCCConfig().getClusterPublicAddress()); + ncSection.put(NCConfig.Option.CLUSTER_PORT.ini(), String.valueOf(ccs.getCCConfig().getClusterPublicPort())); + } + + // if not already configured, set GC max pause time millis to not exceed 1/2 the total max heartbeat miss period... + String ncJvmArgs = ncConfig.getString(NCConfig.Option.JVM_ARGS); + if (ncJvmArgs == null || !ncJvmArgs.contains("-XX:MaxGCPauseMillis")) { + String gcMaxPauseArg = "-XX:MaxGCPauseMillis=" + getGcMaxPauseMillis(); + ncSection.put(NCConfig.Option.JVM_ARGS.ini(), + ncJvmArgs == null ? gcMaxPauseArg : ncJvmArgs + " " + gcMaxPauseArg); + } + // Finally insert *this* NC's name into localnc section - this is a fixed // entry point so that NCs can determine where all their config is. ccini.put(Section.LOCALNC.sectionName(), NCConfig.Option.NODE_ID.ini(), ncId); @@ -100,4 +119,8 @@ } return iniString.toString(); } + + private long getGcMaxPauseMillis() { + return ccs.getCCConfig().getHeartbeatPeriodMillis() * ccs.getCCConfig().getHeartbeatMaxMisses() / 2; + } } -- To view, visit https://asterix-gerrit.ics.uci.edu/2426 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7e51db5ccfbb4771ba1f6e0264abfd69f833e7e7 Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Michael Blow <mb...@apache.org>