This is an automated email from the ASF dual-hosted git repository.

elserj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ratis.git

commit c01c5e9cce712fc5f6500bb5c27f80193eec7fbe
Author: Josh Elser <[email protected]>
AuthorDate: Tue Apr 23 16:23:40 2019 -0400

    RATIS-533. Set better default Raft properties from Ratis
    
    Include brief summary from the Jira issue as to what/why we
    changed, things we might want to tune in the future, and
    validate that config isn't set which would break the
    log service.
    
    Closes #18
---
 ratis-logservice/TUNING.md                         | 58 ++++++++++++++++++++++
 .../apache/ratis/logservice/server/BaseServer.java | 32 ++++++++++++
 .../apache/ratis/logservice/server/LogServer.java  | 35 +++++++++++--
 .../ratis/logservice/server/MetadataServer.java    | 12 +++--
 .../ratis/logservice/server/TestBaseServer.java    | 49 ++++++++++++++++++
 5 files changed, 177 insertions(+), 9 deletions(-)

diff --git a/ratis-logservice/TUNING.md b/ratis-logservice/TUNING.md
new file mode 100644
index 0000000..9940237
--- /dev/null
+++ b/ratis-logservice/TUNING.md
@@ -0,0 +1,58 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+# Tuning for the Log Service
+
+This is a list of Ratis configuration properties which have been
+found to be relevant/important to control how Ratis operates for
+the purposes of the LogService.
+
+## RAFT Log
+
+The default RAFT log implementation uses "segments" on disk to avoid
+a single file growing to be very large. By default, each segment is
+`8MB` in size and can be set by the API 
`RaftServerConfigKeys.Log.setSegmentSizeMax()`.
+When a new segment is created, Ratis will "preallocate" that segment by writing
+data into the file to reduce the risk of latency when we first try to append
+entries to the RAFT log. By default, the segment is preallocated with `4MB`
+and can be changed via `RaftServerConfigKeys.Log.setPreallocatedSize()`.
+
+Up to 2 log segments are cached in memory (including the segment actively being
+written to). This is controlled by 
`RaftServerConfigKeys.Log.setMaxCachedSegmentNum()`.
+Increasing this configuration would use more memory but should reduce the 
latency
+of reading entries from the RAFT log.
+
+Writes to the RAFT log are buffered using a Java Direct ByteBuffer (offheap). 
By default,
+this buffer is `64KB` in size and can be changed via 
`RaftServerConfigKeys.Log.setWriteBufferSize`.
+Beware that when one LogServer is hosting multiple RAFT groups (multiple 
"LogService Logs"), each
+will LogServer will have its own buffer. Thus, high concurrency will result in 
multiple buffers.
+
+## RAFT Server
+
+Every RAFT server maintains a queue of I/O actions that it needs to execute. 
As with
+much of Ratis, these actions are executed asynchronously and the client can 
block on
+completion of these tasks as necessary. To prevent saturating memory, this 
queue of
+items can be limited in size by both number of entries and size of the 
elements in the queue.
+The former defaults to 4096 elements and id controlled by 
`RaftServerConfigKeys.Log.setElementLimit()`,
+while the latter defaults to `64MB` and is controlled by 
`RaftServerConfigKeys.Log.setByteLimit()`.
+
+## Do Not Set
+
+Running a snapshot indicates that we can truncate part of the RAFT log, as the 
expectation is that
+a snapshot is an equivalent representation of all of the updates from the log. 
However, the LogService
+is written to expect that we maintain these records. As such, we must not 
allow snapshots to automatically
+happen as we may lose records from the RAFT log. 
`RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled()`
+defaults to `false` and should not be set to `true`.
diff --git 
a/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/BaseServer.java
 
b/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/BaseServer.java
index 717d5b6..dcd7573 100644
--- 
a/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/BaseServer.java
+++ 
b/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/BaseServer.java
@@ -20,9 +20,15 @@ package org.apache.ratis.logservice.server;
 import java.io.Closeable;
 import java.net.InetSocketAddress;
 import java.util.Objects;
+import java.util.concurrent.TimeUnit;
 
+import org.apache.ratis.conf.RaftProperties;
+import org.apache.ratis.grpc.GrpcConfigKeys;
 import org.apache.ratis.logservice.util.LogServiceUtils;
+import org.apache.ratis.netty.NettyConfigKeys;
+import org.apache.ratis.server.RaftServerConfigKeys;
 import org.apache.ratis.util.NetUtils;
+import org.apache.ratis.util.TimeDuration;
 
 /**
  * A base class to encapsulate functionality around a long-lived Java process 
which runs a state machine.
@@ -39,6 +45,32 @@ public abstract class BaseServer implements Closeable {
     return opts;
   }
 
+  /**
+   * Sets common Ratis server properties for both the log and metadata state 
machines.
+   */
+  void setRaftProperties(RaftProperties properties) {
+    // Set the ports for the server
+    GrpcConfigKeys.Server.setPort(properties, opts.getPort());
+    NettyConfigKeys.Server.setPort(properties, opts.getPort());
+
+    // Ozone sets the leader election timeout (min) to 1second.
+    TimeDuration leaderElectionTimeoutMin = TimeDuration.valueOf(1, 
TimeUnit.SECONDS);
+    RaftServerConfigKeys.Rpc.setTimeoutMin(properties, 
leaderElectionTimeoutMin);
+    TimeDuration leaderElectionMaxTimeout = TimeDuration.valueOf(
+        leaderElectionTimeoutMin.toLong(TimeUnit.MILLISECONDS) + 200,
+        TimeUnit.MILLISECONDS);
+    RaftServerConfigKeys.Rpc.setTimeoutMax(properties, 
leaderElectionMaxTimeout);
+  }
+
+  /**
+   * Validates that there are no properties set which are in conflict with the 
LogService.
+   */
+  void validateRaftProperties(RaftProperties properties) {
+    if (RaftServerConfigKeys.Snapshot.autoTriggerEnabled(properties)) {
+      throw new IllegalStateException("Auto triggering snapshots is disallowed 
by the LogService");
+    }
+  }
+
   static ServerOpts buildOpts(String hostname, String metaQuorum, int port, 
String workingDir) {
     ServerOpts opts = new ServerOpts();
     opts.setHost(hostname);
diff --git 
a/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/LogServer.java
 
b/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/LogServer.java
index d25b0b3..bed6b2c 100644
--- 
a/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/LogServer.java
+++ 
b/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/LogServer.java
@@ -23,13 +23,13 @@ import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.Collections;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.ratis.client.RaftClient;
+import org.apache.ratis.client.RaftClientConfigKeys;
 import org.apache.ratis.conf.RaftProperties;
-import org.apache.ratis.grpc.GrpcConfigKeys;
 import org.apache.ratis.logservice.util.LogServiceUtils;
 import org.apache.ratis.logservice.util.MetaServiceProtoUtil;
-import org.apache.ratis.netty.NettyConfigKeys;
 import org.apache.ratis.protocol.ClientId;
 import org.apache.ratis.protocol.RaftGroup;
 import org.apache.ratis.protocol.RaftGroupId;
@@ -38,6 +38,8 @@ import org.apache.ratis.protocol.RaftPeerId;
 import org.apache.ratis.server.RaftServer;
 import org.apache.ratis.server.RaftServerConfigKeys;
 import org.apache.ratis.statemachine.StateMachine;
+import org.apache.ratis.util.SizeInBytes;
+import org.apache.ratis.util.TimeDuration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -51,6 +53,7 @@ public class LogServer extends BaseServer {
 
     public LogServer(ServerOpts opts) {
       super(opts);
+      LOG.debug("Log Server options: {}", opts);
     }
 
     public RaftServer getServer() {
@@ -61,13 +64,31 @@ public class LogServer extends BaseServer {
         return new Builder();
     }
 
+    @Override
+    void setRaftProperties(RaftProperties properties) {
+      super.setRaftProperties(properties);
+
+      // Increase the client timeout
+      RaftClientConfigKeys.Rpc.setRequestTimeout(properties, 
TimeDuration.valueOf(100, TimeUnit.SECONDS));
+
+      // Increase the segment size to avoid rolling so quickly
+      SizeInBytes segmentSize = SizeInBytes.valueOf("32MB");
+      RaftServerConfigKeys.Log.setSegmentSizeMax(properties, segmentSize);
+      RaftServerConfigKeys.Log.setPreallocatedSize(properties, segmentSize);
+
+      // TODO this seems to cause errors, not sure if pushing Ratis too hard?
+      // SizeInBytes writeBufferSize = SizeInBytes.valueOf("128KB");
+      // RaftServerConfigKeys.Log.setWriteBufferSize(properties, 
writeBufferSize);
+    }
+
     public void start() throws IOException {
         final ServerOpts opts = getServerOpts();
         Set<RaftPeer> peers = 
LogServiceUtils.getPeersFromQuorum(opts.getMetaQuorum());
         RaftProperties properties = new RaftProperties();
-        properties.set("raft.client.rpc.request.timeout", "100000");
-        GrpcConfigKeys.Server.setPort(properties, opts.getPort());
-        NettyConfigKeys.Server.setPort(properties, opts.getPort());
+
+        // Set properties for the log server state machine
+        setRaftProperties(properties);
+
         InetSocketAddress addr = new InetSocketAddress(opts.getHost(), 
opts.getPort());
         if(opts.getWorkingDir() != null) {
             RaftServerConfigKeys.setStorageDirs(properties, 
Collections.singletonList(new File(opts.getWorkingDir())));
@@ -77,6 +98,10 @@ public class LogServer extends BaseServer {
         final RaftGroupId logServerGroupId = 
RaftGroupId.valueOf(opts.getLogServerGroupId());
         RaftGroup all = RaftGroup.valueOf(logServerGroupId, peer);
         RaftGroup meta = 
RaftGroup.valueOf(RaftGroupId.valueOf(opts.getMetaGroupId()), peers);
+
+        // Make sure that we aren't setting any invalid/harmful properties
+        validateRaftProperties(properties);
+
         raftServer = RaftServer.newBuilder()
                 .setStateMachineRegistry(new StateMachine.Registry() {
                     @Override
diff --git 
a/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/MetadataServer.java
 
b/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/MetadataServer.java
index 1d658fe..c7c56b3 100644
--- 
a/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/MetadataServer.java
+++ 
b/ratis-logservice/src/main/java/org/apache/ratis/logservice/server/MetadataServer.java
@@ -20,9 +20,7 @@ package org.apache.ratis.logservice.server;
 
 import com.beust.jcommander.JCommander;
 import org.apache.ratis.conf.RaftProperties;
-import org.apache.ratis.grpc.GrpcConfigKeys;
 import org.apache.ratis.logservice.util.LogServiceUtils;
-import org.apache.ratis.netty.NettyConfigKeys;
 import org.apache.ratis.protocol.*;
 import org.apache.ratis.server.RaftServer;
 import org.apache.ratis.server.RaftServerConfigKeys;
@@ -72,12 +70,18 @@ public class MetadataServer extends BaseServer {
         if(opts.getWorkingDir() != null) {
             RaftServerConfigKeys.setStorageDirs(properties, 
Collections.singletonList(new File(opts.getWorkingDir())));
         }
-        GrpcConfigKeys.Server.setPort(properties, opts.getPort());
-        NettyConfigKeys.Server.setPort(properties, opts.getPort());
+
+        // Set properties common to all log service state machines
+        setRaftProperties(properties);
+
         Set<RaftPeer> peers = getPeersFromQuorum(opts.getMetaQuorum());
         RaftGroupId raftMetaGroupId = 
RaftGroupId.valueOf(opts.getMetaGroupId());
         RaftGroup metaGroup = RaftGroup.valueOf(raftMetaGroupId, peers);
         metaStateMachine = new MetaStateMachine(raftMetaGroupId, 
RaftGroupId.valueOf(opts.getLogServerGroupId()));
+
+        // Make sure that we aren't setting any invalid/harmful properties
+        validateRaftProperties(properties);
+
         server = RaftServer.newBuilder()
                 .setGroup(metaGroup)
                 .setServerId(RaftPeerId.valueOf(id))
diff --git 
a/ratis-logservice/src/test/java/org/apache/ratis/logservice/server/TestBaseServer.java
 
b/ratis-logservice/src/test/java/org/apache/ratis/logservice/server/TestBaseServer.java
new file mode 100644
index 0000000..9ed9e65
--- /dev/null
+++ 
b/ratis-logservice/src/test/java/org/apache/ratis/logservice/server/TestBaseServer.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ratis.logservice.server;
+
+import org.apache.ratis.conf.RaftProperties;
+import org.apache.ratis.server.RaftServerConfigKeys;
+import org.junit.Test;
+
+public class TestBaseServer {
+  private static class MockServer extends BaseServer {
+    public MockServer() {
+      super(new ServerOpts());
+    }
+
+    @Override
+    public void close() {}
+  }
+
+  @Test
+  public void testDefaultPropertiesAreValid() {
+    RaftProperties props = new RaftProperties();
+    try (MockServer server = new MockServer()) {
+      server.validateRaftProperties(props);
+    }
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testAutoSnapshotIsInvalid() {
+    RaftProperties props = new RaftProperties();
+    RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(props, true);
+    try (MockServer server = new MockServer()) {
+      server.validateRaftProperties(props);
+    }
+  }
+}

Reply via email to