Maxwell-Guo commented on code in PR #2894:
URL: https://github.com/apache/cassandra/pull/2894#discussion_r1403941235


##########
src/java/org/apache/cassandra/db/commitlog/DirectIOSegment.java:
##########
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.db.commitlog;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+
+import com.sun.nio.file.ExtendedOpenOption;
+import net.openhft.chronicle.core.util.ThrowingFunction;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.io.FSWriteError;
+import org.apache.cassandra.io.compress.BufferType;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.io.util.FileUtils;
+import org.apache.cassandra.io.util.SimpleCachedBufferPool;
+import org.apache.cassandra.utils.ByteBufferUtil;
+import sun.nio.ch.DirectBuffer;
+
+/*
+ * Direct-IO segment. Allocates ByteBuffer using ByteBuffer.allocateDirect and 
align
+ * ByteBuffer.position, ByteBuffer.limit and FileChannel.position to page size 
(4K).
+ * Java-11 forces minimum page size to be written to disk with Direct-IO.
+ */
+public class DirectIOSegment extends CommitLogSegment
+{
+    private final int fsBlockSize;
+    private final int fsBlockQuotientMask;
+    private final int fsBlockRemainderMask;
+
+    // Needed to track number of bytes written to disk in multiple of page 
size.
+    long lastWritten = 0;
+
+    /**
+     * Constructs a new segment file.
+     */
+    DirectIOSegment(AbstractCommitLogSegmentManager manager, 
ThrowingFunction<Path, FileChannel, IOException> channelFactory, int 
fsBlockSize)
+    {
+        super(manager, channelFactory);
+
+        assert Integer.highestOneBit(fsBlockSize) == fsBlockSize : 
"fsBlockSize must be a power of 2";
+
+        // mark the initial sync marker as uninitialised
+        int firstSync = buffer.position();
+        buffer.putInt(firstSync + 0, 0);
+        buffer.putInt(firstSync + 4, 0);
+
+        this.fsBlockSize = fsBlockSize;
+        this.fsBlockRemainderMask = fsBlockSize - 1;
+        this.fsBlockQuotientMask = ~fsBlockRemainderMask;
+    }
+
+    @Override
+    void writeLogHeader()
+    {
+        super.writeLogHeader();
+        // Testing shows writing initial bytes takes some time for Direct I/O. 
During peak load,
+        // it is better to make "COMMIT-LOG-ALLOCATOR" thread to write these 
few bytes of each
+        // file and this helps syncer thread to speedup the flush activity.
+        flush(0, lastSyncedOffset);
+    }
+
+    @Override
+    void write(int startMarker, int nextMarker)
+    {
+        // if there's room in the discard section to write an empty header,
+        // zero out the next sync marker so replayer can cleanly exit
+        if (nextMarker <= buffer.capacity() - SYNC_MARKER_SIZE)
+        {
+            buffer.putInt(nextMarker, 0);
+            buffer.putInt(nextMarker + 4, 0);
+        }
+
+        // write previous sync marker to point to next sync marker
+        // we don't chain the crcs here to ensure this method is idempotent if 
it fails
+        writeSyncMarker(id, buffer, startMarker, startMarker, nextMarker);
+    }
+
+    @Override
+    protected void flush(int startMarker, int nextMarker)
+    {
+        try
+        {
+            // lastSyncedOffset is synced to disk. Align lastSyncedOffset to 
start of its block
+            // and nextMarker to end of its block to avoid write errors.
+            int flushPosition = lastSyncedOffset;
+            ByteBuffer duplicate = buffer.duplicate();
+
+            // Aligned file position if not aligned to start of a block.
+            if ((flushPosition & fsBlockRemainderMask) != 0)
+            {
+                flushPosition = flushPosition & fsBlockQuotientMask;
+                channel.position(flushPosition);
+            }
+            duplicate.position(flushPosition);
+
+            int flushLimit = nextMarker;
+
+            // Align last byte to end of block.
+            if ((flushLimit & fsBlockRemainderMask) != 0)
+                flushLimit = (flushLimit + fsBlockSize) & fsBlockQuotientMask;
+
+            duplicate.limit(flushLimit);
+
+            channel.write(duplicate);
+
+            // Direct I/O always writes flushes in block size and writes more 
than the flush size.
+            // File size on disk will always multiple of block size and taking 
this into account
+            // helps testcases to pass. Avoid counting same block more than 
once.
+            if (flushLimit > lastWritten)
+            {
+                manager.addSize(flushLimit - lastWritten);
+                lastWritten = flushLimit;
+            }
+        }
+        catch (IOException e)
+        {
+            throw new FSWriteError(e, getPath());
+        }
+    }
+
+    @Override
+    public long onDiskSize()
+    {
+        return lastWritten;
+    }
+
+    @Override
+    protected void internalClose()
+    {
+        try
+        {
+            manager.getBufferPool().releaseBuffer(buffer);
+            super.internalClose();
+        }
+        finally
+        {
+            manager.notifyBufferFreed();
+        }
+    }
+
+    protected static class DirectIOSegmentBuilder extends 
CommitLogSegment.Builder<DirectIOSegment>
+    {
+        public final int fsBlockSize;
+
+        public DirectIOSegmentBuilder(AbstractCommitLogSegmentManager 
segmentManager)
+        {
+            super(segmentManager);
+            this.fsBlockSize = FileUtils.getBlockSize(new 
File(segmentManager.storageDirectory));
+        }
+
+        @Override
+        public DirectIOSegment build()
+        {
+            return new DirectIOSegment(segmentManager,
+                                       path -> FileChannel.open(path, 
StandardOpenOption.WRITE, StandardOpenOption.READ, StandardOpenOption.CREATE, 
ExtendedOpenOption.DIRECT),
+                                       fsBlockSize);
+        }
+
+        @Override
+        public SimpleCachedBufferPool createBufferPool()
+        {
+            // The direct buffer must be aligned with the file system block 
size. We cannot enforce that during
+            // allocation, but we can get an aligned slice from the allocated 
buffer. The buffer must be oversized by the
+            // alignment unit to make it possible.
+            return new 
SimpleCachedBufferPool(DatabaseDescriptor.getCommitLogMaxCompressionBuffersInPool(),
+                                              
DatabaseDescriptor.getCommitLogSegmentSize() + fsBlockSize,
+                                              BufferType.OFF_HEAP) {
+                @Override
+                public ByteBuffer createBuffer()
+                {
+                    int segmentSize = 
DatabaseDescriptor.getCommitLogSegmentSize();
+
+                    ByteBuffer original =  super.createBuffer();

Review Comment:
   an extra space before "super.createBuffer();"



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to