Phillippko commented on code in PR #6636: URL: https://github.com/apache/ignite-3/pull/6636#discussion_r2375541692
########## modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFile.java: ########## @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.raft.storage.segstore; + +import static org.apache.ignite.internal.util.IgniteUtils.atomicMoveFile; +import static org.apache.ignite.internal.util.IgniteUtils.fsyncFile; + +import java.io.IOException; +import java.nio.file.Path; +import org.apache.ignite.internal.logger.IgniteLogger; +import org.apache.ignite.internal.logger.Loggers; + +/** + * Represents an index file create by an {@link IndexFileManager}. + */ +class IndexFile { + private static final IgniteLogger LOG = Loggers.forClass(IndexFile.class); + + private final String name; + + private Path path; + + IndexFile(String name, Path path) { + this.name = name; + this.path = path; + } + + void syncAndMove() throws IOException { + fsyncFile(path); + + path = atomicMoveFile(path, path.getParent().resolve(name), LOG); + } + + /** Returns the name of the index file. */ + String name() { + return name; + } + + /** + * Returns the current path to the index file. + * + * <p>The file is originally created as a temporary file until it gets renamed by calling {@link #syncAndMove}. Review Comment: Should we document that this class or at least this method are not thread-safe? ########## modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileManager.java: ########## @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.raft.storage.segstore; + +import static java.nio.file.StandardOpenOption.CREATE_NEW; +import static java.nio.file.StandardOpenOption.WRITE; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Iterator; +import java.util.Map.Entry; + +/** + * File manager responsible for persisting {@link IndexMemTable}s to index files. + * + * <p>When a checkpoint is triggered on segment file rollover, the current index memtable is scheduled for being saved to a file. The + * format of this file is as follows: + * <pre> + * +--------+---------------------+-----+---------------------+ + * | Header | Payload for group 1 | ... | Payload for group N | + * +--------+---------------------+-----+---------------------+ + * </pre> + * + * <p>Header structure consists of a common meta and a meta for each Raft group present in the memtable. The common meta is as follows: + * <pre> + * +------------------------------------------------------------------+ + * | Common meta | + * +------------------------------------------------------------------+ + * | Magic (4 bytes) | Version (4 bytes) | Number of groups (4 bytes) | + * +------------------------------------------------------------------+ + * </pre> + * + * <p>Raft group meta is as follows: + * <pre> + * +----------------------------------------------------------------------------------------------------------------+-----+ + * | Raft group 1 meta | ... | + * +----------------------------------------------------------------------------------------------------------------+-----+ + * | Group ID (8 bytes) | Flags (4 bytes) | Offset (4 bytes) | First Log Index (8 bytes) | Last Log Index (8 bytes) | ... | + * +----------------------------------------------------------------------------------------------------------------+-----+ + * </pre> + * + * <p>Payload of the index files has the following structure: + * <pre> + * +-------------------------------------------------------------------------+-----+ + * | Payload for group 1 | ... | + * +-------------------------------------------------------------------------+-----+ + * | Segment file offset 1 (4 bytes) | ... | Segment file offset N (4 bytes) | ... | + * +-------------------------------------------------------------------------+-----+ + * </pre> + * + * @see IndexMemTable + * @see SegmentFileManager + */ +class IndexFileManager { + static final int MAGIC_NUMBER = 0xCAFEBABE; + + static final int FORMAT_VERSION = 1; + + private static final String INDEX_FILE_NAME_FORMAT = "index-%010d-%010d.bin"; + + // Magic number + format version + number of Raft groups. + static final int COMMON_META_SIZE = Integer.BYTES + Integer.BYTES + Integer.BYTES; + + // Group ID + flags + file offset + start log index + end log index. + static final int GROUP_META_SIZE = Long.BYTES + Integer.BYTES + Integer.BYTES + Long.BYTES + Long.BYTES; + + static final ByteOrder BYTE_ORDER = ByteOrder.LITTLE_ENDIAN; + + private final Path baseDir; + + /** + * Current index file index (used to generate index file names). + * + * <p>No synchronized access is needed because this field is only updated by the checkpoint thread. + */ + private int curFileIndex = 0; + + IndexFileManager(Path baseDir) { + this.baseDir = baseDir; + } + + /** + * Saves the given index memtable to a file. + * + * <p>The files is saved into a temporary location and it is expected to be later renamed using {@link IndexFile#syncAndMove}. + */ + IndexFile saveIndexMemtable(IndexMemTable indexMemTable) throws IOException { + String fileName = indexFileName(curFileIndex++, 0); + + Path path = baseDir.resolve(fileName + ".tmp"); + + try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path, CREATE_NEW, WRITE))) { + os.write(header(indexMemTable)); + + Iterator<Entry<Long, SegmentInfo>> it = indexMemTable.iterator(); + + while (it.hasNext()) { + os.write(payload(it.next().getValue())); + } + } + + return new IndexFile(fileName, path); + } + + private static byte[] header(IndexMemTable indexMemTable) { + int numGroups = indexMemTable.numGroups(); + + int headerSize = headerSize(numGroups); + + ByteBuffer headerBuffer = ByteBuffer.allocate(headerSize) + .order(BYTE_ORDER) + .putInt(MAGIC_NUMBER) + .putInt(FORMAT_VERSION) + .putInt(numGroups); + + int payloadOffset = headerSize; + + Iterator<Entry<Long, SegmentInfo>> it = indexMemTable.iterator(); + + while (it.hasNext()) { + Entry<Long, SegmentInfo> entry = it.next(); + + long groupId = entry.getKey(); + + SegmentInfo segmentInfo = entry.getValue(); + + headerBuffer + .putLong(groupId) + .putInt(0) // Flags. Review Comment: let's make it a constant, so it is easier to notice that flags are always set to 0 without looking into this method? Should we add a todo with a ticket where we will start to write real flags? ########## modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileManager.java: ########## @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.raft.storage.segstore; + +import static java.nio.file.StandardOpenOption.CREATE_NEW; +import static java.nio.file.StandardOpenOption.WRITE; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Iterator; +import java.util.Map.Entry; + +/** + * File manager responsible for persisting {@link IndexMemTable}s to index files. + * + * <p>When a checkpoint is triggered on segment file rollover, the current index memtable is scheduled for being saved to a file. The + * format of this file is as follows: + * <pre> + * +--------+---------------------+-----+---------------------+ + * | Header | Payload for group 1 | ... | Payload for group N | + * +--------+---------------------+-----+---------------------+ + * </pre> + * + * <p>Header structure consists of a common meta and a meta for each Raft group present in the memtable. The common meta is as follows: + * <pre> + * +------------------------------------------------------------------+ + * | Common meta | + * +------------------------------------------------------------------+ + * | Magic (4 bytes) | Version (4 bytes) | Number of groups (4 bytes) | + * +------------------------------------------------------------------+ + * </pre> + * + * <p>Raft group meta is as follows: + * <pre> + * +----------------------------------------------------------------------------------------------------------------+-----+ + * | Raft group 1 meta | ... | + * +----------------------------------------------------------------------------------------------------------------+-----+ + * | Group ID (8 bytes) | Flags (4 bytes) | Offset (4 bytes) | First Log Index (8 bytes) | Last Log Index (8 bytes) | ... | + * +----------------------------------------------------------------------------------------------------------------+-----+ + * </pre> + * + * <p>Payload of the index files has the following structure: + * <pre> + * +-------------------------------------------------------------------------+-----+ + * | Payload for group 1 | ... | + * +-------------------------------------------------------------------------+-----+ + * | Segment file offset 1 (4 bytes) | ... | Segment file offset N (4 bytes) | ... | + * +-------------------------------------------------------------------------+-----+ + * </pre> + * + * @see IndexMemTable + * @see SegmentFileManager + */ +class IndexFileManager { + static final int MAGIC_NUMBER = 0xCAFEBABE; Review Comment: Should we really use this magic number? Let's use something unique ########## modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFile.java: ########## @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.raft.storage.segstore; + +import static org.apache.ignite.internal.util.IgniteUtils.atomicMoveFile; +import static org.apache.ignite.internal.util.IgniteUtils.fsyncFile; + +import java.io.IOException; +import java.nio.file.Path; +import org.apache.ignite.internal.logger.IgniteLogger; +import org.apache.ignite.internal.logger.Loggers; + +/** + * Represents an index file create by an {@link IndexFileManager}. + */ +class IndexFile { + private static final IgniteLogger LOG = Loggers.forClass(IndexFile.class); + + private final String name; + + private Path path; + + IndexFile(String name, Path path) { + this.name = name; + this.path = path; + } + + void syncAndMove() throws IOException { Review Comment: It is not obvious what "move" means. I assume it is just renamed? Could we name it "syncAndRename"? Or "completeIndexFile", similar to page memory's "completeDeltaFile" ########## modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileManager.java: ########## @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.raft.storage.segstore; + +import static java.nio.file.StandardOpenOption.CREATE_NEW; +import static java.nio.file.StandardOpenOption.WRITE; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Iterator; +import java.util.Map.Entry; + +/** + * File manager responsible for persisting {@link IndexMemTable}s to index files. + * + * <p>When a checkpoint is triggered on segment file rollover, the current index memtable is scheduled for being saved to a file. The + * format of this file is as follows: + * <pre> + * +--------+---------------------+-----+---------------------+ + * | Header | Payload for group 1 | ... | Payload for group N | + * +--------+---------------------+-----+---------------------+ + * </pre> + * + * <p>Header structure consists of a common meta and a meta for each Raft group present in the memtable. The common meta is as follows: + * <pre> + * +------------------------------------------------------------------+ + * | Common meta | + * +------------------------------------------------------------------+ + * | Magic (4 bytes) | Version (4 bytes) | Number of groups (4 bytes) | + * +------------------------------------------------------------------+ + * </pre> + * + * <p>Raft group meta is as follows: + * <pre> + * +----------------------------------------------------------------------------------------------------------------+-----+ + * | Raft group 1 meta | ... | + * +----------------------------------------------------------------------------------------------------------------+-----+ + * | Group ID (8 bytes) | Flags (4 bytes) | Offset (4 bytes) | First Log Index (8 bytes) | Last Log Index (8 bytes) | ... | + * +----------------------------------------------------------------------------------------------------------------+-----+ + * </pre> + * + * <p>Payload of the index files has the following structure: + * <pre> + * +-------------------------------------------------------------------------+-----+ + * | Payload for group 1 | ... | + * +-------------------------------------------------------------------------+-----+ + * | Segment file offset 1 (4 bytes) | ... | Segment file offset N (4 bytes) | ... | + * +-------------------------------------------------------------------------+-----+ + * </pre> + * + * @see IndexMemTable + * @see SegmentFileManager + */ +class IndexFileManager { + static final int MAGIC_NUMBER = 0xCAFEBABE; + + static final int FORMAT_VERSION = 1; + + private static final String INDEX_FILE_NAME_FORMAT = "index-%010d-%010d.bin"; + + // Magic number + format version + number of Raft groups. + static final int COMMON_META_SIZE = Integer.BYTES + Integer.BYTES + Integer.BYTES; + + // Group ID + flags + file offset + start log index + end log index. + static final int GROUP_META_SIZE = Long.BYTES + Integer.BYTES + Integer.BYTES + Long.BYTES + Long.BYTES; + + static final ByteOrder BYTE_ORDER = ByteOrder.LITTLE_ENDIAN; + + private final Path baseDir; + + /** + * Current index file index (used to generate index file names). + * + * <p>No synchronized access is needed because this field is only updated by the checkpoint thread. + */ + private int curFileIndex = 0; + + IndexFileManager(Path baseDir) { + this.baseDir = baseDir; + } + + /** + * Saves the given index memtable to a file. + * + * <p>The files is saved into a temporary location and it is expected to be later renamed using {@link IndexFile#syncAndMove}. Review Comment: ```suggestion * <p>The file is saved into a temporary location and is expected to be later renamed using {@link IndexFile#syncAndMove}. ``` ########## modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexMemTable.java: ########## @@ -64,9 +67,65 @@ int getSegmentFileOffset(long groupId, long logIndex) { return segmentInfo == null ? 0 : segmentInfo.getOffset(logIndex); } + /** + * Returns the number of Raft Group IDs stored in this memtable. + */ + int numGroups() { Review Comment: Documentation doesn't cover that this method and iterator are not safe in relation to writes to memtable (as their size could change), does it? Docs says only about concurrent writes / reads to one groupId ########## modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/IndexFileManager.java: ########## @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.raft.storage.segstore; + +import static java.nio.file.StandardOpenOption.CREATE_NEW; +import static java.nio.file.StandardOpenOption.WRITE; + +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Iterator; +import java.util.Map.Entry; + +/** + * File manager responsible for persisting {@link IndexMemTable}s to index files. + * + * <p>When a checkpoint is triggered on segment file rollover, the current index memtable is scheduled for being saved to a file. The + * format of this file is as follows: + * <pre> + * +--------+---------------------+-----+---------------------+ + * | Header | Payload for group 1 | ... | Payload for group N | + * +--------+---------------------+-----+---------------------+ + * </pre> + * + * <p>Header structure consists of a common meta and a meta for each Raft group present in the memtable. The common meta is as follows: + * <pre> + * +------------------------------------------------------------------+ + * | Common meta | + * +------------------------------------------------------------------+ + * | Magic (4 bytes) | Version (4 bytes) | Number of groups (4 bytes) | + * +------------------------------------------------------------------+ + * </pre> + * + * <p>Raft group meta is as follows: + * <pre> + * +----------------------------------------------------------------------------------------------------------------+-----+ + * | Raft group 1 meta | ... | + * +----------------------------------------------------------------------------------------------------------------+-----+ + * | Group ID (8 bytes) | Flags (4 bytes) | Offset (4 bytes) | First Log Index (8 bytes) | Last Log Index (8 bytes) | ... | + * +----------------------------------------------------------------------------------------------------------------+-----+ + * </pre> + * + * <p>Payload of the index files has the following structure: + * <pre> + * +-------------------------------------------------------------------------+-----+ + * | Payload for group 1 | ... | + * +-------------------------------------------------------------------------+-----+ + * | Segment file offset 1 (4 bytes) | ... | Segment file offset N (4 bytes) | ... | + * +-------------------------------------------------------------------------+-----+ + * </pre> + * + * @see IndexMemTable + * @see SegmentFileManager + */ +class IndexFileManager { + static final int MAGIC_NUMBER = 0xCAFEBABE; + + static final int FORMAT_VERSION = 1; + + private static final String INDEX_FILE_NAME_FORMAT = "index-%010d-%010d.bin"; + + // Magic number + format version + number of Raft groups. + static final int COMMON_META_SIZE = Integer.BYTES + Integer.BYTES + Integer.BYTES; + + // Group ID + flags + file offset + start log index + end log index. + static final int GROUP_META_SIZE = Long.BYTES + Integer.BYTES + Integer.BYTES + Long.BYTES + Long.BYTES; + + static final ByteOrder BYTE_ORDER = ByteOrder.LITTLE_ENDIAN; + + private final Path baseDir; + + /** + * Current index file index (used to generate index file names). + * + * <p>No synchronized access is needed because this field is only updated by the checkpoint thread. Review Comment: ```suggestion * <p>No synchronized access is needed because this field is only used by the checkpoint thread. ``` ########## modules/raft/src/main/java/org/apache/ignite/internal/raft/storage/segstore/SegmentInfo.java: ########## @@ -122,4 +129,36 @@ int getOffset(long logIndex) { return segmentFileOffsets.get((int) offsetIndex); } + + /** + * Returns the inclusive lower bound of log indices stored in this memtable. + */ + long minLogIndex() { + return logIndexBase; + } + + /** + * Returns the non-inclusive upper bound of log indices stored in this memtable. + */ + long maxLogIndex() { Review Comment: we use both maxLogIndex and endLogIndex / minLogIndex and startLogIndex. Should we choose one term? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
