vinothchandar commented on code in PR #9209: URL: https://github.com/apache/hudi/pull/9209#discussion_r1280082204
########## hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java: ########## @@ -18,75 +18,125 @@ package org.apache.hudi.common.table.timeline; -import org.apache.hudi.avro.HoodieAvroUtils; -import org.apache.hudi.avro.model.HoodieArchivedMetaEntry; -import org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan; -import org.apache.hudi.common.fs.HoodieWrapperFileSystem; -import org.apache.hudi.common.model.HoodieLogFile; -import org.apache.hudi.common.model.HoodiePartitionMetadata; -import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.avro.model.HoodieArchivedInstant; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; import org.apache.hudi.common.table.HoodieTableMetaClient; -import org.apache.hudi.common.table.log.HoodieLogFormat; -import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock; -import org.apache.hudi.common.table.log.block.HoodieLogBlock; -import org.apache.hudi.common.util.collection.ClosableIterator; +import org.apache.hudi.common.util.ArchivedInstantReadSchemas; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.FileIOUtils; import org.apache.hudi.common.util.Option; -import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.common.util.collection.ClosableIterator; +import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.io.storage.HoodieAvroParquetReader; +import org.apache.hudi.io.storage.HoodieFileReaderFactory; +import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.IndexedRecord; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.io.IOException; import java.io.Serializable; -import java.nio.charset.StandardCharsets; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Spliterator; -import java.util.Spliterators; +import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.StreamSupport; +import java.util.stream.Collectors; /** - * Represents the Archived Timeline for the Hoodie table. Instants for the last 12 hours (configurable) is in the - * ActiveTimeline and the rest are in ArchivedTimeline. - * <p> - * </p> - * Instants are read from the archive file during initialization and never refreshed. To refresh, clients need to call - * reload() - * <p> - * </p> - * This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized. + * Represents the Archived Timeline for the Hoodie table. + * + * <p>After several instants are accumulated as a batch on the active timeline, they would be archived as a parquet file into the archived timeline. + * In general the archived timeline is comprised with parquet files with LSM style file layout. Each new operation to the archived timeline generates + * a new snapshot version. Theoretically, there could be multiple snapshot versions on the archived timeline. + * + * <p><h2>The Archived Timeline Layout</h2> + * + * <pre> + * t111, t112 ... t120 ... -> + * \ / + * \ / + * | + * V + * t111_t120_0.parquet, t101_t110_0.parquet,... t11_t20_0.parquet L0 + * \ / + * \ / + * | + * V + * t11_t100_1.parquet L1 + * + * manifest_1, manifest_2, ... manifest_12 + * | | | + * V V V + * _version_1, _version_2, ... _version_12 + * </pre> + * + * <p><h2>The LSM Tree Compaction</h2> + * Use the universal compaction strategy, that is: when N(by default 10) number of parquet files exist in the current layer, they are merged and flush as a large file in the next layer. + * We have no limit for the layer number, assumes there are 10 instants for each file in L0, there could be 100 instants per-file in L1, + * so 3000 instants could be represented as 3 parquets in L2, it is pretty fast if we use concurrent read. + * + * <p>The benchmark shows 1000 instants read cost about 10 ms. + * + * <p><h2>The Archiver & Reader Snapshot Isolation</h2> + * + * <p>In order to make snapshot isolation of the archived timeline write/read, we add two kinds of metadata files for the LSM tree version management: + * <ol> + * <li>Manifest file: Each new file in layer 0 or each compaction would generate a new manifest file, the manifest file records the valid file handles of the latest snapshot;</li> + * <li>Version file: A version file is generated right after a complete manifest file is formed.</li> + * </ol> + * + * <p><h2>The Reader Workflow</h2> + * <ul> + * <li>read the latest version;</li> Review Comment: I agree with the version file and it points to latest file list/manifest -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org