This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch compaction_review
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/compaction_review by this push:
     new 35d47387bfe fix comments
35d47387bfe is described below

commit 35d47387bfe0646d29943715d20e8c9616cf3889
Author: jt2594838 <[email protected]>
AuthorDate: Tue Jun 11 09:40:41 2024 +0800

    fix comments
---
 .../task/InsertionCrossSpaceCompactionTask.java    |  3 ++-
 .../utils/CrossSpaceCompactionCandidate.java       | 30 +++++++++++++++-------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/execute/task/InsertionCrossSpaceCompactionTask.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/execute/task/InsertionCrossSpaceCompactionTask.java
index 32c92678eef..41ac8ea7111 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/execute/task/InsertionCrossSpaceCompactionTask.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/execute/task/InsertionCrossSpaceCompactionTask.java
@@ -266,9 +266,10 @@ public class InsertionCrossSpaceCompactionTask extends 
AbstractCompactionTask {
         throw new CompactionRecoverException("Can not recover 
InsertionCrossSpaceCompactionTask");
       }
       if (shouldRollback()) {
+        // the target file is not fully generated, remove the partially 
generated result
         rollback();
       } else {
-        // That finishTask() is revoked means
+        //  the target file is fully generated, remove the source file
         finishTask();
       }
     } catch (Exception e) {
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/selector/utils/CrossSpaceCompactionCandidate.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/selector/utils/CrossSpaceCompactionCandidate.java
index de75a3d9d21..8cf29534bba 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/selector/utils/CrossSpaceCompactionCandidate.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/compaction/selector/utils/CrossSpaceCompactionCandidate.java
@@ -63,6 +63,8 @@ public class CrossSpaceCompactionCandidate {
     if (nextUnseqFileIndex >= unseqFiles.size()) {
       return false;
     }
+    // unseq files should be compacted in order to avoid old data overwriting 
new data
+    // if any unseq file fails to be selected, the remaining ones cannot be 
selected either
     return prepareNextSplit();
   }
 
@@ -78,7 +80,8 @@ public class CrossSpaceCompactionCandidate {
     // check one by one. And we cannot skip any device in the unseq file 
because it may lead to
     // omission of target seq file
     if (!unseqFile.hasDetailedDeviceInfo()) {
-      // unseq file resource has been deleted due to TTL and cannot upgrade to 
DEVICE_TIME_INDEX
+      // unseq file resource is unsealed or has been deleted due to TTL and 
cannot upgrade to
+      // DEVICE_TIME_INDEX
       return false;
     }
     for (DeviceInfo unseqDeviceInfo : unseqFile.getDevices()) {
@@ -87,6 +90,12 @@ public class CrossSpaceCompactionCandidate {
       // The `previousSeqFile` means the seqFile which contains the device and 
its endTime is just
       // be smaller than startTime of the device in unseqFile
       TsFileResourceCandidate previousSeqFile = null;
+
+      // if a seq file actually overlaps or effectively overlaps with the 
unseq file, the seq
+      // file should be merged with the unseq file
+      // e.g.: two seq files seq1 and seq2. seq1 ranges [0, 100] and seq2 
ranges [200, 300]. One
+      // unseq file unseq1 ranges [80, 150]. Then the [80, 100] part from 
unseq1 actually
+      // overlaps with seq1, while the [101, 150] part effectively overlaps 
with seq2.
       for (TsFileResourceCandidate seqFile : seqFiles) {
         // If the seqFile may need to be selected but its invalid, the 
selection should be
         // terminated.
@@ -122,14 +131,17 @@ public class CrossSpaceCompactionCandidate {
           }
         }
       }
-      // Most of cases, one unsetFile should have at least one conresponding 
seqFile whose startTime
-      // is larger than unseqFiles's endTime for each device in unseqFile. But 
some scenario will
-      // break this rule such as:
-      // 1. Delete or TTL operation deletes the seqFile
-      // 2. the unseqFile is created by load operation
-      // In these scenario, the data for soem device in unseqFile will be 
written to wrong seqFile,
-      // which lead to failed overlap check after compaction. The following 
changes ensure the
-      // correct seqFile won't be lost in this selection.
+
+      // If: 1.some seq files are removed by deletion or TTL; 2. the unseq 
file is loaded; the
+      // unseq file may have larger timestamps and does not overlap with any 
seqFile.
+      //
+      // For example, previously we have two seqFiles, ranging [0, 100] [200, 
300] respectively.
+      // Then the seqFile of [200, 300] is deleted and an unseqFile of [150, 
180] is written/loaded.
+      // The unseqFile does not overlap with any seqFile and not seqFile has 
larger timestamp
+      // than it, thus the unseqFile cannot select any candidate seqFiles 
throughput the loop above.
+      //
+      // In this case, the unseqFile should be merged with the last seqFile if 
possible.
+      // TODO: let insertionCompaction handle this case
 
       // That this judgement is true indicates `previousSeqFile` is 
unnecessary.
       if (atLeastOneSeqFileSelected || previousSeqFile == null) {

Reply via email to