This is an automated email from the ASF dual-hosted git repository.

jackietien pushed a commit to branch mergemaster0808
in repository https://gitbox.apache.org/repos/asf/iotdb.git

commit 502819fe277ae02f94db6333d68f8e4a91d2a316
Author: Jiang Tian <[email protected]>
AuthorDate: Fri Aug 2 15:23:26 2024 +0800

    Refactor TsFileValidationTool with abstracting the sequential scan process 
(#13066)
    
    * Abstract with TsFileSequenceScan
    
    * update lastPageEndTime
    
    * add test
    
    * allow providing a directory with only TsFiles
    add license
    
    * Fix initailization
    
    * Rename method
    
    * Remove redundant line
    
    * Fix file generation
    
    * add ignoreFileOverlap
    
    * Use @Before and @After
    
    (cherry picked from commit b9ed555a3aa214749ee1fa3c66973a5509209ac7)
---
 .../iotdb/db/tools/utils/TsFileSequenceScan.java   | 168 +++++++++
 .../iotdb/db/tools/utils/TsFileValidationScan.java | 397 +++++++++++++++++++
 .../db/tools/validate/TsFileValidationTool.java    | 419 ++-------------------
 .../compaction/AbstractCompactionTest.java         |   4 +-
 .../FastCrossCompactionPerformerTest.java          |   4 +-
 ...eCompactionWithFastPerformerValidationTest.java |   2 +-
 ...actionWithReadPointPerformerValidationTest.java |   2 +-
 .../iotdb/db/tools/TsFileValidationScanTest.java   | 203 ++++++++++
 8 files changed, 808 insertions(+), 391 deletions(-)

diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/utils/TsFileSequenceScan.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/utils/TsFileSequenceScan.java
new file mode 100644
index 00000000000..d9ea636948f
--- /dev/null
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/utils/TsFileSequenceScan.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iotdb.db.tools.utils;
+
+import org.apache.tsfile.common.conf.TSFileConfig;
+import org.apache.tsfile.file.MetaMarker;
+import org.apache.tsfile.file.header.ChunkGroupHeader;
+import org.apache.tsfile.file.header.ChunkHeader;
+import org.apache.tsfile.file.header.PageHeader;
+import org.apache.tsfile.file.metadata.IDeviceID;
+import org.apache.tsfile.read.TsFileSequenceReader;
+import org.apache.tsfile.utils.Pair;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.nio.ByteBuffer;
+
+public abstract class TsFileSequenceScan {
+
+  protected PrintWriter printWriter = null;
+
+  protected TsFileSequenceReader reader;
+  protected byte marker;
+  protected File file;
+
+  protected IDeviceID currDeviceID;
+  protected String currMeasurementID;
+  protected Pair<IDeviceID, String> currTimeseriesID;
+  protected boolean currChunkOnePage;
+
+  public TsFileSequenceScan() {}
+
+  /**
+   * @return true if the file should be scanned
+   */
+  protected boolean onFileOpen(File file) throws IOException {
+    // skip head magic string
+    reader.position((long) TSFileConfig.MAGIC_STRING.getBytes().length + 1);
+    this.file = file;
+    return true;
+  }
+
+  protected void onFileEnd() throws IOException {}
+
+  protected void onChunk(PageVisitor pageVisitor) throws IOException {
+    ChunkHeader chunkHeader = reader.readChunkHeader(marker);
+    if (chunkHeader.getDataSize() == 0) {
+      // empty value chunk
+      return;
+    }
+    currMeasurementID = chunkHeader.getMeasurementID();
+    currTimeseriesID = new Pair<>(currDeviceID, currMeasurementID);
+
+    int dataSize = chunkHeader.getDataSize();
+
+    while (dataSize > 0) {
+      PageHeader pageHeader =
+          reader.readPageHeader(
+              chunkHeader.getDataType(),
+              (chunkHeader.getChunkType() & 0x3F) == MetaMarker.CHUNK_HEADER);
+      ByteBuffer pageData = reader.readPage(pageHeader, 
chunkHeader.getCompressionType());
+      pageVisitor.onPage(pageHeader, pageData, chunkHeader);
+      dataSize -= pageHeader.getSerializedPageSize();
+    }
+  }
+
+  protected void onChunkGroup() throws IOException {
+    ChunkGroupHeader chunkGroupHeader = reader.readChunkGroupHeader();
+    currDeviceID = chunkGroupHeader.getDeviceID();
+  }
+
+  protected abstract void onTimePage(
+      PageHeader pageHeader, ByteBuffer pageData, ChunkHeader chunkHeader) 
throws IOException;
+
+  protected abstract void onValuePage(
+      PageHeader pageHeader, ByteBuffer pageData, ChunkHeader chunkHeader) 
throws IOException;
+
+  protected abstract void onNonAlignedPage(
+      PageHeader pageHeader, ByteBuffer pageData, ChunkHeader chunkHeader) 
throws IOException;
+
+  protected interface PageVisitor {
+    void onPage(PageHeader pageHeader, ByteBuffer pageData, ChunkHeader 
chunkHeader)
+        throws IOException;
+  }
+
+  @SuppressWarnings("java:S106")
+  protected void printBoth(String msg) {
+    System.out.println(msg);
+    if (printWriter != null) {
+      printWriter.println(msg);
+    }
+  }
+
+  protected abstract void onException(Throwable t);
+
+  @SuppressWarnings("java:S1181")
+  public void scanTsFile(File tsFile) {
+    try (TsFileSequenceReader r = new 
TsFileSequenceReader(tsFile.getAbsolutePath())) {
+      this.reader = r;
+      boolean shouldScan = onFileOpen(tsFile);
+      if (!shouldScan) {
+        return;
+      }
+      // start reading data points in sequence
+      while ((marker = reader.readMarker()) != MetaMarker.SEPARATOR) {
+        switch (marker) {
+          case MetaMarker.ONLY_ONE_PAGE_CHUNK_HEADER:
+            currChunkOnePage = true;
+            onChunk(this::onNonAlignedPage);
+            break;
+          case MetaMarker.CHUNK_HEADER:
+            currChunkOnePage = false;
+            onChunk(this::onNonAlignedPage);
+            break;
+          case MetaMarker.ONLY_ONE_PAGE_TIME_CHUNK_HEADER:
+            currChunkOnePage = true;
+            onChunk(this::onTimePage);
+            break;
+          case MetaMarker.TIME_CHUNK_HEADER:
+            currChunkOnePage = false;
+            onChunk(this::onTimePage);
+            break;
+          case MetaMarker.ONLY_ONE_PAGE_VALUE_CHUNK_HEADER:
+            currChunkOnePage = true;
+            onChunk(this::onValuePage);
+            break;
+          case MetaMarker.VALUE_CHUNK_HEADER:
+            currChunkOnePage = false;
+            onChunk(this::onValuePage);
+            break;
+          case MetaMarker.CHUNK_GROUP_HEADER:
+            onChunkGroup();
+            break;
+          case MetaMarker.OPERATION_INDEX_RANGE:
+            reader.readPlanIndex();
+            break;
+          default:
+            MetaMarker.handleUnexpectedMarker(marker);
+        }
+      }
+
+      onFileEnd();
+    } catch (Throwable e) {
+      onException(e);
+    }
+  }
+
+  public void setPrintWriter(PrintWriter printWriter) {
+    this.printWriter = printWriter;
+  }
+}
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/utils/TsFileValidationScan.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/utils/TsFileValidationScan.java
new file mode 100644
index 00000000000..1e6c6e2268e
--- /dev/null
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/utils/TsFileValidationScan.java
@@ -0,0 +1,397 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iotdb.db.tools.utils;
+
+import org.apache.iotdb.commons.utils.TestOnly;
+import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResource;
+
+import org.apache.tsfile.common.conf.TSFileDescriptor;
+import org.apache.tsfile.encoding.decoder.Decoder;
+import org.apache.tsfile.enums.TSDataType;
+import org.apache.tsfile.file.header.ChunkHeader;
+import org.apache.tsfile.file.header.PageHeader;
+import org.apache.tsfile.file.metadata.IDeviceID;
+import org.apache.tsfile.file.metadata.PlainDeviceID;
+import org.apache.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.tsfile.read.common.BatchData;
+import org.apache.tsfile.read.reader.page.PageReader;
+import org.apache.tsfile.read.reader.page.TimePageReader;
+import org.apache.tsfile.utils.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+@SuppressWarnings("java:S2175")
+public class TsFileValidationScan extends TsFileSequenceScan {
+
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(TsFileValidationScan.class);
+  private static final String STR_FIND_BAD_FILE = "-- Find the bad file ";
+  private static final String STR_OVERLAP_LATTER_FILE = ", overlap with latter 
files.";
+  private static final String STR_OVERLAP_BETWEEN_FILE =
+      " overlap between files, with previous file ";
+  private static final String STR_TIMESERIES = "-------- Timeseries ";
+  private static final String FILE_NON_EXIST = "";
+  private static final IDeviceID EMPTY_DEVICE_ID = new PlainDeviceID("");
+
+  protected boolean printDetails;
+  protected boolean ignoreFileOverlap = false;
+
+  // chunk/page context
+  protected long currentChunkEndTime;
+  protected long currentPageEndTime;
+  protected long lastPageEndTime;
+
+  // single file context
+  protected TsFileResource resource;
+  // deviceID -> has checked overlap or not
+  protected Map<IDeviceID, Boolean> hasCheckedDeviceOverlap;
+  protected Map<Pair<IDeviceID, String>, SeriesOverlapPrintInfo> 
hasSeriesPrintedDetails;
+  // measurementId -> lastChunkEndTime in current file
+  protected Map<Pair<IDeviceID, String>, Long> lashChunkEndTime;
+
+  // global context
+  // measurementID -> <fileName, [lastTime, endTimeInLastFile]>
+  protected Map<Pair<IDeviceID, String>, FileLastTimeInfo> 
timeseriesLastTimeMap = new HashMap<>();
+  // deviceID -> <fileName, endTime>, the endTime of device in the last seq 
file
+  protected Map<IDeviceID, FileLastTimeInfo> deviceEndTime = new HashMap<>();
+  // fileName -> isBadFile
+  protected Map<String, Boolean> isBadFileMap = new HashMap<>();
+  protected int badFileNum;
+  protected List<String> previousBadFileMsgs = new ArrayList<>();
+
+  public TsFileValidationScan() {
+    super();
+  }
+
+  @Override
+  protected boolean onFileOpen(File file) throws IOException {
+    super.onFileOpen(file);
+
+    hasCheckedDeviceOverlap = new HashMap<>();
+    currDeviceID = EMPTY_DEVICE_ID;
+    hasSeriesPrintedDetails = new HashMap<>();
+    lashChunkEndTime = new HashMap<>();
+
+    resource = new TsFileResource(file);
+    if (!new File(file.getAbsolutePath() + 
TsFileResource.RESOURCE_SUFFIX).exists()) {
+      // resource file does not exist, tsfile may not be flushed yet
+      LOGGER.warn(
+          "{} does not exist ,skip it.", file.getAbsolutePath() + 
TsFileResource.RESOURCE_SUFFIX);
+      return false;
+    } else {
+      resource.deserialize();
+    }
+    isBadFileMap.put(file.getName(), false);
+    return true;
+  }
+
+  @Override
+  protected void onFileEnd() {
+    // record the end time of each timeseries in current file
+    for (Map.Entry<Pair<IDeviceID, String>, Long> entry : 
lashChunkEndTime.entrySet()) {
+      FileLastTimeInfo fileNameLastTime =
+          timeseriesLastTimeMap.computeIfAbsent(currTimeseriesID, id -> new 
FileLastTimeInfo());
+      if (fileNameLastTime.endTimeInLastFile <= entry.getValue()) {
+        fileNameLastTime.endTimeInLastFile = entry.getValue();
+        fileNameLastTime.lastFileName = file.getName();
+      }
+    }
+    if (ignoreFileOverlap) {
+      reset(false);
+    }
+  }
+
+  @Override
+  protected void onChunkGroup() throws IOException {
+    FileLastTimeInfo fileNameLastTimePair =
+        deviceEndTime.computeIfAbsent(currDeviceID, k -> new 
FileLastTimeInfo());
+    if (!currDeviceID.equals(EMPTY_DEVICE_ID)) {
+      long endTime = resource.getEndTime(currDeviceID);
+      // record the end time of last device in current file
+      if (endTime > fileNameLastTimePair.lastTime) {
+        fileNameLastTimePair.lastFileName = file.getName();
+        fileNameLastTimePair.endTimeInLastFile = endTime;
+      }
+    }
+
+    super.onChunkGroup();
+
+    fileNameLastTimePair = deviceEndTime.computeIfAbsent(currDeviceID, k -> 
new FileLastTimeInfo());
+    if 
(!Boolean.TRUE.equals(hasCheckedDeviceOverlap.getOrDefault(currDeviceID, false))
+        && resource.getStartTime(currDeviceID) <= 
fileNameLastTimePair.endTimeInLastFile) {
+      // device overlap, find bad file
+      recordDeviceOverlap(fileNameLastTimePair.lastFileName);
+    }
+
+    hasCheckedDeviceOverlap.put(currDeviceID, true);
+  }
+
+  private void recordDeviceOverlap(String badFileName) {
+    // add previous bad file msg to list
+    if (!Boolean.TRUE.equals(isBadFileMap.get(badFileName))) {
+      if (printDetails) {
+        previousBadFileMsgs.add(
+            STR_FIND_BAD_FILE
+                + file.getParentFile().getAbsolutePath()
+                + File.separator
+                + deviceEndTime.get(currDeviceID).lastFileName
+                + STR_OVERLAP_LATTER_FILE);
+      } else {
+        previousBadFileMsgs.add(
+            file.getParentFile().getAbsolutePath()
+                + File.separator
+                + deviceEndTime.get(currDeviceID).lastFileName);
+      }
+      isBadFileMap.put(badFileName, true);
+      badFileNum++;
+    }
+    // print current file
+    if (!Boolean.TRUE.equals(isBadFileMap.get(file.getName()))) {
+      if (printDetails) {
+        printBoth(STR_FIND_BAD_FILE + file.getAbsolutePath());
+      } else {
+        printBoth(file.getAbsolutePath());
+      }
+      isBadFileMap.put(file.getName(), true);
+      badFileNum++;
+    }
+    if (printDetails) {
+      printBoth(
+          "---- Device "
+              + currDeviceID
+              + STR_OVERLAP_BETWEEN_FILE
+              + deviceEndTime.get(currDeviceID).lastFileName);
+    }
+  }
+
+  @Override
+  protected void onTimePage(PageHeader pageHeader, ByteBuffer pageData, 
ChunkHeader chunkHeader)
+      throws IOException {
+    // Time Chunk
+    Decoder defaultTimeDecoder =
+        Decoder.getDecoderByType(
+            
TSEncoding.valueOf(TSFileDescriptor.getInstance().getConfig().getTimeEncoder()),
+            TSDataType.INT64);
+    TimePageReader timePageReader = new TimePageReader(pageHeader, pageData, 
defaultTimeDecoder);
+    long[] timeBatch = timePageReader.getNextTimeBatch();
+    FileLastTimeInfo fileNameLastTimePair =
+        timeseriesLastTimeMap.computeIfAbsent(currTimeseriesID, id -> new 
FileLastTimeInfo());
+    for (long timestamp : timeBatch) {
+      onTimeStamp(timestamp, fileNameLastTimePair);
+    }
+    lastPageEndTime = Math.max(lastPageEndTime, currentPageEndTime);
+  }
+
+  protected void markInFileOverlap() {
+    if (!Boolean.TRUE.equals(isBadFileMap.get(file.getName()))) {
+      if (printDetails) {
+        printBoth(STR_FIND_BAD_FILE + file.getAbsolutePath());
+      } else {
+        printBoth(file.getAbsolutePath());
+      }
+      isBadFileMap.put(file.getName(), true);
+      badFileNum++;
+    }
+  }
+
+  private void markBetweenFileOverlap(String overLapFile) {
+    // overlap between file, then add previous bad file path to list
+    if (!Boolean.TRUE.equals(isBadFileMap.getOrDefault(overLapFile, false))) {
+      if (printDetails) {
+        previousBadFileMsgs.add(
+            STR_FIND_BAD_FILE
+                + file.getParentFile().getAbsolutePath()
+                + File.separator
+                + overLapFile
+                + STR_OVERLAP_LATTER_FILE);
+      } else {
+        previousBadFileMsgs.add(
+            file.getParentFile().getAbsolutePath() + File.separator + 
overLapFile);
+      }
+      isBadFileMap.put(overLapFile, true);
+      badFileNum++;
+    }
+  }
+
+  protected void printOverlapDetails(long timestamp, FileLastTimeInfo 
seriesLastTime) {
+    SeriesOverlapPrintInfo seriesOverlapPrintInfo =
+        hasSeriesPrintedDetails.computeIfAbsent(
+            currTimeseriesID, k -> new SeriesOverlapPrintInfo());
+    if (timestamp <= seriesLastTime.endTimeInLastFile) {
+      if (!seriesOverlapPrintInfo.betweenFileOverlapPrinted) {
+        printBoth(
+            STR_TIMESERIES
+                + currTimeseriesID
+                + STR_OVERLAP_BETWEEN_FILE
+                + seriesLastTime.lastFileName);
+        seriesOverlapPrintInfo.betweenFileOverlapPrinted = true;
+      }
+    } else if (timestamp <= lashChunkEndTime.getOrDefault(currTimeseriesID, 
Long.MIN_VALUE)) {
+      if (!seriesOverlapPrintInfo.chunkOverlapPrinted) {
+        printBoth(STR_TIMESERIES + currTimeseriesID + " overlap between 
chunks");
+        seriesOverlapPrintInfo.chunkOverlapPrinted = true;
+      }
+    } else if (timestamp <= lastPageEndTime) {
+      if (!seriesOverlapPrintInfo.crossPageOverlapPrinted) {
+        printBoth(STR_TIMESERIES + currTimeseriesID + " overlap between 
pages");
+        seriesOverlapPrintInfo.crossPageOverlapPrinted = true;
+      }
+    } else {
+      if (!seriesOverlapPrintInfo.inPagePOverlapPrinted) {
+        printBoth(STR_TIMESERIES + currTimeseriesID + " overlap within one 
page");
+        seriesOverlapPrintInfo.inPagePOverlapPrinted = true;
+      }
+    }
+  }
+
+  @Override
+  protected void onValuePage(PageHeader pageHeader, ByteBuffer pageData, 
ChunkHeader chunkHeader) {
+    // Value Page, skip it
+  }
+
+  @Override
+  protected void onNonAlignedPage(
+      PageHeader pageHeader, ByteBuffer pageData, ChunkHeader chunkHeader) 
throws IOException {
+    // NonAligned Chunk
+    Decoder defaultTimeDecoder =
+        Decoder.getDecoderByType(
+            
TSEncoding.valueOf(TSFileDescriptor.getInstance().getConfig().getTimeEncoder()),
+            TSDataType.INT64);
+    Decoder valueDecoder =
+        Decoder.getDecoderByType(chunkHeader.getEncodingType(), 
chunkHeader.getDataType());
+    PageReader pageReader =
+        new PageReader(pageData, chunkHeader.getDataType(), valueDecoder, 
defaultTimeDecoder);
+    BatchData batchData = pageReader.getAllSatisfiedPageData();
+    FileLastTimeInfo fileNameLastTimePair =
+        timeseriesLastTimeMap.computeIfAbsent(currTimeseriesID, id -> new 
FileLastTimeInfo());
+    while (batchData.hasCurrent()) {
+      long timestamp = batchData.currentTime();
+      onTimeStamp(timestamp, fileNameLastTimePair);
+      batchData.next();
+    }
+    lastPageEndTime = Math.max(lastPageEndTime, currentPageEndTime);
+  }
+
+  private void onTimeStamp(long timestamp, FileLastTimeInfo 
fileNameLastTimePair) {
+    if (timestamp <= fileNameLastTimePair.lastTime) {
+      // find bad file
+      markInFileOverlap();
+
+      if (timestamp <= fileNameLastTimePair.endTimeInLastFile) {
+        markBetweenFileOverlap(fileNameLastTimePair.lastFileName);
+      }
+
+      if (printDetails) {
+        printOverlapDetails(timestamp, fileNameLastTimePair);
+      }
+    } else {
+      fileNameLastTimePair.lastTime = timestamp;
+      currentPageEndTime = timestamp;
+      currentChunkEndTime = timestamp;
+    }
+  }
+
+  @Override
+  protected void onChunk(PageVisitor pageVisitor) throws IOException {
+    currentChunkEndTime = Long.MIN_VALUE;
+    lastPageEndTime = Long.MIN_VALUE;
+
+    super.onChunk(pageVisitor);
+
+    lashChunkEndTime.put(
+        currTimeseriesID,
+        Math.max(
+            lashChunkEndTime.getOrDefault(currTimeseriesID, Long.MIN_VALUE), 
currentChunkEndTime));
+  }
+
+  public List<String> getPreviousBadFileMsgs() {
+    return previousBadFileMsgs;
+  }
+
+  @Override
+  protected void onException(Throwable t) {
+    LOGGER.error("Meet errors in reading file {} , skip it.", 
file.getAbsolutePath(), t);
+    if (!Boolean.TRUE.equals(isBadFileMap.get(file.getName()))) {
+      if (printDetails) {
+        printBoth(
+            "-- Meet errors in reading file "
+                + file.getAbsolutePath()
+                + ", tsfile may be corrupted.");
+      } else {
+        printBoth(file.getAbsolutePath());
+      }
+      isBadFileMap.put(file.getName(), true);
+      badFileNum++;
+    }
+  }
+
+  public int getBadFileNum() {
+    return badFileNum;
+  }
+
+  @TestOnly
+  public void setBadFileNum(int badFileNum) {
+    this.badFileNum = badFileNum;
+  }
+
+  public void reset(boolean resetBadFileNum) {
+    if (resetBadFileNum) {
+      badFileNum = 0;
+    }
+    timeseriesLastTimeMap.clear();
+    deviceEndTime.clear();
+    isBadFileMap.clear();
+  }
+
+  protected static class FileLastTimeInfo {
+
+    public FileLastTimeInfo() {
+      lastFileName = FILE_NON_EXIST;
+      lastTime = Long.MIN_VALUE;
+      endTimeInLastFile = Long.MIN_VALUE;
+    }
+
+    private String lastFileName;
+    private long lastTime;
+    private long endTimeInLastFile;
+  }
+
+  protected static class SeriesOverlapPrintInfo {
+
+    private boolean betweenFileOverlapPrinted;
+    private boolean chunkOverlapPrinted;
+    private boolean crossPageOverlapPrinted;
+    private boolean inPagePOverlapPrinted;
+  }
+
+  public void setPrintDetails(boolean printDetails) {
+    this.printDetails = printDetails;
+  }
+
+  public void setIgnoreFileOverlap(boolean ignoreFileOverlap) {
+    this.ignoreFileOverlap = ignoreFileOverlap;
+  }
+}
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/validate/TsFileValidationTool.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/validate/TsFileValidationTool.java
index 0d7b3d6e0c5..0dae81fa653 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/validate/TsFileValidationTool.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/tools/validate/TsFileValidationTool.java
@@ -18,24 +18,8 @@
  */
 package org.apache.iotdb.db.tools.validate;
 
-import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResource;
+import org.apache.iotdb.db.tools.utils.TsFileValidationScan;
 
-import org.apache.tsfile.common.conf.TSFileConfig;
-import org.apache.tsfile.common.conf.TSFileDescriptor;
-import org.apache.tsfile.common.constant.TsFileConstant;
-import org.apache.tsfile.encoding.decoder.Decoder;
-import org.apache.tsfile.enums.TSDataType;
-import org.apache.tsfile.file.MetaMarker;
-import org.apache.tsfile.file.header.ChunkGroupHeader;
-import org.apache.tsfile.file.header.ChunkHeader;
-import org.apache.tsfile.file.header.PageHeader;
-import org.apache.tsfile.file.metadata.IDeviceID;
-import org.apache.tsfile.file.metadata.enums.TSEncoding;
-import org.apache.tsfile.read.TsFileSequenceReader;
-import org.apache.tsfile.read.common.BatchData;
-import org.apache.tsfile.read.reader.page.PageReader;
-import org.apache.tsfile.read.reader.page.TimePageReader;
-import org.apache.tsfile.utils.Pair;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -43,18 +27,14 @@ import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.Objects;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
-import static org.apache.iotdb.commons.conf.IoTDBConstant.PATH_SEPARATOR;
 import static org.apache.tsfile.common.constant.TsFileConstant.TSFILE_SUFFIX;
 
 /**
@@ -77,6 +57,7 @@ public class TsFileValidationTool {
 
   // print to local file or not
   private static boolean printToFile = false;
+  private static boolean ignoreFileOverlap = false;
 
   private static String outFilePath = "TsFile_validation_view.txt";
 
@@ -85,16 +66,8 @@ public class TsFileValidationTool {
   private static final Logger logger = 
LoggerFactory.getLogger(TsFileValidationTool.class);
   private static final List<File> seqDataDirList = new ArrayList<>();
   private static final List<File> fileList = new ArrayList<>();
-  public static int badFileNum = 0;
 
-  // measurementID -> <fileName, [lastTime, endTimeInLastFile]>
-  private static final Map<String, Pair<String, long[]>> measurementLastTime = 
new HashMap<>();
-
-  // deviceID -> <fileName, endTime>, the endTime of device in the last seq 
file
-  private static final Map<IDeviceID, Pair<String, Long>> deviceEndTime = new 
HashMap<>();
-
-  // fileName -> isBadFile
-  private static final Map<String, Boolean> isBadFileMap = new HashMap<>();
+  private static TsFileValidationScan validationScan = new 
TsFileValidationScan();
 
   /**
    * The form of param is: [path of data dir or tsfile] [-pd = print details 
or not] [-f = path of
@@ -108,14 +81,17 @@ public class TsFileValidationTool {
     }
     if (printToFile) {
       pw = new PrintWriter(new FileWriter(outFilePath));
+      validationScan.setPrintWriter(pw);
     }
     if (printDetails) {
       printBoth("Start checking seq files ...");
+      validationScan.setPrintDetails(printDetails);
     }
+    validationScan.setIgnoreFileOverlap(ignoreFileOverlap);
 
-    // check tsfile, which will only check for correctness inside a single 
tsfile
+    // check tsfile
     for (File f : fileList) {
-      findUncorrectFiles(Collections.singletonList(f));
+      findIncorrectFiles(Collections.singletonList(f));
     }
 
     // check tsfiles in data dir, which will check for correctness inside one 
single tsfile and
@@ -125,7 +101,14 @@ public class TsFileValidationTool {
       if (!checkIsDirectory(seqDataDir)) {
         continue;
       }
-      File[] sgDirs = seqDataDir.listFiles();
+      List<File> rootTsFiles =
+          Arrays.asList(
+              Objects.requireNonNull(
+                  seqDataDir.listFiles(file -> 
file.getName().endsWith(TSFILE_SUFFIX))));
+      findIncorrectFiles(rootTsFiles);
+
+      List<File> sgDirs =
+          
Arrays.asList(Objects.requireNonNull(seqDataDir.listFiles(File::isDirectory)));
       for (File sgDir : Objects.requireNonNull(sgDirs)) {
         if (!checkIsDirectory(sgDir)) {
           continue;
@@ -171,7 +154,7 @@ public class TsFileValidationTool {
                       : timeDiff;
                 });
 
-            findUncorrectFiles(tsFiles);
+            findIncorrectFiles(tsFiles);
           }
           // clear map
           clearMap(false);
@@ -179,358 +162,19 @@ public class TsFileValidationTool {
       }
     }
     if (printDetails) {
-      printBoth("Finish checking successfully, totally find " + badFileNum + " 
bad files.");
+      printBoth("Finish checking successfully, totally find " + 
getBadFileNum() + " bad files.");
     }
     if (printToFile) {
       pw.close();
     }
   }
 
-  public static void findUncorrectFiles(List<File> tsFiles) {
+  public static void findIncorrectFiles(List<File> tsFiles) {
     for (File tsFile : tsFiles) {
-      List<String> previousBadFileMsgs = new ArrayList<>();
-      try {
-        TsFileResource resource = new TsFileResource(tsFile);
-        if (!new File(tsFile.getAbsolutePath() + 
TsFileResource.RESOURCE_SUFFIX).exists()) {
-          // resource file does not exist, tsfile may not be flushed yet
-          logger.warn(
-              "{} does not exist ,skip it.",
-              tsFile.getAbsolutePath() + TsFileResource.RESOURCE_SUFFIX);
-          continue;
-        } else {
-          resource.deserialize();
-        }
-        isBadFileMap.put(tsFile.getName(), false);
-
-        try (TsFileSequenceReader reader = new 
TsFileSequenceReader(tsFile.getAbsolutePath())) {
-          // deviceID -> has checked overlap or not
-          Map<IDeviceID, Boolean> hasCheckedDeviceOverlap = new HashMap<>();
-          reader.position((long) TSFileConfig.MAGIC_STRING.getBytes().length + 
1);
-          byte marker;
-          IDeviceID deviceID = IDeviceID.Factory.DEFAULT_FACTORY.create("");
-          Map<String, boolean[]> hasMeasurementPrintedDetails = new 
HashMap<>();
-          // measurementId -> lastChunkEndTime in current file
-          Map<String, Long> lashChunkEndTime = new HashMap<>();
-
-          // start reading data points in sequence
-          while ((marker = reader.readMarker()) != MetaMarker.SEPARATOR) {
-            switch (marker) {
-              case MetaMarker.CHUNK_HEADER:
-              case MetaMarker.TIME_CHUNK_HEADER:
-              case MetaMarker.VALUE_CHUNK_HEADER:
-              case MetaMarker.ONLY_ONE_PAGE_CHUNK_HEADER:
-              case MetaMarker.ONLY_ONE_PAGE_TIME_CHUNK_HEADER:
-              case MetaMarker.ONLY_ONE_PAGE_VALUE_CHUNK_HEADER:
-                ChunkHeader header = reader.readChunkHeader(marker);
-                if (header.getDataSize() == 0) {
-                  // empty value chunk
-                  break;
-                }
-                long currentChunkEndTime = Long.MIN_VALUE;
-                String measurementID =
-                    deviceID.toString() + PATH_SEPARATOR + 
header.getMeasurementID();
-                hasMeasurementPrintedDetails.computeIfAbsent(measurementID, k 
-> new boolean[4]);
-                measurementLastTime.computeIfAbsent(
-                    measurementID,
-                    k -> {
-                      long[] arr = new long[2];
-                      Arrays.fill(arr, Long.MIN_VALUE);
-                      return new Pair<>("", arr);
-                    });
-                Decoder defaultTimeDecoder =
-                    Decoder.getDecoderByType(
-                        TSEncoding.valueOf(
-                            
TSFileDescriptor.getInstance().getConfig().getTimeEncoder()),
-                        TSDataType.INT64);
-                Decoder valueDecoder =
-                    Decoder.getDecoderByType(header.getEncodingType(), 
header.getDataType());
-                int dataSize = header.getDataSize();
-                long lastPageEndTime = Long.MIN_VALUE;
-                while (dataSize > 0) {
-                  valueDecoder.reset();
-                  PageHeader pageHeader =
-                      reader.readPageHeader(
-                          header.getDataType(),
-                          (header.getChunkType() & 0x3F) == 
MetaMarker.CHUNK_HEADER);
-                  ByteBuffer pageData = reader.readPage(pageHeader, 
header.getCompressionType());
-                  long currentPageEndTime = Long.MIN_VALUE;
-                  if ((header.getChunkType() & (byte) 
TsFileConstant.TIME_COLUMN_MASK)
-                      == (byte) TsFileConstant.TIME_COLUMN_MASK) {
-                    // Time Chunk
-                    TimePageReader timePageReader =
-                        new TimePageReader(pageHeader, pageData, 
defaultTimeDecoder);
-                    long[] timeBatch = timePageReader.getNextTimeBatch();
-                    for (int i = 0; i < timeBatch.length; i++) {
-                      long timestamp = timeBatch[i];
-                      if (timestamp <= 
measurementLastTime.get(measurementID).right[0]) {
-                        // find bad file
-                        if (timestamp <= 
measurementLastTime.get(measurementID).right[1]) {
-                          // overlap between file, then add previous bad file 
path to list
-                          String lastBadFile = 
measurementLastTime.get(measurementID).left;
-                          if (!isBadFileMap.get(lastBadFile)) {
-                            if (printDetails) {
-                              previousBadFileMsgs.add(
-                                  "-- Find the bad file "
-                                      + 
tsFile.getParentFile().getAbsolutePath()
-                                      + File.separator
-                                      + lastBadFile
-                                      + ", overlap with later files.");
-                            } else {
-                              previousBadFileMsgs.add(
-                                  tsFile.getParentFile().getAbsolutePath()
-                                      + File.separator
-                                      + lastBadFile);
-                            }
-                            isBadFileMap.put(lastBadFile, true);
-                            badFileNum++;
-                          }
-                        }
-
-                        if (!isBadFileMap.get(tsFile.getName())) {
-                          if (printDetails) {
-                            printBoth("-- Find the bad file " + 
tsFile.getAbsolutePath());
-                          } else {
-                            printBoth(tsFile.getAbsolutePath());
-                          }
-                          isBadFileMap.put(tsFile.getName(), true);
-                          badFileNum++;
-                        }
-                        if (printDetails) {
-                          if (timestamp <= 
measurementLastTime.get(measurementID).right[1]) {
-                            if 
(!hasMeasurementPrintedDetails.get(measurementID)[0]) {
-                              printBoth(
-                                  "-------- Timeseries "
-                                      + measurementID
-                                      + " overlap between files, with previous 
file "
-                                      + 
measurementLastTime.get(measurementID).left);
-                              
hasMeasurementPrintedDetails.get(measurementID)[0] = true;
-                            }
-                          } else if (timestamp
-                              <= lashChunkEndTime.getOrDefault(measurementID, 
Long.MIN_VALUE)) {
-                            if 
(!hasMeasurementPrintedDetails.get(measurementID)[1]) {
-                              printBoth(
-                                  "-------- Timeseries "
-                                      + measurementID
-                                      + " overlap between chunks");
-                              
hasMeasurementPrintedDetails.get(measurementID)[1] = true;
-                            }
-                          } else if (timestamp <= lastPageEndTime) {
-                            if 
(!hasMeasurementPrintedDetails.get(measurementID)[2]) {
-                              printBoth(
-                                  "-------- Timeseries "
-                                      + measurementID
-                                      + " overlap between pages");
-                              
hasMeasurementPrintedDetails.get(measurementID)[2] = true;
-                            }
-                          } else {
-                            if 
(!hasMeasurementPrintedDetails.get(measurementID)[3]) {
-                              printBoth(
-                                  "-------- Timeseries "
-                                      + measurementID
-                                      + " overlap within one page");
-                              
hasMeasurementPrintedDetails.get(measurementID)[3] = true;
-                            }
-                          }
-                        }
-                      } else {
-                        measurementLastTime.get(measurementID).right[0] = 
timestamp;
-                        currentPageEndTime = timestamp;
-                        currentChunkEndTime = timestamp;
-                      }
-                    }
-                  } else if ((header.getChunkType() & (byte) 
TsFileConstant.VALUE_COLUMN_MASK)
-                      == (byte) TsFileConstant.VALUE_COLUMN_MASK) {
-                    // Value Chunk, skip it
-                  } else {
-                    // NonAligned Chunk
-                    PageReader pageReader =
-                        new PageReader(
-                            pageData, header.getDataType(), valueDecoder, 
defaultTimeDecoder);
-                    BatchData batchData = pageReader.getAllSatisfiedPageData();
-                    while (batchData.hasCurrent()) {
-                      long timestamp = batchData.currentTime();
-                      if (timestamp <= 
measurementLastTime.get(measurementID).right[0]) {
-                        // find bad file
-                        if (timestamp <= 
measurementLastTime.get(measurementID).right[1]) {
-                          // overlap between file, then add previous bad file 
path to list
-                          if 
(!isBadFileMap.get(measurementLastTime.get(measurementID).left)) {
-                            if (printDetails) {
-                              previousBadFileMsgs.add(
-                                  "-- Find the bad file "
-                                      + 
tsFile.getParentFile().getAbsolutePath()
-                                      + File.separator
-                                      + 
measurementLastTime.get(measurementID).left
-                                      + ", overlap with later files.");
-                            } else {
-                              previousBadFileMsgs.add(
-                                  tsFile.getParentFile().getAbsolutePath()
-                                      + File.separator
-                                      + 
measurementLastTime.get(measurementID).left);
-                            }
-                            badFileNum++;
-                            
isBadFileMap.put(measurementLastTime.get(measurementID).left, true);
-                          }
-                        }
-                        if (!isBadFileMap.get(tsFile.getName())) {
-                          if (printDetails) {
-                            printBoth("-- Find the bad file " + 
tsFile.getAbsolutePath());
-                          } else {
-                            printBoth(tsFile.getAbsolutePath());
-                          }
-                          isBadFileMap.put(tsFile.getName(), true);
-                          badFileNum++;
-                        }
-                        if (printDetails) {
-                          if (timestamp <= 
measurementLastTime.get(measurementID).right[1]) {
-                            if 
(!hasMeasurementPrintedDetails.get(measurementID)[0]) {
-                              printBoth(
-                                  "-------- Timeseries "
-                                      + measurementID
-                                      + " overlap between files, with previous 
file "
-                                      + 
measurementLastTime.get(measurementID).left);
-                              
hasMeasurementPrintedDetails.get(measurementID)[0] = true;
-                            }
-                          } else if (timestamp
-                              <= lashChunkEndTime.getOrDefault(measurementID, 
Long.MIN_VALUE)) {
-                            if 
(!hasMeasurementPrintedDetails.get(measurementID)[1]) {
-                              printBoth(
-                                  "-------- Timeseries "
-                                      + measurementID
-                                      + " overlap between chunks");
-                              
hasMeasurementPrintedDetails.get(measurementID)[1] = true;
-                            }
-                          } else if (timestamp <= lastPageEndTime) {
-                            if 
(!hasMeasurementPrintedDetails.get(measurementID)[2]) {
-                              printBoth(
-                                  "-------- Timeseries "
-                                      + measurementID
-                                      + " overlap between pages");
-                              
hasMeasurementPrintedDetails.get(measurementID)[2] = true;
-                            }
-                          } else {
-                            if 
(!hasMeasurementPrintedDetails.get(measurementID)[3]) {
-                              printBoth(
-                                  "-------- Timeseries "
-                                      + measurementID
-                                      + " overlap within one page");
-                              
hasMeasurementPrintedDetails.get(measurementID)[3] = true;
-                            }
-                          }
-                        }
-                      } else {
-                        measurementLastTime.get(measurementID).right[0] = 
timestamp;
-                        currentPageEndTime = timestamp;
-                        currentChunkEndTime = timestamp;
-                      }
-                      batchData.next();
-                    }
-                  }
-                  dataSize -= pageHeader.getSerializedPageSize();
-                  lastPageEndTime = Math.max(lastPageEndTime, 
currentPageEndTime);
-                }
-                lashChunkEndTime.put(
-                    measurementID,
-                    Math.max(
-                        lashChunkEndTime.getOrDefault(measurementID, 
Long.MIN_VALUE),
-                        currentChunkEndTime));
-                break;
-              case MetaMarker.CHUNK_GROUP_HEADER:
-                if 
(!deviceID.equals(IDeviceID.Factory.DEFAULT_FACTORY.create(""))) {
-                  // record the end time of last device in current file
-                  if (resource.getEndTime(deviceID)
-                      > deviceEndTime.computeIfAbsent(
-                              deviceID,
-                              k -> {
-                                return new Pair<>("", Long.MIN_VALUE);
-                              })
-                          .right) {
-                    deviceEndTime.get(deviceID).left = tsFile.getName();
-                    deviceEndTime.get(deviceID).right = 
resource.getEndTime(deviceID);
-                  }
-                }
-                ChunkGroupHeader chunkGroupHeader = 
reader.readChunkGroupHeader();
-                deviceID = chunkGroupHeader.getDeviceID();
-                if (!hasCheckedDeviceOverlap.getOrDefault(deviceID, false)
-                    && resource.getStartTime(deviceID)
-                        <= deviceEndTime.computeIfAbsent(
-                                deviceID,
-                                k -> {
-                                  return new Pair<>("", Long.MIN_VALUE);
-                                })
-                            .right) {
-                  // find bad file
-                  // add prevous bad file msg to list
-                  if (!isBadFileMap.get(deviceEndTime.get(deviceID).left)) {
-                    if (printDetails) {
-                      previousBadFileMsgs.add(
-                          "-- Find the bad file "
-                              + tsFile.getParentFile().getAbsolutePath()
-                              + File.separator
-                              + deviceEndTime.get(deviceID).left
-                              + ", overlap with later files.");
-                    } else {
-                      previousBadFileMsgs.add(
-                          tsFile.getParentFile().getAbsolutePath()
-                              + File.separator
-                              + deviceEndTime.get(deviceID).left);
-                    }
-                    isBadFileMap.put(deviceEndTime.get(deviceID).left, true);
-                    badFileNum++;
-                  }
-                  // print current file
-                  if (!isBadFileMap.get(tsFile.getName())) {
-                    if (printDetails) {
-                      printBoth("-- Find the bad file " + 
tsFile.getAbsolutePath());
-                    } else {
-                      printBoth(tsFile.getAbsolutePath());
-                    }
-                    isBadFileMap.put(tsFile.getName(), true);
-                    badFileNum++;
-                  }
-                  if (printDetails) {
-                    printBoth(
-                        "---- Device "
-                            + deviceID
-                            + " overlap between files, with previous file "
-                            + deviceEndTime.get(deviceID).left);
-                  }
-                }
-                hasCheckedDeviceOverlap.put(deviceID, true);
-                break;
-              case MetaMarker.OPERATION_INDEX_RANGE:
-                reader.readPlanIndex();
-                break;
-              default:
-                MetaMarker.handleUnexpectedMarker(marker);
-            }
-          }
-
-          // record the end time of each timeseries in current file
-          for (Map.Entry<String, Long> entry : lashChunkEndTime.entrySet()) {
-            if (measurementLastTime.get(entry.getKey()).right[1] <= 
entry.getValue()) {
-              measurementLastTime.get(entry.getKey()).right[1] = 
entry.getValue();
-              measurementLastTime.get(entry.getKey()).left = tsFile.getName();
-            }
-          }
-        }
-      } catch (Throwable e) {
-        logger.error("Meet errors in reading file {} , skip it.", 
tsFile.getAbsolutePath(), e);
-        if (!isBadFileMap.get(tsFile.getName())) {
-          if (printDetails) {
-            printBoth(
-                "-- Meet errors in reading file "
-                    + tsFile.getAbsolutePath()
-                    + ", tsfile may be corrupted.");
-          } else {
-            printBoth(tsFile.getAbsolutePath());
-          }
-          isBadFileMap.put(tsFile.getName(), true);
-          badFileNum++;
-        }
-      } finally {
-        for (String msg : previousBadFileMsgs) {
-          printBoth(msg);
-        }
+      validationScan.getPreviousBadFileMsgs().clear();
+      validationScan.scanTsFile(tsFile);
+      for (String msg : validationScan.getPreviousBadFileMsgs()) {
+        printBoth(msg);
       }
     }
   }
@@ -544,6 +188,8 @@ public class TsFileValidationTool {
       for (String arg : args) {
         if (arg.startsWith("-pd")) {
           printDetails = Boolean.parseBoolean(arg.split("=")[1]);
+        } else if (arg.startsWith("-if")) {
+          ignoreFileOverlap = Boolean.parseBoolean(arg.split("=")[1]);
         } else if (arg.startsWith("-f")) {
           printToFile = true;
           outFilePath = arg.split("=")[1];
@@ -576,12 +222,7 @@ public class TsFileValidationTool {
   }
 
   public static void clearMap(boolean resetBadFileNum) {
-    if (resetBadFileNum) {
-      badFileNum = 0;
-    }
-    measurementLastTime.clear();
-    deviceEndTime.clear();
-    isBadFileMap.clear();
+    validationScan.reset(resetBadFileNum);
   }
 
   private static boolean checkIsDirectory(File dir) {
@@ -599,4 +240,12 @@ public class TsFileValidationTool {
       pw.println(msg);
     }
   }
+
+  public static int getBadFileNum() {
+    return validationScan.getBadFileNum();
+  }
+
+  public static void setBadFileNum(int badFileNum) {
+    validationScan.setBadFileNum(badFileNum);
+  }
 }
diff --git 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/AbstractCompactionTest.java
 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/AbstractCompactionTest.java
index afba46fde89..1abe4eaa2fe 100644
--- 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/AbstractCompactionTest.java
+++ 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/AbstractCompactionTest.java
@@ -527,8 +527,8 @@ public class AbstractCompactionTest {
     for (TsFileResource resource : tsFileManager.getTsFileList(isSeq)) {
       files.add(resource.getTsFile());
     }
-    TsFileValidationTool.findUncorrectFiles(files);
-    Assert.assertEquals(0, TsFileValidationTool.badFileNum);
+    TsFileValidationTool.findIncorrectFiles(files);
+    Assert.assertEquals(0, TsFileValidationTool.getBadFileNum());
   }
 
   protected Map<IFullPath, List<TimeValuePair>> readSourceFiles(
diff --git 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/FastCrossCompactionPerformerTest.java
 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/FastCrossCompactionPerformerTest.java
index cc9978a8717..7c4dd962e85 100644
--- 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/FastCrossCompactionPerformerTest.java
+++ 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/FastCrossCompactionPerformerTest.java
@@ -4709,7 +4709,7 @@ public class FastCrossCompactionPerformerTest extends 
AbstractCompactionTest {
     for (TsFileResource resource : targetResources) {
       files.add(resource.getTsFile());
     }
-    TsFileValidationTool.findUncorrectFiles(files);
-    Assert.assertEquals(0, TsFileValidationTool.badFileNum);
+    TsFileValidationTool.findIncorrectFiles(files);
+    Assert.assertEquals(0, TsFileValidationTool.getBadFileNum());
   }
 }
diff --git 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/cross/CrossSpaceCompactionWithFastPerformerValidationTest.java
 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/cross/CrossSpaceCompactionWithFastPerformerValidationTest.java
index 9aa512aa0dd..cafe8ac2768 100644
--- 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/cross/CrossSpaceCompactionWithFastPerformerValidationTest.java
+++ 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/cross/CrossSpaceCompactionWithFastPerformerValidationTest.java
@@ -101,7 +101,7 @@ public class 
CrossSpaceCompactionWithFastPerformerValidationTest extends Abstrac
     Thread.currentThread().setName(oldThreadName);
     FileReaderManager.getInstance().closeAndRemoveAllOpenedReaders();
     SystemInfo.getInstance().setMemorySizeForCompaction(compactionMemory);
-    TsFileValidationTool.badFileNum = 0;
+    TsFileValidationTool.setBadFileNum(0);
   }
 
   /**
diff --git 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/cross/CrossSpaceCompactionWithReadPointPerformerValidationTest.java
 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/cross/CrossSpaceCompactionWithReadPointPerformerValidationTest.java
index df97ac9257b..451a1bb9883 100644
--- 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/cross/CrossSpaceCompactionWithReadPointPerformerValidationTest.java
+++ 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/storageengine/dataregion/compaction/cross/CrossSpaceCompactionWithReadPointPerformerValidationTest.java
@@ -100,7 +100,7 @@ public class 
CrossSpaceCompactionWithReadPointPerformerValidationTest
     Thread.currentThread().setName(oldThreadName);
     FileReaderManager.getInstance().closeAndRemoveAllOpenedReaders();
     SystemInfo.getInstance().setMemorySizeForCompaction(compactionMemory);
-    TsFileValidationTool.badFileNum = 0;
+    TsFileValidationTool.setBadFileNum(0);
   }
 
   /**
diff --git 
a/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/tools/TsFileValidationScanTest.java
 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/tools/TsFileValidationScanTest.java
new file mode 100644
index 00000000000..97053737571
--- /dev/null
+++ 
b/iotdb-core/datanode/src/test/java/org/apache/iotdb/db/tools/TsFileValidationScanTest.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iotdb.db.tools;
+
+import org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileResource;
+import org.apache.iotdb.db.tools.utils.TsFileValidationScan;
+import org.apache.iotdb.db.utils.constant.TestConstant;
+
+import org.apache.tsfile.enums.TSDataType;
+import org.apache.tsfile.file.metadata.PlainDeviceID;
+import org.apache.tsfile.write.chunk.ChunkWriterImpl;
+import org.apache.tsfile.write.schema.MeasurementSchema;
+import org.apache.tsfile.write.writer.TsFileIOWriter;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+public class TsFileValidationScanTest {
+
+  private List<File> files;
+
+  @Before
+  public void setUp() throws Exception {
+    files = prepareTsFiles();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    files.forEach(
+        file -> {
+          file.delete();
+          new TsFileResource(file).remove();
+        });
+  }
+
+  @Test
+  public void testValidation() throws IOException {
+    // overlap between chunks
+    TsFileValidationScan tsFileValidationScan = new TsFileValidationScan();
+    tsFileValidationScan.scanTsFile(files.get(0));
+    assertEquals(1, tsFileValidationScan.getBadFileNum());
+
+    // overlap between page
+    tsFileValidationScan = new TsFileValidationScan();
+    tsFileValidationScan.scanTsFile(files.get(1));
+    assertEquals(1, tsFileValidationScan.getBadFileNum());
+
+    // overlap within page
+    tsFileValidationScan = new TsFileValidationScan();
+    tsFileValidationScan.scanTsFile(files.get(2));
+    assertEquals(1, tsFileValidationScan.getBadFileNum());
+
+    // normal
+    tsFileValidationScan = new TsFileValidationScan();
+    tsFileValidationScan.scanTsFile(files.get(3));
+    assertEquals(0, tsFileValidationScan.getBadFileNum());
+
+    // normal
+    tsFileValidationScan = new TsFileValidationScan();
+    tsFileValidationScan.scanTsFile(files.get(4));
+    assertEquals(0, tsFileValidationScan.getBadFileNum());
+
+    // overlap between files
+    tsFileValidationScan = new TsFileValidationScan();
+    tsFileValidationScan.scanTsFile(files.get(3));
+    tsFileValidationScan.scanTsFile(files.get(4));
+    assertEquals(2, tsFileValidationScan.getBadFileNum());
+  }
+
+  @Test
+  public void testIgnoreFileOverlap() throws IOException {
+    TsFileValidationScan tsFileValidationScan;
+
+    // overlap between files
+    tsFileValidationScan = new TsFileValidationScan();
+    tsFileValidationScan.setIgnoreFileOverlap(true);
+    tsFileValidationScan.setPrintDetails(true);
+    tsFileValidationScan.scanTsFile(files.get(3));
+    tsFileValidationScan.scanTsFile(files.get(4));
+    assertEquals(0, tsFileValidationScan.getBadFileNum());
+  }
+
+  private static List<File> prepareTsFiles() throws IOException {
+    List<File> files = new ArrayList<>();
+    PlainDeviceID plainDeviceID = new PlainDeviceID("root.sg1.d1");
+    // overlap between chunks
+    File file = new File(TestConstant.BASE_OUTPUT_PATH, "1.tsfile");
+    TsFileResource resource = new TsFileResource(file);
+    files.add(file);
+    TsFileIOWriter tsFileIOWriter = new TsFileIOWriter(file);
+    tsFileIOWriter.startChunkGroup(plainDeviceID);
+    ChunkWriterImpl chunkWriter =
+        new ChunkWriterImpl(new MeasurementSchema("s1", TSDataType.INT32));
+    chunkWriter.write(1, 1);
+    chunkWriter.writeToFileWriter(tsFileIOWriter);
+    chunkWriter.write(1, 1);
+    chunkWriter.writeToFileWriter(tsFileIOWriter);
+    tsFileIOWriter.endChunkGroup();
+    tsFileIOWriter.endFile();
+    resource.updateStartTime(plainDeviceID, 1);
+    resource.updateEndTime(plainDeviceID, 1);
+    resource.serialize();
+
+    // overlap between page
+    file = new File(TestConstant.BASE_OUTPUT_PATH, "2.tsfile");
+    resource = new TsFileResource(file);
+    files.add(file);
+    tsFileIOWriter = new TsFileIOWriter(file);
+    tsFileIOWriter.startChunkGroup(plainDeviceID);
+    chunkWriter = new ChunkWriterImpl(new MeasurementSchema("s1", 
TSDataType.INT32));
+    chunkWriter.write(1, 1);
+    chunkWriter.sealCurrentPage();
+    chunkWriter.write(1, 1);
+    chunkWriter.writeToFileWriter(tsFileIOWriter);
+    tsFileIOWriter.endChunkGroup();
+    tsFileIOWriter.endFile();
+    resource.updateStartTime(plainDeviceID, 1);
+    resource.updateEndTime(plainDeviceID, 1);
+    resource.serialize();
+
+    // overlap within page
+    file = new File(TestConstant.BASE_OUTPUT_PATH, "3.tsfile");
+    resource = new TsFileResource(file);
+    files.add(file);
+    tsFileIOWriter = new TsFileIOWriter(file);
+    tsFileIOWriter.startChunkGroup(plainDeviceID);
+    chunkWriter = new ChunkWriterImpl(new MeasurementSchema("s1", 
TSDataType.INT32));
+    chunkWriter.write(1, 1);
+    chunkWriter.write(1, 1);
+    chunkWriter.writeToFileWriter(tsFileIOWriter);
+    tsFileIOWriter.endChunkGroup();
+    tsFileIOWriter.endFile();
+    resource.updateStartTime(plainDeviceID, 1);
+    resource.updateEndTime(plainDeviceID, 1);
+    resource.serialize();
+
+    // normal
+    file = new File(TestConstant.BASE_OUTPUT_PATH, "4.tsfile");
+    resource = new TsFileResource(file);
+    files.add(file);
+    tsFileIOWriter = new TsFileIOWriter(file);
+    tsFileIOWriter.startChunkGroup(plainDeviceID);
+    chunkWriter = new ChunkWriterImpl(new MeasurementSchema("s1", 
TSDataType.INT32));
+    chunkWriter.write(1, 1);
+    chunkWriter.sealCurrentPage();
+    chunkWriter.write(2, 1);
+    chunkWriter.writeToFileWriter(tsFileIOWriter);
+    chunkWriter.write(3, 1);
+    chunkWriter.sealCurrentPage();
+    chunkWriter.write(4, 1);
+    chunkWriter.writeToFileWriter(tsFileIOWriter);
+    tsFileIOWriter.endChunkGroup();
+    tsFileIOWriter.endFile();
+    resource.updateStartTime(plainDeviceID, 1);
+    resource.updateEndTime(plainDeviceID, 4);
+    resource.serialize();
+
+    // normal but overlap with 4.tsfile
+    file = new File(TestConstant.BASE_OUTPUT_PATH, "5.tsfile");
+    resource = new TsFileResource(file);
+    files.add(file);
+    tsFileIOWriter = new TsFileIOWriter(file);
+    tsFileIOWriter.startChunkGroup(plainDeviceID);
+    chunkWriter = new ChunkWriterImpl(new MeasurementSchema("s1", 
TSDataType.INT32));
+    chunkWriter.write(3, 1);
+    chunkWriter.sealCurrentPage();
+    chunkWriter.write(4, 1);
+    chunkWriter.writeToFileWriter(tsFileIOWriter);
+    chunkWriter.write(5, 1);
+    chunkWriter.sealCurrentPage();
+    chunkWriter.write(6, 1);
+    chunkWriter.writeToFileWriter(tsFileIOWriter);
+    tsFileIOWriter.endChunkGroup();
+    tsFileIOWriter.endFile();
+    resource.updateStartTime(plainDeviceID, 3);
+    resource.updateEndTime(plainDeviceID, 6);
+    resource.serialize();
+    return files;
+  }
+}

Reply via email to