jt2594838 commented on code in PR #17279:
URL: https://github.com/apache/iotdb/pull/17279#discussion_r2963753137
##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/thrift/impl/DataNodeInternalRPCServiceImpl.java:
##########
@@ -3392,71 +3366,62 @@ private long findEarliestTimeslotInDatabase(File
databaseDir, Map<String, Long>
ThreadName.FIND_EARLIEST_TIME_SLOT_PARALLEL_POOL.getName(),
new ThreadPoolExecutor.CallerRunsPolicy());
- try (Stream<Path> databasePaths = Files.list(databaseDir.toPath())) {
- databasePaths
- .filter(Files::isDirectory)
- .forEach(
- regionPath -> {
- Future<?> future =
- findEarliestTimeSlotExecutor.submit(
- () -> {
- try (Stream<Path> regionPaths =
Files.list(regionPath)) {
- regionPaths
- .filter(Files::isDirectory)
- .forEach(
- timeSlotPath -> {
- try {
- Optional<Path> matchedFile =
- Files.find(
- timeSlotPath,
- 1,
- (path, attrs) ->
- attrs.isRegularFile()
- && path.toString()
- .endsWith(
-
DataPartitionTableGenerator
-
.SCAN_FILE_SUFFIX_NAME))
- .findFirst();
- if (!matchedFile.isPresent()) {
- return;
- }
- String timeSlotName =
timeSlotPath.getFileName().toString();
- long timeslot =
Long.parseLong(timeSlotName);
- databaseEarliestRegionMap.compute(
- databaseName,
- (k, v) ->
- v == null ? timeslot :
Math.min(v, timeslot));
- } catch (IOException e) {
- LOGGER.error(
- "Failed to find any {} files in
the {} directory",
-
DataPartitionTableGenerator.SCAN_FILE_SUFFIX_NAME,
- timeSlotPath,
- e);
- }
- });
- } catch (IOException e) {
- LOGGER.error("Failed to scan {}", regionPath, e);
- }
- });
- futureList.add(future);
- });
- } catch (IOException e) {
- LOGGER.error("Failed to walk database directory: {}", databaseDir, e);
- }
-
- for (Future<?> future : futureList) {
- try {
- future.get();
- } catch (InterruptedException | ExecutionException e) {
- LOGGER.error("Failed to wait for task completion", e);
- Thread.currentThread().interrupt();
- }
+ for (DataRegion dataRegion :
StorageEngine.getInstance().getAllDataRegions()) {
+ CompletableFuture<Void> regionFuture =
+ CompletableFuture.runAsync(
+ () -> {
+ TsFileManager tsFileManager = dataRegion.getTsFileManager();
+ String databaseName = dataRegion.getDatabaseName();
+ if (ignoreDatabase.contains(databaseName)) {
+ return;
+ }
+
+ tsFileManager.readLock();
+ List<TsFileResource> seqTsFileList =
tsFileManager.getTsFileList(true);
+ List<TsFileResource> unseqTsFileList =
tsFileManager.getTsFileList(false);
+ tsFileManager.readUnlock();
+
+ long earliestTimeSlotId = Long.MIN_VALUE;
+
+ earliestTimeSlotId =
findEarliestTimeslotInFiles(seqTsFileList, earliestTimeSlotId);
+ earliestTimeSlotId =
+ findEarliestTimeslotInFiles(unseqTsFileList,
earliestTimeSlotId);
+
+ if (earliestTimeSlotId == Long.MIN_VALUE) {
+ LOGGER.info("No time slot info is found in the seq and unseq
directory");
+ return;
+ }
+
+ long finalEarliestTimeSlotId = earliestTimeSlotId;
+ earliestTimeslots.compute(
+ databaseName,
+ (k, v) ->
+ v == null ? finalEarliestTimeSlotId :
Math.min(finalEarliestTimeSlotId, v));
+ },
Review Comment:
org.apache.iotdb.db.storageengine.dataregion.tsfile.TsFileManager#getTimePartitions
is enough
##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/partition/DataPartitionTableGenerator.java:
##########
@@ -213,15 +219,17 @@ private void constructDataPartitionMap(
TSeriesPartitionSlot seriesSlotId =
seriesPartitionExecutor.getSeriesPartitionSlot(deviceId);
TTimePartitionSlot timePartitionSlot =
- new
TTimePartitionSlot(TimePartitionUtils.getTimeByPartitionId(timeSlotId));
+ new
TTimePartitionSlot(TimePartitionUtils.getStartTimeByPartitionId(timeSlotId));
dataPartitionMap
.computeIfAbsent(
seriesSlotId, empty ->
newSeriesPartitionTable(consensusGroupId, timeSlotId))
.putDataPartition(timePartitionSlot, consensusGroupId);
}
processedFiles.incrementAndGet();
+ totalFiles.incrementAndGet();
} catch (Exception e) {
failedFiles.incrementAndGet();
+ totalFiles.incrementAndGet();
Review Comment:
If you are using the totalFiles to calculate the progress, then this is not
the right way to update it.
You should either calculate it from the beginning or use another method for
progress calculation.
For example, the number of total regions/partitions and the number processed.
##########
iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/partition/DataPartitionTableIntegrityCheckProcedure.java:
##########
@@ -658,42 +718,49 @@ public void serialize(final DataOutputStream stream)
throws IOException {
// data written for a single object
stream.write(data);
} catch (IOException | TException e) {
- LOG.error("{} serialize failed for dataNodeId: {}",
this.getClass().getSimpleName(), entry.getKey(), e);
+ LOG.error(
+ "[DataPartitionIntegrity] {} serialize failed for dataNodeId:
{}",
+ this.getClass().getSimpleName(),
+ entry.getKey(),
+ e);
throw new IOException("Failed to serialize dataPartitionTables", e);
}
}
}
- stream.writeInt(lostDataPartitionsOfDatabases.size());
- for (String database : lostDataPartitionsOfDatabases) {
- stream.writeUTF(database);
+ stream.writeInt(databasesWithLostDataPartition.size());
+ for (String database : databasesWithLostDataPartition) {
+ ReadWriteIOUtils.write(database, stream);
}
- if (finalDataPartitionTables != null &&
!finalDataPartitionTables.isEmpty()) {
- stream.writeInt(finalDataPartitionTables.size());
+ if (finalDataPartitionTables != null &&
!finalDataPartitionTables.isEmpty()) {
+ stream.writeInt(finalDataPartitionTables.size());
- for (Map.Entry<String, DataPartitionTable> entry :
finalDataPartitionTables.entrySet()) {
- stream.writeUTF(entry.getKey());
+ for (Map.Entry<String, DataPartitionTable> entry :
finalDataPartitionTables.entrySet()) {
+ ReadWriteIOUtils.write(entry.getKey(), stream);
- try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(baos)) {
+ try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(baos)) {
- TTransport transport = new TIOStreamTransport(dos);
- TBinaryProtocol protocol = new TBinaryProtocol(transport);
+ TTransport transport = new TIOStreamTransport(dos);
+ TBinaryProtocol protocol = new TBinaryProtocol(transport);
- entry.getValue().serialize(dos, protocol);
+ entry.getValue().serialize(dos, protocol);
- byte[] data = baos.toByteArray();
- stream.writeInt(data.length);
- stream.write(data);
- } catch (IOException | TException e) {
- LOG.error("{} serialize finalDataPartitionTables failed",
this.getClass().getSimpleName(), e);
- throw new IOException("Failed to serialize
finalDataPartitionTables", e);
- }
+ byte[] data = baos.toByteArray();
+ stream.writeInt(data.length);
+ stream.write(data);
+ } catch (IOException | TException e) {
Review Comment:
May use PublicBAOS to replace baos.toByteArray() with baos.getBuf()
##########
iotdb-core/node-commons/src/assembly/resources/conf/iotdb-system.properties.template:
##########
@@ -742,6 +742,21 @@ failure_detector_phi_acceptable_pause_in_ms=10000
# Datatype: double(percentage)
disk_space_warning_threshold=0.05
+# The number of threads used for parallel scanning in the partition table
recovery
+# effectiveMode: restart
+# Datatype: Integer
+partition_table_recover_worker_num=10
+
+# Limit the number of bytes read per second from a file, the unit is MB
Review Comment:
for data partition repair
Add in the comment, not just explain it
##########
iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/rateLimiter/LeakyBucketRateLimiter.java:
##########
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iotdb.commons.utils.rateLimiter;
+
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.LockSupport;
+
+/**
+ * A global leaky-bucket rate limiter for bytes throughput. Features: - Strict
throughput limiting
+ * (no burst) - Smooth bandwidth shaping - Thread-safe - Fair for multi-thread
- Low contention
+ */
+public class LeakyBucketRateLimiter {
Review Comment:
We have already used it in other modules, so using it here will not add a
new dependency
##########
iotdb-core/datanode/src/main/java/org/apache/iotdb/db/protocol/thrift/impl/DataNodeInternalRPCServiceImpl.java:
##########
@@ -3150,23 +3147,13 @@ public TGetEarliestTimeslotsResp getEarliestTimeslots()
{
TGetEarliestTimeslotsResp resp = new TGetEarliestTimeslotsResp();
try {
- Map<String, Long> earliestTimeslots = new HashMap<>();
-
- // Get data directories from configuration
- String[] dataDirs =
IoTDBDescriptor.getInstance().getConfig().getDataDirs();
-
- for (String dataDir : dataDirs) {
- File dir = new File(dataDir);
- if (dir.exists() && dir.isDirectory()) {
- processDataDirectoryForEarliestTimeslots(dir, earliestTimeslots);
- }
- }
+ Map<String, Long> earliestTimeslots = new ConcurrentHashMap<>();
+ processDataDirectoryForEarliestTimeslots(earliestTimeslots);
Review Comment:
No longer DataDirectory
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]