swamirishi commented on code in PR #8214:
URL: https://github.com/apache/ozone/pull/8214#discussion_r2024119830
##########
hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java:
##########
@@ -1382,6 +1393,62 @@ public void pruneSstFiles() {
}
}
+ /**
+ * Defines the task that removes OMKeyInfo from SST files from backup
directory to
+ * save disk space.
+ */
+ public void pruneSstFileValues() {
+ if (!shouldRun()) {
+ return;
+ }
+ Path sstBackupDirPath = Paths.get(sstBackupDir);
+ String kVSeparator = ",";
+
+ try (Stream<Path> files = Files.list(sstBackupDirPath)
+ .filter(file -> file.endsWith(ROCKSDB_SST_SUFFIX)) ) {
+
+ for (Path file : files.collect(Collectors.toList())) {
+ // Write the file.sst => file.sst.tmp
+ File sstFile = file.toFile();
+ File prunedSSTFile = Files.createFile(sstBackupDirPath
+ .resolve( sstFile.getName() + ".tmp")).toFile();
+
+ ManagedEnvOptions envOptions = new ManagedEnvOptions();
+ ManagedOptions managedOptions = new ManagedOptions();
+ ManagedSstFileWriter sstFileWriter = new
ManagedSstFileWriter(envOptions, managedOptions))
+ sstFileWriter.open(prunedSSTFile.getAbsolutePath());
+
+ ManagedRawSSTFileReader sstFileReader = new ManagedRawSSTFileReader<>(
+ managedOptions, sstFile.getAbsolutePath(), 2 * 1024 * 1024);
+ ManagedRawSSTFileIterator<String> itr = sstFileReader.newIterator(
+ keyValue -> StringUtils.bytes2String(keyValue.getKey()) +
kVSeparator
+ + StringUtils.bytes2String(keyValue.getValue()), null, null);
+
+ while(itr.hasNext()) {
+ String[] keyValue = itr.next().split(kVSeparator);
+ if (keyValue[1].isEmpty()) {
+ sstFileWriter.delete(keyValue[0].getBytes(UTF_8));
+ } else {
+ sstFileWriter.put(keyValue[0].getBytes(UTF_8),
"\0".getBytes(UTF_8));
+ }
+ }
+ sstFileWriter.finish();
+
+ // Acquire a mutex
+ try (BootstrapStateHandler.Lock lock = getBootstrapStateLock().lock())
{
Review Comment:
We should take a lock while also computing the hardlinks during snapshot
diff computation.
https://github.com/apache/ozone/blob/b11b80707d4e6721c06966936ad87063ec107da6/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java#L839-L844
##########
hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java:
##########
@@ -1382,6 +1393,62 @@ public void pruneSstFiles() {
}
}
+ /**
+ * Defines the task that removes OMKeyInfo from SST files from backup
directory to
+ * save disk space.
+ */
+ public void pruneSstFileValues() {
+ if (!shouldRun()) {
+ return;
+ }
+ Path sstBackupDirPath = Paths.get(sstBackupDir);
+ String kVSeparator = ",";
+
+ try (Stream<Path> files = Files.list(sstBackupDirPath)
+ .filter(file -> file.endsWith(ROCKSDB_SST_SUFFIX)) ) {
+
+ for (Path file : files.collect(Collectors.toList())) {
+ // Write the file.sst => file.sst.tmp
+ File sstFile = file.toFile();
+ File prunedSSTFile = Files.createFile(sstBackupDirPath
+ .resolve( sstFile.getName() + ".tmp")).toFile();
+
+ ManagedEnvOptions envOptions = new ManagedEnvOptions();
+ ManagedOptions managedOptions = new ManagedOptions();
+ ManagedSstFileWriter sstFileWriter = new
ManagedSstFileWriter(envOptions, managedOptions))
+ sstFileWriter.open(prunedSSTFile.getAbsolutePath());
+
+ ManagedRawSSTFileReader sstFileReader = new ManagedRawSSTFileReader<>(
+ managedOptions, sstFile.getAbsolutePath(), 2 * 1024 * 1024);
+ ManagedRawSSTFileIterator<String> itr = sstFileReader.newIterator(
+ keyValue -> StringUtils.bytes2String(keyValue.getKey()) +
kVSeparator
+ + StringUtils.bytes2String(keyValue.getValue()), null, null);
+
+ while(itr.hasNext()) {
+ String[] keyValue = itr.next().split(kVSeparator);
+ if (keyValue[1].isEmpty()) {
+ sstFileWriter.delete(keyValue[0].getBytes(UTF_8));
+ } else {
+ sstFileWriter.put(keyValue[0].getBytes(UTF_8),
"\0".getBytes(UTF_8));
+ }
+ }
+ sstFileWriter.finish();
+
+ // Acquire a mutex
+ try (BootstrapStateHandler.Lock lock = getBootstrapStateLock().lock())
{
Review Comment:
Using BootstrapLock is wrong here
##########
hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java:
##########
@@ -1382,6 +1393,62 @@ public void pruneSstFiles() {
}
}
+ /**
+ * Defines the task that removes OMKeyInfo from SST files from backup
directory to
+ * save disk space.
+ */
+ public void pruneSstFileValues() {
+ if (!shouldRun()) {
+ return;
+ }
+ Path sstBackupDirPath = Paths.get(sstBackupDir);
+ String kVSeparator = ",";
+
+ try (Stream<Path> files = Files.list(sstBackupDirPath)
+ .filter(file -> file.endsWith(ROCKSDB_SST_SUFFIX)) ) {
+
+ for (Path file : files.collect(Collectors.toList())) {
+ // Write the file.sst => file.sst.tmp
+ File sstFile = file.toFile();
+ File prunedSSTFile = Files.createFile(sstBackupDirPath
+ .resolve( sstFile.getName() + ".tmp")).toFile();
+
+ ManagedEnvOptions envOptions = new ManagedEnvOptions();
+ ManagedOptions managedOptions = new ManagedOptions();
+ ManagedSstFileWriter sstFileWriter = new
ManagedSstFileWriter(envOptions, managedOptions))
+ sstFileWriter.open(prunedSSTFile.getAbsolutePath());
+
+ ManagedRawSSTFileReader sstFileReader = new ManagedRawSSTFileReader<>(
+ managedOptions, sstFile.getAbsolutePath(), 2 * 1024 * 1024);
+ ManagedRawSSTFileIterator<String> itr = sstFileReader.newIterator(
+ keyValue -> StringUtils.bytes2String(keyValue.getKey()) +
kVSeparator
+ + StringUtils.bytes2String(keyValue.getValue()), null, null);
Review Comment:
We don't need the value.
##########
hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java:
##########
@@ -1382,6 +1393,62 @@ public void pruneSstFiles() {
}
}
+ /**
+ * Defines the task that removes OMKeyInfo from SST files from backup
directory to
+ * save disk space.
+ */
+ public void pruneSstFileValues() {
+ if (!shouldRun()) {
+ return;
+ }
+ Path sstBackupDirPath = Paths.get(sstBackupDir);
+ String kVSeparator = ",";
+
+ try (Stream<Path> files = Files.list(sstBackupDirPath)
+ .filter(file -> file.endsWith(ROCKSDB_SST_SUFFIX)) ) {
+
+ for (Path file : files.collect(Collectors.toList())) {
+ // Write the file.sst => file.sst.tmp
+ File sstFile = file.toFile();
+ File prunedSSTFile = Files.createFile(sstBackupDirPath
+ .resolve( sstFile.getName() + ".tmp")).toFile();
+
+ ManagedEnvOptions envOptions = new ManagedEnvOptions();
+ ManagedOptions managedOptions = new ManagedOptions();
+ ManagedSstFileWriter sstFileWriter = new
ManagedSstFileWriter(envOptions, managedOptions))
+ sstFileWriter.open(prunedSSTFile.getAbsolutePath());
+
+ ManagedRawSSTFileReader sstFileReader = new ManagedRawSSTFileReader<>(
+ managedOptions, sstFile.getAbsolutePath(), 2 * 1024 * 1024);
+ ManagedRawSSTFileIterator<String> itr = sstFileReader.newIterator(
+ keyValue -> StringUtils.bytes2String(keyValue.getKey()) +
kVSeparator
+ + StringUtils.bytes2String(keyValue.getValue()), null, null);
Review Comment:
We can instead look for the type of the entry
##########
hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java:
##########
@@ -1382,6 +1393,62 @@ public void pruneSstFiles() {
}
}
+ /**
+ * Defines the task that removes OMKeyInfo from SST files from backup
directory to
+ * save disk space.
+ */
+ public void pruneSstFileValues() {
+ if (!shouldRun()) {
+ return;
+ }
+ Path sstBackupDirPath = Paths.get(sstBackupDir);
+ String kVSeparator = ",";
+
+ try (Stream<Path> files = Files.list(sstBackupDirPath)
+ .filter(file -> file.endsWith(ROCKSDB_SST_SUFFIX)) ) {
+
+ for (Path file : files.collect(Collectors.toList())) {
+ // Write the file.sst => file.sst.tmp
+ File sstFile = file.toFile();
+ File prunedSSTFile = Files.createFile(sstBackupDirPath
+ .resolve( sstFile.getName() + ".tmp")).toFile();
+
+ ManagedEnvOptions envOptions = new ManagedEnvOptions();
+ ManagedOptions managedOptions = new ManagedOptions();
+ ManagedSstFileWriter sstFileWriter = new
ManagedSstFileWriter(envOptions, managedOptions))
+ sstFileWriter.open(prunedSSTFile.getAbsolutePath());
+
+ ManagedRawSSTFileReader sstFileReader = new ManagedRawSSTFileReader<>(
+ managedOptions, sstFile.getAbsolutePath(), 2 * 1024 * 1024);
+ ManagedRawSSTFileIterator<String> itr = sstFileReader.newIterator(
+ keyValue -> StringUtils.bytes2String(keyValue.getKey()) +
kVSeparator
+ + StringUtils.bytes2String(keyValue.getValue()), null, null);
+
+ while(itr.hasNext()) {
+ String[] keyValue = itr.next().split(kVSeparator);
+ if (keyValue[1].isEmpty()) {
+ sstFileWriter.delete(keyValue[0].getBytes(UTF_8));
+ } else {
+ sstFileWriter.put(keyValue[0].getBytes(UTF_8),
"\0".getBytes(UTF_8));
+ }
+ }
+ sstFileWriter.finish();
+
+ // Acquire a mutex
+ try (BootstrapStateHandler.Lock lock = getBootstrapStateLock().lock())
{
Review Comment:
You need to take an ozone manager lock. Create an SST_FILE_LOCK resource in
OzoneManagerLock just on the sst file not entire lock
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]