xuanyuanking commented on a change in pull request #32933:
URL: https://github.com/apache/spark/pull/32933#discussion_r661505905



##########
File path: 
sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
##########
@@ -102,6 +102,72 @@ class RocksDBSuite extends SparkFunSuite {
     }
   }
 
+  test("RocksDB: cleanup old files") {
+    val remoteDir = Utils.createTempDir().toString
+    val conf = RocksDBConf().copy(compactOnCommit = true, minVersionsToRetain 
= 10)
+
+    def versionsPresent: Seq[Long] = {
+      remoteDir.listFiles.filter(_.getName.endsWith(".zip"))
+        .map(_.getName.stripSuffix(".zip"))
+        .map(_.toLong)
+        .sorted
+    }
+
+    withDB(remoteDir, conf = conf) { db =>
+      // Generate versions without cleaning up
+      for (version <- 1 to 50) {
+        db.put(version.toString, version.toString)  // update "1" -> "1", "2" 
-> "2", ...
+        db.commit()
+      }
+
+      // Clean up and verify version files and SST files were deleted
+      require(versionsPresent === (1L to 50L))
+      val sstDir = new File(remoteDir, "SSTs")
+      val numSstFiles = listFiles(sstDir).length
+      db.cleanup()
+      assert(versionsPresent === (41L to 50L))
+      assert(listFiles(sstDir).length < numSstFiles)

Review comment:
       Right. We'll do the RocksDB checkpoint for each commit operation, each 
checkpoint is a full snapshot and includes all data. In this UT we have 50 
versions but only retain 10 versions, so the SST files for deleted versions(1 
to 40) will be deleted.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to