HeartSaVioR commented on a change in pull request #27557: [SPARK-30804][SS] 
Measure and log elapsed time for "compact" operation in CompactibleFileStreamLog
URL: https://github.com/apache/spark/pull/27557#discussion_r378655218
 
 

 ##########
 File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
 ##########
 @@ -177,16 +178,29 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : 
ClassTag](
    * corresponding `batchId` file. It will delete expired files as well if 
enabled.
    */
   private def compact(batchId: Long, logs: Array[T]): Boolean = {
-    val validBatches = getValidBatchesBeforeCompactionBatch(batchId, 
compactInterval)
-    val allLogs = validBatches.flatMap { id =>
-      super.get(id).getOrElse {
-        throw new IllegalStateException(
-          s"${batchIdToPath(id)} doesn't exist when compacting batch $batchId 
" +
-            s"(compactInterval: $compactInterval)")
-      }
-    } ++ logs
+    val (allLogs, loadElapsedMs) = Utils.timeTakenMs {
+      val validBatches = getValidBatchesBeforeCompactionBatch(batchId, 
compactInterval)
+      validBatches.flatMap { id =>
+        super.get(id).getOrElse {
+          throw new IllegalStateException(
+            s"${batchIdToPath(id)} doesn't exist when compacting batch 
$batchId " +
+              s"(compactInterval: $compactInterval)")
+        }
+      } ++ logs
+    }
+    logInfo(s"It took $loadElapsedMs ms to load ${allLogs.size} entries " +
+      s"(${SizeEstimator.estimate(allLogs)} bytes in memory) for compact batch 
$batchId.")
 
 Review comment:
   I've added estimated memory usage as well, because this is adding the memory 
usage in "driver", which is easily thought as not too much memory is needed.
   (https://issues.apache.org/jira/browse/SPARK-30462)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to