HeartSaVioR commented on a change in pull request #27557: [SPARK-30804][SS] 
Measure and log elapsed time for "compact" operation in CompactibleFileStreamLog
URL: https://github.com/apache/spark/pull/27557#discussion_r378654769
 
 

 ##########
 File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
 ##########
 @@ -177,16 +178,29 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : 
ClassTag](
    * corresponding `batchId` file. It will delete expired files as well if 
enabled.
    */
   private def compact(batchId: Long, logs: Array[T]): Boolean = {
-    val validBatches = getValidBatchesBeforeCompactionBatch(batchId, 
compactInterval)
-    val allLogs = validBatches.flatMap { id =>
-      super.get(id).getOrElse {
-        throw new IllegalStateException(
-          s"${batchIdToPath(id)} doesn't exist when compacting batch $batchId 
" +
-            s"(compactInterval: $compactInterval)")
-      }
-    } ++ logs
+    val (allLogs, loadElapsedMs) = Utils.timeTakenMs {
+      val validBatches = getValidBatchesBeforeCompactionBatch(batchId, 
compactInterval)
+      validBatches.flatMap { id =>
+        super.get(id).getOrElse {
+          throw new IllegalStateException(
+            s"${batchIdToPath(id)} doesn't exist when compacting batch 
$batchId " +
+              s"(compactInterval: $compactInterval)")
+        }
+      } ++ logs
+    }
+    logInfo(s"It took $loadElapsedMs ms to load ${allLogs.size} entries " +
 
 Review comment:
   Personally, setting this to INFO won't bother much as it will be only 
printed per compact interval. InMemoryFileIndex prints the latency information 
to seek files via INFO which makes sense and it can be reflected here as well.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to