[GitHub] [carbondata] jackylk commented on a change in pull request #3798: [CARBONDATA-3875] Support show segments include stage

GitBox Sat, 27 Jun 2020 23:47:39 -0700


jackylk commented on a change in pull request #3798:
URL: https://github.com/apache/carbondata/pull/3798#discussion_r446609223




##########
File path: 
integration/spark/src/main/scala/org/apache/carbondata/api/CarbonStore.scala
##########
@@ -63,13 +68,101 @@ object CarbonStore {
     }
 
     if (limit.isDefined) {
-      val lim = Integer.parseInt(limit.get)
-      segmentsMetadataDetails.slice(0, lim)
+      segmentsMetadataDetails.slice(0, limit.get)
     } else {
       segmentsMetadataDetails
     }
   }
 
+  /**
+   * Read stage files and return input files
+   */
+  def readStages(
+      tablePath: String,
+      configuration: Configuration): Seq[StageInput] = {
+    val stageFiles = listStageFiles(
+      CarbonTablePath.getStageDir(tablePath), configuration)
+    var output = Collections.synchronizedList(new util.ArrayList[StageInput]())
+    output.addAll(readStageInput(stageFiles._1,
+      StageInput.StageStatus.Unload).asJavaCollection)
+    output.addAll(readStageInput(stageFiles._2,
+      StageInput.StageStatus.Loading).asJavaCollection)
+    Collections.sort(output, new Comparator[StageInput]() {
+      def compare(stageInput1: StageInput, stageInput2: StageInput): Int = {
+        (stageInput2.getCreateTime - stageInput1.getCreateTime).intValue()
+      }
+    })
+    output.asScala
+  }
+
+  /**
+   * Read stage files and return input files
+   */
+  def readStageInput(
+      stageFiles: Seq[CarbonFile],
+      status: StageInput.StageStatus): Seq[StageInput] = {
+    val gson = new Gson()
+    val output = Collections.synchronizedList(new util.ArrayList[StageInput]())
+    stageFiles.map { stage =>
+      val filePath = stage.getAbsolutePath
+      val stream = FileFactory.getDataInputStream(filePath)
+      try {
+        val stageInput = gson.fromJson(new InputStreamReader(stream), 
classOf[StageInput])
+        stageInput.setCreateTime(stage.getLastModifiedTime)
+        stageInput.setStatus(status)
+        output.add(stageInput)
+      } finally {
+        stream.close()
+      }
+    }
+    output.asScala
+  }
+
+  /*
+   * Collect all stage files and matched success files.
+   * A stage file without success file will not be collected
+   */
+  def listStageFiles(

Review comment:
       It seems this it is only for test, can you add it to test folder instead 
of adding new API?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [carbondata] jackylk commented on a change in pull request #3798: [CARBONDATA-3875] Support show segments include stage

Reply via email to