danny0405 commented on code in PR #10255:
URL: https://github.com/apache/hudi/pull/10255#discussion_r1424872867


##########
hudi-common/src/main/java/org/apache/hudi/common/table/timeline/CompletionTimeQueryView.java:
##########
@@ -175,42 +190,109 @@ public Option<String> getCompletionTime(String 
startTime) {
    *
    * <p>By default, assumes there is at most 1 day time of duration for an 
instant to accelerate the queries.
    *
-   * @param startCompletionTime The start completion time.
-   * @param endCompletionTime   The end completion time.
+   * @param readTimeline The read timeline.
+   * @param startTime    The start completion time.
+   * @param endTime      The end completion time.
+   * @param rangeType    The range type.
    *
-   * @return The instant time set.
+   * @return The sorted instant time list.
    */
-  public Set<String> getStartTimeSet(String startCompletionTime, String 
endCompletionTime) {
+  public List<String> getStartTime(
+      HoodieTimeline readTimeline,
+      @Nullable String startTime,
+      @Nullable String endTime,
+      InstantRange.RangeType rangeType) {
     // assumes any instant/transaction lasts at most 1 day to optimize the 
query efficiency.
-    return getStartTimeSet(startCompletionTime, endCompletionTime, s -> 
HoodieInstantTimeGenerator.instantTimeMinusMillis(s, MILLI_SECONDS_IN_ONE_DAY));
+    return getStartTime(readTimeline, startTime, endTime, rangeType, s -> 
HoodieInstantTimeGenerator.instantTimeMinusMillis(s, MILLI_SECONDS_IN_ONE_DAY));
+  }
+
+  /**
+   * Queries the instant start time with given completion time range.
+   *
+   * @param startTime               The start completion time.
+   * @param endTime                 The end completion time.
+   * @param earliestInstantTimeFunc The function to generate the earliest 
start time boundary
+   *                                with the minimum completion time.
+   *
+   * @return The sorted instant time list.
+   */
+  @VisibleForTesting
+  public List<String> getStartTime(
+      @Nullable String startTime,
+      @Nullable String endTime,
+      Function<String, String> earliestInstantTimeFunc) {
+    return 
getStartTime(metaClient.getCommitsTimeline().filterCompletedInstants(), 
startTime, endTime, InstantRange.RangeType.CLOSE_CLOSE, 
earliestInstantTimeFunc);
   }
 
   /**
    * Queries the instant start time with given completion time range.
    *
-   * @param startCompletionTime   The start completion time.
-   * @param endCompletionTime     The end completion time.
-   * @param earliestStartTimeFunc The function to generate the earliest start 
time boundary
-   *                              with the minimum completion time {@code 
startCompletionTime}.
+   * @param readTimeline            The read timeline.
+   * @param startTime               The start completion time.
+   * @param endTime                 The end completion time.
+   * @param rangeType               The range type.
+   * @param earliestInstantTimeFunc The function to generate the earliest 
start time boundary
+   *                                with the minimum completion time.
    *
-   * @return The instant time set.
+   * @return The sorted instant time list.
    */
-  public Set<String> getStartTimeSet(String startCompletionTime, String 
endCompletionTime, Function<String, String> earliestStartTimeFunc) {
-    String startInstant = earliestStartTimeFunc.apply(startCompletionTime);
+  public List<String> getStartTime(
+      HoodieTimeline readTimeline,
+      @Nullable String startTime,
+      @Nullable String endTime,
+      InstantRange.RangeType rangeType,
+      Function<String, String> earliestInstantTimeFunc) {
+    final boolean startFromEarliest = 
START_COMMIT_EARLIEST.equalsIgnoreCase(startTime);
+    String earliestInstantToLoad = null;
+    if (startTime != null && !startFromEarliest) {
+      earliestInstantToLoad = earliestInstantTimeFunc.apply(startTime);
+    } else if (endTime != null) {
+      earliestInstantToLoad = earliestInstantTimeFunc.apply(endTime);
+    }
+
+    // ensure the earliest instant boundary be loaded.
+    if (earliestInstantToLoad != null && 
HoodieTimeline.compareTimestamps(this.cursorInstant, GREATER_THAN, 
earliestInstantToLoad)) {
+      loadCompletionTimeIncrementally(earliestInstantToLoad);
+    }
+
+    if (startTime == null && endTime != null) {
+      // returns the last instant that finished at or before the given 
completion time 'endTime'.
+      String maxInstantTime = readTimeline.getInstantsAsStream()
+          .filter(instant -> instant.isCompleted() && 
HoodieTimeline.compareTimestamps(instant.getCompletionTime(), 
LESSER_THAN_OR_EQUALS, endTime))
+          
.max(Comparator.comparing(HoodieInstant::getCompletionTime)).map(HoodieInstant::getTimestamp).orElse(null);
+      if (maxInstantTime != null) {
+        return Collections.singletonList(maxInstantTime);
+      }
+      // fallback to archived timeline
+      return this.startToCompletionInstantTimeMap.entrySet().stream()
+          .filter(entry -> HoodieTimeline.compareTimestamps(entry.getValue(), 
LESSER_THAN_OR_EQUALS, endTime))
+          .map(Map.Entry::getKey).collect(Collectors.toList());
+    }
+
+    if (startFromEarliest) {
+      // expedience for snapshot read: ['earliest', _) to avoid loading 
unnecessary instants.
+      startTime = null;

Review Comment:
   For flink inc read, we have such option for user to consume from the 
earliest. Here I want to reuse some code path for `startTime == null && endTime 
== null` so put it here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to