danny0405 commented on code in PR #10255: URL: https://github.com/apache/hudi/pull/10255#discussion_r1425055040
########## hudi-common/src/main/java/org/apache/hudi/common/table/timeline/CompletionTimeQueryView.java: ########## @@ -175,42 +190,109 @@ public Option<String> getCompletionTime(String startTime) { * * <p>By default, assumes there is at most 1 day time of duration for an instant to accelerate the queries. * - * @param startCompletionTime The start completion time. - * @param endCompletionTime The end completion time. + * @param readTimeline The read timeline. + * @param startTime The start completion time. + * @param endTime The end completion time. + * @param rangeType The range type. * - * @return The instant time set. + * @return The sorted instant time list. */ - public Set<String> getStartTimeSet(String startCompletionTime, String endCompletionTime) { + public List<String> getStartTime( + HoodieTimeline readTimeline, + @Nullable String startTime, + @Nullable String endTime, + InstantRange.RangeType rangeType) { // assumes any instant/transaction lasts at most 1 day to optimize the query efficiency. - return getStartTimeSet(startCompletionTime, endCompletionTime, s -> HoodieInstantTimeGenerator.instantTimeMinusMillis(s, MILLI_SECONDS_IN_ONE_DAY)); + return getStartTime(readTimeline, startTime, endTime, rangeType, s -> HoodieInstantTimeGenerator.instantTimeMinusMillis(s, MILLI_SECONDS_IN_ONE_DAY)); + } + + /** + * Queries the instant start time with given completion time range. + * + * @param startTime The start completion time. + * @param endTime The end completion time. + * @param earliestInstantTimeFunc The function to generate the earliest start time boundary + * with the minimum completion time. + * + * @return The sorted instant time list. + */ + @VisibleForTesting + public List<String> getStartTime( + @Nullable String startTime, + @Nullable String endTime, + Function<String, String> earliestInstantTimeFunc) { + return getStartTime(metaClient.getCommitsTimeline().filterCompletedInstants(), startTime, endTime, InstantRange.RangeType.CLOSE_CLOSE, earliestInstantTimeFunc); } /** * Queries the instant start time with given completion time range. * - * @param startCompletionTime The start completion time. - * @param endCompletionTime The end completion time. - * @param earliestStartTimeFunc The function to generate the earliest start time boundary - * with the minimum completion time {@code startCompletionTime}. + * @param readTimeline The read timeline. + * @param startTime The start completion time. + * @param endTime The end completion time. + * @param rangeType The range type. + * @param earliestInstantTimeFunc The function to generate the earliest start time boundary + * with the minimum completion time. * - * @return The instant time set. + * @return The sorted instant time list. */ - public Set<String> getStartTimeSet(String startCompletionTime, String endCompletionTime, Function<String, String> earliestStartTimeFunc) { - String startInstant = earliestStartTimeFunc.apply(startCompletionTime); + public List<String> getStartTime( + HoodieTimeline readTimeline, + @Nullable String startTime, + @Nullable String endTime, + InstantRange.RangeType rangeType, + Function<String, String> earliestInstantTimeFunc) { + final boolean startFromEarliest = START_COMMIT_EARLIEST.equalsIgnoreCase(startTime); + String earliestInstantToLoad = null; + if (startTime != null && !startFromEarliest) { + earliestInstantToLoad = earliestInstantTimeFunc.apply(startTime); + } else if (endTime != null) { + earliestInstantToLoad = earliestInstantTimeFunc.apply(endTime); + } + + // ensure the earliest instant boundary be loaded. + if (earliestInstantToLoad != null && HoodieTimeline.compareTimestamps(this.cursorInstant, GREATER_THAN, earliestInstantToLoad)) { + loadCompletionTimeIncrementally(earliestInstantToLoad); + } + + if (startTime == null && endTime != null) { + // returns the last instant that finished at or before the given completion time 'endTime'. + String maxInstantTime = readTimeline.getInstantsAsStream() + .filter(instant -> instant.isCompleted() && HoodieTimeline.compareTimestamps(instant.getCompletionTime(), LESSER_THAN_OR_EQUALS, endTime)) + .max(Comparator.comparing(HoodieInstant::getCompletionTime)).map(HoodieInstant::getTimestamp).orElse(null); + if (maxInstantTime != null) { + return Collections.singletonList(maxInstantTime); + } + // fallback to archived timeline + return this.startToCompletionInstantTimeMap.entrySet().stream() Review Comment: We should, here we want to filter the timeline first then figure out the max instant, the `startToCompletionInstantTimeMap` constants all the completed instants(which is by design). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org