maytasm commented on a change in pull request #10742: URL: https://github.com/apache/druid/pull/10742#discussion_r561439141
########## File path: server/src/test/java/org/apache/druid/segment/indexing/granularity/IntervalsByGranularityTest.java ########## @@ -76,6 +79,21 @@ public void testDups() Assert.assertTrue(count == 61); } + + @Test + public void testCondenseDoesNotMaterialize() Review comment: how does this test that condense does not materialize? ########## File path: core/src/main/java/org/apache/druid/java/util/common/JodaUtils.java ########## @@ -50,15 +49,25 @@ sortedIntervals.add(interval); } } + return condenseIntervals(sortedIntervals.iterator()); + } - if (sortedIntervals.isEmpty()) { - return new ArrayList<>(); + /** + * Caller needs to insure that sortedIntervals is sorted in ascending order Review comment: Can you add comment that this function does not materialize the input intervals and how it does that. ########## File path: indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java ########## @@ -106,7 +106,8 @@ ); } - @VisibleForTesting // Only for testing + @VisibleForTesting + // Only for testing Review comment: Comment not needed as the annotation is saying the same thing ########## File path: indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java ########## @@ -373,23 +373,25 @@ private boolean tryLockWithDetermineResult(TaskActionClient client, LockGranular } } + protected boolean tryTimeChunkLock(TaskActionClient client, List<Interval> intervals) throws IOException { - - // In this case, the intervals to lock must be aligned with segmentGranularity if it's defined - final Iterator<Interval> intervalIterator; + // The given intervals are first converted to align with segment granularity. This is because, + // when an overwriting task finds a version for a given input row, it expects the interval + // associated to each version to be equal or larger than the time bucket where the input row falls in. + // See ParallelIndexSupervisorTask.findVersion(). + final Set<Interval> uniqueCondensedIntervals = new HashSet<>(); final Granularity segmentGranularity = getSegmentGranularity(); if (segmentGranularity == null) { - Set<Interval> uniqueIntervals = new HashSet<>(intervals); - ArrayList<Interval> condensedIntervals = JodaUtils.condenseIntervals(() -> uniqueIntervals.iterator()); - intervalIterator = condensedIntervals.iterator(); + uniqueCondensedIntervals.addAll(JodaUtils.condenseIntervals(intervals)); } else { IntervalsByGranularity intervalsByGranularity = new IntervalsByGranularity(intervals, segmentGranularity); - intervalIterator = intervalsByGranularity.granularityIntervalsIterator(); + // the following is calling a condense that does not materialize the intervals: + uniqueCondensedIntervals.addAll(JodaUtils.condenseIntervals(intervalsByGranularity.granularityIntervalsIterator())); Review comment: doesn't addAll materialize all the intervals? Is materializing the condensed intervals fine as long as we are not materializing the original intervals? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For additional commands, e-mail: commits-h...@druid.apache.org