Hi, Please kindly review the attached patch for RandomAccessSpliterator implementation.
Currently default implementation of spliterator is an IteratorSpliterator which is not optimal for RandomAccess data structures (besides ArrayList and Vector). This patch is to provide a default RandomAccessSpliterator implementation for RandomAccess data structure. The figures below demonstrate the performance difference before and after the change. Note the significant performance improvement in test SelectTest.parallel_lazy_streams_gsc (parallel streams performance test for non-JDK Lists that implement RandomAccess but don't yet implement their own spliterators). Benchmark code: https://github.com/goldmansachs/gs-collections/blob/master/jmh-tests/src/main/java/com/gs/collections/impl/jmh/SelectTest.java With IteratorSpliterator as default: Benchmark Mode Cnt Score Error Units SelectTest.parallel_lazy_jdk thrpt 20 172.671 ± 14.231 ops/s SelectTest.parallel_lazy_streams_gsc thrpt 20 20.662 ± 0.493 ops/s SelectTest.serial_lazy_jdk thrpt 20 89.384 ± 4.431 ops/s SelectTest.serial_lazy_streams_gsc thrpt 20 80.831 ± 7.875 ops/s With RandomAccessSpliterator as default: Benchmark Mode Cnt Score Error Units SelectTest.parallel_lazy_jdk thrpt 20 174.190 ± 16.870 ops/s SelectTest.parallel_lazy_streams_gsc thrpt 20 180.740 ± 9.594 ops/s SelectTest.serial_lazy_jdk thrpt 20 85.719 ± 2.414 ops/s SelectTest.serial_lazy_streams_gsc thrpt 20 78.760 ± 1.029 ops/s Majority of the patch is contributed by Paul Sandoz and he should be credited in the Contributed-by field. Along with this patch submission, we have a question for SubList spliterator implementation that we retained old behavior for now (i.e. return IteratorSpliterator, refer to RandomAccessSubList#spliterator()). We have found that Vector's subList is wrapped by RandomAccessSubList, that is RandomAccess but not a Vector anymore, and it expects to use IteratorSpliterator. We were not sure what's the best approach to distinguish Vector from other RandomAccess data structure within RandomAccessSublist, so we kept it return IteratorSpliterator for now. One approach could be to introduce VectorSubList that returns IteratorSpliterator (or an implementation similar to VectorSpliterator?). Then we could revert the spliterator() override in RandomAccessSublist. What would be your suggestion to handle this? Depending on your suggestion, we might fix the subList spliterator in this patch, or submit a separate patch if the amount of the change is significant. Thanks, Hiroshi
# HG changeset patch # User Hiroshi Ito <hiroshi....@gs.com> # Date 1462175817 -28800 # Mon May 02 15:56:57 2016 +0800 # Node ID 3cb964821c22962c34bf23b1d3b075cd289732dc # Parent 8c293ee99d5ace8938d2117ce380ba9724fedb0a Add RandomAccessSpliterator to Spliterators. diff -r 8c293ee99d5a -r 3cb964821c22 src/java.base/share/classes/java/util/AbstractList.java --- a/src/java.base/share/classes/java/util/AbstractList.java Thu Apr 07 10:07:02 2016 -0700 +++ b/src/java.base/share/classes/java/util/AbstractList.java Mon May 02 15:56:57 2016 +0800 @@ -25,6 +25,8 @@ package java.util; +import java.util.function.Consumer; + /** * This class provides a skeletal implementation of the {@link List} * interface to minimize the effort required to implement this interface @@ -634,6 +636,84 @@ return "Index: "+index+", Size: "+size(); } + /** Index-based split-by-two, lazily initialized Spliterator */ + static final class RandomAccessSpliterator<E> implements Spliterator<E> { + + private final List<E> list; + private int index; // current index, modified on advance/split + private int fence; // -1 until used; then one past last index + + RandomAccessSpliterator(List<E> list) { + this(list, 0, -1); + assert list instanceof RandomAccess; + } + + /** Create new spliterator covering the given range */ + RandomAccessSpliterator(List<E> list, int origin, int fence) { + this.list = list; // OK if null unless traversed + this.index = origin; + this.fence = fence; + } + + private int getFence() { // initialize fence to size on first use + int hi; // (a specialized variant appears in method forEach) + List<E> lst; + if ((hi = fence) < 0) { + if ((lst = list) == null) + hi = fence = 0; + else { + hi = fence = lst.size(); + } + } + return hi; + } + + public RandomAccessSpliterator<E> trySplit() { + int hi = getFence(), lo = index, mid = (lo + hi) >>> 1; + return (lo >= mid) ? null : // divide range in half unless too small + new RandomAccessSpliterator<>(this.list, lo, index = mid); + } + + public boolean tryAdvance(Consumer<? super E> action) { + if (action == null) + throw new NullPointerException(); + int hi = getFence(), i = index; + if (i < hi) { + index = i + 1; + @SuppressWarnings("unchecked") E e = this.list.get(i); + action.accept(e); + return true; + } + return false; + } + + public void forEachRemaining(Consumer<? super E> action) { + int i, hi; // hoist accesses and checks from loop + List<E> lst; + if (action == null) + throw new NullPointerException(); + if ((lst = list) != null) { + if ((hi = fence) < 0) { + hi = lst.size(); + } + if ((i = index) >= 0 && (index = hi) <= lst.size()) { + for (; i < hi; ++i) { + @SuppressWarnings("unchecked") E e = lst.get(i); + action.accept(e); + } + } + } + } + + public long estimateSize() { + return (long) (getFence() - index); + } + + public int characteristics() { + return Spliterator.ORDERED | Spliterator.SIZED | Spliterator.SUBSIZED; + } + } + private static class SubList<E> extends AbstractList<E> { private final AbstractList<E> root; private final SubList<E> parent; @@ -827,5 +907,10 @@ subListRangeCheck(fromIndex, toIndex, size); return new RandomAccessSubList<>(this, fromIndex, toIndex); } + + @Override + public Spliterator<E> spliterator() { + return Spliterators.spliterator(this, Spliterator.ORDERED); + } } } diff -r 8c293ee99d5a -r 3cb964821c22 src/java.base/share/classes/java/util/List.java --- a/src/java.base/share/classes/java/util/List.java Thu Apr 07 10:07:02 2016 -0700 +++ b/src/java.base/share/classes/java/util/List.java Mon May 02 15:56:57 2016 +0800 @@ -751,7 +751,11 @@ */ @Override default Spliterator<E> spliterator() { - return Spliterators.spliterator(this, Spliterator.ORDERED); + if (this instanceof RandomAccess) { + return new AbstractList.RandomAccessSpliterator<>(this); + } else { + return Spliterators.spliterator(this, Spliterator.ORDERED); + } } /** diff -r 8c293ee99d5a -r 3cb964821c22 test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java --- a/test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java Thu Apr 07 10:07:02 2016 -0700 +++ b/test/java/util/Spliterator/SpliteratorTraversingAndSplittingTest.java Mon May 02 15:56:57 2016 +0800 @@ -51,6 +51,7 @@ import java.util.List; import java.util.Map; import java.util.PriorityQueue; +import java.util.RandomAccess; import java.util.Set; import java.util.SortedSet; import java.util.Spliterator; @@ -278,6 +279,26 @@ db.addCollection( c -> new AbstractListImpl(c)); + class AbstractRandomAccessListImpl extends AbstractList<Integer> implements RandomAccess { + Integer[] ia; + + AbstractRandomAccessListImpl(Collection<Integer> c) { + this.ia = c.toArray(new Integer[c.size()]); + } + + @Override + public Integer get(int index) { + return ia[index]; + } + + @Override + public int size() { + return ia.length; + } + } + db.addCollection( + c -> new AbstractRandomAccessListImpl(c)); + class AbstractSetImpl extends AbstractSet<Integer> { Set<Integer> s;