This is an automated email from the ASF dual-hosted git repository. emkornfield pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 3f767ce ARROW-5719: [Java] Support in-place vector sorting 3f767ce is described below commit 3f767ce38866b87e29aa5a964176e978ddf4899e Author: liyafan82 <fan_li...@foxmail.com> AuthorDate: Sun Jul 7 21:11:31 2019 -0700 ARROW-5719: [Java] Support in-place vector sorting Support in-place sorting for vectors. An in-place sorter sorts the vector by directly modifying the vector data, so the input and output vectors are the same one. Author: liyafan82 <fan_li...@foxmail.com> Closes #4699 from liyafan82/fly_0625_isort and squashes the following commits: 2b4fc8233 <liyafan82> Remove type width 1f210d9d3 <liyafan82> Remove data copier e3400a9cb <liyafan82> Support in-place vector sorting --- .../sort/FixedWidthInPlaceVectorSorter.java | 83 +++++++++++++++++++++ .../arrow/algorithm/sort/InPlaceVectorSorter.java | 37 ++++++++++ .../sort/TestFixedWidthInPlaceVectorSorter.java | 86 ++++++++++++++++++++++ 3 files changed, 206 insertions(+) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java new file mode 100644 index 0000000..a8d2b70 --- /dev/null +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.algorithm.sort; + +import org.apache.arrow.vector.BaseFixedWidthVector; + +/** + * Default in-place sorter for fixed-width vectors. + * It is based on quick-sort, with average time complexity O(n*log(n)). + * @param <V> vector type. + */ +public class FixedWidthInPlaceVectorSorter<V extends BaseFixedWidthVector> implements InPlaceVectorSorter<V> { + + private VectorValueComparator<V> comparator; + + /** + * The vector to sort. + */ + private V vec; + + /** + * The buffer to hold the pivot. + * It always has length 1. + */ + private V pivotBuffer; + + @Override + public void sortInPlace(V vec, VectorValueComparator<V> comparator) { + try { + this.vec = vec; + this.comparator = comparator; + this.pivotBuffer = (V) vec.getField().createVector(vec.getAllocator()); + this.pivotBuffer.allocateNew(1); + + comparator.attachVectors(vec, pivotBuffer); + quickSort(0, vec.getValueCount() - 1); + } finally { + this.pivotBuffer.close(); + } + } + + private void quickSort(int low, int high) { + if (low < high) { + int mid = partition(low, high); + quickSort(low, mid - 1); + quickSort(mid + 1, high); + } + } + + private int partition(int low, int high) { + pivotBuffer.copyFrom(low, 0, vec); + + while (low < high) { + while (low < high && comparator.compare(high, 0) >= 0) { + high -= 1; + } + vec.copyFrom(high, low, vec); + + while (low < high && comparator.compare(low, 0) <= 0) { + low += 1; + } + vec.copyFrom(low, high, vec); + } + + vec.copyFrom(0, low, pivotBuffer); + return low; + } +} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java new file mode 100644 index 0000000..19817fe --- /dev/null +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.algorithm.sort; + +import org.apache.arrow.vector.ValueVector; + +/** + * Basic interface for sorting a vector in-place. + * That is, the sorting is performed by modifying the input vector, + * without creating a new sorted vector. + * + * @param <V> the vector type. + */ +public interface InPlaceVectorSorter<V extends ValueVector> { + + /** + * Sort a vector in-place. + * @param vec the vector to sort. + * @param comparator the criteria for sort. + */ + void sortInPlace(V vec, VectorValueComparator<V> comparator); +} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java new file mode 100644 index 0000000..1a71a53 --- /dev/null +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.algorithm.sort; + +import static org.junit.Assert.assertTrue; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.IntVector; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Test cases for {@link FixedWidthInPlaceVectorSorter}. + */ +public class TestFixedWidthInPlaceVectorSorter { + + private BufferAllocator allocator; + + @Before + public void prepare() { + allocator = new RootAllocator(1024 * 1024); + } + + @After + public void shutdown() { + allocator.close(); + } + + @Test + public void testSortInt() { + try (IntVector vec = new IntVector("", allocator)) { + vec.allocateNew(10); + vec.setValueCount(10); + + // fill data to sort + vec.set(0, 10); + vec.set(1, 8); + vec.setNull(2); + vec.set(3, 10); + vec.set(4, 12); + vec.set(5, 17); + vec.setNull(6); + vec.set(7, 23); + vec.set(8, 35); + vec.set(9, 2); + + // sort the vector + FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); + DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator(); + + sorter.sortInPlace(vec, comparator); + + // verify results + Assert.assertEquals(10, vec.getValueCount()); + + assertTrue(vec.isNull(0)); + assertTrue(vec.isNull(1)); + Assert.assertEquals(2, vec.get(2)); + Assert.assertEquals(8, vec.get(3)); + Assert.assertEquals(10, vec.get(4)); + Assert.assertEquals(10, vec.get(5)); + Assert.assertEquals(12, vec.get(6)); + Assert.assertEquals(17, vec.get(7)); + Assert.assertEquals(23, vec.get(8)); + Assert.assertEquals(35, vec.get(9)); + } + } +}