This is an automated email from the ASF dual-hosted git repository. bdeggleston pushed a commit to branch cassandra-3.0 in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-3.0 by this push: new 9382186 Minimize clustering values in metadata collector 9382186 is described below commit 9382186f70cf0560c5308dc324411bef52cf461f Author: Blake Eggleston <bdeggles...@gmail.com> AuthorDate: Thu Nov 7 10:48:51 2019 -0800 Minimize clustering values in metadata collector Patch by Blake Eggleston; Reviewed by Marcus Eriksson for CASSANDRA-15400 --- CHANGES.txt | 1 + src/java/org/apache/cassandra/db/Clustering.java | 8 +++++ .../org/apache/cassandra/db/ClusteringPrefix.java | 6 ++++ .../org/apache/cassandra/db/RangeTombstone.java | 8 +++++ src/java/org/apache/cassandra/db/Slice.java | 8 +++++ .../io/sstable/metadata/MetadataCollector.java | 35 ++-------------------- .../org/apache/cassandra/utils/ByteBufferUtil.java | 23 ++++++++++++++ .../cassandra/io/sstable/SSTableMetadataTest.java | 6 ++++ 8 files changed, 62 insertions(+), 33 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 547bb1a..6dd6d0a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.0.20 + * Minimize clustering values in metadata collector (CASSANDRA-15400) * Avoid over-trimming of results in mixed mode clusters (CASSANDRA-15405) * validate value sizes in LegacyLayout (CASSANDRA-15373) * Ensure that tracing doesn't break connections in 3.x/4.0 mixed mode by default (CASSANDRA-15385) diff --git a/src/java/org/apache/cassandra/db/Clustering.java b/src/java/org/apache/cassandra/db/Clustering.java index a40cc1f..62af0f1 100644 --- a/src/java/org/apache/cassandra/db/Clustering.java +++ b/src/java/org/apache/cassandra/db/Clustering.java @@ -25,6 +25,7 @@ import org.apache.cassandra.config.CFMetaData; import org.apache.cassandra.config.ColumnDefinition; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.io.util.*; +import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.memory.AbstractAllocator; /** @@ -88,6 +89,13 @@ public class Clustering extends AbstractClusteringPrefix return Kind.CLUSTERING; } + public ClusteringPrefix minimize() + { + if (!ByteBufferUtil.canMinimize(values)) + return this; + return new Clustering(ByteBufferUtil.minimizeBuffers(values)); + } + public Clustering copy(AbstractAllocator allocator) { // Important for STATIC_CLUSTERING (but no point in being wasteful in general). diff --git a/src/java/org/apache/cassandra/db/ClusteringPrefix.java b/src/java/org/apache/cassandra/db/ClusteringPrefix.java index 3b826c9..c1000e4 100644 --- a/src/java/org/apache/cassandra/db/ClusteringPrefix.java +++ b/src/java/org/apache/cassandra/db/ClusteringPrefix.java @@ -246,6 +246,12 @@ public interface ClusteringPrefix extends IMeasurableMemory, Clusterable */ public ByteBuffer[] getRawValues(); + /** + * If the prefix contains byte buffers that can be minimized (see {@link ByteBufferUtil#minimalBufferFor(ByteBuffer)}), + * this will return a copy of the prefix with minimized values, otherwise it returns itself. + */ + public ClusteringPrefix minimize(); + public static class Serializer { public void serialize(ClusteringPrefix clustering, DataOutputPlus out, int version, List<AbstractType<?>> types) throws IOException diff --git a/src/java/org/apache/cassandra/db/RangeTombstone.java b/src/java/org/apache/cassandra/db/RangeTombstone.java index 8af3b97..4a26581 100644 --- a/src/java/org/apache/cassandra/db/RangeTombstone.java +++ b/src/java/org/apache/cassandra/db/RangeTombstone.java @@ -26,6 +26,7 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.rows.RangeTombstoneMarker; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.memory.AbstractAllocator; @@ -174,6 +175,13 @@ public class RangeTombstone return new Bound(kind(), newValues); } + public ClusteringPrefix minimize() + { + if (!ByteBufferUtil.canMinimize(values)) + return this; + return new Bound(kind, ByteBufferUtil.minimizeBuffers(values)); + } + @Override public Bound withNewKind(Kind kind) { diff --git a/src/java/org/apache/cassandra/db/Slice.java b/src/java/org/apache/cassandra/db/Slice.java index f90c195..fb75b8e 100644 --- a/src/java/org/apache/cassandra/db/Slice.java +++ b/src/java/org/apache/cassandra/db/Slice.java @@ -25,6 +25,7 @@ import org.apache.cassandra.config.CFMetaData; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.io.util.DataInputPlus; import org.apache.cassandra.io.util.DataOutputPlus; +import org.apache.cassandra.utils.ByteBufferUtil; /** * A slice represents the selection of a range of rows. @@ -494,6 +495,13 @@ public class Slice return sb.append(')').toString(); } + public ClusteringPrefix minimize() + { + if (!ByteBufferUtil.canMinimize(values)) + return this; + return new Bound(kind, ByteBufferUtil.minimizeBuffers(values)); + } + /** * Serializer for slice bounds. * <p> diff --git a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java index 437d80f..867e9a1 100644 --- a/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java +++ b/src/java/org/apache/cassandra/io/sstable/metadata/MetadataCollector.java @@ -229,42 +229,11 @@ public class MetadataCollector implements PartitionStatisticsCollector public MetadataCollector updateClusteringValues(ClusteringPrefix clustering) { - minClustering = minClustering == null || comparator.compare(clustering, minClustering) < 0 ? clustering : minClustering; - maxClustering = maxClustering == null || comparator.compare(clustering, maxClustering) > 0 ? clustering : maxClustering; + minClustering = minClustering == null || comparator.compare(clustering, minClustering) < 0 ? clustering.minimize() : minClustering; + maxClustering = maxClustering == null || comparator.compare(clustering, maxClustering) > 0 ? clustering.minimize() : maxClustering; return this; } - private static ByteBuffer maybeMinimize(ByteBuffer buffer) - { - // ByteBuffer.minimalBufferFor doesn't handle null, but we can get it in this case since it's possible - // for some clustering values to be null - return buffer == null ? null : ByteBufferUtil.minimalBufferFor(buffer); - } - - private static ByteBuffer min(ByteBuffer b1, ByteBuffer b2, AbstractType<?> comparator) - { - if (b1 == null) - return b2; - if (b2 == null) - return b1; - - if (comparator.compare(b1, b2) >= 0) - return b2; - return b1; - } - - private static ByteBuffer max(ByteBuffer b1, ByteBuffer b2, AbstractType<?> comparator) - { - if (b1 == null) - return b2; - if (b2 == null) - return b1; - - if (comparator.compare(b1, b2) >= 0) - return b1; - return b2; - } - public void updateHasLegacyCounterShards(boolean hasLegacyCounterShards) { this.hasLegacyCounterShards = this.hasLegacyCounterShards || hasLegacyCounterShards; diff --git a/src/java/org/apache/cassandra/utils/ByteBufferUtil.java b/src/java/org/apache/cassandra/utils/ByteBufferUtil.java index 0e69f7e..e5a3bb8 100644 --- a/src/java/org/apache/cassandra/utils/ByteBufferUtil.java +++ b/src/java/org/apache/cassandra/utils/ByteBufferUtil.java @@ -610,12 +610,35 @@ public class ByteBufferUtil return prefix.equals(value.duplicate().limit(value.remaining() - diff)); } + public static boolean canMinimize(ByteBuffer buf) + { + return buf != null && (buf.capacity() > buf.remaining() || !buf.hasArray()); + } + /** trims size of bytebuffer to exactly number of bytes in it, to do not hold too much memory */ public static ByteBuffer minimalBufferFor(ByteBuffer buf) { return buf.capacity() > buf.remaining() || !buf.hasArray() ? ByteBuffer.wrap(getArray(buf)) : buf; } + public static ByteBuffer[] minimizeBuffers(ByteBuffer[] src) + { + ByteBuffer[] dst = new ByteBuffer[src.length]; + for (int i=0; i<src.length; i++) + dst[i] = src[i] != null ? minimalBufferFor(src[i]) : null; + return dst; + } + + public static boolean canMinimize(ByteBuffer[] src) + { + for (ByteBuffer buffer : src) + { + if (canMinimize(buffer)) + return true; + } + return false; + } + // Doesn't change bb position public static int getShortLength(ByteBuffer bb, int position) { diff --git a/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataTest.java b/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataTest.java index 419eb25..f39cf3b 100644 --- a/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/SSTableMetadataTest.java @@ -221,6 +221,9 @@ public class SSTableMetadataTest { assertEquals(ByteBufferUtil.string(sstable.getSSTableMetadata().minClusteringValues.get(0)), "0col100"); assertEquals(ByteBufferUtil.string(sstable.getSSTableMetadata().maxClusteringValues.get(0)), "7col149"); + // make sure the clustering values are minimised + assertTrue(sstable.getSSTableMetadata().minClusteringValues.get(0).capacity() < 50); + assertTrue(sstable.getSSTableMetadata().maxClusteringValues.get(0).capacity() < 50); } String key = "row2"; @@ -240,6 +243,9 @@ public class SSTableMetadataTest { assertEquals(ByteBufferUtil.string(sstable.getSSTableMetadata().minClusteringValues.get(0)), "0col100"); assertEquals(ByteBufferUtil.string(sstable.getSSTableMetadata().maxClusteringValues.get(0)), "9col298"); + // and make sure the clustering values are still minimised after compaction + assertTrue(sstable.getSSTableMetadata().minClusteringValues.get(0).capacity() < 50); + assertTrue(sstable.getSSTableMetadata().maxClusteringValues.get(0).capacity() < 50); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org