This is an automated email from the ASF dual-hosted git repository.
lakshsingla pushed a commit to branch 29.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/29.0.0 by this push:
new 8c2d42bdec2 Fix
HllSketchHolderObjectStrategy#isSafeToConvertToNullSketch. (#15860) (#15861)
8c2d42bdec2 is described below
commit 8c2d42bdec29ee6d15e7d36d02f3bc5393feec1e
Author: Adarsh Sanjeev <[email protected]>
AuthorDate: Thu Feb 8 12:21:57 2024 +0530
Fix HllSketchHolderObjectStrategy#isSafeToConvertToNullSketch. (#15860)
(#15861)
* Fix HllSketchHolderObjectStrategy#isSafeToConvertToNullSketch.
The prior code from #15162 was reading only the low-order byte of an int
representing the size of a coupon set. As a result, it would erroneously
believe that a coupon set with a multiple of 256 elements was empty.
Co-authored-by: Gian Merlino <[email protected]>
---
.../hll/HllSketchHolderObjectStrategy.java | 9 +--
...java => HllSketchHolderObjectStrategyTest.java} | 75 +++++++++++++++++++++-
2 files changed, 79 insertions(+), 5 deletions(-)
diff --git
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategy.java
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategy.java
index 93597a2621e..60b1cc87461 100644
---
a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategy.java
+++
b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategy.java
@@ -85,7 +85,7 @@ public class HllSketchHolderObjectStrategy implements
ObjectStrategy<HllSketchHo
* Checks the initial 8 byte header to find the type of internal sketch
implementation, then uses the logic the
* corresponding implementation uses to tell if a sketch is empty while
deserializing it.
*/
- private static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size)
+ static boolean isSafeToConvertToNullSketch(ByteBuffer buf, int size)
{
if (size < 8) {
// Sanity check.
@@ -110,13 +110,14 @@ public class HllSketchHolderObjectStrategy implements
ObjectStrategy<HllSketchHo
int listCount = buf.get(position + 6) & 0xFF; // get(LIST_COUNT_BYTE)
& 0xFF
return listCount == 0;
case 1: // SET
- if (preInts != 3 || size < 9) {
+ if (preInts != 3 || size < 12) {
// preInts should be HASH_SET_PREINTS, Sanity check.
- // We also need to read an additional byte for Set implementations.
+ // We also need to read an additional int for Set implementations.
return false;
}
// Based on
org.apache.datasketches.hll.PreambleUtil.extractHashSetCount
- int setCount = buf.get(position + 8); // get(HASH_SET_COUNT_INT)
+ // Endianness of buf doesn't matter, since we're checking for equality
with zero.
+ int setCount = buf.getInt(position + 8); // getInt(HASH_SET_COUNT_INT)
return setCount == 0;
case 2: // HLL
if (preInts != 10) {
diff --git
a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchObjectStrategyTest.java
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategyTest.java
similarity index 50%
rename from
extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchObjectStrategyTest.java
rename to
extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategyTest.java
index 97eaf80fd64..c25135ef2d3 100644
---
a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchObjectStrategyTest.java
+++
b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchHolderObjectStrategyTest.java
@@ -21,14 +21,16 @@ package org.apache.druid.query.aggregation.datasketches.hll;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.hll.TgtHllType;
import org.apache.druid.java.util.common.StringUtils;
import org.junit.Assert;
import org.junit.Test;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
+import java.util.Random;
-public class HllSketchObjectStrategyTest
+public class HllSketchHolderObjectStrategyTest
{
@Test
public void testSafeRead()
@@ -74,4 +76,75 @@ public class HllSketchObjectStrategyTest
() -> objectStrategy.fromByteBufferSafe(buf4,
garbageLonger.length).getSketch().copy()
);
}
+
+ @Test
+ public void testHllSketchIsNullEquivalent()
+ {
+ final Random random = new Random(0);
+ for (final TgtHllType tgtHllType : TgtHllType.values()) {
+ for (int lgK = 7; lgK < 22; lgK++) {
+ for (int sz : new int[]{0, 1, 2, 127, 128, 129, 255, 256, 257, 511,
512, 513, 16383, 16384, 16385}) {
+ final String description = StringUtils.format("tgtHllType[%s],
lgK[%s], sz[%s]", tgtHllType, lgK, sz);
+ final HllSketch sketch = new HllSketch(lgK, tgtHllType);
+ for (int i = 0; i < sz; i++) {
+ sketch.update(random.nextLong());
+ }
+
+ final boolean expectEmpty = sz == 0;
+
+ // --------------------------------
+ // Compact array, little endian buf
+ final byte[] compactBytes = sketch.toCompactByteArray();
+ // Add a byte of padding on either side
+ ByteBuffer buf = ByteBuffer.allocate(compactBytes.length + 2);
+ buf.order(ByteOrder.LITTLE_ENDIAN);
+ buf.position(1);
+ buf.put(compactBytes);
+ buf.position(1);
+ Assert.assertEquals(
+ "Compact array littleEndian " + description,
+ expectEmpty,
+ HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf,
compactBytes.length)
+ );
+ Assert.assertEquals(1, buf.position());
+
+ // -----------------------------
+ // Compact array, big endian buf
+ buf.order(ByteOrder.BIG_ENDIAN);
+ Assert.assertEquals(
+ "Compact array bigEndian " + description,
+ expectEmpty,
+ HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf,
compactBytes.length)
+ );
+ Assert.assertEquals(1, buf.position());
+
+ // ----------------------------------
+ // Updatable array, little endian buf
+ final byte[] updatableBytes = sketch.toUpdatableByteArray();
+ // Add a byte of padding on either side
+ buf = ByteBuffer.allocate(updatableBytes.length + 2);
+ buf.order(ByteOrder.LITTLE_ENDIAN);
+ buf.position(1);
+ buf.put(updatableBytes);
+ buf.position(1);
+ Assert.assertEquals(
+ "Updatable array littleEndian " + description,
+ expectEmpty,
+ HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf,
updatableBytes.length)
+ );
+ Assert.assertEquals(1, buf.position());
+
+ // -------------------------------
+ // Updatable array, big endian buf
+ buf.order(ByteOrder.BIG_ENDIAN);
+ Assert.assertEquals(
+ "Updatable array bigEndian " + description,
+ expectEmpty,
+ HllSketchHolderObjectStrategy.isSafeToConvertToNullSketch(buf,
updatableBytes.length)
+ );
+ Assert.assertEquals(1, buf.position());
+ }
+ }
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]