This is an automated email from the ASF dual-hosted git repository. jmalkin pushed a commit to branch 5.0.X-backport in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 6ae6956c4741b30cc02f524d2c8b2f098af3b5b0 Author: Lee Rhodes <[email protected]> AuthorDate: Fri Feb 23 10:23:08 2024 -0800 Changes required to remove vulnerabilities to Finalize Attacks and satisfy SpotBugs warnings. --- .../org/apache/datasketches/hll/CouponList.java | 9 +- .../apache/datasketches/hll/DirectCouponList.java | 19 +- .../apache/datasketches/hll/DirectHllArray.java | 9 +- .../apache/datasketches/kll/KllItemsSketch.java | 152 +++++++++++- .../datasketches/kll/KllItemsSketchSortedView.java | 137 ----------- .../quantiles/DoublesSketchAccessor.java | 24 +- .../quantiles/HeapDoublesSketchAccessor.java | 2 +- .../apache/datasketches/quantiles/ItemsSketch.java | 91 ++++++- .../quantiles/ItemsSketchSortedView.java | 203 ++++++++------- .../GenericPartitionBoundaries.java | 7 +- .../theta/DirectQuickSelectSketch.java | 51 ++-- .../apache/datasketches/tuple/CompactSketch.java | 20 +- .../datasketches/tuple/QuickSelectSketch.java | 271 +++++++++++++-------- .../java/org/apache/datasketches/tuple/Sketch.java | 8 +- .../apache/datasketches/tuple/UpdatableSketch.java | 4 +- .../ArrayOfDoublesQuickSelectSketch.java | 4 +- .../DirectArrayOfDoublesQuickSelectSketch.java | 78 ++++-- .../tuple/strings/ArrayOfStringsSketchTest.java | 4 +- tools/FindBugsExcludeFilter.xml | 21 +- 19 files changed, 668 insertions(+), 446 deletions(-) diff --git a/src/main/java/org/apache/datasketches/hll/CouponList.java b/src/main/java/org/apache/datasketches/hll/CouponList.java index 45f42656..eb1f0481 100644 --- a/src/main/java/org/apache/datasketches/hll/CouponList.java +++ b/src/main/java/org/apache/datasketches/hll/CouponList.java @@ -41,9 +41,9 @@ class CouponList extends AbstractCoupons { int couponCount; int[] couponIntArr; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + private static int checkLgConfigK(final CurMode curMode, final int lgConfigK) { + if (curMode == CurMode.SET) { assert lgConfigK > 7; } + return lgConfigK; } /** @@ -53,12 +53,11 @@ class CouponList extends AbstractCoupons { * @param curMode LIST or SET */ CouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode) { - super(lgConfigK, tgtHllType, curMode); + super(checkLgConfigK(curMode, lgConfigK), tgtHllType, curMode); if (curMode == CurMode.LIST) { lgCouponArrInts = LG_INIT_LIST_SIZE; } else { //SET lgCouponArrInts = LG_INIT_SET_SIZE; - assert lgConfigK > 7; } couponIntArr = new int[1 << lgCouponArrInts]; couponCount = 0; diff --git a/src/main/java/org/apache/datasketches/hll/DirectCouponList.java b/src/main/java/org/apache/datasketches/hll/DirectCouponList.java index e8b6ad2e..daf1da41 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectCouponList.java +++ b/src/main/java/org/apache/datasketches/hll/DirectCouponList.java @@ -61,24 +61,21 @@ class DirectCouponList extends AbstractCoupons { Memory mem; final boolean compact; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + private static int checkMemCompactFlag(final WritableMemory wmem, final int lgConfigK) { + assert !extractCompactFlag(wmem); + return lgConfigK; } - //called from newInstance, writableWrap and DirectCouponHashSet - DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, - final WritableMemory wmem) { - super(lgConfigK, tgtHllType, curMode); + //called from newInstance, writableWrap and DirectCouponHashSet, must be compact + DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, final WritableMemory wmem) { + super(checkMemCompactFlag(wmem, lgConfigK), tgtHllType, curMode); this.wmem = wmem; mem = wmem; compact = extractCompactFlag(wmem); - assert !compact; } - //called from HllSketch.wrap and from DirectCouponHashSet constructor, may be compact - DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, - final Memory mem) { + //called from HllSketch.wrap and from DirectCouponHashSet constructor, may or may not be compact + DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, final Memory mem) { super(lgConfigK, tgtHllType, curMode); wmem = null; this.mem = mem; diff --git a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java index 7d4270c1..5b3e1a4f 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java +++ b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java @@ -59,20 +59,19 @@ abstract class DirectHllArray extends AbstractHllArray { long memAdd; final boolean compact; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + private static int checkMemCompactFlag(final WritableMemory wmem, final int lgConfigK) { + assert !extractCompactFlag(wmem); + return lgConfigK; } //Memory must be already initialized and may have data DirectHllArray(final int lgConfigK, final TgtHllType tgtHllType, final WritableMemory wmem) { - super(lgConfigK, tgtHllType, CurMode.HLL); + super(checkMemCompactFlag(wmem, lgConfigK), tgtHllType, CurMode.HLL); this.wmem = wmem; mem = wmem; memObj = wmem.getArray(); memAdd = wmem.getCumulativeOffset(0L); compact = extractCompactFlag(mem); - assert !compact; insertEmptyFlag(wmem, false); } diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java index 589c1fa3..6f3fbafc 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java @@ -25,18 +25,19 @@ import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH; import java.lang.reflect.Array; +import java.util.Arrays; import java.util.Comparator; import java.util.Objects; import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; import org.apache.datasketches.quantilescommon.PartitioningFeature; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; -import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator; @@ -154,7 +155,7 @@ public abstract class KllItemsSketch<T> extends KllSketch implements QuantilesGe @Override public GenericPartitionBoundaries<T> getPartitionBoundaries(final int numEquallySized, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } + if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } refreshSortedView(); return kllItemsSV.getPartitionBoundaries(numEquallySized, searchCrit); } @@ -307,13 +308,6 @@ public abstract class KllItemsSketch<T> extends KllSketch implements QuantilesGe @Override abstract int getMinMaxSizeBytes(); - private final KllItemsSketchSortedView<T> refreshSortedView() { - final KllItemsSketchSortedView<T> sv = (kllItemsSV == null) - ? kllItemsSV = new KllItemsSketchSortedView<>(this) - : kllItemsSV; - return sv; - } - abstract T[] getRetainedItemsArray(); @Override @@ -374,4 +368,144 @@ public abstract class KllItemsSketch<T> extends KllSketch implements QuantilesGe throw new SketchesArgumentException(UNSUPPORTED_MSG + "Sketch not writable."); } + void updateMinMax(final T item) { + if (isEmpty()) { + setMinItem(item); + setMaxItem(item); + } else { + setMinItem(Util.minT(getMinItem(), item, comparator)); + setMaxItem(Util.maxT(getMaxItem(), item, comparator)); + } + } + + private final KllItemsSketchSortedView<T> refreshSortedView() { + if (kllItemsSV == null) { + final CreateSortedView csv = new CreateSortedView(); + kllItemsSV = csv.getSV(); + } + return kllItemsSV; + } + + private final class CreateSortedView { + T[] quantiles; + long[] cumWeights; + + KllItemsSketchSortedView<T> getSV() { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + if (getN() == 0) { throw new SketchesArgumentException(EMPTY_MSG); } + final T[] srcQuantiles = getTotalItemsArray(); + final int[] srcLevels = levelsArr; + final int srcNumLevels = getNumLevels(); + + if (!isLevelZeroSorted()) { + Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1], comparator); + if (!hasMemory()) { setLevelZeroSorted(true); } + } + final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove free space + quantiles = (T[]) Array.newInstance(serDe.getClassOfT(), numQuantiles); + cumWeights = new long[numQuantiles]; + populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); + return new KllItemsSketchSortedView<>(quantiles, cumWeights, getN(), comparator, getMaxItem(), getMinItem()); + } + + private void populateFromSketch(final Object[] srcQuantiles, final int[] srcLevels, + final int srcNumLevels, final int numItems) { + final int[] myLevels = new int[srcNumLevels + 1]; + final int offset = srcLevels[0]; + System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < srcNumLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(cumWeights, fromIndex, toIndex, weight); + myLevels[dstLevel] = fromIndex; + myLevels[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + final int numLevels = dstLevel; + blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels, comparator); //create unit weights + KllHelper.convertToCumulative(cumWeights); + } + } + + private static <T> void blockyTandemMergeSort(final Object[] quantiles, final long[] weights, + final int[] levels, final int numLevels, final Comparator<? super T> comp) { + if (numLevels == 1) { return; } + + // duplicate the input in preparation for the "ping-pong" copy reduction strategy. + final Object[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length); + final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one here + + blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels, comp); + } + + private static <T> void blockyTandemMergeSortRecursion( + final Object[] quantilesSrc, final long[] weightsSrc, + final Object[] quantilesDst, final long[] weightsDst, + final int[] levels, final int startingLevel, final int numLevels, final Comparator<? super T> comp) { + if (numLevels == 1) { return; } + final int numLevels1 = numLevels / 2; + final int numLevels2 = numLevels - numLevels1; + assert numLevels1 >= 1; + assert numLevels2 >= numLevels1; + final int startingLevel1 = startingLevel; + final int startingLevel2 = startingLevel + numLevels1; + // swap roles of src and dst + blockyTandemMergeSortRecursion( + quantilesDst, weightsDst, + quantilesSrc, weightsSrc, + levels, startingLevel1, numLevels1, comp); + blockyTandemMergeSortRecursion( + quantilesDst, weightsDst, + quantilesSrc, weightsSrc, + levels, startingLevel2, numLevels2, comp); + tandemMerge( + quantilesSrc, weightsSrc, + quantilesDst, weightsDst, + levels, + startingLevel1, numLevels1, + startingLevel2, numLevels2, comp); + } + + private static <T> void tandemMerge( + final Object[] quantilesSrc, final long[] weightsSrc, + final Object[] quantilesDst, final long[] weightsDst, + final int[] levelStarts, + final int startingLevel1, final int numLevels1, + final int startingLevel2, final int numLevels2, final Comparator<? super T> comp) { + final int fromIndex1 = levelStarts[startingLevel1]; + final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive + final int fromIndex2 = levelStarts[startingLevel2]; + final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive + int iSrc1 = fromIndex1; + int iSrc2 = fromIndex2; + int iDst = fromIndex1; + + while (iSrc1 < toIndex1 && iSrc2 < toIndex2) { + if (Util.lt((T) quantilesSrc[iSrc1], (T) quantilesSrc[iSrc2], comp)) { + quantilesDst[iDst] = quantilesSrc[iSrc1]; + weightsDst[iDst] = weightsSrc[iSrc1]; + iSrc1++; + } else { + quantilesDst[iDst] = quantilesSrc[iSrc2]; + weightsDst[iDst] = weightsSrc[iSrc2]; + iSrc2++; + } + iDst++; + } + if (iSrc1 < toIndex1) { + System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1); + System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1); + } else if (iSrc2 < toIndex2) { + System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2); + System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2); + } + } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java index 783ce9bc..553b93af 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java @@ -26,11 +26,9 @@ import static org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpaced import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.lang.reflect.Array; -import java.util.Arrays; import java.util.Comparator; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; import org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequality; import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; import org.apache.datasketches.quantilescommon.GenericSortedView; @@ -56,11 +54,6 @@ public class KllItemsSketchSortedView<T> implements GenericSortedView<T>, Partit private final T minItem; private final Class<T> clazz; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** * Construct from elements for testing only. * @param quantiles sorted array of quantiles @@ -86,34 +79,6 @@ public class KllItemsSketchSortedView<T> implements GenericSortedView<T>, Partit this.clazz = (Class<T>)quantiles[0].getClass(); } - /** - * Constructs this Sorted View given the sketch - * @param sketch the given KllItemsSketch. - */ - @SuppressWarnings("unchecked") - KllItemsSketchSortedView(final KllItemsSketch<T> sketch) { - if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - this.totalN = sketch.getN(); - final T[] srcQuantiles = sketch.getTotalItemsArray(); - final int[] srcLevels = sketch.levelsArr; - final int srcNumLevels = sketch.getNumLevels(); - this.comparator = sketch.comparator; - this.maxItem = sketch.getMaxItem(); - this.minItem = sketch.getMinItem(); - this.clazz = (Class<T>)sketch.serDe.getClassOfT(); - - if (totalN == 0) { throw new SketchesArgumentException(EMPTY_MSG); } - if (!sketch.isLevelZeroSorted()) { - Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1], comparator); - if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); } - } - - final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove garbage - quantiles = (T[]) Array.newInstance(sketch.serDe.getClassOfT(), numQuantiles); - cumWeights = new long[numQuantiles]; - populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); - } - //end of constructors @Override @@ -253,106 +218,4 @@ public class KllItemsSketchSortedView<T> implements GenericSortedView<T>, Partit return new GenericSortedViewIterator<>(quantiles, cumWeights); } - //restricted methods - - private void populateFromSketch(final Object[] srcQuantiles, final int[] srcLevels, - final int srcNumLevels, final int numItems) { - final int[] myLevels = new int[srcNumLevels + 1]; - final int offset = srcLevels[0]; - System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems); - int srcLevel = 0; - int dstLevel = 0; - long weight = 1; - while (srcLevel < srcNumLevels) { - final int fromIndex = srcLevels[srcLevel] - offset; - final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive - if (fromIndex < toIndex) { // if equal, skip empty level - Arrays.fill(cumWeights, fromIndex, toIndex, weight); - myLevels[dstLevel] = fromIndex; - myLevels[dstLevel + 1] = toIndex; - dstLevel++; - } - srcLevel++; - weight *= 2; - } - final int numLevels = dstLevel; - blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels, comparator); //create unit weights - KllHelper.convertToCumulative(cumWeights); - } - - private static <T> void blockyTandemMergeSort(final Object[] quantiles, final long[] weights, - final int[] levels, final int numLevels, final Comparator<? super T> comp) { - if (numLevels == 1) { return; } - - // duplicate the input in preparation for the "ping-pong" copy reduction strategy. - final Object[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length); - final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one here - - blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels, comp); - } - - private static <T> void blockyTandemMergeSortRecursion( - final Object[] quantilesSrc, final long[] weightsSrc, - final Object[] quantilesDst, final long[] weightsDst, - final int[] levels, final int startingLevel, final int numLevels, final Comparator<? super T> comp) { - if (numLevels == 1) { return; } - final int numLevels1 = numLevels / 2; - final int numLevels2 = numLevels - numLevels1; - assert numLevels1 >= 1; - assert numLevels2 >= numLevels1; - final int startingLevel1 = startingLevel; - final int startingLevel2 = startingLevel + numLevels1; - // swap roles of src and dst - blockyTandemMergeSortRecursion( - quantilesDst, weightsDst, - quantilesSrc, weightsSrc, - levels, startingLevel1, numLevels1, comp); - blockyTandemMergeSortRecursion( - quantilesDst, weightsDst, - quantilesSrc, weightsSrc, - levels, startingLevel2, numLevels2, comp); - tandemMerge( - quantilesSrc, weightsSrc, - quantilesDst, weightsDst, - levels, - startingLevel1, numLevels1, - startingLevel2, numLevels2, comp); - } - - @SuppressWarnings("unchecked") - private static <T> void tandemMerge( - final Object[] quantilesSrc, final long[] weightsSrc, - final Object[] quantilesDst, final long[] weightsDst, - final int[] levelStarts, - final int startingLevel1, final int numLevels1, - final int startingLevel2, final int numLevels2, final Comparator<? super T> comp) { - final int fromIndex1 = levelStarts[startingLevel1]; - final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive - final int fromIndex2 = levelStarts[startingLevel2]; - final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive - int iSrc1 = fromIndex1; - int iSrc2 = fromIndex2; - int iDst = fromIndex1; - - while (iSrc1 < toIndex1 && iSrc2 < toIndex2) { - if (Util.lt((T) quantilesSrc[iSrc1], (T) quantilesSrc[iSrc2], comp)) { - quantilesDst[iDst] = quantilesSrc[iSrc1]; - weightsDst[iDst] = weightsSrc[iSrc1]; - iSrc1++; - } else { - quantilesDst[iDst] = quantilesSrc[iSrc2]; - weightsDst[iDst] = weightsSrc[iSrc2]; - iSrc2++; - } - iDst++; - } - if (iSrc1 < toIndex1) { - System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1); - System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1); - } else if (iSrc2 < toIndex2) { - System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2); - System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2); - } - } - } diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java index 54f83f18..1c5913b7 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java @@ -22,6 +22,7 @@ package org.apache.datasketches.quantiles; import static org.apache.datasketches.quantiles.PreambleUtil.COMBINED_BUFFER; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; /** * This allows access to package-private levels and data in whatever quantiles sketch you give @@ -39,18 +40,30 @@ abstract class DoublesSketchAccessor extends DoublesBufferAccessor { int numItems_; int offset_; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + DoublesSketchAccessor( + final DoublesSketch ds, + final boolean forceSize, + final int level) { + this(checkLvl(level), ds, forceSize, level); } - DoublesSketchAccessor(final DoublesSketch ds, final boolean forceSize, final int level) { + private DoublesSketchAccessor( + final boolean secure, + final DoublesSketch ds, + final boolean forceSize, + final int level) { ds_ = ds; forceSize_ = forceSize; - setLevel(level); } + private static final boolean checkLvl(final int level) { + if (level != BB_LVL_IDX && level < 0) { + throw new SketchesArgumentException("Parameter level is < 0."); + } + return true; + } + static DoublesSketchAccessor wrap(final DoublesSketch ds) { return wrap(ds, false); } @@ -71,7 +84,6 @@ abstract class DoublesSketchAccessor extends DoublesBufferAccessor { numItems_ = (forceSize_ ? ds_.getK() * 2 : ds_.getBaseBufferCount()); offset_ = (ds_.hasMemory() ? COMBINED_BUFFER : 0); } else { - assert lvl >= 0; if (((ds_.getBitPattern() & (1L << lvl)) > 0) || forceSize_) { numItems_ = ds_.getK(); } else { diff --git a/src/main/java/org/apache/datasketches/quantiles/HeapDoublesSketchAccessor.java b/src/main/java/org/apache/datasketches/quantiles/HeapDoublesSketchAccessor.java index 10fb3f71..87383ae1 100644 --- a/src/main/java/org/apache/datasketches/quantiles/HeapDoublesSketchAccessor.java +++ b/src/main/java/org/apache/datasketches/quantiles/HeapDoublesSketchAccessor.java @@ -24,7 +24,7 @@ import java.util.Arrays; /** * @author Jon Malkin */ -class HeapDoublesSketchAccessor extends DoublesSketchAccessor { +final class HeapDoublesSketchAccessor extends DoublesSketchAccessor { HeapDoublesSketchAccessor(final DoublesSketch ds, final boolean forceSize, final int level) { diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java index 6b247347..8a6b7831 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java @@ -36,7 +36,9 @@ import static org.apache.datasketches.quantiles.PreambleUtil.extractK; import static org.apache.datasketches.quantiles.PreambleUtil.extractN; import static org.apache.datasketches.quantiles.PreambleUtil.extractPreLongs; import static org.apache.datasketches.quantiles.PreambleUtil.extractSerVer; +//import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import java.lang.reflect.Array; import java.util.Arrays; import java.util.Comparator; import java.util.Objects; @@ -44,6 +46,7 @@ import java.util.Random; import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; @@ -545,13 +548,6 @@ public final class ItemsSketch<T> implements QuantilesGenericAPI<T>, Partitionin // Restricted - private final ItemsSketchSortedView<T> refreshSortedView() { - final ItemsSketchSortedView<T> sv = (classicQisSV == null) - ? classicQisSV = new ItemsSketchSortedView<>(this) - : classicQisSV; - return sv; - } - /** * Returns the base buffer count * @return the base buffer count @@ -626,4 +622,85 @@ public final class ItemsSketch<T> implements QuantilesGenericAPI<T>, Partitionin sketch.combinedBuffer_ = Arrays.copyOf(baseBuffer, newSize); } + private final ItemsSketchSortedView<T> refreshSortedView() { + if (classicQisSV == null) { + final CreateSortedView csv = new CreateSortedView(); + classicQisSV = csv.getSV(); + } + return classicQisSV; + } + + @SuppressWarnings({"rawtypes", "unchecked"}) + private final class CreateSortedView { + final long n = getN(); + final int numQuantiles = getNumRetained(); + T[] quantiles = (T[]) Array.newInstance(clazz, numQuantiles); + long[] cumWeights = new long[numQuantiles]; + final int k = getK(); + + final T[] combinedBuffer = (T[]) getCombinedBuffer(); + final int baseBufferCount = getBaseBufferCount(); + final Comparator<? super T> comparator = ItemsSketch.this.comparator_; + + ItemsSketchSortedView<T> getSV() { + long weight = 1; + int index = 0; + long bits = getBitPattern(); + assert bits == (n / (2L * k)); // internal consistency check + for (int lvl = 0; bits != 0L; lvl++, bits >>>= 1) { + weight *= 2; + if ((bits & 1L) > 0L) { + final int offset = (2 + lvl) * k; + for (int i = 0; i < k; i++) { + quantiles[index] = combinedBuffer[i + offset]; + cumWeights[index] = weight; + index++; + } + } + } + + weight = 1; //NOT a mistake! We just copied the highest level; now we need to copy the base buffer + final int startOfBaseBufferBlock = index; + + // Copy BaseBuffer over, along with weight = 1 + for (int i = 0; i < baseBufferCount; i++) { + quantiles[index] = combinedBuffer[i]; + cumWeights[index] = weight; + index++; + } + assert index == numQuantiles; + + // Must sort the items that came from the base buffer. + // Don't need to sort the corresponding weights because they are all the same. + Arrays.sort(quantiles, startOfBaseBufferBlock, numQuantiles, comparator); + + // Sort the first "numSamples" slots of the two arrays in tandem, + // taking advantage of the already sorted blocks of length k + ItemsMergeImpl.blockyTandemMergeSort(quantiles, cumWeights, numQuantiles, k, comparator); + + if (convertToCumulative(cumWeights) != n) { + throw new SketchesStateException("Sorted View is misconfigured. TotalN does not match cumWeights."); + } + + return new ItemsSketchSortedView( + quantiles, cumWeights, getN(), comparator, getMaxItem(), getMinItem(), getK()); + } + + } + + /** + * Convert the individual weights into cumulative weights. + * An array of {1,1,1,1} becomes {1,2,3,4} + * @param array of actual weights from the sketch, none of the weights may be zero + * @return total weight + */ + private static long convertToCumulative(final long[] array) { + long subtotal = 0; + for (int i = 0; i < array.length; i++) { + final long newSubtotal = subtotal + array[i]; + subtotal = array[i] = newSubtotal; + } + return subtotal; + } + } diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java index 827d9d08..e9117b93 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java @@ -26,11 +26,11 @@ import static org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpaced import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.lang.reflect.Array; -import java.util.Arrays; +//import java.util.Arrays; import java.util.Comparator; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesStateException; +//import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequality; import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; import org.apache.datasketches.quantilescommon.GenericSortedView; @@ -56,13 +56,8 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition private final T minItem; private final Class<T> clazz; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** - * Construct from elements for testing. + * Construct from elements, also used in testing. * @param quantiles sorted array of quantiles * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. @@ -85,39 +80,39 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition this.clazz = (Class<T>)quantiles[0].getClass(); } - /** - * Constructs this Sorted View given the sketch - * @param sketch the given Classic Quantiles ItemsSketch - */ - @SuppressWarnings("unchecked") - ItemsSketchSortedView(final ItemsSketch<T> sketch) { - if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - this.totalN = sketch.getN(); - final int k = sketch.getK(); - final int numQuantiles = sketch.getNumRetained(); - this.quantiles = (T[]) Array.newInstance(sketch.clazz, numQuantiles); - this.minItem = sketch.minItem_; - this.maxItem = sketch.maxItem_; - cumWeights = new long[numQuantiles]; - comparator = sketch.getComparator(); - clazz = sketch.clazz; - - final Object[] combinedBuffer = sketch.getCombinedBuffer(); - final int baseBufferCount = sketch.getBaseBufferCount(); - - // Populate from ItemsSketch: - // copy over the "levels" and then the base buffer, all with appropriate weights - populateFromItemsSketch(k, totalN, sketch.getBitPattern(), (T[]) combinedBuffer, baseBufferCount, - numQuantiles, quantiles, cumWeights, sketch.getComparator()); - - // Sort the first "numSamples" slots of the two arrays in tandem, - // taking advantage of the already sorted blocks of length k - ItemsMergeImpl.blockyTandemMergeSort(quantiles, cumWeights, numQuantiles, k, sketch.getComparator()); - - if (convertToCumulative(cumWeights) != totalN) { - throw new SketchesStateException("Sorted View is misconfigured. TotalN does not match cumWeights."); - } - } +// /** +// * Constructs this Sorted View given the sketch +// * @param sketch the given Classic Quantiles ItemsSketch +// */ +// @SuppressWarnings("unchecked") +// ItemsSketchSortedView(final ItemsSketch<T> sketch) { +// if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } +// this.totalN = sketch.getN(); +// final int numQuantiles = sketch.getNumRetained(); +// this.quantiles = (T[]) Array.newInstance(sketch.clazz, numQuantiles); +// this.minItem = sketch.minItem_; +// this.maxItem = sketch.maxItem_; +// this.cumWeights = new long[numQuantiles]; +// this.comparator = sketch.getComparator(); +// this.clazz = sketch.clazz; +// this.k = sketch.getK(); +// +// final Object[] combinedBuffer = sketch.getCombinedBuffer(); +// final int baseBufferCount = sketch.getBaseBufferCount(); +// +// // Populate from ItemsSketch: +// // copy over the "levels" and then the base buffer, all with appropriate weights +// populateFromItemsSketch(k, totalN, sketch.getBitPattern(), (T[]) combinedBuffer, baseBufferCount, +// numQuantiles, quantiles, cumWeights, sketch.getComparator()); +// +// // Sort the first "numSamples" slots of the two arrays in tandem, +// // taking advantage of the already sorted blocks of length k +// ItemsMergeImpl.blockyTandemMergeSort(quantiles, cumWeights, numQuantiles, k, sketch.getComparator()); +// +// if (convertToCumulative(cumWeights) != totalN) { +// throw new SketchesStateException("Sorted View is misconfigured. TotalN does not match cumWeights."); +// } +// } //end of constructors @@ -256,68 +251,68 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition //restricted methods - /** - * Populate the arrays and registers from an ItemsSketch - * @param <T> the data type - * @param k K parameter of sketch - * @param n The current size of the stream - * @param bitPattern the bit pattern for valid log levels - * @param combinedBuffer the combined buffer reference - * @param baseBufferCount the count of the base buffer - * @param numQuantiles number of retained quantiles in the sketch - * @param quantilesArr the consolidated array of all quantiles from the sketch - * @param weightsArr the weights for each item from the sketch - * @param comparator the given comparator for data type T - */ - private final static <T> void populateFromItemsSketch( - final int k, final long n, final long bitPattern, final T[] combinedBuffer, - final int baseBufferCount, final int numQuantiles, final T[] quantilesArr, final long[] weightsArr, - final Comparator<? super T> comparator) { - long weight = 1; - int nxt = 0; - long bits = bitPattern; - assert bits == (n / (2L * k)); // internal consistency check - for (int lvl = 0; bits != 0L; lvl++, bits >>>= 1) { - weight *= 2; - if ((bits & 1L) > 0L) { - final int offset = (2 + lvl) * k; - for (int i = 0; i < k; i++) { - quantilesArr[nxt] = combinedBuffer[i + offset]; - weightsArr[nxt] = weight; - nxt++; - } - } - } - - weight = 1; //NOT a mistake! We just copied the highest level; now we need to copy the base buffer - final int startOfBaseBufferBlock = nxt; - - // Copy BaseBuffer over, along with weight = 1 - for (int i = 0; i < baseBufferCount; i++) { - quantilesArr[nxt] = combinedBuffer[i]; - weightsArr[nxt] = weight; - nxt++; - } - assert nxt == numQuantiles; - - // Must sort the items that came from the base buffer. - // Don't need to sort the corresponding weights because they are all the same. - Arrays.sort(quantilesArr, startOfBaseBufferBlock, numQuantiles, comparator); - } - - /** - * Convert the individual weights into cumulative weights. - * An array of {1,1,1,1} becomes {1,2,3,4} - * @param array of actual weights from the sketch, none of the weights may be zero - * @return total weight - */ - private static long convertToCumulative(final long[] array) { - long subtotal = 0; - for (int i = 0; i < array.length; i++) { - final long newSubtotal = subtotal + array[i]; - subtotal = array[i] = newSubtotal; - } - return subtotal; - } +// /** +// * Populate the arrays and registers from an ItemsSketch +// * @param <T> the data type +// * @param k K parameter of sketch +// * @param n The current size of the stream +// * @param bitPattern the bit pattern for valid log levels +// * @param combinedBuffer the combined buffer reference +// * @param baseBufferCount the count of the base buffer +// * @param numQuantiles number of retained quantiles in the sketch +// * @param quantilesArr the consolidated array of all quantiles from the sketch +// * @param weightsArr the weights for each item from the sketch +// * @param comparator the given comparator for data type T +// */ +// private final static <T> void populateFromItemsSketch( +// final int k, final long n, final long bitPattern, final T[] combinedBuffer, +// final int baseBufferCount, final int numQuantiles, final T[] quantilesArr, final long[] weightsArr, +// final Comparator<? super T> comparator) { +// long weight = 1; +// int nxt = 0; +// long bits = bitPattern; +// assert bits == (n / (2L * k)); // internal consistency check +// for (int lvl = 0; bits != 0L; lvl++, bits >>>= 1) { +// weight *= 2; +// if ((bits & 1L) > 0L) { +// final int offset = (2 + lvl) * k; +// for (int i = 0; i < k; i++) { +// quantilesArr[nxt] = combinedBuffer[i + offset]; +// weightsArr[nxt] = weight; +// nxt++; +// } +// } +// } +// +// weight = 1; //NOT a mistake! We just copied the highest level; now we need to copy the base buffer +// final int startOfBaseBufferBlock = nxt; +// +// // Copy BaseBuffer over, along with weight = 1 +// for (int i = 0; i < baseBufferCount; i++) { +// quantilesArr[nxt] = combinedBuffer[i]; +// weightsArr[nxt] = weight; +// nxt++; +// } +// assert nxt == numQuantiles; +// +// // Must sort the items that came from the base buffer. +// // Don't need to sort the corresponding weights because they are all the same. +// Arrays.sort(quantilesArr, startOfBaseBufferBlock, numQuantiles, comparator); +// } +// +// /** +// * Convert the individual weights into cumulative weights. +// * An array of {1,1,1,1} becomes {1,2,3,4} +// * @param array of actual weights from the sketch, none of the weights may be zero +// * @return total weight +// */ +// private static long convertToCumulative(final long[] array) { +// long subtotal = 0; +// for (int i = 0; i < array.length; i++) { +// final long newSubtotal = subtotal + array[i]; +// subtotal = array[i] = newSubtotal; +// } +// return subtotal; +// } } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java index 9cba9fbd..c89a4aa2 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java @@ -27,7 +27,7 @@ import org.apache.datasketches.common.SketchesStateException; /** * Implements PartitionBoundaries */ -public class GenericPartitionBoundaries<T> implements PartitionBoundaries { +final public class GenericPartitionBoundaries<T> implements PartitionBoundaries { private long totalN; //totalN of source sketch private T[] boundaries; //quantiles at the boundaries private long[] natRanks; //natural ranks at the boundaries @@ -39,11 +39,6 @@ public class GenericPartitionBoundaries<T> implements PartitionBoundaries { private long[] numDeltaItems; //num of items in each part private int numPartitions; //num of partitions - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - public GenericPartitionBoundaries( final long totalN, final T[] boundaries, diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index 27174b52..33efe064 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -56,6 +56,7 @@ import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; @@ -81,11 +82,6 @@ class DirectQuickSelectSketch extends DirectQuickSelectSketchR { super(seed, wmem); } - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** * Construct a new sketch instance and initialize the given Memory as its backing store. * @@ -111,8 +107,29 @@ class DirectQuickSelectSketch extends DirectQuickSelectSketchR { final MemoryRequestServer memReqSvr, final WritableMemory dstMem, final boolean unionGadget) { - super(seed, dstMem); + this( + checkMemSize(lgNomLongs, rf, dstMem, unionGadget), + //SpotBugs CT_CONSTRUCTOR_THROW is false positive. + //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J + lgNomLongs, + seed, + p, + rf, + memReqSvr, + dstMem, + unionGadget); + } + private DirectQuickSelectSketch( + final boolean secure, + final int lgNomLongs, + final long seed, + final float p, + final ResizeFactor rf, + final MemoryRequestServer memReqSvr, + final WritableMemory dstMem, + final boolean unionGadget) { + super(seed, dstMem); //Choose family, preambleLongs final Family family; final int preambleLongs; @@ -128,14 +145,6 @@ class DirectQuickSelectSketch extends DirectQuickSelectSketchR { //Choose RF, minReqBytes, lgArrLongs. final int lgRF = rf.lg(); final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; - final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); - - //Make sure Memory is large enough - final long curMemCapBytes = dstMem.getCapacity(); - if (curMemCapBytes < minReqBytes) { - throw new SketchesArgumentException( - "Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes); - } //@formatter:off //Build preamble @@ -164,6 +173,20 @@ class DirectQuickSelectSketch extends DirectQuickSelectSketchR { memReqSvr_ = memReqSvr; } + private static final boolean checkMemSize( + final int lgNomLongs, final ResizeFactor rf, final Memory dstMem, final boolean unionGadget) { + final int preambleLongs = (unionGadget) ? Family.UNION.getMinPreLongs() : Family.QUICKSELECT.getMinPreLongs(); + final int lgRF = rf.lg(); + final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; + final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); + final long curMemCapBytes = dstMem.getCapacity(); + if (curMemCapBytes < minReqBytes) { + throw new SketchesArgumentException( + "Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes); + } + return true; + } + /** * Wrap a sketch around the given source Memory containing sketch data that originated from * this sketch. diff --git a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java index c77a2800..ec58a5e7 100644 --- a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java @@ -39,23 +39,18 @@ import org.apache.datasketches.memory.Memory; * * @param <S> type of Summary */ -public class CompactSketch<S extends Summary> extends Sketch<S> { +public final class CompactSketch<S extends Summary> extends Sketch<S> { private static final byte serialVersionWithSummaryClassNameUID = 1; private static final byte serialVersionUIDLegacy = 2; private static final byte serialVersionUID = 3; private static final short defaultSeedHash = (short) 37836; // for compatibility with C++ - private long[] hashArr_; + private final long[] hashArr_; private S[] summaryArr_; private enum FlagsLegacy { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED } private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED } - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** * Create a CompactSketch from correct components * @param hashArr compacted hash array @@ -64,10 +59,11 @@ public class CompactSketch<S extends Summary> extends Sketch<S> { * @param empty empty flag */ CompactSketch(final long[] hashArr, final S[] summaryArr, final long thetaLong, final boolean empty) { + super(thetaLong, empty, null); + super.thetaLong_ = thetaLong; + super.empty_ = empty; hashArr_ = hashArr; summaryArr_ = summaryArr; - thetaLong_ = thetaLong; - empty_ = empty; } /** @@ -77,6 +73,7 @@ public class CompactSketch<S extends Summary> extends Sketch<S> { * @param deserializer the SummaryDeserializer */ CompactSketch(final Memory mem, final SummaryDeserializer<S> deserializer) { + super(Long.MAX_VALUE, true, null); int offset = 0; final byte preambleLongs = mem.getByte(offset++); final byte version = mem.getByte(offset++); @@ -114,6 +111,7 @@ public class CompactSketch<S extends Summary> extends Sketch<S> { offset += classNameLength; } hashArr_ = new long[count]; + for (int i = 0; i < count; i++) { hashArr_[i] = mem.getLong(offset); offset += Long.BYTES; @@ -121,6 +119,9 @@ public class CompactSketch<S extends Summary> extends Sketch<S> { for (int i = 0; i < count; i++) { offset += readSummary(mem, offset, i, count, deserializer); } + } else { + hashArr_ = new long[0]; + summaryArr_ = null; } } else { // current serial format offset++; //skip unused byte @@ -143,6 +144,7 @@ public class CompactSketch<S extends Summary> extends Sketch<S> { } } hashArr_ = new long[count]; + for (int i = 0; i < count; i++) { hashArr_[i] = mem.getLong(offset); offset += Long.BYTES; diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java index 2cc0d989..59e27a42 100644 --- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java @@ -47,24 +47,18 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED } - static final int DEFAULT_LG_RESIZE_FACTOR = ResizeFactor.X8.lg(); + private static final int DEFAULT_LG_RESIZE_FACTOR = ResizeFactor.X8.lg(); private final int nomEntries_; - private int lgCurrentCapacity_; private final int lgResizeFactor_; - private int count_; private final float samplingProbability_; + private int lgCurrentCapacity_; + private int retEntries_; private int rebuildThreshold_; private long[] hashTable_; S[] summaryTable_; - @Override - protected final void finalize() throws Throwable { - super.finalize(); - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** - * This is to create an instance of a QuickSelectSketch with default resize factor. + * This is to create a new instance of a QuickSelectSketch with default resize factor. * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than * given value. * @param summaryFactory An instance of a SummaryFactory. @@ -76,7 +70,7 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { } /** - * This is to create an instance of a QuickSelectSketch with custom resize factor + * This is to create a new instance of a QuickSelectSketch with custom resize factor * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than * given value. * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: @@ -96,7 +90,7 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { } /** - * This is to create an instance of a QuickSelectSketch with custom resize factor and sampling + * This is to create a new instance of a QuickSelectSketch with custom resize factor and sampling * probability * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than * or equal to the given value. @@ -124,38 +118,50 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { ); } - QuickSelectSketch( + /** + * Target constructor for above constructors for a new instance. + * @param nomEntries Nominal number of entries. + * @param lgResizeFactor log2(resizeFactor) + * @param samplingProbability the given sampling probability + * @param summaryFactory An instance of a SummaryFactory. + * @param startingSize starting size of the sketch. + */ + private QuickSelectSketch( final int nomEntries, final int lgResizeFactor, final float samplingProbability, final SummaryFactory<S> summaryFactory, final int startingSize) { + super( + (long) (Long.MAX_VALUE * (double) samplingProbability), + true, + summaryFactory); nomEntries_ = ceilingIntPowerOf2(nomEntries); lgResizeFactor_ = lgResizeFactor; samplingProbability_ = samplingProbability; - summaryFactory_ = summaryFactory; //super - thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); //super lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingSize); - hashTable_ = new long[startingSize]; + retEntries_ = 0; + hashTable_ = new long[startingSize]; //must be before setRebuildThreshold + rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); summaryTable_ = null; // wait for the first summary to call Array.newInstance() - setRebuildThreshold(); } /** * Copy constructor - * @param sketch the QuickSelectSketch to be copied. + * @param sketch the QuickSelectSketch to be deep copied. */ QuickSelectSketch(final QuickSelectSketch<S> sketch) { + super( + sketch.thetaLong_, + sketch.empty_, + sketch.summaryFactory_); nomEntries_ = sketch.nomEntries_; - lgCurrentCapacity_ = sketch.lgCurrentCapacity_; lgResizeFactor_ = sketch.lgResizeFactor_; - count_ = sketch.count_; samplingProbability_ = sketch.samplingProbability_; - rebuildThreshold_ = sketch.rebuildThreshold_; - thetaLong_ = sketch.thetaLong_; - empty_ = sketch.empty_; - summaryFactory_ = sketch.summaryFactory_; + lgCurrentCapacity_ = sketch.lgCurrentCapacity_; + retEntries_ = sketch.retEntries_; hashTable_ = sketch.hashTable_.clone(); + rebuildThreshold_ = sketch.rebuildThreshold_; summaryTable_ = Util.copySummaryArray(sketch.summaryTable_); } @@ -164,75 +170,128 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { * @param mem Memory object with serialized QuickSelectSketch * @param deserializer the SummaryDeserializer * @param summaryFactory the SummaryFactory - * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. * This capability will be removed in a future release. * Heapifying a CompactSketch is not deprecated. */ - @Deprecated QuickSelectSketch( final Memory mem, final SummaryDeserializer<S> deserializer, final SummaryFactory<S> summaryFactory) { - Objects.requireNonNull(mem, "SourceMemory must not be null."); - Objects.requireNonNull(deserializer, "Deserializer must not be null."); - checkBounds(0, 8, mem.getCapacity()); - summaryFactory_ = summaryFactory; - int offset = 0; - final byte preambleLongs = mem.getByte(offset++); //byte 0 PreLongs - final byte version = mem.getByte(offset++); //byte 1 SerVer - final byte familyId = mem.getByte(offset++); //byte 2 FamID - SerializerDeserializer.validateFamily(familyId, preambleLongs); - if (version > serialVersionUID) { - throw new SketchesArgumentException( - "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " - + version); - } - SerializerDeserializer.validateType(mem.getByte(offset++), //byte 3 - SerializerDeserializer.SketchType.QuickSelectSketch); - final byte flags = mem.getByte(offset++); //byte 4 - final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0; - if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { - throw new SketchesArgumentException("Endian byte order mismatch"); - } - nomEntries_ = 1 << mem.getByte(offset++); //byte 5 - lgCurrentCapacity_ = mem.getByte(offset++); //byte 6 - lgResizeFactor_ = mem.getByte(offset++); //byte 7 - - checkBounds(0, preambleLongs * 8L, mem.getCapacity()); - final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0; - samplingProbability_ = isInSamplingMode ? mem.getFloat(offset) : 1f; //bytes 8 - 11 - if (isInSamplingMode) { - offset += Float.BYTES; - } + this(new Validate<>(), mem, deserializer, summaryFactory); + } - final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0; - if (isThetaIncluded) { - thetaLong_ = mem.getLong(offset); - offset += Long.BYTES; - } else { - thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_); - } + /* + * This private constructor is used to protect against "Finalizer attacks". + * The private static inner class Validate performs validation and deserialization + * from the input Memory and may throw exceptions. In order to protect against the attack, we must + * perform this validation prior to the constructor's super reaches the Object class. + * Making QuickSelectSketch final won't work here because UpdatableSketch is a subclass. + * Using an empty final finalizer() is not recommended and is deprecated as of Java9. + */ + private QuickSelectSketch( + final Validate<S> val, + final Memory mem, + final SummaryDeserializer<S> deserializer, + final SummaryFactory<S> summaryFactory) { + super(val.validate(mem, deserializer), val.myEmpty, summaryFactory); + nomEntries_ = val.myNomEntries; + lgResizeFactor_ = val.myLgResizeFactor; + samplingProbability_ = val.mySamplingProbability; + lgCurrentCapacity_ = val.myLgCurrentCapacity; + retEntries_ = val.myRetEntries; + rebuildThreshold_ = val.myRebuildThreshold; + hashTable_ = val.myHashTable; + summaryTable_ = val.mySummaryTable; + } + + private static final class Validate<S> { + //super fields + long myThetaLong; + boolean myEmpty; + //this fields + int myNomEntries; + int myLgResizeFactor; + float mySamplingProbability; + int myLgCurrentCapacity; + int myRetEntries; + int myRebuildThreshold; + long[] myHashTable; + S[] mySummaryTable; + + @SuppressWarnings("unchecked") + long validate( + final Memory mem, + final SummaryDeserializer<?> deserializer) { + Objects.requireNonNull(mem, "SourceMemory must not be null."); + Objects.requireNonNull(deserializer, "Deserializer must not be null."); + checkBounds(0, 8, mem.getCapacity()); + + int offset = 0; + final byte preambleLongs = mem.getByte(offset++); //byte 0 PreLongs + final byte version = mem.getByte(offset++); //byte 1 SerVer + final byte familyId = mem.getByte(offset++); //byte 2 FamID + SerializerDeserializer.validateFamily(familyId, preambleLongs); + if (version > serialVersionUID) { + throw new SketchesArgumentException( + "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " + + version); + } + SerializerDeserializer.validateType(mem.getByte(offset++), //byte 3 + SerializerDeserializer.SketchType.QuickSelectSketch); + final byte flags = mem.getByte(offset++); //byte 4 + final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0; + if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { + throw new SketchesArgumentException("Endian byte order mismatch"); + } + myNomEntries = 1 << mem.getByte(offset++); //byte 5 + myLgCurrentCapacity = mem.getByte(offset++); //byte 6 + myLgResizeFactor = mem.getByte(offset++); //byte 7 + + checkBounds(0, preambleLongs * 8L, mem.getCapacity()); + final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0; + mySamplingProbability = isInSamplingMode ? mem.getFloat(offset) : 1f; //bytes 8 - 11 + if (isInSamplingMode) { + offset += Float.BYTES; + } - int count = 0; - final boolean hasEntries = (flags & 1 << Flags.HAS_ENTRIES.ordinal()) > 0; - if (hasEntries) { - count = mem.getInt(offset); - offset += Integer.BYTES; - } - final int currentCapacity = 1 << lgCurrentCapacity_; - hashTable_ = new long[currentCapacity]; - for (int i = 0; i < count; i++) { - final long hash = mem.getLong(offset); - offset += Long.BYTES; - final Memory memRegion = mem.region(offset, mem.getCapacity() - offset); - final DeserializeResult<S> summaryResult = deserializer.heapifySummary(memRegion); - final S summary = summaryResult.getObject(); - offset += summaryResult.getSize(); - insert(hash, summary); + final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0; + if (isThetaIncluded) { + myThetaLong = mem.getLong(offset); + offset += Long.BYTES; + } else { + myThetaLong = (long) (Long.MAX_VALUE * (double) mySamplingProbability); + } + + int count = 0; + final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0; + if (hasEntries) { + count = mem.getInt(offset); + offset += Integer.BYTES; + } + final int currentCapacity = 1 << myLgCurrentCapacity; + myHashTable = new long[currentCapacity]; + for (int i = 0; i < count; i++) { + final long hash = mem.getLong(offset); + offset += Long.BYTES; + final Memory memRegion = mem.region(offset, mem.getCapacity() - offset); + final DeserializeResult<?> summaryResult = deserializer.heapifySummary(memRegion); + final S summary = (S) summaryResult.getObject(); + offset += summaryResult.getSize(); + //in-place equivalent to insert(hash, summary): + final int index = HashOperations.hashInsertOnly(myHashTable, myLgCurrentCapacity, hash); + if (mySummaryTable == null) { + mySummaryTable = (S[]) Array.newInstance(summary.getClass(), myHashTable.length); + } + mySummaryTable[index] = summary; + myRetEntries++; + myEmpty = false; + } + myEmpty = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; + myRebuildThreshold = setRebuildThreshold(myHashTable, myNomEntries); + return myThetaLong; } - empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; - setRebuildThreshold(); - } + + } //end class Validate /** * @return a deep copy of this sketch @@ -247,7 +306,7 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { @Override public int getRetainedEntries() { - return count_; + return retEntries_; } @Override @@ -303,7 +362,7 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { * Rebuilds reducing the actual number of entries to the nominal number of entries if needed */ public void trim() { - if (count_ > nomEntries_) { + if (retEntries_ > nomEntries_) { updateTheta(); resize(hashTable_.length); } @@ -314,13 +373,13 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { */ public void reset() { empty_ = true; - count_ = 0; + retEntries_ = 0; thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_); final int startingCapacity = Util.getStartingCapacity(nomEntries_, lgResizeFactor_); lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); hashTable_ = new long[startingCapacity]; summaryTable_ = null; // wait for the first summary to call Array.newInstance() - setRebuildThreshold(); + rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); } /** @@ -364,8 +423,8 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { public byte[] toByteArray() { byte[][] summariesBytes = null; int summariesBytesLength = 0; - if (count_ > 0) { - summariesBytes = new byte[count_][]; + if (retEntries_ > 0) { + summariesBytes = new byte[retEntries_][]; int i = 0; for (int j = 0; j < summaryTable_.length; j++) { if (summaryTable_[j] != null) { @@ -392,10 +451,10 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { if (isThetaIncluded) { sizeBytes += Long.BYTES; } - if (count_ > 0) { + if (retEntries_ > 0) { sizeBytes += Integer.BYTES; // count } - sizeBytes += Long.BYTES * count_ + summariesBytesLength; + sizeBytes += Long.BYTES * retEntries_ + summariesBytesLength; final byte[] bytes = new byte[sizeBytes]; int offset = 0; bytes[offset++] = PREAMBLE_LONGS; @@ -407,7 +466,7 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) | (isInSamplingMode() ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0) | (empty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) - | (count_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) + | (retEntries_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) | (isThetaIncluded ? 1 << Flags.IS_THETA_INCLUDED.ordinal() : 0) ); bytes[offset++] = (byte) Integer.numberOfTrailingZeros(nomEntries_); @@ -421,11 +480,11 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { ByteArrayUtil.putLongLE(bytes, offset, thetaLong_); offset += Long.BYTES; } - if (count_ > 0) { - ByteArrayUtil.putIntLE(bytes, offset, count_); + if (retEntries_ > 0) { + ByteArrayUtil.putIntLE(bytes, offset, retEntries_); offset += Integer.BYTES; } - if (count_ > 0) { + if (retEntries_ > 0) { int i = 0; for (int j = 0; j < hashTable_.length; j++) { if (summaryTable_[j] != null) { @@ -473,13 +532,13 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { int findOrInsert(final long hash) { final int index = HashOperations.hashSearchOrInsert(hashTable_, lgCurrentCapacity_, hash); if (index < 0) { - count_++; + retEntries_++; } return index; } boolean rebuildIfNeeded() { - if (count_ <= rebuildThreshold_) { + if (retEntries_ <= rebuildThreshold_) { return false; } if (hashTable_.length > nomEntries_) { @@ -498,12 +557,12 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { void insert(final long hash, final S summary) { final int index = HashOperations.hashInsertOnly(hashTable_, lgCurrentCapacity_, hash); insertSummary(index, summary); - count_++; + retEntries_++; empty_ = false; } private void updateTheta() { - final long[] hashArr = new long[count_]; + final long[] hashArr = new long[retEntries_]; int i = 0; //Because of the association of the hashTable with the summaryTable we cannot destroy the // hashTable structure. So we must copy. May as well compact at the same time. @@ -514,7 +573,7 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { hashArr[i++] = hashTable_[j]; } } - thetaLong_ = QuickSelect.select(hashArr, 0, count_ - 1, nomEntries_); + thetaLong_ = QuickSelect.select(hashArr, 0, retEntries_ - 1, nomEntries_); } private void resize(final int newSize) { @@ -523,20 +582,20 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> { hashTable_ = new long[newSize]; summaryTable_ = Util.newSummaryArray(summaryTable_, newSize); lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newSize); - count_ = 0; + retEntries_ = 0; for (int i = 0; i < oldHashTable.length; i++) { if (oldSummaryTable[i] != null && oldHashTable[i] < thetaLong_) { insert(oldHashTable[i], oldSummaryTable[i]); } } - setRebuildThreshold(); + rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); } - private void setRebuildThreshold() { - if (hashTable_.length > nomEntries_) { - rebuildThreshold_ = (int) (hashTable_.length * ThetaUtil.REBUILD_THRESHOLD); + private static int setRebuildThreshold(final long[] hashTable, final int nomEntries) { + if (hashTable.length > nomEntries) { + return (int) (hashTable.length * ThetaUtil.REBUILD_THRESHOLD); } else { - rebuildThreshold_ = (int) (hashTable_.length * ThetaUtil.RESIZE_THRESHOLD); + return (int) (hashTable.length * ThetaUtil.RESIZE_THRESHOLD); } } diff --git a/src/main/java/org/apache/datasketches/tuple/Sketch.java b/src/main/java/org/apache/datasketches/tuple/Sketch.java index b4aeb13f..dcc7a2c1 100644 --- a/src/main/java/org/apache/datasketches/tuple/Sketch.java +++ b/src/main/java/org/apache/datasketches/tuple/Sketch.java @@ -35,9 +35,13 @@ public abstract class Sketch<S extends Summary> { long thetaLong_; boolean empty_ = true; - protected SummaryFactory<S> summaryFactory_; + protected SummaryFactory<S> summaryFactory_ = null; - Sketch() {} + Sketch(final long thetaLong, final boolean empty, final SummaryFactory<S> summaryFactory) { + this.thetaLong_ = thetaLong; + this.empty_ = empty; + this.summaryFactory_ = summaryFactory; + } /** * Converts this sketch to a CompactSketch on the Java heap. diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java index f4987278..135ce6d0 100644 --- a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java @@ -69,7 +69,9 @@ public class UpdatableSketch<U, S extends UpdatableSummary<U>> extends QuickSele * Heapifying a CompactSketch is not deprecated. */ @Deprecated - public UpdatableSketch(final Memory srcMem, final SummaryDeserializer<S> deserializer, + public UpdatableSketch( + final Memory srcMem, + final SummaryDeserializer<S> deserializer, final SummaryFactory<S> summaryFactory) { super(srcMem, deserializer, summaryFactory); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java index c4a896a2..9a9962b1 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java @@ -57,7 +57,9 @@ abstract class ArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesUpdatableSk int rebuildThreshold_; //absolute value relative to current capacity int lgCurrentCapacity_; - ArrayOfDoublesQuickSelectSketch(final int numValues, final long seed) { + ArrayOfDoublesQuickSelectSketch( + final int numValues, + final long seed) { super(numValues, seed); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 8c08b3a2..60a3a909 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -45,11 +45,6 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke private int keysOffset_; private int valuesOffset_; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** * Construct a new sketch using the given Memory as its backing store. * @@ -66,12 +61,33 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See seed</a> * @param dstMem <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a> */ - DirectArrayOfDoublesQuickSelectSketch(final int nomEntries, final int lgResizeFactor, - final float samplingProbability, final int numValues, final long seed, final WritableMemory dstMem) { + DirectArrayOfDoublesQuickSelectSketch( + final int nomEntries, + final int lgResizeFactor, + final float samplingProbability, + final int numValues, + final long seed, + final WritableMemory dstMem) { + this(validate1(nomEntries, lgResizeFactor, numValues, dstMem), + nomEntries, + lgResizeFactor, + samplingProbability, + numValues, + seed, + dstMem); + } + + private DirectArrayOfDoublesQuickSelectSketch( + final boolean secure, + final int nomEntries, + final int lgResizeFactor, + final float samplingProbability, + final int numValues, + final long seed, + final WritableMemory dstMem) { super(numValues, seed); mem_ = dstMem; final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); - checkIfEnoughMemory(dstMem, startingCapacity, numValues); mem_.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); mem_.putByte(SERIAL_VERSION_BYTE, serialVersionUID); mem_.putByte(FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); @@ -99,19 +115,52 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke setRebuildThreshold(); } + private static final boolean validate1( + final int nomEntries, + final int lgResizeFactor, + final int numValues, + final WritableMemory dstMem) { + final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); + checkIfEnoughMemory(dstMem, startingCapacity, numValues); + return true; + } + /** * Wraps the given Memory. * @param mem <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a> * @param seed update seed */ - DirectArrayOfDoublesQuickSelectSketch(final WritableMemory mem, final long seed) { + DirectArrayOfDoublesQuickSelectSketch( + final WritableMemory mem, + final long seed) { + this(validate2(mem), mem, seed); + //SpotBugs CT_CONSTRUCTOR_THROW is false positive. + //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J + } + + private DirectArrayOfDoublesQuickSelectSketch( + final boolean secure, + final WritableMemory mem, + final long seed) { super(mem.getByte(NUM_VALUES_BYTE), seed); mem_ = mem; SerializerDeserializer.validateFamily(mem.getByte(FAMILY_ID_BYTE), mem.getByte(PREAMBLE_LONGS_BYTE)); SerializerDeserializer.validateType(mem_.getByte(SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); - final byte version = mem_.getByte(SERIAL_VERSION_BYTE); + + Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), Util.computeSeedHash(seed)); + keysOffset_ = ENTRIES_START; + valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity()); + // to do: make parent take care of its own parts + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(getCurrentCapacity()); + thetaLong_ = mem_.getLong(THETA_LONG); + isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; + setRebuildThreshold(); + } + + private static final boolean validate2(final Memory mem) { + final byte version = mem.getByte(SERIAL_VERSION_BYTE); if (version != serialVersionUID) { throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); @@ -121,14 +170,7 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), Util.computeSeedHash(seed)); - keysOffset_ = ENTRIES_START; - valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity()); - // to do: make parent take care of its own parts - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(getCurrentCapacity()); - thetaLong_ = mem_.getLong(THETA_LONG); - isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; - setRebuildThreshold(); + return true; } @Override diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java index 2253be04..803c2f59 100644 --- a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java @@ -46,11 +46,11 @@ public class ArrayOfStringsSketchTest { sketch1.update(strArrArr[i], strArrArr[i]); } sketch1.update(strArrArr[0], strArrArr[0]); //insert duplicate - //printSummaries(sketch1.iterator()); + printSummaries(sketch1.iterator()); byte[] array = sketch1.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(array); ArrayOfStringsSketch sketch2 = new ArrayOfStringsSketch(wmem); - //printSummaries(sketch2.iterator()); + printSummaries(sketch2.iterator()); checkSummaries(sketch2, sketch2); String[] strArr3 = {"g", "h" }; diff --git a/tools/FindBugsExcludeFilter.xml b/tools/FindBugsExcludeFilter.xml index 32d176d7..993bbbcf 100644 --- a/tools/FindBugsExcludeFilter.xml +++ b/tools/FindBugsExcludeFilter.xml @@ -79,10 +79,27 @@ under the License. <Class name="org.apache.datasketches.quantilescommon.GenericSortedViewIterator"/> </Match> - <Match> + <Match> <!-- False Positive: These are intentional --> <Bug pattern="FE_FLOATING_POINT_EQUALITY" /> <Class name="org.apache.datasketches.sampling.EbppsItemsSample" /> - <Method name="merge" /> + <Or> + <Method name="merge" /> + <Method name="downsample" /> + </Or> + </Match> + + <Match> <!-- False Positive: Application is single threaded. --> + <Bug pattern="SING_SINGLETON_GETTER_NOT_SYNCHRONIZED" /> + <Class name="org.apache.datasketches.theta.EmptyCompactSketch"/> + <Method name="getHeapInstance" /> + </Match> + + <Match> <!-- False Positive: Code is the recommended solution for Finalizer Attack. --> + <Bug pattern="CT_CONSTRUCTOR_THROW" /> + <Or> + <Class name="org.apache.datasketches.tuple.arrayofdoubles.DirectArrayOfDoublesQuickSelectSketch"/> + <Class name="org.apache.datasketches.theta.DirectQuickSelectSketch"/> + </Or> </Match> </FindBugsFilter> --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
