Log Merkle tree precision stats patch by Benedict; reviewed by yukim for CASSANDRA-2698
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/18f0234b Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/18f0234b Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/18f0234b Branch: refs/heads/trunk Commit: 18f0234bf908d1f970bb694d0bb58539ab427387 Parents: 3205c10 Author: Benedict <s...@laerad.com> Authored: Wed Aug 7 12:19:09 2013 -0500 Committer: Yuki Morishita <yu...@apache.org> Committed: Wed Aug 7 12:19:58 2013 -0500 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../org/apache/cassandra/repair/Validator.java | 61 ++++- .../cassandra/utils/EstimatedHistogram.java | 65 ++++++ .../cassandra/utils/HistogramBuilder.java | 158 +++++++++++++ .../org/apache/cassandra/utils/MerkleTree.java | 233 ++++++++++++++----- .../cassandra/repair/DifferencerTest.java | 2 +- .../cassandra/utils/HistogramBuilderTest.java | 109 +++++++++ .../apache/cassandra/utils/MerkleTreeTest.java | 9 +- 8 files changed, 572 insertions(+), 66 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/18f0234b/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 9509113..4dca1b8 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,6 @@ 2.0.1 * Notify indexer of columns shadowed by range tombstones (CASSANDRA-5614) + * Log Merkle tree stats (CASSANDRA-2698) 2.0.0 http://git-wip-us.apache.org/repos/asf/cassandra/blob/18f0234b/src/java/org/apache/cassandra/repair/Validator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/repair/Validator.java b/src/java/org/apache/cassandra/repair/Validator.java index 4e8e865..e7efcaa 100644 --- a/src/java/org/apache/cassandra/repair/Validator.java +++ b/src/java/org/apache/cassandra/repair/Validator.java @@ -61,8 +61,6 @@ public class Validator implements Runnable private transient MerkleTree.TreeRangeIterator ranges; private transient DecoratedKey lastKey; - public final static MerkleTree.RowHash EMPTY_ROW = new MerkleTree.RowHash(null, new byte[0]); - /** * Create Validator with default size of initial Merkle Tree. */ @@ -145,7 +143,7 @@ public class Validator implements Runnable while (!range.contains(row.key.token)) { // add the empty hash, and move to the next range - range.addHash(EMPTY_ROW); + range.ensureHashInitialised(); range = ranges.next(); } @@ -153,13 +151,52 @@ public class Validator implements Runnable range.addHash(rowHash(row)); } + static class CountingDigest extends MessageDigest + { + private long count; + private MessageDigest underlying; + + public CountingDigest(MessageDigest underlying) + { + super(underlying.getAlgorithm()); + this.underlying = underlying; + } + + @Override + protected void engineUpdate(byte input) + { + underlying.update(input); + count += 1; + } + + @Override + protected void engineUpdate(byte[] input, int offset, int len) + { + underlying.update(input, offset, len); + count += len; + } + + @Override + protected byte[] engineDigest() + { + return underlying.digest(); + } + + @Override + protected void engineReset() + { + underlying.reset(); + } + + } + private MerkleTree.RowHash rowHash(AbstractCompactedRow row) { validated++; // MerkleTree uses XOR internally, so we want lots of output bits here - MessageDigest digest = FBUtilities.newMessageDigest("SHA-256"); + CountingDigest digest = new CountingDigest(FBUtilities.newMessageDigest("SHA-256")); row.update(digest); - return new MerkleTree.RowHash(row.key.token, digest.digest()); + return new MerkleTree.RowHash(row.key.token, digest.digest(), digest.count); } /** @@ -170,7 +207,15 @@ public class Validator implements Runnable completeTree(); StageManager.getStage(Stage.ANTI_ENTROPY).execute(this); - logger.debug("Validated " + validated + " rows into AEService tree for " + desc); + + if (logger.isDebugEnabled()) + { + // log distribution of rows in tree + logger.debug("Validated " + validated + " rows into AEService tree for " + desc + " with row count distribution:"); + tree.histogramOfRowCountPerLeaf().log(logger); + logger.debug("Validated " + validated + " rows into AEService tree for " + desc + " with row size distribution:"); + tree.histogramOfRowSizePerLeaf().log(logger); + } } @VisibleForTesting @@ -179,11 +224,11 @@ public class Validator implements Runnable assert ranges != null : "Validator was not prepared()"; if (range != null) - range.addHash(EMPTY_ROW); + range.ensureHashInitialised(); while (ranges.hasNext()) { range = ranges.next(); - range.addHash(EMPTY_ROW); + range.ensureHashInitialised(); } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/18f0234b/src/java/org/apache/cassandra/utils/EstimatedHistogram.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/EstimatedHistogram.java b/src/java/org/apache/cassandra/utils/EstimatedHistogram.java index 54407a4..3a30290 100644 --- a/src/java/org/apache/cassandra/utils/EstimatedHistogram.java +++ b/src/java/org/apache/cassandra/utils/EstimatedHistogram.java @@ -27,6 +27,7 @@ import com.google.common.base.Objects; import org.apache.cassandra.db.TypeSizes; import org.apache.cassandra.io.ISerializer; +import org.slf4j.Logger; public class EstimatedHistogram { @@ -226,6 +227,70 @@ public class EstimatedHistogram return buckets.get(buckets.length() - 1) > 0; } + /** + * log.debug() every record in the histogram + * + * @param log + */ + public void log(Logger log) + { + + // only print overflow if there is any + int nameCount; + if (buckets.get(buckets.length() - 1) == 0) + nameCount = buckets.length() - 1; + else + nameCount = buckets.length(); + String[] names = new String[nameCount]; + + int maxNameLength = 0; + for (int i = 0; i < nameCount; i++) + { + names[i] = nameOfRange(bucketOffsets, i); + maxNameLength = Math.max(maxNameLength, names[i].length()); + } + + // emit log records + String formatstr = "%" + maxNameLength + "s: %d"; + for (int i = 0; i < nameCount; i++) + { + long count = buckets.get(i); + // sort-of-hack to not print empty ranges at the start that are only used to demarcate the + // first populated range. for code clarity we don't omit this record from the maxNameLength + // calculation, and accept the unnecessary whitespace prefixes that will occasionally occur + if (i == 0 && count == 0) + continue; + log.debug(String.format(formatstr, names[i], count)); + } + } + + private static String nameOfRange(long[] bucketOffsets, int index) + { + StringBuilder sb = new StringBuilder(); + appendRange(sb, bucketOffsets, index); + return sb.toString(); + } + + private static void appendRange(StringBuilder sb, long[] bucketOffsets, int index) + { + sb.append("["); + if (index == 0) + if (bucketOffsets[0] > 0) + // by original definition, this histogram is for values greater than zero only; + // if values of 0 or less are required, an entry of lb-1 must be inserted at the start + sb.append("1"); + else + sb.append("-Inf"); + else + sb.append(bucketOffsets[index - 1] + 1); + sb.append(".."); + if (index == bucketOffsets.length) + sb.append("Inf"); + else + sb.append(bucketOffsets[index]); + sb.append("]"); + } + @Override public boolean equals(Object o) { http://git-wip-us.apache.org/repos/asf/cassandra/blob/18f0234b/src/java/org/apache/cassandra/utils/HistogramBuilder.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/HistogramBuilder.java b/src/java/org/apache/cassandra/utils/HistogramBuilder.java new file mode 100644 index 0000000..c2accb2 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/HistogramBuilder.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.utils; + +import java.util.Arrays; + +/** + * Simple class for constructing an EsimtatedHistogram from a set of predetermined values + */ +public class HistogramBuilder +{ + + public HistogramBuilder() {} + public HistogramBuilder(long[] values) + { + for (long value : values) + { + add(value); + } + } + + private long[] values = new long[10]; + int count = 0; + + public void add(long value) + { + if (count == values.length) + values = Arrays.copyOf(values, values.length << 1); + values[count++] = value; + } + + /** + * See {@link #buildWithStdevRangesAroundMean(int)} + * @return buildWithStdevRangesAroundMean(3) + */ + public EstimatedHistogram buildWithStdevRangesAroundMean() + { + return buildWithStdevRangesAroundMean(3); + } + + /** + * Calculate the min, mean, max and standard deviation of the items in the builder, and + * generate an EstimatedHistogram with upto <code>maxdev</code> stdev size ranges either + * side of the mean, until min/max are hit; if either min/max are not reached a further range is + * inserted at the relevant ends. e.g., with a <code>maxdevs</code> of 3, there may be <i>up to</i> 8 ranges + * (between 9 boundaries, the middle being the mean); the middle 6 will have the same size (stdev) + * with the outermost two stretching out to min and max. + * + * @param maxdevs + * @return + */ + public EstimatedHistogram buildWithStdevRangesAroundMean(int maxdevs) + { + if (maxdevs < 0) + throw new IllegalArgumentException("maxdevs must be greater than or equal to zero"); + + final int count = this.count; + final long[] values = this.values; + + if (count == 0) + return new EstimatedHistogram(new long[] { }, new long[] { 0 }); + + long min = Long.MAX_VALUE, max = Long.MIN_VALUE; + double sum = 0, sumsq = 0; + for (int i = 0 ; i < count ; i++) + { + final long value = values[i]; + sum += value; + sumsq += value * value; + if (value < min) + min = value; + if (value > max) + max = value; + } + + final long mean = (long)Math.round(sum / count); + final double stdev = + Math.sqrt((sumsq / count) - (mean * (double) mean)); + + // build the ranges either side of the mean + final long[] lowhalf = buildRange(mean, min, true, stdev, maxdevs); + final long[] highhalf = buildRange(mean, max, false, stdev, maxdevs); + + // combine the ranges + final long[] ranges = new long[lowhalf.length + highhalf.length + 1]; + System.arraycopy(lowhalf, 0, ranges, 0, lowhalf.length); + ranges[lowhalf.length] = mean; + System.arraycopy(highhalf, 0, ranges, lowhalf.length + 1, highhalf.length); + + final EstimatedHistogram hist = new EstimatedHistogram(ranges, new long[ranges.length + 1]); + for (int i = 0 ; i < count ; i++) + hist.add(values[i]); + return hist; + } + + private static long[] buildRange(long mean, long minormax, boolean ismin, double stdev, int maxdevs) + { + if (minormax == mean) + // minormax == mean we have no range to produce, but given the exclusive starts + // that begin at zero by default (or -Inf) in EstimatedHistogram we have to generate a min range + // to indicate where we start from + return ismin ? new long[] { mean - 1 } : new long[0]; + + if (stdev < 1) + { + // deal with stdevs too small to generate sensible ranges + return ismin ? new long[] { minormax - 1, mean - 1 } : + new long[] { minormax }; + } + + long larger, smaller; + if (ismin) { larger = mean; smaller = minormax; } + else { larger = minormax; smaller = mean; } + + double stdevsTo = (larger - smaller) / stdev; + if (stdevsTo > 0 && stdevsTo < 1) + // always round up if there's just one non-empty range + stdevsTo = 1; + else + // otherwise round to the nearest half stdev, to avoid tiny ranges at the start/end + stdevsTo = Math.round(stdevsTo); + + // limit to 4 stdev ranges - last range will contain everything to boundary + final int len = Math.min(maxdevs + 1, (int) stdevsTo); + final long[] range = new long[len]; + long next = ismin ? minormax - 1 : minormax; + for (int i = 0 ; i < range.length ; i++) + { + long delta = (range.length - (i + 1)) * (long) stdev; + if (ismin) + { + range[i] = next; + next = mean - delta; + } + else + { + range[len - 1 - i] = next; + next = mean + delta; + } + } + return range; + } +} http://git-wip-us.apache.org/repos/asf/cassandra/blob/18f0234b/src/java/org/apache/cassandra/utils/MerkleTree.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/MerkleTree.java b/src/java/org/apache/cassandra/utils/MerkleTree.java index a16c2b9..ce71ec4 100644 --- a/src/java/org/apache/cassandra/utils/MerkleTree.java +++ b/src/java/org/apache/cassandra/utils/MerkleTree.java @@ -64,6 +64,7 @@ public class MerkleTree implements Serializable public static final int CONSISTENT = 0; public static final int FULLY_INCONSISTENT = 1; public static final int PARTIALLY_INCONSISTENT = 2; + private static final byte[] EMPTY_HASH = new byte[0]; public final byte hashdepth; @@ -227,11 +228,14 @@ public class MerkleTree implements Serializable if (!ltree.fullRange.equals(rtree.fullRange)) throw new IllegalArgumentException("Difference only make sense on tree covering the same range (but " + ltree.fullRange + " != " + rtree.fullRange + ")"); - List<TreeRange> diff = new ArrayList<TreeRange>(); - TreeRange active = new TreeRange(null, ltree.fullRange.left, ltree.fullRange.right, (byte)0, null); + List<TreeRange> diff = new ArrayList<>(); + TreeDifference active = new TreeDifference(ltree.fullRange.left, ltree.fullRange.right, (byte)0); - byte[] lhash = ltree.hash(active); - byte[] rhash = rtree.hash(active); + Hashable lnode = ltree.find(active); + Hashable rnode = rtree.find(active); + byte[] lhash = lnode.hash(); + byte[] rhash = rnode.hash(); + active.setSize(lnode.sizeOfRange(), rnode.sizeOfRange()); if (lhash != null && rhash != null && !Arrays.equals(lhash, rhash)) { @@ -256,14 +260,19 @@ public class MerkleTree implements Serializable return CONSISTENT; Token midpoint = ltree.partitioner().midpoint(active.left, active.right); - TreeRange left = new TreeRange(null, active.left, midpoint, inc(active.depth), null); - TreeRange right = new TreeRange(null, midpoint, active.right, inc(active.depth), null); - byte[] lhash; - byte[] rhash; + TreeDifference left = new TreeDifference(active.left, midpoint, inc(active.depth)); + TreeDifference right = new TreeDifference(midpoint, active.right, inc(active.depth)); + byte[] lhash, rhash; + Hashable lnode, rnode; // see if we should recurse left - lhash = ltree.hash(left); - rhash = rtree.hash(left); + lnode = ltree.find(left); + rnode = rtree.find(left); + lhash = lnode.hash(); + rhash = rnode.hash(); + left.setSize(lnode.sizeOfRange(), rnode.sizeOfRange()); + left.setRows(lnode.rowsInRange(), rnode.rowsInRange()); + int ldiff = CONSISTENT; boolean lreso = lhash != null && rhash != null; if (lreso && !Arrays.equals(lhash, rhash)) @@ -271,10 +280,14 @@ public class MerkleTree implements Serializable else if (!lreso) ldiff = FULLY_INCONSISTENT; - // see if we should recurse right - lhash = ltree.hash(right); - rhash = rtree.hash(right); + lnode = ltree.find(right); + rnode = rtree.find(right); + lhash = lnode.hash(); + rhash = rnode.hash(); + right.setSize(lnode.sizeOfRange(), rnode.sizeOfRange()); + right.setRows(lnode.rowsInRange(), rnode.rowsInRange()); + int rdiff = CONSISTENT; boolean rreso = lhash != null && rhash != null; if (rreso && !Arrays.equals(lhash, rhash)) @@ -363,54 +376,57 @@ public class MerkleTree implements Serializable */ public byte[] hash(Range<Token> range) { + return find(range).hash(); + } + + /** + * Find the {@link Hashable} node that matches the given {@code range}. + * + * @param range Range to find + * @return {@link Hashable} found. If nothing found, return {@link Leaf} with null hash. + */ + private Hashable find(Range<Token> range) + { try { - return hashHelper(root, new Range<Token>(fullRange.left, fullRange.right), range); + return findHelper(root, new Range<Token>(fullRange.left, fullRange.right), range); } catch (StopRecursion e) { - return null; + return new Leaf(); } } /** * @throws StopRecursion If no match could be found for the range. */ - private byte[] hashHelper(Hashable hashable, Range<Token> active, Range<Token> range) throws StopRecursion + private Hashable findHelper(Hashable current, Range<Token> activeRange, Range<Token> find) throws StopRecursion { - if (hashable instanceof Leaf) + if (current instanceof Leaf) { - if (!range.contains(active)) + if (!find.contains(activeRange)) // we are not fully contained in this range! throw new StopRecursion.BadRange(); - return hashable.hash(); + return current; } // else: node. - Inner node = (Inner)hashable; - Range<Token> leftactive = new Range<Token>(active.left, node.token); - Range<Token> rightactive = new Range<Token>(node.token, active.right); + Inner node = (Inner)current; + Range<Token> leftRange = new Range<Token>(activeRange.left, node.token); + Range<Token> rightRange = new Range<Token>(node.token, activeRange.right); - if (range.contains(active)) - { + if (find.contains(activeRange)) // this node is fully contained in the range - if (node.hash() != null) - // we had a cached value - return node.hash(); - // continue recursing to hash our children - byte[] lhash = hashHelper(node.lchild(), leftactive, range); - byte[] rhash = hashHelper(node.rchild(), rightactive, range); - // cache the computed value (even if it is null) - node.hash(lhash, rhash); - return node.hash(); - } // else: one of our children contains the range - - if (leftactive.contains(range)) + return node.calc(); + + // else: one of our children contains the range + + if (leftRange.contains(find)) // left child contains/matches the range - return hashHelper(node.lchild, leftactive, range); - else if (rightactive.contains(range)) + return findHelper(node.lchild, leftRange, find); + else if (rightRange.contains(find)) // right child contains/matches the range - return hashHelper(node.rchild, rightactive, range); + return findHelper(node.rchild, rightRange, find); else throw new StopRecursion.BadRange(); } @@ -479,6 +495,26 @@ public class MerkleTree implements Serializable return new TreeRangeIterator(this); } + public EstimatedHistogram histogramOfRowSizePerLeaf() + { + HistogramBuilder histbuild = new HistogramBuilder(); + for (TreeRange range : new TreeRangeIterator(this)) + { + histbuild.add(range.hashable.sizeOfRange); + } + return histbuild.buildWithStdevRangesAroundMean(); + } + + public EstimatedHistogram histogramOfRowCountPerLeaf() + { + HistogramBuilder histbuild = new HistogramBuilder(); + for (TreeRange range : new TreeRangeIterator(this)) + { + histbuild.add(range.hashable.rowsInRange); + } + return histbuild.buildWithStdevRangesAroundMean(); + } + @Override public String toString() { @@ -489,6 +525,59 @@ public class MerkleTree implements Serializable return buff.toString(); } + public static class TreeDifference extends TreeRange + { + private static final long serialVersionUID = 6363654174549968183L; + + private long sizeOnLeft; + private long sizeOnRight; + private long rowsOnLeft; + private long rowsOnRight; + + void setSize(long sizeOnLeft, long sizeOnRight) + { + this.sizeOnLeft = sizeOnLeft; + this.sizeOnRight = sizeOnRight; + } + + void setRows(long rowsOnLeft, long rowsOnRight) + { + this.rowsOnLeft = rowsOnLeft; + this.rowsOnRight = rowsOnRight; + } + + public long sizeOnLeft() + { + return sizeOnLeft; + } + + public long sizeOnRight() + { + return sizeOnRight; + } + + public long rowsOnLeft() + { + return rowsOnLeft; + } + + public long rowsOnRight() + { + return rowsOnRight; + } + + public TreeDifference(Token left, Token right, byte depth) + { + super(null, left, right, depth, null); + } + + public long totalRows() + { + return rowsOnLeft + rowsOnRight; + } + + } + /** * The public interface to a range in the tree. * @@ -531,7 +620,16 @@ public class MerkleTree implements Serializable assert tree != null : "Not intended for modification!"; assert hashable instanceof Leaf; - hashable.addHash(entry.hash); + hashable.addHash(entry.hash, entry.size); + } + + public void ensureHashInitialised() + { + assert tree != null : "Not intended for modification!"; + assert hashable instanceof Leaf; + + if (hashable.hash == null) + hashable.hash = EMPTY_HASH; } public void addAll(Iterator<RowHash> entries) @@ -662,6 +760,21 @@ public class MerkleTree implements Serializable rchild = child; } + Hashable calc() + { + if (hash == null) + { + // hash and size haven't been calculated; calc children then compute + Hashable lnode = lchild.calc(); + Hashable rnode = rchild.calc(); + // cache the computed value + hash(lnode.hash, rnode.hash); + sizeOfRange = lnode.sizeOfRange + rnode.sizeOfRange; + rowsInRange = lnode.rowsInRange + rnode.rowsInRange; + } + return this; + } + /** * Recursive toString. */ @@ -729,12 +842,12 @@ public class MerkleTree implements Serializable public long serializedSize(Inner inner, int version) { int size = inner.hash == null - ? TypeSizes.NATIVE.sizeof(-1) - : TypeSizes.NATIVE.sizeof(inner.hash().length) + inner.hash().length; + ? TypeSizes.NATIVE.sizeof(-1) + : TypeSizes.NATIVE.sizeof(inner.hash().length) + inner.hash().length; size += Token.serializer.serializedSize(inner.token, TypeSizes.NATIVE) - + Hashable.serializer.serializedSize(inner.lchild, version) - + Hashable.serializer.serializedSize(inner.rchild, version); + + Hashable.serializer.serializedSize(inner.lchild, version) + + Hashable.serializer.serializedSize(inner.rchild, version); return size; } } @@ -768,11 +881,6 @@ public class MerkleTree implements Serializable super(hash); } - public Leaf(byte[] lefthash, byte[] righthash) - { - super(Hashable.binaryHash(lefthash, righthash)); - } - public void toString(StringBuilder buff, int maxdepth) { buff.append(toString()); @@ -826,16 +934,18 @@ public class MerkleTree implements Serializable { public final Token token; public final byte[] hash; - public RowHash(Token token, byte[] hash) + public final long size; + public RowHash(Token token, byte[] hash, long size) { this.token = token; this.hash = hash; + this.size = size; } @Override public String toString() { - return "#<RowHash " + token + " " + Hashable.toString(hash) + ">"; + return "#<RowHash " + token + " " + Hashable.toString(hash) + " @ " + size + " bytes>"; } } @@ -848,6 +958,8 @@ public class MerkleTree implements Serializable private static final IVersionedSerializer<Hashable> serializer = new HashableSerializer(); protected byte[] hash; + protected long sizeOfRange; + protected long rowsInRange; protected Hashable(byte[] hash) { @@ -859,11 +971,26 @@ public class MerkleTree implements Serializable return hash; } + public long sizeOfRange() + { + return sizeOfRange; + } + + public long rowsInRange() + { + return rowsInRange; + } + void hash(byte[] hash) { this.hash = hash; } + Hashable calc() + { + return this; + } + /** * Sets the value of this hash to binaryHash of its children. * @param lefthash Hash of left child. @@ -878,12 +1005,14 @@ public class MerkleTree implements Serializable * Mixes the given value into our hash. If our hash is null, * our hash will become the given value. */ - void addHash(byte[] righthash) + void addHash(byte[] righthash, long sizeOfRow) { if (hash == null) hash = righthash; else hash = binaryHash(hash, righthash); + this.sizeOfRange += sizeOfRow; + this.rowsInRange += 1; } /** http://git-wip-us.apache.org/repos/asf/cassandra/blob/18f0234b/test/unit/org/apache/cassandra/repair/DifferencerTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/repair/DifferencerTest.java b/test/unit/org/apache/cassandra/repair/DifferencerTest.java index 2502620..3f259f2 100644 --- a/test/unit/org/apache/cassandra/repair/DifferencerTest.java +++ b/test/unit/org/apache/cassandra/repair/DifferencerTest.java @@ -132,7 +132,7 @@ public class DifferencerTest extends SchemaLoader tree.init(); for (MerkleTree.TreeRange r : tree.invalids()) { - r.addHash(Validator.EMPTY_ROW); + r.ensureHashInitialised(); } return tree; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/18f0234b/test/unit/org/apache/cassandra/utils/HistogramBuilderTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/utils/HistogramBuilderTest.java b/test/unit/org/apache/cassandra/utils/HistogramBuilderTest.java new file mode 100644 index 0000000..dfceaf3 --- /dev/null +++ b/test/unit/org/apache/cassandra/utils/HistogramBuilderTest.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.utils; + +import java.util.concurrent.atomic.AtomicLongArray; + +import org.junit.Test; + +import static org.junit.Assert.*; + +public class HistogramBuilderTest +{ + + @Test + public void testStdevEmpty() + { + EstimatedHistogram hist = new HistogramBuilder().buildWithStdevRangesAroundMean(); + assertArrayEquals(new long[] { }, hist.getBucketOffsets()); + assertArrayEquals(new long[] { 0 }, toArray(hist.buckets)); + } + + @Test + public void testStdevSingletonRanges() + { + EstimatedHistogram hist; + hist = new HistogramBuilder(new long[] { 5, 5, 5, 5, 5 }).buildWithStdevRangesAroundMean(); + assertArrayEquals(new long[] { 4, 5 }, hist.getBucketOffsets()); + assertArrayEquals(new long[] { 0, 5, 0 }, toArray(hist.buckets)); + // should behave exactly the same for negative numbers + hist = new HistogramBuilder(new long[] { -1 }).buildWithStdevRangesAroundMean(); + assertArrayEquals(new long[] { -2, -1 }, hist.getBucketOffsets()); + assertArrayEquals(new long[] { 0, 1, 0 }, toArray(hist.buckets)); + } + + @Test + public void testStdevNearZeroStdev() + { + EstimatedHistogram hist; + long[] vals = new long[100000]; + vals[0] = 99; + vals[1] = 101; + for (int i = 2 ; i < vals.length ; i++) + vals[i] = 100; + hist = new HistogramBuilder(vals).buildWithStdevRangesAroundMean(); + assertArrayEquals(new long[] { 98, 99, 100, 101 }, hist.getBucketOffsets()); + assertArrayEquals(new long[] { 0, 1, vals.length - 2, 1, 0 }, toArray(hist.buckets)); + } + + @Test + public void testStdev() + { + long[] vals; + EstimatedHistogram hist; + vals = new long[] { -10, -3, -2, -2, -1, -1, -1, -1, -1, -1, -1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 10 }; + hist = new HistogramBuilder(vals).buildWithStdevRangesAroundMean(2); + assertArrayEquals(new long[] { -11, -6, -3, 0, 3, 6, 10 }, hist.getBucketOffsets()); + assertArrayEquals(new long[] { 0, 1, 1, 10, 10, 0, 1, 0 }, toArray(hist.buckets)); + } + + private static long[] toArray(AtomicLongArray a) + { + final long[] r = new long[a.length()]; + for (int i = 0 ; i < r.length ; i++) + r[i] = a.get(i); + return r; + } + + @Test + public void testStdevLargeNumbers() + { + long[] vals; + EstimatedHistogram hist; + vals = new long[100000]; + for (int i = 0 ; i < vals.length ; i++) + { + if (i < vals.length * 0.6f) + vals[i] = 60; + else if (i < vals.length * 0.8f) + vals[i] = 120; + else if (i < vals.length * 0.9f) + vals[i] = 180; + else if (i < vals.length * 0.95f) + vals[i] = 240; + else if (i < vals.length * 0.98f) + vals[i] = 320; + else + vals[i] = 1000; + } + hist = new HistogramBuilder(vals).buildWithStdevRangesAroundMean(2); + assertArrayEquals(new long[] { 59, 120, 260, 400, 1000 }, hist.getBucketOffsets()); + assertArrayEquals(new long[] { 0, 80000, 15000, 3000, 2000, 0 }, toArray(hist.buckets)); + } + +} http://git-wip-us.apache.org/repos/asf/cassandra/blob/18f0234b/test/unit/org/apache/cassandra/utils/MerkleTreeTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/utils/MerkleTreeTest.java b/test/unit/org/apache/cassandra/utils/MerkleTreeTest.java index 70fe3a0..2b459c6 100644 --- a/test/unit/org/apache/cassandra/utils/MerkleTreeTest.java +++ b/test/unit/org/apache/cassandra/utils/MerkleTreeTest.java @@ -25,16 +25,15 @@ import com.google.common.collect.AbstractIterator; import com.google.common.io.ByteArrayDataInput; import com.google.common.io.ByteArrayDataOutput; import com.google.common.io.ByteStreams; -import org.apache.hadoop.hdfs.server.common.Storage; + import org.junit.Before; import org.junit.Test; - import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.dht.*; import org.apache.cassandra.net.MessagingService; -import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.MerkleTree.Hashable; import org.apache.cassandra.utils.MerkleTree.RowHash; +import org.apache.cassandra.utils.MerkleTree.TreeDifference; import org.apache.cassandra.utils.MerkleTree.TreeRange; import org.apache.cassandra.utils.MerkleTree.TreeRangeIterator; @@ -322,7 +321,7 @@ public class MerkleTreeTest // validate the tree TreeRangeIterator ranges = mt.invalids(); for (TreeRange range : ranges) - range.addHash(new RowHash(range.right, new byte[0])); + range.addHash(new RowHash(range.right, new byte[0], 0)); assert mt.hash(new Range<>(tok(-1), tok(-1))) != null : "Could not hash tree " + mt; @@ -491,7 +490,7 @@ public class MerkleTreeTest public RowHash computeNext() { if (tokens.hasNext()) - return new RowHash(tokens.next(), DUMMY); + return new RowHash(tokens.next(), DUMMY, DUMMY.length); return endOfData(); } }