garydgregory commented on a change in pull request #83: WIP: Initial bloom filter code contribution URL: https://github.com/apache/commons-collections/pull/83#discussion_r327646708
########## File path: src/main/java/org/apache/commons/collections4/bloomfilters/ProtoBloomFilter.java ########## @@ -0,0 +1,558 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilters; + +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Stream; + +/** + * A prototypical bloom filter definition. + * <p> + * The ProtoBloomFilter contains the information necessary to create a concrete + * bloom filter with a given filter configuration. The construction of the + * ProtoBloomFilter is far more compute expensive than making the concrete bloom + * filter from the proto filter. Concrete implementations of BloomFilter are + * built from the ProtoBloomFilter. + * </p> + * + * @since 4.5 + * @see FilterConfig + * + */ +public final class ProtoBloomFilter implements Comparable<ProtoBloomFilter> { + + private final List<Hash> hashes; + private transient Integer hashCode; + + /** + * An empty ProtoBloomFilter. Used to create empty BloomFilters. + */ + public static final ProtoBloomFilter EMPTY = new ProtoBloomFilter(Collections.emptyList()); + + /** + * Get a builder . + * + * @return a new builder. + */ + public static Builder builder() { + return new Builder(); + } + + /* package private for testing */ + /** + * Constructor + * + * @param hashes the two longs that were created by the murmur hash function. + */ + ProtoBloomFilter(Collection<Hash> hashes) { + this.hashes = new ArrayList<Hash>(); + this.hashes.addAll(hashes); + // sort so compareTo and equals work properly + Collections.sort(this.hashes); + } + + private Object writeReplace() { + return new ProtoSerProxy(this); + } + + /** + * Get the count of hashed items included in this proto bloom filter. + * + * @return The number of unique items in this proto filter. + */ + public int getItemCount() { + return hashes.size(); + } + + /** + * Get the stream of hashes included in this proto bloom filter. + * + * @return the stream of hashes. + */ + public Stream<Hash> getHashes() { + return hashes.stream(); + } + + /** + * Get the count of unique hashed items included in this proto bloom filter. + * + * @return the number of unique items hashed into the proto bloom filter. + */ + public int getUniqueItemCount() { + return (int) getUniqueHashes().count(); + } + + /** + * Get the stream of uniques hashes included in this proto bloom filter. + * + * @return the stream of unique hashes. + */ + public Stream<Hash> getUniqueHashes() { + return hashes.stream().distinct(); + } + + @Override + public int hashCode() { + if (hashCode == null) { + hashCode = Objects.hash(hashes); + } + return hashCode.intValue(); + } + + @Override + public int compareTo(ProtoBloomFilter other) { + Iterator<Hash> otherIter = other.getHashes().iterator(); + Iterator<Hash> iter = hashes.iterator(); + int result; + while (iter.hasNext() && otherIter.hasNext()) { + result = iter.next().compareTo(otherIter.next()); + if (result != 0) { + return result; + } + } + return otherIter.hasNext() ? -1 : 0; + } + + @Override + public boolean equals(Object o) { + if (o instanceof ProtoBloomFilter) { + return compareTo((ProtoBloomFilter) o) == 0; + } + return false; + } + + @Override + public String toString() { + return String.format("ProtoBloomFilter[ %s, %s]", hashes.size(), hashCode()); + } + + /** + * A Serialization proxy for a ProtoBloomFilter. + * + */ + private static class ProtoSerProxy implements Serializable { Review comment: No need to abbreviate the class name here, I would spell out Serializable in full. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services