(datasketches-go) 28/34: Add remaing tests and cleanup

alsay Thu, 21 Dec 2023 15:16:34 -0800

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git


commit 3c48b48bdd5205f501e11d4a90d4d84a18689082
Author: Pierre Lacave <[email protected]>
AuthorDate: Thu Dec 21 13:35:51 2023 +0100

    Add remaing tests and cleanup
---
 common/testutils/utils.go                      |  14 ++
 common/utils.go                                |   7 +-
 frequencies/dist_test.go                       |  13 --
 frequencies/error_types.go                     |  39 ----
 frequencies/longs_sketch.go                    | 232 +++++++++++++++------
 frequencies/longs_sketch_serialization_test.go | 145 +++++++++++++
 frequencies/longs_sketch_test.go               | 273 +++++++++++++++----------
 frequencies/row.go                             |  18 +-
 frequencies/utils.go                           |  33 +++
 hll/hll_sketch_serialization_test.go           |  50 ++---
 hll/hll_sketch_test.go                         |   6 -
 11 files changed, 554 insertions(+), 276 deletions(-)

diff --git a/common/testutils/utils.go b/common/testutils/utils.go
new file mode 100644
index 0000000..7f8586a
--- /dev/null
+++ b/common/testutils/utils.go
@@ -0,0 +1,14 @@
+package testutils
+
+const (
+       DSketchTestGenerateGo = "DSKETCH_TEST_GENERATE_GO"
+       DSketchTestCrossJava  = "DSKETCH_TEST_CROSS_JAVA"
+       DSketchTestCrossCpp   = "DSKETCH_TEST_CROSS_CPP"
+       DSketchTestCrossGo    = "DSKETCH_TEST_CROSS_GO"
+)
+
+const (
+       JavaPath = "../serialization_test_data/java_generated_files"
+       CppPath  = "../serialization_test_data/cpp_generated_files"
+       GoPath   = "../serialization_test_data/go_generated_files"
+)
diff --git a/common/utils.go b/common/utils.go
index 305fa00..80a8b0d 100644
--- a/common/utils.go
+++ b/common/utils.go
@@ -28,6 +28,7 @@ const (
        InverseGolden = float64(0.6180339887498949025)
 )
 
+
 // InvPow2 returns 2^(-e).
 func InvPow2(e int) (float64, error) {
        if (e | 1024 - e - 1) < 0 {
@@ -49,14 +50,14 @@ func CeilPowerOf2(n int) int {
 }
 
 func ExactLog2(powerOf2 int) (int, error) {
-       if !isPowerOf2(powerOf2) {
+       if !IsPowerOf2(powerOf2) {
                return 0, fmt.Errorf("argument 'powerOf2' must be a positive 
power of 2")
        }
        return bits.TrailingZeros64(uint64(powerOf2)), nil
 }
 
-// isPowerOf2 returns true if the given number is a power of 2.
-func isPowerOf2(powerOf2 int) bool {
+// IsPowerOf2 returns true if the given number is a power of 2.
+func IsPowerOf2(powerOf2 int) bool {
        return powerOf2 > 0 && (powerOf2&(powerOf2-1)) == 0
 }
 
diff --git a/frequencies/dist_test.go b/frequencies/dist_test.go
deleted file mode 100644
index 2b67ae0..0000000
--- a/frequencies/dist_test.go
+++ /dev/null
@@ -1,13 +0,0 @@
-package frequencies
-
-import (
-       "math"
-       "math/rand"
-)
-
-func randomGeometricDist(prob float64) int64 {
-       if prob <= 0.0 || prob >= 1.0 {
-               panic("prob must be in (0, 1)")
-       }
-       return int64(1 + math.Log(rand.Float64())/math.Log(1.0-prob))
-}
diff --git a/frequencies/error_types.go b/frequencies/error_types.go
deleted file mode 100644
index 730f331..0000000
--- a/frequencies/error_types.go
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package frequencies
-
-type ErrorType struct {
-       id   int
-       Name string
-}
-
-type ErrorTypes struct {
-       NO_FALSE_POSITIVES ErrorType
-       NO_FALSE_NEGATIVES ErrorType
-}
-
-var ErrorTypeEnum = &ErrorTypes{
-       NO_FALSE_POSITIVES: ErrorType{
-               id:   1,
-               Name: "NO_FALSE_POSITIVES",
-       },
-       NO_FALSE_NEGATIVES: ErrorType{
-               id:   2,
-               Name: "NO_FALSE_NEGATIVES",
-       },
-}
diff --git a/frequencies/longs_sketch.go b/frequencies/longs_sketch.go
index 6574c33..41c6b34 100644
--- a/frequencies/longs_sketch.go
+++ b/frequencies/longs_sketch.go
@@ -19,6 +19,7 @@ package frequencies
 
 import (
        "encoding/binary"
+       "errors"
        "fmt"
        "github.com/apache/datasketches-go/common"
        "math/bits"
@@ -59,13 +60,15 @@ const (
  * map managed by this sketch.
  */
 
-// NewLongSketch returns a new LongsSketch with the given lgMaxMapSize and 
lgCurMapSize.
+// NewLongsSketch returns a new LongsSketch with the given lgMaxMapSize and 
lgCurMapSize.
+//
 // lgMaxMapSize is the log2 of the physical size of the internal hash map 
managed by this
 // sketch. The maximum capacity of this internal hash map is 0.75 times 
2^lgMaxMapSize.
 // Both the ultimate accuracy and size of this sketch are a function of 
lgMaxMapSize.
+//
 // lgCurMapSize is the log2 of the starting (current) physical size of the 
internal hash
 // map managed by this sketch.
-func NewLongSketch(lgMaxMapSize int, lgCurMapSize int) (*LongsSketch, error) {
+func NewLongsSketch(lgMaxMapSize int, lgCurMapSize int) (*LongsSketch, error) {
        //set initial size of hash map
        lgMaxMapSize = max(lgMaxMapSize, _LG_MIN_MAP_SIZE)
        lgCurMapSize = max(lgCurMapSize, _LG_MIN_MAP_SIZE)
@@ -86,21 +89,26 @@ func NewLongSketch(lgMaxMapSize int, lgCurMapSize int) 
(*LongsSketch, error) {
        }, nil
 }
 
-// NewLongSketchWithMaxMapSize constructs a new LongsSketch with the given 
maxMapSize and the
+// NewLongsSketchWithMaxMapSize constructs a new LongsSketch with the given 
maxMapSize and the
 // default initialMapSize (8).
+//
 // maxMapSize determines the physical size of the internal hash map managed by 
this
 // sketch and must be a power of 2.  The maximum capacity of this internal 
hash map is
 // 0.75 times * maxMapSize. Both the ultimate accuracy and size of this sketch 
are a
 // function of maxMapSize.
-func NewLongSketchWithMaxMapSize(maxMapSize int) (*LongsSketch, error) {
+func NewLongsSketchWithMaxMapSize(maxMapSize int) (*LongsSketch, error) {
        log2OfInt, err := common.ExactLog2(maxMapSize)
        if err != nil {
                return nil, fmt.Errorf("maxMapSize, %e", err)
        }
-       return NewLongSketch(log2OfInt, _LG_MIN_MAP_SIZE)
+       return NewLongsSketch(log2OfInt, _LG_MIN_MAP_SIZE)
 }
 
-func NewLongSketchFromSlice(slc []byte) (*LongsSketch, error) {
+// NewLongsSketchFromSlice returns a sketch instance of this class from the 
given slice,
+// which must be a byte slice representation of this sketch class.
+//
+// slc is a byte slice representation of a sketch of this class.
+func NewLongsSketchFromSlice(slc []byte) (*LongsSketch, error) {
        pre0, err := checkPreambleSize(slc)
        if err != nil {
                return nil, err
@@ -130,14 +138,14 @@ func NewLongSketchFromSlice(slc []byte) (*LongsSketch, 
error) {
                return nil, fmt.Errorf("Possible Corruption: Empty Flag set 
incorrectly: %t", preLongsEq1)
        }
        if empty {
-               return NewLongSketch(lgMaxMapSize, _LG_MIN_MAP_SIZE)
+               return NewLongsSketch(lgMaxMapSize, _LG_MIN_MAP_SIZE)
        }
        // get full preamble
        preArr := make([]int64, preLongs)
        for i := 0; i < preLongs; i++ {
                preArr[i] = int64(binary.LittleEndian.Uint64(slc[i<<3:]))
        }
-       fls, err := NewLongSketch(lgMaxMapSize, lgCurMapSize)
+       fls, err := NewLongsSketch(lgMaxMapSize, lgCurMapSize)
        if err != nil {
                return nil, err
        }
@@ -173,7 +181,11 @@ func NewLongSketchFromSlice(slc []byte) (*LongsSketch, 
error) {
        return fls, nil
 }
 
-func NewLongSketchFromString(str string) (*LongsSketch, error) {
+// NewLongsSketchFromString returns a sketch instance of this class from the 
given string,
+// which must be a string representation of this sketch class.
+//
+// str is a string representation of a sketch of this class.
+func NewLongsSketchFromString(str string) (*LongsSketch, error) {
        if len(str) < 1 {
                return nil, fmt.Errorf("String is empty")
        }
@@ -237,7 +249,7 @@ func NewLongSketchFromString(str string) (*LongsSketch, 
error) {
                return nil, fmt.Errorf("Possible Corruption: Incorrect # of 
tokens: %d, numActive: %d", numTokens, numActive)
        }
        //    if ((2 * numActive) != (numTokens - STR_PREAMBLE_TOKENS - 2)) {
-       sk, err := NewLongSketch(int(lgMax), int(lgCur))
+       sk, err := NewLongsSketch(int(lgMax), int(lgCur))
        if err != nil {
                return nil, err
        }
@@ -250,7 +262,43 @@ func NewLongSketchFromString(str string) (*LongsSketch, 
error) {
        return sk, nil
 }
 
-func (s *LongsSketch) getEstimate(item int64) (int64, error) {
+// GetAprioriErrorLongsSketch returns the estimated a priori error given the 
maxMapSize for the sketch and the
+// estimatedTotalStreamWeight.
+//
+// maxMapSize is the planned map size to be used when constructing this sketch.
+// estimatedTotalStreamWeight is the estimated total stream weight.
+func GetAprioriErrorLongsSketch(maxMapSize int, estimatedTotalStreamWeight 
int64) (float64, error) {
+       epsilon, err := GetEpsilonLongsSketch(maxMapSize)
+       if err != nil {
+               return 0, err
+       }
+       return epsilon * float64(estimatedTotalStreamWeight), nil
+}
+
+// GetCurrentMapCapacity returns the current number of counters the sketch is 
configured to support.
+func (s *LongsSketch) GetCurrentMapCapacity() int {
+       return s.curMapCap
+}
+
+// GetEpsilonLongsSketch returns epsilon used to compute a priori error.
+// This is just the value 3.5 / maxMapSize.
+//
+// maxMapSize is the planned map size to be used when constructing this sketch.
+func GetEpsilonLongsSketch(maxMapSize int) (float64, error) {
+       if !common.IsPowerOf2(maxMapSize) {
+               return 0, errors.New("maxMapSize is not a power of 2")
+       }
+       return 3.5 / float64(maxMapSize), nil
+}
+
+// GetEstimate gets the estimate of the frequency of the given item.
+// Note: The true frequency of a item would be the sum of the counts as a 
result of the
+// two update functions.
+//
+// item is the given item
+//
+// return the estimate of the frequency of the given item
+func (s *LongsSketch) GetEstimate(item int64) (int64, error) {
        itemCount, err := s.hashMap.get(item)
        if err != nil {
                return 0, err
@@ -258,12 +306,25 @@ func (s *LongsSketch) getEstimate(item int64) (int64, 
error) {
        return itemCount + s.offset, nil
 }
 
-func (s *LongsSketch) getLowerBound(item int64) (int64, error) {
+// GetLowerBound gets the guaranteed lower bound frequency of the given item, 
which can never be
+// negative.
+//
+// item is the given item.
+//
+// return the guaranteed lower bound frequency of the given item. That is, a 
number which
+// is guaranteed to be no larger than the real frequency.
+func (s *LongsSketch) GetLowerBound(item int64) (int64, error) {
        // LB = itemCount
        return s.hashMap.get(item)
 }
 
-func (s *LongsSketch) getUpperBound(item int64) (int64, error) {
+// GetUpperBound gets the guaranteed upper bound frequency of the given item.
+//
+// item is the given item.
+//
+// return the guaranteed upper bound frequency of the given item. That is, a 
number which
+// is guaranteed to be no smaller than the real frequency.
+func (s *LongsSketch) GetUpperBound(item int64) (int64, error) {
        itemCount, err := s.hashMap.get(item)
        if err != nil {
                return 0, err
@@ -271,43 +332,91 @@ func (s *LongsSketch) getUpperBound(item int64) (int64, 
error) {
        return itemCount + s.offset, nil
 }
 
-func (s *LongsSketch) getNumActiveItems() int {
+// GetFrequentItemsWithThreshold returns an array of Rows that include 
frequent items, estimates, upper and
+// lower bounds given a threshold and an ErrorCondition. If the threshold is 
lower than
+// getMaximumError(), then getMaximumError() will be used instead.
+//
+// The method first examines all active items in the sketch (items that have a 
counter).
+//
+// If errorType = NO_FALSE_NEGATIVES, this will include an item in the result 
list if
+// GetUpperBound(item) > threshold. There will be no false negatives, i.e., no 
Type II error.
+// There may be items in the set with true frequencies less than the threshold 
(false positives).
+//
+// If errorType = NO_FALSE_POSITIVES, this will include an item in the result 
list if
+// GetLowerBound(item) > threshold. There will be no false positives, i.e., no 
Type I error.
+// There may be items omitted from the set with true frequencies greater than 
the threshold
+// (false negatives). This is a subset of the NO_FALSE_NEGATIVES case.
+//
+// threshold to include items in the result list
+// errorType determines whether no false positives or no false negatives are 
desired.
+// an array of frequent items
+func (s *LongsSketch) GetFrequentItemsWithThreshold(threshold int64, errorType 
ErrorType) ([]*Row, error) {
+       finalThreshold := s.GetMaximumError()
+       if threshold > finalThreshold {
+               finalThreshold = threshold
+       }
+       return sortItems(s, finalThreshold, errorType)
+}
+
+// GetFrequentItems returns an array of Rows that include frequent items, 
estimates, upper and
+// lower bounds given an ErrorCondition and the default threshold.
+// This is the same as GetFrequentItemsWithThreshold(getMaximumError(), 
errorType)
+//
+// errorType determines whether no false positives or no false negatives are 
desired.
+func (s *LongsSketch) GetFrequentItems(errorType ErrorType) ([]*Row, error) {
+       return sortItems(s, s.GetMaximumError(), errorType)
+}
+
+// GetNumActiveItems returns the number of active items in the sketch.
+func (s *LongsSketch) GetNumActiveItems() int {
        return s.hashMap.numActive
 }
 
-// getMaximumMapCapacity returns the maximum number of counters the sketch is 
configured to
+// GetMaximumError return an upper bound on the maximum error of 
GetEstimate(item) for any item.
+// This is equivalent to the maximum distance between the upper bound and the 
lower bound
+// for any item.
+func (s *LongsSketch) GetMaximumError() int64 {
+       return s.offset
+}
+
+// GetMaximumMapCapacity returns the maximum number of counters the sketch is 
configured to
 // support.
-func (s *LongsSketch) getMaximumMapCapacity() int {
+func (s *LongsSketch) GetMaximumMapCapacity() int {
        return int(float64(uint64(1<<s.lgMaxMapSize)) * 
reversePurgeLongHashMapLoadFactor)
 }
 
-func (s *LongsSketch) getStorageBytes() int {
-       if s.isEmpty() {
+// GetStorageBytes returns the number of bytes required to store this sketch 
as slice
+func (s *LongsSketch) GetStorageBytes() int {
+       if s.IsEmpty() {
                return 8
        }
-       return (4 * 8) + (16 * s.getNumActiveItems())
+       return (4 * 8) + (16 * s.GetNumActiveItems())
 }
 
-func (s *LongsSketch) getCurrentMapCapacity() int {
-       return s.curMapCap
-}
-
-func (s *LongsSketch) getMaximumError() int64 {
-       return s.offset
-}
-
-func (s *LongsSketch) getStreamLength() int64 {
+// GetStreamLength returns the sum of the frequencies (weights or counts) in 
the stream seen
+// so far by the sketch
+func (s *LongsSketch) GetStreamLength() int64 {
        return s.streamWeight
 }
 
-func (s *LongsSketch) isEmpty() bool {
-       return s.getNumActiveItems() == 0
+// IsEmpty returns true if this sketch is empty
+func (s *LongsSketch) IsEmpty() bool {
+       return s.GetNumActiveItems() == 0
 }
 
+// Update update this sketch with an item and a frequency count of one.
+//
+// item for which the frequency should be increased.
 func (s *LongsSketch) Update(item int64) error {
        return s.UpdateMany(item, 1)
 }
 
+// UpdateMany update this sketch with a item and a positive frequency count 
(or weight).
+//
+// item for which the frequency should be increased. The item can be any long 
value
+// and is only used by the sketch to determine uniqueness.
+// count the amount by which the frequency of the item should be increased.
+// An count of zero is a no-op, and a negative count will throw an exception.
 func (s *LongsSketch) UpdateMany(item int64, count int64) error {
        if count == 0 {
                return nil
@@ -330,7 +439,7 @@ func (s *LongsSketch) UpdateMany(item int64, count int64) 
error {
                } else {
                        // At tgt size, must purge
                        s.offset += s.hashMap.purge(s.sampleSize)
-                       if s.getNumActiveItems() > s.getMaximumMapCapacity() {
+                       if s.GetNumActiveItems() > s.GetMaximumMapCapacity() {
                                return fmt.Errorf("purge did not reduce active 
items")
                        }
                }
@@ -338,23 +447,17 @@ func (s *LongsSketch) UpdateMany(item int64, count int64) 
error {
        return nil
 }
 
-func (s *LongsSketch) String() string {
-       var sb strings.Builder
-       sb.WriteString("FrequentLongsSketch:")
-       sb.WriteString("\n")
-       sb.WriteString("  Stream Length    : " + 
strconv.FormatInt(s.streamWeight, 10))
-       sb.WriteString("\n")
-       sb.WriteString("  Max Error Offset : " + strconv.FormatInt(s.offset, 
10))
-       sb.WriteString("\n")
-       sb.WriteString(s.hashMap.String())
-       return sb.String()
-}
-
-func (s *LongsSketch) merge(other *LongsSketch) (*LongsSketch, error) {
-       if other == nil || other.isEmpty() {
+// Merge merges the other sketch into this one. The other sketch may be of a 
different size.
+//
+// other sketch of this class
+//
+// return a sketch whose estimates are within the guarantees of the largest 
error tolerance
+// of the two merged sketches.
+func (s *LongsSketch) Merge(other *LongsSketch) (*LongsSketch, error) {
+       if other == nil || other.IsEmpty() {
                return s, nil
        }
-       streamWt := s.streamWeight + other.streamWeight //capture before merge
+       streamWt := s.streamWeight + other.streamWeight //capture before Merge
        iter := other.hashMap.iterator()
        for iter.next() {
                err := s.UpdateMany(iter.getKey(), iter.getValue())
@@ -367,7 +470,8 @@ func (s *LongsSketch) merge(other *LongsSketch) 
(*LongsSketch, error) {
        return s, nil
 }
 
-func (s *LongsSketch) serializeToString() (string, error) {
+// ToString returns a String representation of this sketch
+func (s *LongsSketch) ToString() (string, error) {
        var sb strings.Builder
        //start the string with parameters of the sketch
        serVer := _SER_VER //0
@@ -385,9 +489,10 @@ func (s *LongsSketch) serializeToString() (string, error) {
        return sb.String(), nil
 }
 
-func (s *LongsSketch) toSlice() ([]byte, error) {
-       emtpy := s.isEmpty()
-       activeItems := s.getNumActiveItems()
+// ToSlice returns a slice representation of this sketch
+func (s *LongsSketch) ToSlice() ([]byte, error) {
+       emtpy := s.IsEmpty()
+       activeItems := s.GetNumActiveItems()
        preLongs := 1
        outBytes := 8
        if !emtpy {
@@ -428,6 +533,7 @@ func (s *LongsSketch) toSlice() ([]byte, error) {
        return outArr, nil
 }
 
+// Reset resets this sketch to a virgin state.
 func (s *LongsSketch) Reset() {
        hasMap, _ := NewReversePurgeLongHashMap(1 << _LG_MIN_MAP_SIZE)
        s.curMapCap = hasMap.getCapacity()
@@ -436,20 +542,14 @@ func (s *LongsSketch) Reset() {
        s.hashMap = hasMap
 }
 
-/*
-  public Row[] getFrequentItems(final long threshold, final ErrorType 
errorType) {
-    return sortItems(threshold > getMaximumError() ? threshold : 
getMaximumError(), errorType);
-  }
-*/
-
-func (s *LongsSketch) getFrequentItemsWithThreshold(threshold int64, errorType 
ErrorType) ([]*Row, error) {
-       finalThreshold := s.getMaximumError()
-       if threshold > finalThreshold {
-               finalThreshold = threshold
-       }
-       return sortItems(s, finalThreshold, errorType)
-}
-
-func (s *LongsSketch) getFrequentItems(errorType ErrorType) ([]*Row, error) {
-       return sortItems(s, s.getMaximumError(), errorType)
+func (s *LongsSketch) String() string {
+       var sb strings.Builder
+       sb.WriteString("FrequentLongsSketch:")
+       sb.WriteString("\n")
+       sb.WriteString("  Stream Length    : " + 
strconv.FormatInt(s.streamWeight, 10))
+       sb.WriteString("\n")
+       sb.WriteString("  Max Error Offset : " + strconv.FormatInt(s.offset, 
10))
+       sb.WriteString("\n")
+       sb.WriteString(s.hashMap.String())
+       return sb.String()
 }
diff --git a/frequencies/longs_sketch_serialization_test.go 
b/frequencies/longs_sketch_serialization_test.go
new file mode 100644
index 0000000..c54351d
--- /dev/null
+++ b/frequencies/longs_sketch_serialization_test.go
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package frequencies
+
+import (
+       "fmt"
+       "github.com/apache/datasketches-go/common/testutils"
+       "github.com/stretchr/testify/assert"
+       "os"
+       "testing"
+)
+
+func TestGenerateGoBinariesForCompatibilityTestingLongsSketch(t *testing.T) {
+       if len(os.Getenv(testutils.DSketchTestGenerateGo)) == 0 {
+               t.Skipf("%s not set", testutils.DSketchTestGenerateGo)
+       }
+
+       nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
+       for _, n := range nArr {
+               sk, err := NewLongsSketchWithMaxMapSize(64)
+               assert.NoError(t, err)
+               for i := 1; i <= n; i++ {
+                       err = sk.Update(int64(i))
+                       assert.NoError(t, err)
+               }
+               if n == 0 {
+                       assert.True(t, sk.IsEmpty())
+               } else {
+                       assert.False(t, sk.IsEmpty())
+               }
+               if n > 10 {
+                       assert.True(t, sk.GetMaximumError() > 0)
+               } else {
+                       assert.Equal(t, sk.GetMaximumError(), int64(0))
+               }
+               err = os.MkdirAll(testutils.GoPath, os.ModePerm)
+               assert.NoError(t, err)
+
+               slc, err := sk.ToSlice()
+               assert.NoError(t, err)
+               err = os.WriteFile(fmt.Sprintf("%s/frequent_long_n%d_go.sk", 
testutils.GoPath, n), slc, 0644)
+               if err != nil {
+                       t.Errorf("err != nil")
+               }
+       }
+}
+
+func TestGoCompat(t *testing.T) {
+       if len(os.Getenv(testutils.DSketchTestCrossGo)) == 0 {
+               t.Skipf("%s not set", testutils.DSketchTestCrossGo)
+       }
+
+       nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
+       for _, n := range nArr {
+               bytes, err := 
os.ReadFile(fmt.Sprintf("%s/frequent_long_n%d_go.sk", testutils.GoPath, n))
+               assert.NoError(t, err)
+               sketch, err := NewLongsSketchFromSlice(bytes)
+               if err != nil {
+                       return
+               }
+
+               if n == 0 {
+                       assert.True(t, sketch.IsEmpty())
+               } else {
+                       assert.False(t, sketch.IsEmpty())
+               }
+               if n > 10 {
+                       assert.True(t, sketch.GetMaximumError() > 0)
+               } else {
+                       assert.Equal(t, sketch.GetMaximumError(), int64(0))
+               }
+               assert.Equal(t, sketch.GetStreamLength(), int64(n))
+       }
+}
+
+func TestJavaCompat(t *testing.T) {
+       if len(os.Getenv(testutils.DSketchTestCrossJava)) == 0 {
+               t.Skipf("%s not set", testutils.DSketchTestCrossJava)
+       }
+
+       nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
+       for _, n := range nArr {
+               bytes, err := 
os.ReadFile(fmt.Sprintf("%s/frequent_long_n%d_java.sk", testutils.JavaPath, n))
+               assert.NoError(t, err)
+               sketch, err := NewLongsSketchFromSlice(bytes)
+               if err != nil {
+                       return
+               }
+
+               if n == 0 {
+                       assert.True(t, sketch.IsEmpty())
+               } else {
+                       assert.False(t, sketch.IsEmpty())
+               }
+               if n > 10 {
+                       assert.True(t, sketch.GetMaximumError() > 0)
+               } else {
+                       assert.Equal(t, sketch.GetMaximumError(), int64(0))
+               }
+               assert.Equal(t, sketch.GetStreamLength(), int64(n))
+       }
+}
+
+func TestCppCompat(t *testing.T) {
+       if len(os.Getenv(testutils.DSketchTestCrossCpp)) == 0 {
+               t.Skipf("%s not set", testutils.DSketchTestCrossCpp)
+       }
+
+       nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
+       for _, n := range nArr {
+               bytes, err := 
os.ReadFile(fmt.Sprintf("%s/frequent_long_n%d_cpp.sk", testutils.CppPath, n))
+               assert.NoError(t, err)
+               sketch, err := NewLongsSketchFromSlice(bytes)
+               if err != nil {
+                       return
+               }
+
+               if n == 0 {
+                       assert.True(t, sketch.IsEmpty())
+               } else {
+                       assert.False(t, sketch.IsEmpty())
+               }
+               if n > 10 {
+                       assert.True(t, sketch.GetMaximumError() > 0)
+               } else {
+                       assert.Equal(t, sketch.GetMaximumError(), int64(0))
+               }
+               assert.Equal(t, sketch.GetStreamLength(), int64(n))
+       }
+}
diff --git a/frequencies/longs_sketch_test.go b/frequencies/longs_sketch_test.go
index 20c9898..895c18b 100644
--- a/frequencies/longs_sketch_test.go
+++ b/frequencies/longs_sketch_test.go
@@ -27,9 +27,9 @@ import (
 )
 
 func TestFrequentItemsStringSerial(t *testing.T) {
-       sketch, err := NewLongSketchWithMaxMapSize(8)
+       sketch, err := NewLongsSketchWithMaxMapSize(8)
        assert.NoError(t, err)
-       sketch2, err := NewLongSketchWithMaxMapSize(128)
+       sketch2, err := NewLongsSketchWithMaxMapSize(128)
        assert.NoError(t, err)
        sketch.UpdateMany(10, 100)
        sketch.UpdateMany(10, 100)
@@ -37,15 +37,15 @@ func TestFrequentItemsStringSerial(t *testing.T) {
        sketch.UpdateMany(1000001, 1010230)
        sketch.UpdateMany(1000002, 1010230)
 
-       ser, err := sketch.serializeToString()
+       ser, err := sketch.ToString()
        assert.NoError(t, err)
-       newSk0, err := NewLongSketchFromString(ser)
+       newSk0, err := NewLongsSketchFromString(ser)
        assert.NoError(t, err)
-       newSer0, err := newSk0.serializeToString()
+       newSer0, err := newSk0.ToString()
        assert.NoError(t, err)
        assert.Equal(t, ser, newSer0)
-       assert.Equal(t, sketch.getMaximumMapCapacity(), 
newSk0.getMaximumMapCapacity())
-       assert.Equal(t, sketch.getCurrentMapCapacity(), 
newSk0.getCurrentMapCapacity())
+       assert.Equal(t, sketch.GetMaximumMapCapacity(), 
newSk0.GetMaximumMapCapacity())
+       assert.Equal(t, sketch.GetCurrentMapCapacity(), 
newSk0.GetCurrentMapCapacity())
 
        sketch2.UpdateMany(190, 12902390)
        sketch2.UpdateMany(191, 12902390)
@@ -67,44 +67,44 @@ func TestFrequentItemsStringSerial(t *testing.T) {
        sketch2.UpdateMany(207, 12902390)
        sketch2.UpdateMany(208, 12902390)
 
-       s2, err := sketch2.serializeToString()
+       s2, err := sketch2.ToString()
        assert.NoError(t, err)
-       newSk2, err := NewLongSketchFromString(s2)
+       newSk2, err := NewLongsSketchFromString(s2)
        assert.NoError(t, err)
-       newS2, err := newSk2.serializeToString()
+       newS2, err := newSk2.ToString()
        assert.NoError(t, err)
        assert.Equal(t, s2, newS2)
-       assert.Equal(t, sketch2.getMaximumMapCapacity(), 
newSk2.getMaximumMapCapacity())
-       assert.Equal(t, sketch2.getCurrentMapCapacity(), 
newSk2.getCurrentMapCapacity())
-       assert.Equal(t, sketch2.getStreamLength(), newSk2.getStreamLength())
+       assert.Equal(t, sketch2.GetMaximumMapCapacity(), 
newSk2.GetMaximumMapCapacity())
+       assert.Equal(t, sketch2.GetCurrentMapCapacity(), 
newSk2.GetCurrentMapCapacity())
+       assert.Equal(t, sketch2.GetStreamLength(), newSk2.GetStreamLength())
 
-       mergedSketch, err := sketch.merge(sketch2)
+       mergedSketch, err := sketch.Merge(sketch2)
        assert.NoError(t, err)
-       ms, err := mergedSketch.serializeToString()
+       ms, err := mergedSketch.ToString()
        assert.NoError(t, err)
-       newMs, err := NewLongSketchFromString(ms)
+       newMs, err := NewLongsSketchFromString(ms)
        assert.NoError(t, err)
-       newSMs, err := newMs.serializeToString()
+       newSMs, err := newMs.ToString()
        assert.NoError(t, err)
        assert.Equal(t, ms, newSMs)
-       assert.Equal(t, mergedSketch.getMaximumMapCapacity(), 
newMs.getMaximumMapCapacity())
-       assert.Equal(t, mergedSketch.getCurrentMapCapacity(), 
newMs.getCurrentMapCapacity())
-       assert.Equal(t, mergedSketch.getStreamLength(), newMs.getStreamLength())
+       assert.Equal(t, mergedSketch.GetMaximumMapCapacity(), 
newMs.GetMaximumMapCapacity())
+       assert.Equal(t, mergedSketch.GetCurrentMapCapacity(), 
newMs.GetCurrentMapCapacity())
+       assert.Equal(t, mergedSketch.GetStreamLength(), newMs.GetStreamLength())
 }
 
 func TestFrequentItemsByteSerial(t *testing.T) {
-       sketch, err := NewLongSketchWithMaxMapSize(16)
+       sketch, err := NewLongsSketchWithMaxMapSize(16)
        assert.NoError(t, err)
-       byteArray0, err := sketch.toSlice()
-       newSk0, err := NewLongSketchFromSlice(byteArray0)
+       byteArray0, err := sketch.ToSlice()
+       newSk0, err := NewLongsSketchFromSlice(byteArray0)
        assert.NoError(t, err)
-       str0, err := sketch.serializeToString()
+       str0, err := sketch.ToString()
        assert.NoError(t, err)
-       newStr0, err := newSk0.serializeToString()
+       newStr0, err := newSk0.ToString()
        assert.NoError(t, err)
        assert.Equal(t, str0, newStr0)
 
-       sketch2, err := NewLongSketchWithMaxMapSize(128)
+       sketch2, err := NewLongsSketchWithMaxMapSize(128)
        assert.NoError(t, err)
        sketch.UpdateMany(10, 100)
        sketch.UpdateMany(10, 100)
@@ -112,16 +112,16 @@ func TestFrequentItemsByteSerial(t *testing.T) {
        sketch.UpdateMany(1000001, 1010230)
        sketch.UpdateMany(1000002, 1010230)
 
-       byteArray1, err := sketch.toSlice()
+       byteArray1, err := sketch.ToSlice()
        assert.NoError(t, err)
-       newSk1, err := NewLongSketchFromSlice(byteArray1)
+       newSk1, err := NewLongsSketchFromSlice(byteArray1)
        assert.NoError(t, err)
-       str1, err := sketch.serializeToString()
-       newStr1, err := newSk1.serializeToString()
+       str1, err := sketch.ToString()
+       newStr1, err := newSk1.ToString()
        assert.NoError(t, err)
        assert.Equal(t, str1, newStr1)
-       assert.Equal(t, sketch.getMaximumMapCapacity(), 
newSk1.getMaximumMapCapacity())
-       assert.Equal(t, sketch.getCurrentMapCapacity(), 
newSk1.getCurrentMapCapacity())
+       assert.Equal(t, sketch.GetMaximumMapCapacity(), 
newSk1.GetMaximumMapCapacity())
+       assert.Equal(t, sketch.GetCurrentMapCapacity(), 
newSk1.GetCurrentMapCapacity())
 
        sketch2.UpdateMany(190, 12902390)
        sketch2.UpdateMany(191, 12902390)
@@ -143,37 +143,37 @@ func TestFrequentItemsByteSerial(t *testing.T) {
        sketch2.UpdateMany(207, 12902390)
        sketch2.UpdateMany(208, 12902390)
 
-       byteArray2, err := sketch2.toSlice()
+       byteArray2, err := sketch2.ToSlice()
        assert.NoError(t, err)
-       newSk2, err := NewLongSketchFromSlice(byteArray2)
+       newSk2, err := NewLongsSketchFromSlice(byteArray2)
        assert.NoError(t, err)
-       str2, err := sketch2.serializeToString()
+       str2, err := sketch2.ToString()
        assert.NoError(t, err)
-       newStr2, err := newSk2.serializeToString()
+       newStr2, err := newSk2.ToString()
        assert.NoError(t, err)
        assert.Equal(t, str2, newStr2)
-       assert.Equal(t, sketch2.getMaximumMapCapacity(), 
newSk2.getMaximumMapCapacity())
-       assert.Equal(t, sketch2.getCurrentMapCapacity(), 
newSk2.getCurrentMapCapacity())
-       assert.Equal(t, sketch2.getStreamLength(), newSk2.getStreamLength())
+       assert.Equal(t, sketch2.GetMaximumMapCapacity(), 
newSk2.GetMaximumMapCapacity())
+       assert.Equal(t, sketch2.GetCurrentMapCapacity(), 
newSk2.GetCurrentMapCapacity())
+       assert.Equal(t, sketch2.GetStreamLength(), newSk2.GetStreamLength())
 
-       mergedSketch, err := sketch.merge(sketch2)
+       mergedSketch, err := sketch.Merge(sketch2)
        assert.NoError(t, err)
-       byteArray3, err := mergedSketch.toSlice()
+       byteArray3, err := mergedSketch.ToSlice()
        assert.NoError(t, err)
-       newSk3, err := NewLongSketchFromSlice(byteArray3)
+       newSk3, err := NewLongsSketchFromSlice(byteArray3)
        assert.NoError(t, err)
-       str3, err := mergedSketch.serializeToString()
+       str3, err := mergedSketch.ToString()
        assert.NoError(t, err)
-       newStr3, err := newSk3.serializeToString()
+       newStr3, err := newSk3.ToString()
        assert.NoError(t, err)
        assert.Equal(t, str3, newStr3)
-       assert.Equal(t, mergedSketch.getMaximumMapCapacity(), 
newSk3.getMaximumMapCapacity())
-       assert.Equal(t, mergedSketch.getCurrentMapCapacity(), 
newSk3.getCurrentMapCapacity())
-       assert.Equal(t, mergedSketch.getStreamLength(), 
newSk3.getStreamLength())
+       assert.Equal(t, mergedSketch.GetMaximumMapCapacity(), 
newSk3.GetMaximumMapCapacity())
+       assert.Equal(t, mergedSketch.GetCurrentMapCapacity(), 
newSk3.GetCurrentMapCapacity())
+       assert.Equal(t, mergedSketch.GetStreamLength(), 
newSk3.GetStreamLength())
 }
 
 func TestFrequentItemsByteResetAndEmptySerial(t *testing.T) {
-       sketch, err := NewLongSketchWithMaxMapSize(16)
+       sketch, err := NewLongsSketchWithMaxMapSize(16)
        assert.NoError(t, err)
        sketch.UpdateMany(10, 100)
        sketch.UpdateMany(10, 100)
@@ -182,22 +182,22 @@ func TestFrequentItemsByteResetAndEmptySerial(t 
*testing.T) {
        sketch.UpdateMany(1000002, 1010230)
        sketch.Reset()
 
-       byteArray0, err := sketch.toSlice()
+       byteArray0, err := sketch.ToSlice()
        assert.NoError(t, err)
-       newSk0, err := NewLongSketchFromSlice(byteArray0)
+       newSk0, err := NewLongsSketchFromSlice(byteArray0)
        assert.NoError(t, err)
-       str0, err := sketch.serializeToString()
+       str0, err := sketch.ToString()
        assert.NoError(t, err)
-       newStr0, err := newSk0.serializeToString()
+       newStr0, err := newSk0.ToString()
        assert.NoError(t, err)
        assert.Equal(t, str0, newStr0)
-       assert.Equal(t, sketch.getMaximumMapCapacity(), 
newSk0.getMaximumMapCapacity())
-       assert.Equal(t, sketch.getCurrentMapCapacity(), 
newSk0.getCurrentMapCapacity())
+       assert.Equal(t, sketch.GetMaximumMapCapacity(), 
newSk0.GetMaximumMapCapacity())
+       assert.Equal(t, sketch.GetCurrentMapCapacity(), 
newSk0.GetCurrentMapCapacity())
 }
 
 func TestFreqLongSliceSerDe(t *testing.T) {
        minSize := 1 << _LG_MIN_MAP_SIZE
-       sk1, err := NewLongSketchWithMaxMapSize(minSize)
+       sk1, err := NewLongsSketchWithMaxMapSize(minSize)
        assert.NoError(t, err)
        sk1.UpdateMany(10, 100)
        sk1.UpdateMany(10, 100)
@@ -205,9 +205,9 @@ func TestFreqLongSliceSerDe(t *testing.T) {
        sk1.UpdateMany(1000001, 1010230)
        sk1.UpdateMany(1000002, 1010230)
 
-       byteArray0, err := sk1.toSlice()
+       byteArray0, err := sk1.ToSlice()
        assert.NoError(t, err)
-       sk2, err := NewLongSketchFromSlice(byteArray0)
+       sk2, err := NewLongsSketchFromSlice(byteArray0)
        assert.NoError(t, err)
 
        checkEquality(t, sk1, sk2)
@@ -215,7 +215,7 @@ func TestFreqLongSliceSerDe(t *testing.T) {
 
 func TestFreqLongStringSerDe(t *testing.T) {
        minSize := 1 << _LG_MIN_MAP_SIZE
-       sk1, err := NewLongSketchWithMaxMapSize(minSize)
+       sk1, err := NewLongsSketchWithMaxMapSize(minSize)
        assert.NoError(t, err)
        sk1.UpdateMany(10, 100)
        sk1.UpdateMany(10, 100)
@@ -223,29 +223,29 @@ func TestFreqLongStringSerDe(t *testing.T) {
        sk1.UpdateMany(1000001, 1010230)
        sk1.UpdateMany(1000002, 1010230)
 
-       str1, err := sk1.serializeToString()
+       str1, err := sk1.ToString()
        assert.NoError(t, err)
-       sk2, err := NewLongSketchFromString(str1)
+       sk2, err := NewLongsSketchFromString(str1)
        assert.NoError(t, err)
 
        checkEquality(t, sk1, sk2)
 }
 
 func checkEquality(t *testing.T, sk1, sk2 *LongsSketch) {
-       assert.Equal(t, sk1.getNumActiveItems(), sk2.getNumActiveItems())
-       assert.Equal(t, sk1.getCurrentMapCapacity(), 
sk2.getCurrentMapCapacity())
-       assert.Equal(t, sk1.getMaximumError(), sk2.getMaximumError())
-       assert.Equal(t, sk1.getMaximumMapCapacity(), 
sk2.getMaximumMapCapacity())
-       assert.Equal(t, sk1.getStorageBytes(), sk2.getStorageBytes())
-       assert.Equal(t, sk1.getStreamLength(), sk2.getStreamLength())
-       assert.Equal(t, sk1.isEmpty(), sk2.isEmpty())
+       assert.Equal(t, sk1.GetNumActiveItems(), sk2.GetNumActiveItems())
+       assert.Equal(t, sk1.GetCurrentMapCapacity(), 
sk2.GetCurrentMapCapacity())
+       assert.Equal(t, sk1.GetMaximumError(), sk2.GetMaximumError())
+       assert.Equal(t, sk1.GetMaximumMapCapacity(), 
sk2.GetMaximumMapCapacity())
+       assert.Equal(t, sk1.GetStorageBytes(), sk2.GetStorageBytes())
+       assert.Equal(t, sk1.GetStreamLength(), sk2.GetStreamLength())
+       assert.Equal(t, sk1.IsEmpty(), sk2.IsEmpty())
 
        NFN := ErrorTypeEnum.NO_FALSE_NEGATIVES
        NFP := ErrorTypeEnum.NO_FALSE_POSITIVES
 
-       rowArr1, err := sk1.getFrequentItems(NFN)
+       rowArr1, err := sk1.GetFrequentItems(NFN)
        assert.NoError(t, err)
-       rowArr2, err := sk2.getFrequentItems(NFN)
+       rowArr2, err := sk2.GetFrequentItems(NFN)
        assert.NoError(t, err)
        assert.Equal(t, len(rowArr1), len(rowArr2))
        for i := 0; i < len(rowArr1); i++ {
@@ -254,9 +254,9 @@ func checkEquality(t *testing.T, sk1, sk2 *LongsSketch) {
                assert.Equal(t, s1, s2)
        }
 
-       rowArr1, err = sk1.getFrequentItems(NFP)
+       rowArr1, err = sk1.GetFrequentItems(NFP)
        assert.NoError(t, err)
-       rowArr2, err = sk2.getFrequentItems(NFP)
+       rowArr2, err = sk2.GetFrequentItems(NFP)
        assert.NoError(t, err)
        assert.Equal(t, len(rowArr1), len(rowArr2))
        for i := 0; i < len(rowArr1); i++ {
@@ -268,11 +268,11 @@ func checkEquality(t *testing.T, sk1, sk2 *LongsSketch) {
 
 func TestFreqLongSliceSerDeError(t *testing.T) {
        minSize := 1 << _LG_MIN_MAP_SIZE
-       sk1, err := NewLongSketchWithMaxMapSize(minSize)
+       sk1, err := NewLongsSketchWithMaxMapSize(minSize)
        assert.NoError(t, err)
        sk1.Update(1)
 
-       byteArray0, err := sk1.toSlice()
+       byteArray0, err := sk1.ToSlice()
        pre0 := binary.LittleEndian.Uint64(byteArray0)
 
        tryBadMem(t, byteArray0, _PREAMBLE_LONGS_BYTE, 2) //Corrupt
@@ -291,13 +291,13 @@ func TestFreqLongSliceSerDeError(t *testing.T) {
 
 func tryBadMem(t *testing.T, mem []byte, byteOffset, byteValue int) {
        binary.LittleEndian.PutUint64(mem[byteOffset:], uint64(byteValue))
-       _, err := NewLongSketchFromSlice(mem)
+       _, err := NewLongsSketchFromSlice(mem)
        assert.Error(t, err)
 }
 
 func TestFreqLongStringSerDeError(t *testing.T) {
-       // sk1, err := NewLongSketchWithMaxMapSize(8)
-       // str1 := sk1.serializeToString()
+       // sk1, err := NewLongsSketchWithMaxMapSize(8)
+       // str1 := sk1.ToString()
        // correct   = "1,10,2,4,0,0,0,4,";
 
        tryBadString(t, "2,10,2,4,0,0,0,4,")   //bad SerVer of 2
@@ -306,7 +306,7 @@ func TestFreqLongStringSerDeError(t *testing.T) {
 }
 
 func tryBadString(t *testing.T, badString string) {
-       _, err := NewLongSketchFromString(badString)
+       _, err := NewLongsSketchFromString(badString)
        assert.Error(t, err)
 }
 
@@ -329,27 +329,27 @@ func TestFreqLongs(t *testing.T) {
        }
 
        for h := 0; h < numSketches; h++ {
-               threshold := sketches[h].getMaximumError()
-               rows, err := 
sketches[h].getFrequentItems(ErrorTypeEnum.NO_FALSE_NEGATIVES)
+               threshold := sketches[h].GetMaximumError()
+               rows, err := 
sketches[h].GetFrequentItems(ErrorTypeEnum.NO_FALSE_NEGATIVES)
                assert.NoError(t, err)
                for i := 0; i < len(rows); i++ {
-                       assert.True(t, rows[i].getUpperBound() > threshold)
+                       assert.True(t, rows[i].GetUpperBound() > threshold)
                }
 
-               rows, err = 
sketches[h].getFrequentItems(ErrorTypeEnum.NO_FALSE_POSITIVES)
+               rows, err = 
sketches[h].GetFrequentItems(ErrorTypeEnum.NO_FALSE_POSITIVES)
                assert.NoError(t, err)
                assert.Equal(t, len(rows), 0)
                for i := 0; i < len(rows); i++ {
-                       assert.True(t, rows[i].getLowerBound() > threshold)
+                       assert.True(t, rows[i].GetLowerBound() > threshold)
                }
 
-               rows, err = 
sketches[h].getFrequentItems(ErrorTypeEnum.NO_FALSE_NEGATIVES)
+               rows, err = 
sketches[h].GetFrequentItems(ErrorTypeEnum.NO_FALSE_NEGATIVES)
        }
 }
 
 func newFrequencySketch(eps float64) (*LongsSketch, error) {
        maxMapSize := common.CeilPowerOf2(int(1.0 / (eps * 
reversePurgeLongHashMapLoadFactor)))
-       return NewLongSketchWithMaxMapSize(maxMapSize)
+       return NewLongsSketchWithMaxMapSize(maxMapSize)
 }
 
 func TestUpdateOneTime(t *testing.T) {
@@ -359,35 +359,35 @@ func TestUpdateOneTime(t *testing.T) {
        numSketches := 1
        for h := 0; h < numSketches; h++ {
                sketch, _ := newFrequencySketch(errorTolerance)
-               ub, err := sketch.getUpperBound(13)
+               ub, err := sketch.GetUpperBound(13)
                assert.NoError(t, err)
                assert.Equal(t, ub, int64(0))
-               lb, err := sketch.getLowerBound(13)
+               lb, err := sketch.GetLowerBound(13)
                assert.NoError(t, err)
                assert.Equal(t, lb, int64(0))
-               assert.Equal(t, sketch.getMaximumError(), int64(0))
-               est, err := sketch.getEstimate(13)
+               assert.Equal(t, sketch.GetMaximumError(), int64(0))
+               est, err := sketch.GetEstimate(13)
                assert.NoError(t, err)
                assert.Equal(t, est, int64(0))
                sketch.Update(13)
-               // assert.Equal(t, sketch.getEstimate(13), 1)
+               // assert.Equal(t, sketch.GetEstimate(13), 1)
        }
 }
 
 func TestGetInstanceSlice(t *testing.T) {
        sl := make([]byte, 4)
-       _, err := NewLongSketchFromSlice(sl)
+       _, err := NewLongsSketchFromSlice(sl)
        assert.Error(t, err)
 }
 
 func TestGetInstanceString(t *testing.T) {
-       _, err := NewLongSketchFromString("")
+       _, err := NewLongsSketchFromString("")
        assert.Error(t, err)
 }
 
 func TestUpdateNegative(t *testing.T) {
        minSize := 1 << _LG_MIN_MAP_SIZE
-       fls, err := NewLongSketchWithMaxMapSize(minSize)
+       fls, err := NewLongsSketchWithMaxMapSize(minSize)
        assert.NoError(t, err)
        err = fls.UpdateMany(1, 0)
        assert.NoError(t, err)
@@ -397,10 +397,10 @@ func TestUpdateNegative(t *testing.T) {
 
 func TestGetFrequentItems1(t *testing.T) {
        minSize := 1 << _LG_MIN_MAP_SIZE
-       fls, err := NewLongSketchWithMaxMapSize(minSize)
+       fls, err := NewLongsSketchWithMaxMapSize(minSize)
        assert.NoError(t, err)
        fls.Update(1)
-       rowArr, err := fls.getFrequentItems(ErrorTypeEnum.NO_FALSE_POSITIVES)
+       rowArr, err := fls.GetFrequentItems(ErrorTypeEnum.NO_FALSE_POSITIVES)
        assert.NoError(t, err)
        assert.NotEmpty(t, rowArr)
        row := rowArr[0]
@@ -417,44 +417,44 @@ func TestGetFrequentItems1(t *testing.T) {
 
 func TestGetStorageByes(t *testing.T) {
        minSize := 1 << _LG_MIN_MAP_SIZE
-       fls, err := NewLongSketchWithMaxMapSize(minSize)
+       fls, err := NewLongsSketchWithMaxMapSize(minSize)
        assert.NoError(t, err)
-       sl, err := fls.toSlice()
+       sl, err := fls.ToSlice()
        assert.NoError(t, err)
-       assert.Equal(t, len(sl), fls.getStorageBytes())
+       assert.Equal(t, len(sl), fls.GetStorageBytes())
        err = fls.Update(1)
        assert.NoError(t, err)
-       sl, err = fls.toSlice()
+       sl, err = fls.ToSlice()
        assert.NoError(t, err)
-       assert.Equal(t, len(sl), fls.getStorageBytes())
+       assert.Equal(t, len(sl), fls.GetStorageBytes())
 }
 
 func TestDeSerFromString(t *testing.T) {
        minSize := 1 << _LG_MIN_MAP_SIZE
-       fls, err := NewLongSketchWithMaxMapSize(minSize)
+       fls, err := NewLongsSketchWithMaxMapSize(minSize)
        assert.NoError(t, err)
-       str, err := fls.serializeToString()
+       str, err := fls.ToString()
        fmt.Println(str)
        assert.NoError(t, err)
        fls.Update(1)
-       str, err = fls.serializeToString()
+       str, err = fls.ToString()
        assert.NoError(t, err)
        fmt.Println(str)
 }
 
 func TestMerge(t *testing.T) {
        minSize := 1 << _LG_MIN_MAP_SIZE
-       fls1, err := NewLongSketchWithMaxMapSize(minSize)
+       fls1, err := NewLongsSketchWithMaxMapSize(minSize)
        assert.NoError(t, err)
        var fls2 *LongsSketch
        fls2 = nil
-       fle, err := fls1.merge(fls2)
+       fle, err := fls1.Merge(fls2)
        assert.NoError(t, err)
-       assert.True(t, fle.isEmpty())
+       assert.True(t, fle.IsEmpty())
 
-       fls2, err = NewLongSketchWithMaxMapSize(minSize)
+       fls2, err = NewLongsSketchWithMaxMapSize(minSize)
        assert.NoError(t, err)
-       fle, err = fls1.merge(fls2)
+       fle, err = fls1.Merge(fls2)
        assert.NoError(t, err)
 }
 
@@ -480,11 +480,11 @@ func TestSortItems(t *testing.T) {
        }
 
        for h := 0; h < numSketches; h++ {
-               threshold := sketches[h].getMaximumError()
-               rows, err := 
sketches[h].getFrequentItems(ErrorTypeEnum.NO_FALSE_NEGATIVES)
+               threshold := sketches[h].GetMaximumError()
+               rows, err := 
sketches[h].GetFrequentItems(ErrorTypeEnum.NO_FALSE_NEGATIVES)
                assert.NoError(t, err)
                for i := 0; i < len(rows); i++ {
-                       assert.True(t, rows[i].getUpperBound() > threshold)
+                       assert.True(t, rows[i].GetUpperBound() > threshold)
                }
                first := rows[0]
                anItem := first.item
@@ -514,7 +514,7 @@ func TestToString1(t *testing.T) {
 
 func printSketch(t *testing.T, size int, items []int64) {
        var sb strings.Builder
-       fls, err := NewLongSketchWithMaxMapSize(size)
+       fls, err := NewLongsSketchWithMaxMapSize(size)
        assert.NoError(t, err)
        for i := 0; i < len(items); i++ {
                fls.UpdateMany(int64(i+1), items[i])
@@ -529,7 +529,7 @@ func printSketch(t *testing.T, size int, items []int64) {
 }
 
 func printRows(t *testing.T, fls *LongsSketch, errorType ErrorType) {
-       rows, err := fls.getFrequentItems(errorType)
+       rows, err := fls.GetFrequentItems(errorType)
        assert.NoError(t, err)
        fmt.Println(errorType.Name)
        fmt.Printf("  %20s%20s%20s %s", "Est", "UB", "LB", "Item")
@@ -544,3 +544,52 @@ func printRows(t *testing.T, fls *LongsSketch, errorType 
ErrorType) {
                assert.NotEqual(t, rows[0], nullRow)
        }
 }
+
+func TestStringDeserEmptyNotCorrupt(t *testing.T) {
+       size := 1 << _LG_MIN_MAP_SIZE
+       thresh := (size * 3) / 4
+       format := "%6d%10s%s"
+       fls, err := NewLongsSketchWithMaxMapSize(size)
+       assert.NoError(t, err)
+       fmt.Printf("Sketch Size: %d\n", size)
+       for i := 0; i <= thresh; i++ {
+               err := fls.UpdateMany(int64(i+1), 1)
+               assert.NoError(t, err)
+               s, err := fls.ToString()
+               assert.NoError(t, err)
+               fmt.Printf("SER   "+format+"\n", (i + 1), fmt.Sprintf("%t : ", 
fls.IsEmpty()), s)
+               fls2, err := NewLongsSketchFromString(s)
+               assert.NoError(t, err)
+               fmt.Printf("DESER "+format+"\n", (i + 1), fmt.Sprintf("%t : ", 
fls2.IsEmpty()), s)
+       }
+}
+
+func TestStringDeserEmptyCorrupt(t *testing.T) {
+       var s strings.Builder
+       s.WriteString("1,")  //serVer
+       s.WriteString("10,") //FamID
+       s.WriteString("3,")  //lgMaxMapSz
+       s.WriteString("0,")  //Empty Flag = false ... corrupted, should be true
+       s.WriteString("7,")  //stream Len so far
+       s.WriteString("1,")  //error offset
+       s.WriteString("0,")  //numActive ...conflict with empty
+       s.WriteString("8,")  //curMapLen
+       _, err := NewLongsSketchFromString(s.String())
+       assert.Error(t, err)
+}
+
+func TestGetEpsilon(t *testing.T) {
+       eps, err := GetEpsilonLongsSketch(1024)
+       assert.NoError(t, err)
+       assert.Equal(t, eps, 3.5/1024)
+
+       _, err = GetEpsilonLongsSketch(1000)
+       assert.Error(t, err)
+}
+
+func TestGetAprioriError(t *testing.T) {
+       eps := 3.5 / 1024
+       apr, err := GetAprioriErrorLongsSketch(1024, 10_000)
+       assert.NoError(t, err)
+       assert.Equal(t, apr, eps*10_000)
+}
diff --git a/frequencies/row.go b/frequencies/row.go
index ad105ef..696193d 100644
--- a/frequencies/row.go
+++ b/frequencies/row.go
@@ -42,15 +42,15 @@ func (r *Row) String() string {
        return fmt.Sprintf("  %20d%20d%20d %d", r.est, r.ub, r.lb, r.item)
 }
 
-func (r *Row) getEstimate() int64 {
+func (r *Row) GetEstimate() int64 {
        return r.est
 }
 
-func (r *Row) getUpperBound() int64 {
+func (r *Row) GetUpperBound() int64 {
        return r.ub
 }
 
-func (r *Row) getLowerBound() int64 {
+func (r *Row) GetLowerBound() int64 {
        return r.lb
 }
 
@@ -59,15 +59,15 @@ func sortItems(sk *LongsSketch, threshold int64, errorType 
ErrorType) ([]*Row, e
        iter := sk.hashMap.iterator()
        if errorType == ErrorTypeEnum.NO_FALSE_NEGATIVES {
                for iter.next() {
-                       est, err := sk.getEstimate(iter.getKey())
+                       est, err := sk.GetEstimate(iter.getKey())
                        if err != nil {
                                return nil, err
                        }
-                       ub, err := sk.getUpperBound(iter.getKey())
+                       ub, err := sk.GetUpperBound(iter.getKey())
                        if err != nil {
                                return nil, err
                        }
-                       lb, err := sk.getLowerBound(iter.getKey())
+                       lb, err := sk.GetLowerBound(iter.getKey())
                        if err != nil {
                                return nil, err
                        }
@@ -78,15 +78,15 @@ func sortItems(sk *LongsSketch, threshold int64, errorType 
ErrorType) ([]*Row, e
                }
        } else { //NO_FALSE_POSITIVES
                for iter.next() {
-                       est, err := sk.getEstimate(iter.getKey())
+                       est, err := sk.GetEstimate(iter.getKey())
                        if err != nil {
                                return nil, err
                        }
-                       ub, err := sk.getUpperBound(iter.getKey())
+                       ub, err := sk.GetUpperBound(iter.getKey())
                        if err != nil {
                                return nil, err
                        }
-                       lb, err := sk.getLowerBound(iter.getKey())
+                       lb, err := sk.GetLowerBound(iter.getKey())
                        if err != nil {
                                return nil, err
                        }
diff --git a/frequencies/utils.go b/frequencies/utils.go
index 6af4804..2778032 100644
--- a/frequencies/utils.go
+++ b/frequencies/utils.go
@@ -17,6 +17,11 @@
 
 package frequencies
 
+import (
+       "math"
+       "math/rand"
+)
+
 const (
        _LG_MIN_MAP_SIZE = 3
        // This constant is large enough so that computing the median of 
SAMPLE_SIZE
@@ -26,6 +31,27 @@ const (
        _SAMPLE_SIZE = 1024
 )
 
+type ErrorType struct {
+       id   int
+       Name string
+}
+
+type ErrorTypes struct {
+       NO_FALSE_POSITIVES ErrorType
+       NO_FALSE_NEGATIVES ErrorType
+}
+
+var ErrorTypeEnum = &ErrorTypes{
+       NO_FALSE_POSITIVES: ErrorType{
+               id:   1,
+               Name: "NO_FALSE_POSITIVES",
+       },
+       NO_FALSE_NEGATIVES: ErrorType{
+               id:   2,
+               Name: "NO_FALSE_NEGATIVES",
+       },
+}
+
 // hash returns an index into the hash table.
 // This hash function is taken from the internals of Austin Appleby's 
MurmurHash3 algorithm.
 // It is also used by the Trove for Java libraries.
@@ -38,3 +64,10 @@ func hash(okey int64) int64 {
        key ^= key >> 33
        return int64(key)
 }
+
+func randomGeometricDist(prob float64) int64 {
+       if prob <= 0.0 || prob >= 1.0 {
+               panic("prob must be in (0, 1)")
+       }
+       return int64(1 + math.Log(rand.Float64())/math.Log(1.0-prob))
+}
diff --git a/hll/hll_sketch_serialization_test.go 
b/hll/hll_sketch_serialization_test.go
index 2341af6..94a5ff1 100644
--- a/hll/hll_sketch_serialization_test.go
+++ b/hll/hll_sketch_serialization_test.go
@@ -19,23 +19,17 @@ package hll
 
 import (
        "fmt"
+       "github.com/apache/datasketches-go/common/testutils"
        "os"
        "testing"
 
        "github.com/stretchr/testify/assert"
 )
 
-const (
-       dSketchTestGenerateGo = "DSKETCH_TEST_GENERATE_GO"
-       dSketchTestCrossJava  = "DSKETCH_TEST_CROSS_JAVA"
-       dSketchTestCrossCpp   = "DSKETCH_TEST_CROSS_CPP"
-       dSketchTestCrossGo    = "DSKETCH_TEST_CROSS_GO"
-)
-
 // Run me manually for generation
 func TestGenerateGoFiles(t *testing.T) {
-       if len(os.Getenv(dSketchTestGenerateGo)) == 0 {
-               t.Skipf("%s not set", dSketchTestGenerateGo)
+       if len(os.Getenv(testutils.DSketchTestGenerateGo)) == 0 {
+               t.Skipf("%s not set", testutils.DSketchTestGenerateGo)
        }
 
        nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
@@ -52,35 +46,35 @@ func TestGenerateGoFiles(t *testing.T) {
                        assert.NoError(t, hll6.UpdateUInt64(uint64(i)))
                        assert.NoError(t, hll8.UpdateUInt64(uint64(i)))
                }
-               err = os.MkdirAll(goPath, os.ModePerm)
+               err = os.MkdirAll(testutils.GoPath, os.ModePerm)
                assert.NoError(t, err)
 
                sl4, err := hll4.ToCompactSlice()
                assert.NoError(t, err)
-               err = os.WriteFile(fmt.Sprintf("%s/hll4_n%d_go.sk", goPath, n), 
sl4, 0644)
+               err = os.WriteFile(fmt.Sprintf("%s/hll4_n%d_go.sk", 
testutils.GoPath, n), sl4, 0644)
                assert.NoError(t, err)
 
                sl6, err := hll6.ToCompactSlice()
                assert.NoError(t, err)
-               err = os.WriteFile(fmt.Sprintf("%s/hll6_n%d_go.sk", goPath, n), 
sl6, 0644)
+               err = os.WriteFile(fmt.Sprintf("%s/hll6_n%d_go.sk", 
testutils.GoPath, n), sl6, 0644)
                assert.NoError(t, err)
 
                sl8, err := hll8.ToCompactSlice()
                assert.NoError(t, err)
-               err = os.WriteFile(fmt.Sprintf("%s/hll8_n%d_go.sk", goPath, n), 
sl8, 0644)
+               err = os.WriteFile(fmt.Sprintf("%s/hll8_n%d_go.sk", 
testutils.GoPath, n), sl8, 0644)
                assert.NoError(t, err)
        }
 }
 
 func TestJavaCompat(t *testing.T) {
-       if len(os.Getenv(dSketchTestCrossJava)) == 0 {
-               t.Skipf("%s not set", dSketchTestCrossJava)
+       if len(os.Getenv(testutils.DSketchTestCrossJava)) == 0 {
+               t.Skipf("%s not set", testutils.DSketchTestCrossJava)
        }
 
        t.Run("Java Hll4", func(t *testing.T) {
                nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
                for _, n := range nArr {
-                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll4_n%d_java.sk", javaPath, n))
+                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll4_n%d_java.sk", testutils.JavaPath, n))
                        assert.NoError(t, err)
                        sketch, err := DeserializeHllSketch(bytes, true)
                        if err != nil {
@@ -97,7 +91,7 @@ func TestJavaCompat(t *testing.T) {
        t.Run("Java Hll6", func(t *testing.T) {
                nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
                for _, n := range nArr {
-                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll6_n%d_java.sk", javaPath, n))
+                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll6_n%d_java.sk", testutils.JavaPath, n))
                        assert.NoError(t, err)
 
                        sketch, err := DeserializeHllSketch(bytes, true)
@@ -115,7 +109,7 @@ func TestJavaCompat(t *testing.T) {
        t.Run("Java Hll8", func(t *testing.T) {
                nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
                for _, n := range nArr {
-                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll8_n%d_java.sk", javaPath, n))
+                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll8_n%d_java.sk", testutils.JavaPath, n))
                        assert.NoError(t, err)
                        sketch, err := DeserializeHllSketch(bytes, true)
                        if err != nil {
@@ -131,14 +125,14 @@ func TestJavaCompat(t *testing.T) {
 }
 
 func TestCppCompat(t *testing.T) {
-       if len(os.Getenv(dSketchTestCrossCpp)) == 0 {
-               t.Skipf("%s not set", dSketchTestCrossCpp)
+       if len(os.Getenv(testutils.DSketchTestCrossCpp)) == 0 {
+               t.Skipf("%s not set", testutils.DSketchTestCrossCpp)
        }
 
        t.Run("Cpp Hll4", func(t *testing.T) {
                nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
                for _, n := range nArr {
-                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll4_n%d_cpp.sk", cppPath, n))
+                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll4_n%d_cpp.sk", testutils.CppPath, n))
                        assert.NoError(t, err)
                        sketch, err := DeserializeHllSketch(bytes, true)
                        if err != nil {
@@ -155,7 +149,7 @@ func TestCppCompat(t *testing.T) {
        t.Run("Cpp Hll6", func(t *testing.T) {
                nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
                for _, n := range nArr {
-                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll6_n%d_cpp.sk", cppPath, n))
+                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll6_n%d_cpp.sk", testutils.CppPath, n))
                        assert.NoError(t, err)
 
                        sketch, err := DeserializeHllSketch(bytes, true)
@@ -173,7 +167,7 @@ func TestCppCompat(t *testing.T) {
        t.Run("Cpp Hll8", func(t *testing.T) {
                nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
                for _, n := range nArr {
-                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll8_n%d_cpp.sk", cppPath, n))
+                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll8_n%d_cpp.sk", testutils.CppPath, n))
                        assert.NoError(t, err)
                        sketch, err := DeserializeHllSketch(bytes, true)
                        if err != nil {
@@ -189,14 +183,14 @@ func TestCppCompat(t *testing.T) {
 }
 
 func TestGoCompat(t *testing.T) {
-       if len(os.Getenv(dSketchTestCrossGo)) == 0 {
-               t.Skipf("%s not set", dSketchTestCrossGo)
+       if len(os.Getenv(testutils.DSketchTestCrossGo)) == 0 {
+               t.Skipf("%s not set", testutils.DSketchTestCrossGo)
        }
 
        t.Run("Go Hll4", func(t *testing.T) {
                nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
                for _, n := range nArr {
-                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll4_n%d_go.sk", goPath, n))
+                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll4_n%d_go.sk", testutils.GoPath, n))
                        assert.NoError(t, err)
 
                        sketch, err := DeserializeHllSketch(bytes, true)
@@ -214,7 +208,7 @@ func TestGoCompat(t *testing.T) {
        t.Run("Go Hll6", func(t *testing.T) {
                nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
                for _, n := range nArr {
-                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll6_n%d_go.sk", goPath, n))
+                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll6_n%d_go.sk", testutils.GoPath, n))
                        assert.NoError(t, err)
 
                        sketch, err := DeserializeHllSketch(bytes, true)
@@ -232,7 +226,7 @@ func TestGoCompat(t *testing.T) {
        t.Run("Go Hll8", func(t *testing.T) {
                nArr := []int{0, 1, 10, 100, 1000, 10000, 100000, 1000000}
                for _, n := range nArr {
-                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll8_n%d_go.sk", goPath, n))
+                       bytes, err := 
os.ReadFile(fmt.Sprintf("%s/hll8_n%d_go.sk", testutils.GoPath, n))
                        assert.NoError(t, err)
 
                        sketch, err := DeserializeHllSketch(bytes, true)
diff --git a/hll/hll_sketch_test.go b/hll/hll_sketch_test.go
index bb9fd02..82f7313 100644
--- a/hll/hll_sketch_test.go
+++ b/hll/hll_sketch_test.go
@@ -26,12 +26,6 @@ import (
        "github.com/stretchr/testify/assert"
 )
 
-const (
-       javaPath = "../serialization_test_data/java_generated_files"
-       cppPath  = "../serialization_test_data/cpp_generated_files"
-       goPath   = "../serialization_test_data/go_generated_files"
-)
-
 func TestMisc(t *testing.T) {
        hll, err := NewHllSketch(10, TgtHllTypeHll4)
        assert.NoError(t, err)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datasketches-go) 28/34: Add remaing tests and cleanup

Reply via email to