This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-go.git

commit 7d8bc368fc41d9d9c730c132d5ccef0321012098
Author: Pierre Lacave <[email protected]>
AuthorDate: Thu Dec 21 11:45:51 2023 +0100

    Add more test and fix sort order
---
 frequencies/error_types.go                 |  24 ++++-
 frequencies/longs_sketch.go                |  12 +++
 frequencies/longs_sketch_test.go           | 162 ++++++++++++++++++++++++++++-
 frequencies/reverse_purge_long_hash_map.go |  13 +++
 frequencies/row.go                         |  18 ++--
 5 files changed, 208 insertions(+), 21 deletions(-)

diff --git a/frequencies/error_types.go b/frequencies/error_types.go
index b649d01..730f331 100644
--- a/frequencies/error_types.go
+++ b/frequencies/error_types.go
@@ -17,9 +17,23 @@
 
 package frequencies
 
-type ErrorType = int
+type ErrorType struct {
+       id   int
+       Name string
+}
 
-const (
-       NO_FALSE_POSITIVES = ErrorType(1)
-       NO_FALSE_NEGATIVES = ErrorType(2)
-)
+type ErrorTypes struct {
+       NO_FALSE_POSITIVES ErrorType
+       NO_FALSE_NEGATIVES ErrorType
+}
+
+var ErrorTypeEnum = &ErrorTypes{
+       NO_FALSE_POSITIVES: ErrorType{
+               id:   1,
+               Name: "NO_FALSE_POSITIVES",
+       },
+       NO_FALSE_NEGATIVES: ErrorType{
+               id:   2,
+               Name: "NO_FALSE_NEGATIVES",
+       },
+}
diff --git a/frequencies/longs_sketch.go b/frequencies/longs_sketch.go
index ecb51a2..6574c33 100644
--- a/frequencies/longs_sketch.go
+++ b/frequencies/longs_sketch.go
@@ -338,6 +338,18 @@ func (s *LongsSketch) UpdateMany(item int64, count int64) 
error {
        return nil
 }
 
+func (s *LongsSketch) String() string {
+       var sb strings.Builder
+       sb.WriteString("FrequentLongsSketch:")
+       sb.WriteString("\n")
+       sb.WriteString("  Stream Length    : " + 
strconv.FormatInt(s.streamWeight, 10))
+       sb.WriteString("\n")
+       sb.WriteString("  Max Error Offset : " + strconv.FormatInt(s.offset, 
10))
+       sb.WriteString("\n")
+       sb.WriteString(s.hashMap.String())
+       return sb.String()
+}
+
 func (s *LongsSketch) merge(other *LongsSketch) (*LongsSketch, error) {
        if other == nil || other.isEmpty() {
                return s, nil
diff --git a/frequencies/longs_sketch_test.go b/frequencies/longs_sketch_test.go
index f0ec490..20c9898 100644
--- a/frequencies/longs_sketch_test.go
+++ b/frequencies/longs_sketch_test.go
@@ -19,8 +19,10 @@ package frequencies
 
 import (
        "encoding/binary"
+       "fmt"
        "github.com/apache/datasketches-go/common"
        "github.com/stretchr/testify/assert"
+       "strings"
        "testing"
 )
 
@@ -238,8 +240,8 @@ func checkEquality(t *testing.T, sk1, sk2 *LongsSketch) {
        assert.Equal(t, sk1.getStreamLength(), sk2.getStreamLength())
        assert.Equal(t, sk1.isEmpty(), sk2.isEmpty())
 
-       NFN := NO_FALSE_NEGATIVES
-       NFP := NO_FALSE_POSITIVES
+       NFN := ErrorTypeEnum.NO_FALSE_NEGATIVES
+       NFP := ErrorTypeEnum.NO_FALSE_POSITIVES
 
        rowArr1, err := sk1.getFrequentItems(NFN)
        assert.NoError(t, err)
@@ -328,20 +330,20 @@ func TestFreqLongs(t *testing.T) {
 
        for h := 0; h < numSketches; h++ {
                threshold := sketches[h].getMaximumError()
-               rows, err := sketches[h].getFrequentItems(NO_FALSE_NEGATIVES)
+               rows, err := 
sketches[h].getFrequentItems(ErrorTypeEnum.NO_FALSE_NEGATIVES)
                assert.NoError(t, err)
                for i := 0; i < len(rows); i++ {
                        assert.True(t, rows[i].getUpperBound() > threshold)
                }
 
-               rows, err = sketches[h].getFrequentItems(NO_FALSE_POSITIVES)
+               rows, err = 
sketches[h].getFrequentItems(ErrorTypeEnum.NO_FALSE_POSITIVES)
                assert.NoError(t, err)
                assert.Equal(t, len(rows), 0)
                for i := 0; i < len(rows); i++ {
                        assert.True(t, rows[i].getLowerBound() > threshold)
                }
 
-               rows, err = sketches[h].getFrequentItems(NO_FALSE_NEGATIVES)
+               rows, err = 
sketches[h].getFrequentItems(ErrorTypeEnum.NO_FALSE_NEGATIVES)
        }
 }
 
@@ -392,3 +394,153 @@ func TestUpdateNegative(t *testing.T) {
        err = fls.UpdateMany(1, -1)
        assert.Error(t, err)
 }
+
+func TestGetFrequentItems1(t *testing.T) {
+       minSize := 1 << _LG_MIN_MAP_SIZE
+       fls, err := NewLongSketchWithMaxMapSize(minSize)
+       assert.NoError(t, err)
+       fls.Update(1)
+       rowArr, err := fls.getFrequentItems(ErrorTypeEnum.NO_FALSE_POSITIVES)
+       assert.NoError(t, err)
+       assert.NotEmpty(t, rowArr)
+       row := rowArr[0]
+       assert.Equal(t, row.est, int64(1))
+       assert.Equal(t, row.item, int64(1))
+       assert.Equal(t, row.lb, int64(1))
+       assert.Equal(t, row.ub, int64(1))
+       newRow := NewRow(row.item, row.est+1, row.ub, row.lb)
+       assert.NotEqual(t, row, newRow)
+       newRow = NewRow(row.item, row.est, row.ub, row.lb)
+       assert.Equal(t, row, newRow)
+
+}
+
+func TestGetStorageByes(t *testing.T) {
+       minSize := 1 << _LG_MIN_MAP_SIZE
+       fls, err := NewLongSketchWithMaxMapSize(minSize)
+       assert.NoError(t, err)
+       sl, err := fls.toSlice()
+       assert.NoError(t, err)
+       assert.Equal(t, len(sl), fls.getStorageBytes())
+       err = fls.Update(1)
+       assert.NoError(t, err)
+       sl, err = fls.toSlice()
+       assert.NoError(t, err)
+       assert.Equal(t, len(sl), fls.getStorageBytes())
+}
+
+func TestDeSerFromString(t *testing.T) {
+       minSize := 1 << _LG_MIN_MAP_SIZE
+       fls, err := NewLongSketchWithMaxMapSize(minSize)
+       assert.NoError(t, err)
+       str, err := fls.serializeToString()
+       fmt.Println(str)
+       assert.NoError(t, err)
+       fls.Update(1)
+       str, err = fls.serializeToString()
+       assert.NoError(t, err)
+       fmt.Println(str)
+}
+
+func TestMerge(t *testing.T) {
+       minSize := 1 << _LG_MIN_MAP_SIZE
+       fls1, err := NewLongSketchWithMaxMapSize(minSize)
+       assert.NoError(t, err)
+       var fls2 *LongsSketch
+       fls2 = nil
+       fle, err := fls1.merge(fls2)
+       assert.NoError(t, err)
+       assert.True(t, fle.isEmpty())
+
+       fls2, err = NewLongSketchWithMaxMapSize(minSize)
+       assert.NoError(t, err)
+       fle, err = fls1.merge(fls2)
+       assert.NoError(t, err)
+}
+
+func TestSortItems(t *testing.T) {
+       numSketches := 1
+       n := 2222
+       errorTolerance := 1.0 / 100
+       sketchSize := common.CeilPowerOf2(int(1.0 / (errorTolerance * 
reversePurgeLongHashMapLoadFactor)))
+       fmt.Printf("sketchSize: %d\n", sketchSize)
+
+       sketches := make([]*LongsSketch, numSketches)
+       for h := 0; h < numSketches; h++ {
+               sketches[h], _ = newFrequencySketch(float64(sketchSize))
+       }
+
+       prob := .001
+       for i := 0; i < n; i++ {
+               item := randomGeometricDist(prob) + 1
+               for h := 0; h < numSketches; h++ {
+                       err := sketches[h].Update(item)
+                       assert.NoError(t, err)
+               }
+       }
+
+       for h := 0; h < numSketches; h++ {
+               threshold := sketches[h].getMaximumError()
+               rows, err := 
sketches[h].getFrequentItems(ErrorTypeEnum.NO_FALSE_NEGATIVES)
+               assert.NoError(t, err)
+               for i := 0; i < len(rows); i++ {
+                       assert.True(t, rows[i].getUpperBound() > threshold)
+               }
+               first := rows[0]
+               anItem := first.item
+               anEst := first.est
+               aLB := first.lb
+               s := first.String()
+               fmt.Println(s)
+               assert.True(t, anEst >= 0)
+               assert.True(t, aLB >= 0)
+               assert.Equal(t, anItem, anItem) //dummy test
+
+       }
+}
+
+func TestGetAndCheckPreLongs(t *testing.T) {
+       byteArr := make([]byte, 8)
+       byteArr[0] = 2
+       _, err := checkPreambleSize(byteArr)
+       assert.Error(t, err)
+}
+
+func TestToString1(t *testing.T) {
+       size := 1 << _LG_MIN_MAP_SIZE
+       printSketch(t, size, []int64{1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 5})
+       printSketch(t, size, []int64{5, 4, 3, 2, 1, 1, 1, 1, 1, 1, 1})
+}
+
+func printSketch(t *testing.T, size int, items []int64) {
+       var sb strings.Builder
+       fls, err := NewLongSketchWithMaxMapSize(size)
+       assert.NoError(t, err)
+       for i := 0; i < len(items); i++ {
+               fls.UpdateMany(int64(i+1), items[i])
+       }
+       sb.WriteString(fmt.Sprintf("Sketch Size: %d\n", size))
+       sb.WriteString(fls.String())
+       fmt.Println(sb.String())
+       printRows(t, fls, ErrorTypeEnum.NO_FALSE_NEGATIVES)
+       fmt.Println("")
+       printRows(t, fls, ErrorTypeEnum.NO_FALSE_POSITIVES)
+       fmt.Println("")
+}
+
+func printRows(t *testing.T, fls *LongsSketch, errorType ErrorType) {
+       rows, err := fls.getFrequentItems(errorType)
+       assert.NoError(t, err)
+       fmt.Println(errorType.Name)
+       fmt.Printf("  %20s%20s%20s %s", "Est", "UB", "LB", "Item")
+       fmt.Print("\n")
+       for i := 0; i < len(rows); i++ {
+               row := rows[i]
+               s2 := row.String()
+               fmt.Println(s2)
+       }
+       if len(rows) > 0 { //check equals null case
+               var nullRow *Row
+               assert.NotEqual(t, rows[0], nullRow)
+       }
+}
diff --git a/frequencies/reverse_purge_long_hash_map.go 
b/frequencies/reverse_purge_long_hash_map.go
index 03c2b16..62c83b9 100644
--- a/frequencies/reverse_purge_long_hash_map.go
+++ b/frequencies/reverse_purge_long_hash_map.go
@@ -352,6 +352,19 @@ func (s *reversePurgeLongHashMap) hashProbe(key int64) int 
{
        return probe
 }
 
+func (s *reversePurgeLongHashMap) String() string {
+       var sb strings.Builder
+       sb.WriteString("ReversePurgeLongHashMap:\n")
+       sb.WriteString(fmt.Sprintf("  %12s:%11s%20s %s\n", "Index", "States", 
"Values", "Keys"))
+       for i := 0; i < len(s.keys); i++ {
+               if s.states[i] <= 0 {
+                       continue
+               }
+               sb.WriteString(fmt.Sprintf("  %12d:%11d%20d %d\n", i, 
s.states[i], s.values[i], s.keys[i]))
+       }
+       return sb.String()
+}
+
 func newIterator(keys []int64, values []int64, states []int16, numActive int) 
*iteratorHashMap {
        stride := int(uint64(float64(len(keys))*common.InverseGolden) | 1)
        return &iteratorHashMap{
diff --git a/frequencies/row.go b/frequencies/row.go
index 21a2c81..ad105ef 100644
--- a/frequencies/row.go
+++ b/frequencies/row.go
@@ -22,10 +22,6 @@ import (
        "sort"
 )
 
-const (
-       hfmt string = "  %20s%20s%20s %s"
-)
-
 type Row struct {
        item int64
        est  int64
@@ -33,8 +29,8 @@ type Row struct {
        lb   int64
 }
 
-func NewRow(item int64, estimate int64, ub int64, lb int64) Row {
-       return Row{
+func NewRow(item int64, estimate int64, ub int64, lb int64) *Row {
+       return &Row{
                item: item,
                est:  estimate,
                ub:   ub,
@@ -43,7 +39,7 @@ func NewRow(item int64, estimate int64, ub int64, lb int64) 
Row {
 }
 
 func (r *Row) String() string {
-       return fmt.Sprintf("  %20d%20d%20d %d", r.item, r.est, r.ub, r.lb)
+       return fmt.Sprintf("  %20d%20d%20d %d", r.est, r.ub, r.lb, r.item)
 }
 
 func (r *Row) getEstimate() int64 {
@@ -61,7 +57,7 @@ func (r *Row) getLowerBound() int64 {
 func sortItems(sk *LongsSketch, threshold int64, errorType ErrorType) ([]*Row, 
error) {
        rowList := make([]*Row, 0)
        iter := sk.hashMap.iterator()
-       if errorType == NO_FALSE_NEGATIVES {
+       if errorType == ErrorTypeEnum.NO_FALSE_NEGATIVES {
                for iter.next() {
                        est, err := sk.getEstimate(iter.getKey())
                        if err != nil {
@@ -77,7 +73,7 @@ func sortItems(sk *LongsSketch, threshold int64, errorType 
ErrorType) ([]*Row, e
                        }
                        if ub >= threshold {
                                row := NewRow(iter.getKey(), est, ub, lb)
-                               rowList = append(rowList, &row)
+                               rowList = append(rowList, row)
                        }
                }
        } else { //NO_FALSE_POSITIVES
@@ -96,13 +92,13 @@ func sortItems(sk *LongsSketch, threshold int64, errorType 
ErrorType) ([]*Row, e
                        }
                        if lb >= threshold {
                                row := NewRow(iter.getKey(), est, ub, lb)
-                               rowList = append(rowList, &row)
+                               rowList = append(rowList, row)
                        }
                }
        }
 
        sort.Slice(rowList, func(i, j int) bool {
-               return rowList[i].est < rowList[j].est
+               return rowList[i].est > rowList[j].est
        })
 
        return rowList, nil


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to