This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datasketches-go.git
commit fb6cfe1e4bf77f92ae7a99d8aa37a080ed72c224 Author: Pierre Lacave <[email protected]> AuthorDate: Thu Dec 21 15:10:30 2023 +0100 error / lint cleanup --- frequencies/longs_sketch.go | 53 +++++++++++++++---------- frequencies/longs_sketch_test.go | 8 ++++ frequencies/preable_utils.go | 6 +-- frequencies/reverse_purge_long_hash_map.go | 40 ++++++++++--------- frequencies/reverse_purge_long_hash_map_test.go | 2 +- 5 files changed, 65 insertions(+), 44 deletions(-) diff --git a/frequencies/longs_sketch.go b/frequencies/longs_sketch.go index 2a0d6b5..e28cce1 100644 --- a/frequencies/longs_sketch.go +++ b/frequencies/longs_sketch.go @@ -72,7 +72,7 @@ func NewLongsSketch(lgMaxMapSize int, lgCurMapSize int) (*LongsSketch, error) { //set initial size of hash map lgMaxMapSize = max(lgMaxMapSize, _LG_MIN_MAP_SIZE) lgCurMapSize = max(lgCurMapSize, _LG_MIN_MAP_SIZE) - hashMap, err := NewReversePurgeLongHashMap(1 << lgCurMapSize) + hashMap, err := newReversePurgeLongHashMap(1 << lgCurMapSize) if err != nil { return nil, err } @@ -81,7 +81,7 @@ func NewLongsSketch(lgMaxMapSize int, lgCurMapSize int) (*LongsSketch, error) { offset := int64(0) sampleSize := min(_SAMPLE_SIZE, maxMapCap) return &LongsSketch{ - lgMaxMapSize: int(lgMaxMapSize), + lgMaxMapSize: lgMaxMapSize, curMapCap: curMapCap, offset: offset, sampleSize: sampleSize, @@ -125,17 +125,17 @@ func NewLongsSketchFromSlice(slc []byte) (*LongsSketch, error) { preLongsEq1 := preLongs == 1 preLongsEqMax := preLongs == maxPreLongs if !preLongsEq1 && !preLongsEqMax { - return nil, fmt.Errorf("Possible Corruption: PreLongs must be 1 or %d: %d", maxPreLongs, preLongs) + return nil, fmt.Errorf("possible Corruption: PreLongs must be 1 or %d: %d", maxPreLongs, preLongs) } if serVer != _SER_VER { - return nil, fmt.Errorf("Possible Corruption: Ser Ver must be %d: %d", _SER_VER, serVer) + return nil, fmt.Errorf("possible Corruption: Ser Ver must be %d: %d", _SER_VER, serVer) } actFamID := common.FamilyEnum.Frequency.Id if familyID != actFamID { - return nil, fmt.Errorf("Possible Corruption: FamilyID must be %d: %d", actFamID, familyID) + return nil, fmt.Errorf("possible Corruption: FamilyID must be %d: %d", actFamID, familyID) } if empty && !preLongsEq1 { - return nil, fmt.Errorf("Possible Corruption: Empty Flag set incorrectly: %t", preLongsEq1) + return nil, fmt.Errorf("possible Corruption: Empty Flag set incorrectly: %t", preLongsEq1) } if empty { return NewLongsSketch(lgMaxMapSize, _LG_MIN_MAP_SIZE) @@ -159,7 +159,7 @@ func NewLongsSketchFromSlice(slc []byte) (*LongsSketch, error) { countArray := make([]int64, activeItems) reqBytes := preBytes + 2*activeItems*8 //count Arr + Items Arr if len(slc) < reqBytes { - return nil, fmt.Errorf("Possible Corruption: Insufficient bytes in array: %d, %d", len(slc), reqBytes) + return nil, fmt.Errorf("possible Corruption: Insufficient bytes in array: %d, %d", len(slc), reqBytes) } for i := 0; i < activeItems; i++ { countArray[i] = int64(binary.LittleEndian.Uint64(slc[preBytes+(i<<3):])) @@ -173,10 +173,12 @@ func NewLongsSketchFromSlice(slc []byte) (*LongsSketch, error) { } // UpdateMany the sketch - for i := 0; i < activeItems; i++ { - fls.UpdateMany(itemArray[i], countArray[i]) + for i := 0; i < activeItems && err == nil; i++ { + err = fls.UpdateMany(itemArray[i], countArray[i]) + } + if err != nil { + return nil, err } - fls.streamWeight = preArr[2] //override streamWeight due to updating return fls, nil } @@ -187,7 +189,7 @@ func NewLongsSketchFromSlice(slc []byte) (*LongsSketch, error) { // str is a string representation of a sketch of this class. func NewLongsSketchFromString(str string) (*LongsSketch, error) { if len(str) < 1 { - return nil, fmt.Errorf("String is empty") + return nil, errors.New("string is empty") } // Remove trailing comma if present // as this will cause a problem with the split @@ -196,7 +198,7 @@ func NewLongsSketchFromString(str string) (*LongsSketch, error) { } tokens := strings.Split(str, ",") if len(tokens) < (strPreambleTokens + 2) { - return nil, fmt.Errorf("String not long enough: %d", len(tokens)) + return nil, fmt.Errorf("string not long enough: %d", len(tokens)) } serVe, err := strconv.ParseInt(tokens[0], 10, 32) if err != nil { @@ -235,10 +237,10 @@ func NewLongsSketchFromString(str string) (*LongsSketch, error) { //checks if serVe != _SER_VER { - return nil, fmt.Errorf("Possible Corruption: Bad SerVer: %d", serVe) + return nil, fmt.Errorf("possible Corruption: Bad SerVer: %d", serVe) } if famID != int64(common.FamilyEnum.Frequency.Id) { - return nil, fmt.Errorf("Possible Corruption: Bad Family: %d", famID) + return nil, fmt.Errorf("possible Corruption: Bad Family: %d", famID) } empty := flags > 0 if !empty && (numActive == 0) { @@ -246,7 +248,7 @@ func NewLongsSketchFromString(str string) (*LongsSketch, error) { } numTokens := int64(len(tokens)) if (2 * numActive) != (numTokens - strPreambleTokens - 2) { - return nil, fmt.Errorf("Possible Corruption: Incorrect # of tokens: %d, numActive: %d", numTokens, numActive) + return nil, fmt.Errorf("possible Corruption: Incorrect # of tokens: %d, numActive: %d", numTokens, numActive) } // if ((2 * numActive) != (numTokens - STR_PREAMBLE_TOKENS - 2)) { sk, err := NewLongsSketch(int(lgMax), int(lgCur)) @@ -422,7 +424,7 @@ func (s *LongsSketch) UpdateMany(item int64, count int64) error { return nil } if count < 0 { - return fmt.Errorf("count may not be negative") + return errors.New("count may not be negative") } s.streamWeight += count err := s.hashMap.adjustOrPutValue(item, count) @@ -434,13 +436,16 @@ func (s *LongsSketch) UpdateMany(item int64, count int64) error { // Over the threshold, we need to do something if s.hashMap.lgLength < s.lgMaxMapSize { // Below tgt size, we can grow - s.hashMap.resize(2 * len(s.hashMap.keys)) + err = s.hashMap.resize(2 * len(s.hashMap.keys)) + if err != nil { + return err + } s.curMapCap = s.hashMap.getCapacity() } else { // At tgt size, must purge s.offset += s.hashMap.purge(s.sampleSize) if s.GetNumActiveItems() > s.GetMaximumMapCapacity() { - return fmt.Errorf("purge did not reduce active items") + return errors.New("purge did not reduce active items") } } } @@ -526,12 +531,18 @@ func (s *LongsSketch) ToSlice() ([]byte, error) { } preBytes := preLongs << 3 - activeValues := s.hashMap.getActiveValues() + activeValues, err := s.hashMap.getActiveValues() + if err != nil { + return nil, err + } for i := 0; i < activeItems; i++ { binary.LittleEndian.PutUint64(outArr[preBytes+(i<<3):], uint64(activeValues[i])) } - activeKeys := s.hashMap.getActiveKeys() + activeKeys, err := s.hashMap.getActiveKeys() + if err != nil { + return nil, err + } for i := 0; i < activeItems; i++ { binary.LittleEndian.PutUint64(outArr[preBytes+((activeItems+i)<<3):], uint64(activeKeys[i])) } @@ -541,7 +552,7 @@ func (s *LongsSketch) ToSlice() ([]byte, error) { // Reset resets this sketch to a virgin state. func (s *LongsSketch) Reset() { - hasMap, _ := NewReversePurgeLongHashMap(1 << _LG_MIN_MAP_SIZE) + hasMap, _ := newReversePurgeLongHashMap(1 << _LG_MIN_MAP_SIZE) s.curMapCap = hasMap.getCapacity() s.offset = 0 s.streamWeight = 0 diff --git a/frequencies/longs_sketch_test.go b/frequencies/longs_sketch_test.go index 7d5860c..1d03280 100644 --- a/frequencies/longs_sketch_test.go +++ b/frequencies/longs_sketch_test.go @@ -593,3 +593,11 @@ func TestGetAprioriError(t *testing.T) { assert.NoError(t, err) assert.Equal(t, apr, eps*10_000) } + +func BenchmarkLongSketch(b *testing.B) { + sketch, err := NewLongsSketch(128, 8) + assert.NoError(b, err) + for i := 0; i < b.N; i++ { + sketch.Update(int64(i)) + } +} diff --git a/frequencies/preable_utils.go b/frequencies/preable_utils.go index ca3854c..3a2b0fd 100644 --- a/frequencies/preable_utils.go +++ b/frequencies/preable_utils.go @@ -19,7 +19,7 @@ package frequencies import ( "encoding/binary" - "fmt" + "errors" ) const ( @@ -42,13 +42,13 @@ const ( func checkPreambleSize(preamble []byte) (int64, error) { if len(preamble) < 8 { - return 0, fmt.Errorf("preamble is too small") + return 0, errors.New("preamble is too small") } pre0 := int64(binary.LittleEndian.Uint64(preamble)) preLongs := int(pre0 & 0x3F) required := max(preLongs<<3, 8) if len(preamble) < required { - return 0, fmt.Errorf("preamble is too small") + return 0, errors.New("preamble is too small") } return pre0, nil } diff --git a/frequencies/reverse_purge_long_hash_map.go b/frequencies/reverse_purge_long_hash_map.go index 62c83b9..3d028b6 100644 --- a/frequencies/reverse_purge_long_hash_map.go +++ b/frequencies/reverse_purge_long_hash_map.go @@ -18,6 +18,7 @@ package frequencies import ( + "errors" "fmt" "github.com/apache/datasketches-go/common" "github.com/apache/datasketches-go/thetacommon" @@ -51,12 +52,12 @@ const ( reversePurgeLongHashMapDriftLimit = 1024 //used only in stress testing ) -// NewReversePurgeLongHashMap constructs a new reversePurgeLongHashMap. +// newReversePurgeLongHashMap constructs a new reversePurgeLongHashMap. // It will create arrays of length mapSize, which must be a power of two. // This restriction was made to ensure fast hashing. // The member loadThreshold is then set to the largest value that // will not overload the hash table. -func NewReversePurgeLongHashMap(mapSize int) (*reversePurgeLongHashMap, error) { +func newReversePurgeLongHashMap(mapSize int) (*reversePurgeLongHashMap, error) { lgLength, err := common.ExactLog2(mapSize) if err != nil { return nil, fmt.Errorf("mapSize: %e", err) @@ -80,7 +81,7 @@ func (r *reversePurgeLongHashMap) get(key int64) (int64, error) { if r.keys[probe] == key { return r.values[probe], nil } - return 0, fmt.Errorf("key not found") + return 0, errors.New("key not found") } return 0, nil } @@ -106,14 +107,14 @@ func (r *reversePurgeLongHashMap) adjustOrPutValue(key int64, adjustAmount int64 probe = (probe + 1) & int64(arrayMask) drift++ if drift >= reversePurgeLongHashMapDriftLimit { - panic("drift >= driftLimit") + return errors.New("drift >= driftLimit") } } //found either an empty slot or the key if r.states[probe] == 0 { //found empty slot // adding the key and value to the table if r.numActive > r.loadThreshold { - return fmt.Errorf("numActive >= loadThreshold") + return errors.New("numActive >= loadThreshold") } r.keys[probe] = key r.values[probe] = adjustAmount @@ -121,7 +122,7 @@ func (r *reversePurgeLongHashMap) adjustOrPutValue(key int64, adjustAmount int64 r.numActive++ } else { //found the key, adjust the value if r.keys[probe] != key { - panic("keys[probe] != key") + return errors.New("keys[probe] != key") } r.values[probe] += adjustAmount } @@ -212,7 +213,7 @@ func (r *reversePurgeLongHashMap) keepOnlyPositiveCounts() { } } -func (r *reversePurgeLongHashMap) hashDelete(deleteProbe int) { +func (r *reversePurgeLongHashMap) hashDelete(deleteProbe int) error { // Looks ahead in the table to search for another item to move to this location. // If none are found, the status is changed r.states[deleteProbe] = 0 //mark as empty @@ -235,15 +236,16 @@ func (r *reversePurgeLongHashMap) hashDelete(deleteProbe int) { drift++ //only used for theoretical analysis if drift >= reversePurgeLongHashMapDriftLimit { - panic("drift >= driftLimit") + return errors.New("drift >= driftLimit") } } + return nil } func deserializeReversePurgeLongHashMapFromString(string string) (*reversePurgeLongHashMap, error) { tokens := strings.Split(string, ",") if len(tokens) < 2 { - panic("len(tokens) < 2") + return nil, errors.New("len(tokens) < 2") } numActive, err := strconv.Atoi(tokens[0]) if err != nil { @@ -253,7 +255,7 @@ func deserializeReversePurgeLongHashMapFromString(string string) (*reversePurgeL if err != nil { return nil, err } - table, err := NewReversePurgeLongHashMap(length) + table, err := newReversePurgeLongHashMap(length) if err != nil { return nil, err } @@ -280,7 +282,7 @@ func deserializeFromStringArray(tokens []string) (*reversePurgeLongHashMap, erro ignore := strPreambleTokens numActive, _ := strconv.ParseUint(tokens[ignore], 10, 32) length, _ := strconv.ParseUint(tokens[ignore+1], 10, 32) - hashMap, err := NewReversePurgeLongHashMap(int(length)) + hashMap, err := newReversePurgeLongHashMap(int(length)) if err != nil { return nil, err } @@ -303,9 +305,9 @@ func deserializeFromStringArray(tokens []string) (*reversePurgeLongHashMap, erro return hashMap, nil } -func (r *reversePurgeLongHashMap) getActiveValues() []int64 { +func (r *reversePurgeLongHashMap) getActiveValues() ([]int64, error) { if r.numActive == 0 { - return nil + return nil, nil } returnValues := make([]int64, r.numActive) j := 0 @@ -316,14 +318,14 @@ func (r *reversePurgeLongHashMap) getActiveValues() []int64 { } } if j != r.numActive { - panic("j != r.numActive") + return nil, errors.New("j != r.numActive") } - return returnValues + return returnValues, nil } -func (r *reversePurgeLongHashMap) getActiveKeys() []int64 { +func (r *reversePurgeLongHashMap) getActiveKeys() ([]int64, error) { if r.numActive == 0 { - return nil + return nil, nil } returnValues := make([]int64, r.numActive) j := 0 @@ -334,9 +336,9 @@ func (r *reversePurgeLongHashMap) getActiveKeys() []int64 { } } if j != r.numActive { - panic("j != r.numActive") + return nil, errors.New("j != r.numActive") } - return returnValues + return returnValues, nil } func (s *reversePurgeLongHashMap) iterator() *iteratorHashMap { diff --git a/frequencies/reverse_purge_long_hash_map_test.go b/frequencies/reverse_purge_long_hash_map_test.go index c157541..f6cf8aa 100644 --- a/frequencies/reverse_purge_long_hash_map_test.go +++ b/frequencies/reverse_purge_long_hash_map_test.go @@ -23,7 +23,7 @@ import ( ) func TestHashMapSerial(t *testing.T) { - mp, err := NewReversePurgeLongHashMap(8) + mp, err := newReversePurgeLongHashMap(8) assert.NoError(t, err) mp.adjustOrPutValue(10, 15) mp.adjustOrPutValue(10, 5) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
