Fengzdadi commented on code in PR #104:
URL: https://github.com/apache/datasketches-go/pull/104#discussion_r2703433246


##########
sampling/reservoir_items_sketch.go:
##########
@@ -115,6 +115,62 @@ func (s *ReservoirItemsSketch[T]) Reset() {
        s.data = s.data[:0]
 }
 
+// GetImplicitSampleWeight returns N/K when in sampling mode, or 1.0 in exact 
mode.
+func (s *ReservoirItemsSketch[T]) GetImplicitSampleWeight() float64 {
+       if s.n < int64(s.k) {
+               return 1.0
+       }
+       return float64(s.n) / float64(s.k)
+}
+
+// Copy returns a deep copy of the sketch.
+func (s *ReservoirItemsSketch[T]) Copy() *ReservoirItemsSketch[T] {
+       dataCopy := make([]T, len(s.data))
+       copy(dataCopy, s.data)
+       return &ReservoirItemsSketch[T]{
+               k:    s.k,
+               n:    s.n,
+               data: dataCopy,
+       }
+}
+
+// DownsampledCopy returns a copy with a reduced reservoir size.
+// If newK >= current K, returns a regular copy.
+func (s *ReservoirItemsSketch[T]) DownsampledCopy(newK int) 
*ReservoirItemsSketch[T] {
+       if newK >= s.k {
+               return s.Copy()
+       }
+
+       result, _ := NewReservoirItemsSketch[T](newK)
+
+       samples := s.Samples()
+       for _, item := range samples {
+               result.Update(item)
+       }
+
+       // Adjust N to preserve correct implicit weights
+       if result.n < s.n {
+               result.forceIncrementItemsSeen(s.n - result.n)
+       }
+
+       return result
+}
+
+// getValueAtPosition returns the item at the given position.
+func (s *ReservoirItemsSketch[T]) getValueAtPosition(pos int) T {
+       return s.data[pos]
+}
+
+// insertValueAtPosition replaces the item at the given position.
+func (s *ReservoirItemsSketch[T]) insertValueAtPosition(item T, pos int) {
+       s.data[pos] = item
+}
+
+// forceIncrementItemsSeen adds delta to the items seen count.
+func (s *ReservoirItemsSketch[T]) forceIncrementItemsSeen(delta int64) {

Review Comment:
   I kept `forceIncrementItemsSeen` as-is because it only bumps n for implicit 
weights and never writes into the reservoir, so it can’t overfill capacity. 
Capacity validation now happens where it matters: `DownsampledCopy` returns an 
error on bad newK, `UpdateFromRaw` rejects k<1 or len(items)>k/n, and 
`UpdateSketch` propagates those errors/gadget creation failures.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to