jmalkin commented on PR #23:
URL: 
https://github.com/apache/datasketches-python/pull/23#issuecomment-1865130666

   Created a simple, non-exhaustive test script. It uses python iteration to 
update since we expect that vectorized updates handled in c++ will be much 
closer in performance/.
   
   Script:
   ```Python
   import datasketches as ds
   import numpy as np
   import timeit
   
   num_repeats = 100
   n = 2 ** 20 # num updates
   
   # default k in all cases since the test is of the interface
   
   # cpc
   def cpc_test():
     cpc = ds.cpc_sketch()
     for i in range(0, n):
       cpc.update(i)
     cpc.get_estimate()
   
     b = cpc.serialize()
     ds.cpc_sketch.deserialize(b)
   
   # theta
   def theta_test():
     sk = ds.update_theta_sketch()
     for i in range(0, n):
       sk.update(i)
     sk.get_estimate()
   
     b = sk.compact().serialize()
     ds.compact_theta_sketch.deserialize(b)
   
   # tuple
   def tuple_test():
     sk = ds.update_tuple_sketch(ds.AccumulatorPolicy())
     for i in range(0, n):
       sk.update(i, i * i)
     sk.get_estimate()
   
     b = sk.compact().serialize(ds.PyLongsSerDe())
     ds.compact_tuple_sketch.deserialize(b, ds.PyLongsSerDe())
   
   # kll
   def kll_test():
     kll = ds.kll_ints_sketch()
     for i in range(0, n):
       kll.update(i)
     kll.get_quantile(0.5)
     kll.get_rank(int(n / 2))
   
     b = kll.serialize()
     ds.kll_items_sketch.deserialize(b, ds.PyIntsSerDe())
   
   
   # ebpps
   def ebpps_test():
     ebpps = ds.ebpps_sketch(100)
     for i in range(0, n):
       ebpps.update(i)
     result = list(ebpps)
   
     b = ebpps.serialize(ds.PyIntsSerDe())
     ds.ebpps_sketch.deserialize(b, ds.PyIntsSerDe())
   
   
   # density
   def density_test():
     dim = 3
     sk = ds.density_sketch(k=50, dim=dim, kernel=ds.GaussianKernel())
     for i in range(0, int(n / 256)):
       sk.update(np.random.randn(dim))
   
     b = sk.serialize()
     ds.density_sketch.deserialize(b, ds.GaussianKernel())
   
   
   
   elapsed = timeit.timeit(lambda: cpc_test(), number = num_repeats)
   print(f'CPC: {elapsed:.4f}')
   
   elapsed = timeit.timeit(lambda: theta_test(), number = num_repeats)
   print(f'Theta: {elapsed:.4f}')
   
   elapsed = timeit.timeit(lambda: tuple_test(), number = num_repeats)
   print(f'Tuple: {elapsed:.4f}')
   
   elapsed = timeit.timeit(lambda: kll_test(), number = num_repeats)
   print(f'KLL: {elapsed:.4f}')
   
   elapsed = timeit.timeit(lambda: ebpps_test(), number = num_repeats)
   print(f'EBPPS: {elapsed:.4f}')
   
   elapsed = timeit.timeit(lambda: density_test(), number = num_repeats)
   print(f'Density: {elapsed:.4f}')
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to