jmalkin commented on PR #23:
URL:
https://github.com/apache/datasketches-python/pull/23#issuecomment-1865130666
Created a simple, non-exhaustive test script. It uses python iteration to
update since we expect that vectorized updates handled in c++ will be much
closer in performance/.
Script:
```Python
import datasketches as ds
import numpy as np
import timeit
num_repeats = 100
n = 2 ** 20 # num updates
# default k in all cases since the test is of the interface
# cpc
def cpc_test():
cpc = ds.cpc_sketch()
for i in range(0, n):
cpc.update(i)
cpc.get_estimate()
b = cpc.serialize()
ds.cpc_sketch.deserialize(b)
# theta
def theta_test():
sk = ds.update_theta_sketch()
for i in range(0, n):
sk.update(i)
sk.get_estimate()
b = sk.compact().serialize()
ds.compact_theta_sketch.deserialize(b)
# tuple
def tuple_test():
sk = ds.update_tuple_sketch(ds.AccumulatorPolicy())
for i in range(0, n):
sk.update(i, i * i)
sk.get_estimate()
b = sk.compact().serialize(ds.PyLongsSerDe())
ds.compact_tuple_sketch.deserialize(b, ds.PyLongsSerDe())
# kll
def kll_test():
kll = ds.kll_ints_sketch()
for i in range(0, n):
kll.update(i)
kll.get_quantile(0.5)
kll.get_rank(int(n / 2))
b = kll.serialize()
ds.kll_items_sketch.deserialize(b, ds.PyIntsSerDe())
# ebpps
def ebpps_test():
ebpps = ds.ebpps_sketch(100)
for i in range(0, n):
ebpps.update(i)
result = list(ebpps)
b = ebpps.serialize(ds.PyIntsSerDe())
ds.ebpps_sketch.deserialize(b, ds.PyIntsSerDe())
# density
def density_test():
dim = 3
sk = ds.density_sketch(k=50, dim=dim, kernel=ds.GaussianKernel())
for i in range(0, int(n / 256)):
sk.update(np.random.randn(dim))
b = sk.serialize()
ds.density_sketch.deserialize(b, ds.GaussianKernel())
elapsed = timeit.timeit(lambda: cpc_test(), number = num_repeats)
print(f'CPC: {elapsed:.4f}')
elapsed = timeit.timeit(lambda: theta_test(), number = num_repeats)
print(f'Theta: {elapsed:.4f}')
elapsed = timeit.timeit(lambda: tuple_test(), number = num_repeats)
print(f'Tuple: {elapsed:.4f}')
elapsed = timeit.timeit(lambda: kll_test(), number = num_repeats)
print(f'KLL: {elapsed:.4f}')
elapsed = timeit.timeit(lambda: ebpps_test(), number = num_repeats)
print(f'EBPPS: {elapsed:.4f}')
elapsed = timeit.timeit(lambda: density_test(), number = num_repeats)
print(f'Density: {elapsed:.4f}')
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]