sxjscience commented on a change in pull request #9747: Add contrib.rand_log_uniform URL: https://github.com/apache/incubator-mxnet/pull/9747#discussion_r167739501
########## File path: python/mxnet/ndarray/contrib.py ########## @@ -18,9 +18,76 @@ # coding: utf-8 # pylint: disable=wildcard-import, unused-wildcard-import """Contrib NDArray API of MXNet.""" +import math +from ..context import current_context +from ..random import uniform try: from .gen_contrib import * except ImportError: pass -__all__ = [] +__all__ = ["rand_log_uniform"] + +def rand_log_uniform(true_classes, num_sampled, range_max, ctx=None): + """Draw random samples from an approximately log-uniform or Zipfian distribution. + + This operation randomly samples *num_sampled* candidates the range of integers [0, range_max). + The elements of sampled_candidates are drawn with replacement from the base distribution. + + The base distribution for this operator is an approximately log-uniform or Zipfian distribution: + + P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1) + + This sampler is useful when the true classes approximately follow such a distribution. + For example, if the classes represent words in a lexicon sorted in decreasing order of \ + frequency. If your classes are not ordered by decreasing frequency, do not use this op. + + Additionaly, it also returns the number of times each of the \ + true classes and the sampled classes is expected to occur. + + Parameters + ---------- + true_classes : NDArray + A 1-D NDArray of the target classes. + num_sampled: int + The number of classes to randomly sample. + range_max: int + The number of possible classes. + ctx : Context + Device context of output. Default is current context. Overridden by + `mu.context` when `mu` is an NDArray. + + Returns + ------- + list of NDArrays + A 1-D `int64` `NDArray` for sampled candidate classes, a 1-D `float64` `NDArray` for \ + the expected count for true classes, and a 1-D `float64` `NDArray` for the \ + expected count for sampled classes. + + Examples + -------- + >>> true_cls = mx.nd.array([3]) + >>> samples, exp_count_true, exp_count_sample = mx.nd.contrib.rand_log_uniform(true_cls, 4, 5) + >>> samples + [1 3 3 3] + <NDArray 4 @cpu(0)> + >>> exp_count_true + [ 0.12453879] + <NDArray 1 @cpu(0)> + >>> exp_count_sample + [ 0.22629439 0.12453879 0.12453879 0.12453879] + <NDArray 4 @cpu(0)> + """ + if ctx is None: + ctx = current_context() + log_range = math.log(range_max + 1) + rand = uniform(0, log_range, shape=(num_sampled,), dtype='float64', ctx=ctx) + # make sure sampled_classes are in the range of [0, range_max) + sampled_classes = (rand.exp() - 1).astype('int64') % range_max + + true_classes = true_classes.as_in_context(ctx).astype('float64') + expected_count_true = ((true_classes + 2.0) / (true_classes + 1.0)).log() / log_range + # cast sampled classes to fp64 to avoid interget division + sampled_cls_fp64 = sampled_classes.astype('float64') + expected_count_sampled = ((sampled_cls_fp64 + 2.0) / (sampled_cls_fp64 + 1.0)).log() / log_range + return [sampled_classes, expected_count_true, expected_count_sampled] Review comment: No need to return a list here. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services