Alexey Kopytov has proposed merging lp:~vadim-tk/sysbench/zipf-distribution 
into lp:sysbench.

Requested reviews:
  Alexey Kopytov (akopytov)

For more details, see:
https://code.launchpad.net/~vadim-tk/sysbench/zipf-distribution/+merge/104775
-- 
https://code.launchpad.net/~vadim-tk/sysbench/zipf-distribution/+merge/104775
Your team sysbench-developers is subscribed to branch lp:sysbench.
=== modified file 'sysbench/sysbench.c'
--- sysbench/sysbench.c	2012-03-21 08:20:02 +0000
+++ sysbench/sysbench.c	2012-05-04 16:56:19 +0000
@@ -81,7 +81,8 @@
 {
   DIST_TYPE_UNIFORM,
   DIST_TYPE_GAUSSIAN,
-  DIST_TYPE_SPECIAL
+  DIST_TYPE_SPECIAL,
+  DIST_TYPE_ZIPF
 } rand_dist_t;
 
 /* Event queue data type for the tx-rate mode */
@@ -99,6 +100,12 @@
 static unsigned int rand_res;
 static int rand_seed; /* optional seed set on the command line */
 
+/* parameters for zipf distribution */
+static double zipf_theta; /* parameter theta */
+static unsigned int zipf_nitems = 0; /* number of items to choose from */
+static double zipf_zetan; /* precalculated ZetaN, based on nitems */
+static double zipf_zeta2; /* precalculated Zeta2, based on theta */
+
 /* Random seed used to generate unique random numbers */
 static unsigned long long rnd_seed;
 /* Mutex to protect random seed */
@@ -131,13 +138,14 @@
   {"help", "print help and exit", SB_ARG_TYPE_FLAG, NULL},
   {"version", "print version and exit", SB_ARG_TYPE_FLAG, "off"},
   {"rand-init", "initialize random number generator", SB_ARG_TYPE_FLAG, "off"},
-  {"rand-type", "random numbers distribution {uniform,gaussian,special}", SB_ARG_TYPE_STRING,
+  {"rand-type", "random numbers distribution {uniform,gaussian,special,zipf}", SB_ARG_TYPE_STRING,
    "special"},
   {"rand-spec-iter", "number of iterations used for numbers generation", SB_ARG_TYPE_INT, "12"},
   {"rand-spec-pct", "percentage of values to be treated as 'special' (for special distribution)",
    SB_ARG_TYPE_INT, "1"},
   {"rand-spec-res", "percentage of 'special' values to use (for special distribution)",
    SB_ARG_TYPE_INT, "75"},
+  {"rand-zipf-t", "parameter theta for zipf distibution", SB_ARG_TYPE_FLOAT, "1.16"},
   {"rand-seed", "seed for random number generator, ignored when 0", SB_ARG_TYPE_INT, "0"},
   {NULL, NULL, SB_ARG_TYPE_NULL, NULL}
 };
@@ -1020,6 +1028,11 @@
     rand_type = DIST_TYPE_SPECIAL;
     rand_func = &sb_rand_special;
   }
+  else if (!strcmp(s, "zipf"))
+  {
+    rand_type = DIST_TYPE_ZIPF;
+    rand_func = &sb_rand_zipf;
+  }
   else
   {
     log_text(LOG_FATAL, "Invalid random numbers distribution: %s.", s);
@@ -1030,6 +1043,9 @@
   rand_pct = sb_get_value_int("rand-spec-pct");
   rand_res = sb_get_value_int("rand-spec-res");
 
+  zipf_theta  = sb_get_value_float("rand-zipf-t");
+  zipf_zeta2 = sb_rand_zeta(2., zipf_theta);
+
   sb_globals.tx_rate = sb_get_value_int("tx-rate");
   sb_globals.report_interval = sb_get_value_int("report-interval");
 
@@ -1226,6 +1242,54 @@
   return a + sum / rand_iter;
 }
 
+/* zipf distribution */
+
+/* aux function to calculate zeta */
+double sb_rand_zeta(int n, double theta)
+{
+  int i;
+  double ans=0.0;
+  
+  for (i=1; i <= n; i++)
+    ans += pow(1./(double)i, theta);
+  return(ans);
+}
+
+
+int sb_rand_zipf(int a, int b)
+{
+
+  double alpha, eta, rand_uni, rand_z;
+  unsigned int n;
+  unsigned int val;
+
+  n = b - a + 1;
+
+  /* we pre-cache zipf_zetan, as calculation is slow */
+  if (n != zipf_nitems) 
+  {
+    zipf_zetan = sb_rand_zeta(n, zipf_theta);
+    zipf_nitems = n;
+  }
+
+  alpha = 1. / (1. - zipf_theta); 
+  eta = (1. - pow(2./n, 1. - zipf_theta)) / (1. - zipf_zeta2/zipf_zetan);
+ 
+  rand_uni = (double) sb_rnd() / (double) SB_MAX_RND;
+  rand_z = rand_uni * zipf_zetan;
+
+  if (rand_z < 1.)
+  {
+    val = 1;
+  } else if (rand_z < (1. + pow(0.5, zipf_theta))) {
+    val = 2;
+  } else {
+    val = 1 + (unsigned int)(n * pow(eta*rand_uni - eta + 1., alpha));
+  }
+
+  return a + val - 1;
+}
+
 /* 'special' distribution */
 
 int sb_rand_special(int a, int b)

=== modified file 'sysbench/sysbench.h'
--- sysbench/sysbench.h	2012-03-18 22:35:16 +0000
+++ sysbench/sysbench.h	2012-05-04 16:56:19 +0000
@@ -227,6 +227,8 @@
 int sb_rand_uniform(int, int);
 int sb_rand_gaussian(int, int);
 int sb_rand_special(int, int);
+int sb_rand_zipf(int, int);
+double sb_rand_zeta(int n, double theta);
 int sb_rand_uniq(int a, int b);
 void sb_rand_str(const char *, char *);
 

_______________________________________________
Mailing list: https://launchpad.net/~sysbench-developers
Post to     : [email protected]
Unsubscribe : https://launchpad.net/~sysbench-developers
More help   : https://help.launchpad.net/ListHelp

Reply via email to