Repository: hbase Updated Branches: refs/heads/branch-2 0d0c33040 -> 9329a18c2
HBASE-17849 PE tool random read is not totally random (Ram) Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/9329a18c Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/9329a18c Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/9329a18c Branch: refs/heads/branch-2 Commit: 9329a18c2d6942cb32fa2b30211abedd6bab014a Parents: 0d0c330 Author: Ramkrishna <ramkrishna.s.vasude...@intel.com> Authored: Wed Jun 7 11:31:30 2017 +0530 Committer: Ramkrishna <ramkrishna.s.vasude...@intel.com> Committed: Wed Jun 7 11:31:30 2017 +0530 ---------------------------------------------------------------------- .../hadoop/hbase/PerformanceEvaluation.java | 36 +++++++++++++------- .../hadoop/hbase/TestPerformanceEvaluation.java | 32 +++++++++++++++++ 2 files changed, 56 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/9329a18c/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java index d0b7319..2c5cb65 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java @@ -124,6 +124,8 @@ import com.codahale.metrics.UniformReservoir; */ @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) public class PerformanceEvaluation extends Configured implements Tool { + static final String RANDOM_SEEK_SCAN = "randomSeekScan"; + static final String RANDOM_READ = "randomRead"; private static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName()); private static final ObjectMapper MAPPER = new ObjectMapper(); static { @@ -151,9 +153,9 @@ public class PerformanceEvaluation extends Configured implements Tool { private static final Path PERF_EVAL_DIR = new Path("performance_evaluation"); static { - addCommandDescriptor(RandomReadTest.class, "randomRead", + addCommandDescriptor(RandomReadTest.class, RANDOM_READ, "Run random read test"); - addCommandDescriptor(RandomSeekScanTest.class, "randomSeekScan", + addCommandDescriptor(RandomSeekScanTest.class, RANDOM_SEEK_SCAN, "Run random seek and scan 100 test"); addCommandDescriptor(RandomScanWithRange10Test.class, "scanRange10", "Run random seek scan with both start and stop row (max 10 rows)"); @@ -1769,7 +1771,11 @@ public class PerformanceEvaluation extends Configured implements Tool { } static byte [] getRandomRow(final Random random, final int totalRows) { - return format(random.nextInt(Integer.MAX_VALUE) % totalRows); + return format(generateRandomRow(random, totalRows)); + } + + static int generateRandomRow(final Random random, final int totalRows) { + return random.nextInt(Integer.MAX_VALUE) % totalRows; } static RunResult runOneClient(final Class<? extends Test> cmd, Configuration conf, Connection con, @@ -1872,9 +1878,15 @@ public class PerformanceEvaluation extends Configured implements Tool { System.err.println("Table Creation / Write Tests:"); System.err.println(" table Alternate table name. Default: 'TestTable'"); System.err.println(" rows Rows each client runs. Default: " - + DEFAULT_OPTS.getPerClientRunRows()); - System.err.println(" size Total size in GiB. Mutually exclusive with --rows. " + - "Default: 1.0."); + + DEFAULT_OPTS.getPerClientRunRows() + + ". In case of randomReads and randomSeekScans this could" + + " be specified along with --size to specify the number of rows to be scanned within" + + " the total range specified by the size."); + System.err.println( + " size Total size in GiB. Mutually exclusive with --rows for writes and scans" + + ". But for randomReads and randomSeekScans when you use size with --rows you could" + + " use size to specify the end range and --rows" + + " specifies the number of rows within that range. " + "Default: 1.0."); System.err.println(" compress Compression type to use (GZ, LZO, ...). Default: 'NONE'"); System.err.println(" flushCommits Used to determine if the test should flush the table. " + "Default: false"); @@ -2193,11 +2205,6 @@ public class PerformanceEvaluation extends Configured implements Tool { } catch (NoSuchElementException | NumberFormatException e) { throw new IllegalArgumentException("Command " + cmd + " does not have threads number", e); } - if (opts.size != DEFAULT_OPTS.size && - opts.perClientRunRows != DEFAULT_OPTS.perClientRunRows) { - throw new IllegalArgumentException(rows + " and " + size + - " are mutually exclusive options"); - } opts = calculateRowsAndSize(opts); break; } else { @@ -2214,7 +2221,12 @@ public class PerformanceEvaluation extends Configured implements Tool { static TestOptions calculateRowsAndSize(final TestOptions opts) { int rowsPerGB = getRowsPerGB(opts); - if (opts.size != DEFAULT_OPTS.size) { + if ((opts.getCmdName() != null + && (opts.getCmdName().equals(RANDOM_READ) || opts.getCmdName().equals(RANDOM_SEEK_SCAN))) + && opts.size != DEFAULT_OPTS.size + && opts.perClientRunRows != DEFAULT_OPTS.perClientRunRows) { + opts.totalRows = (int) opts.size * rowsPerGB; + } else if (opts.size != DEFAULT_OPTS.size) { // total size in GB specified opts.totalRows = (int) opts.size * rowsPerGB; opts.perClientRunRows = opts.totalRows / opts.numClientThreads; http://git-wip-us.apache.org/repos/asf/hbase/blob/9329a18c/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java index cb7fdad..86a3d3f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPerformanceEvaluation.java @@ -27,6 +27,7 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.NoSuchElementException; import java.util.Queue; +import java.util.Random; import java.util.LinkedList; import org.apache.hadoop.fs.FSDataInputStream; @@ -119,6 +120,37 @@ public class TestPerformanceEvaluation { } @Test + public void testRandomReadCalculation() { + TestOptions opts = new PerformanceEvaluation.TestOptions(); + opts = PerformanceEvaluation.calculateRowsAndSize(opts); + int rows = opts.getPerClientRunRows(); + // Default row count + final int defaultPerClientRunRows = 1024 * 1024; + assertEquals(defaultPerClientRunRows, rows); + // If size is 2G, then twice the row count. + opts.setSize(2.0f); + opts.setPerClientRunRows(1000); + opts.setCmdName(PerformanceEvaluation.RANDOM_READ); + opts = PerformanceEvaluation.calculateRowsAndSize(opts); + assertEquals(1000, opts.getPerClientRunRows()); + // If two clients, then they get half the rows each. + opts.setNumClientThreads(2); + opts = PerformanceEvaluation.calculateRowsAndSize(opts); + assertEquals(1000, opts.getPerClientRunRows()); + Random random = new Random(); + // assuming we will get one before this loop expires + boolean foundValue = false; + for (int i = 0; i < 10000000; i++) { + int randomRow = PerformanceEvaluation.generateRandomRow(random, opts.totalRows); + if (randomRow > 1000) { + foundValue = true; + break; + } + } + assertTrue("We need to get a value more than 1000", foundValue); + } + + @Test public void testZipfian() throws NoSuchMethodException, SecurityException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException {