[GitHub] [kafka] jsancio commented on a change in pull request #10324: MINOR: Add a few more benchmark for the timeline map

2021-03-15 Thread GitBox


jsancio commented on a change in pull request #10324:
URL: https://github.com/apache/kafka/pull/10324#discussion_r594806269



##
File path: 
jmh-benchmarks/src/main/java/org/apache/kafka/jmh/timeline/TimelineHashMapBenchmark.java
##
@@ -87,4 +189,129 @@
 }
 return map;
 }
+
+@Benchmark
+public Map testUpdateEntriesInHashMap(HashMapInput input) 
{
+for (Integer key : input.keys) {
+input.map.put(key, String.valueOf(key));
+}
+return input.map;
+}
+
+@Benchmark
+public scala.collection.Map 
testUpdateEntriesInImmutableMap(ImmutableMapInput input) {
+scala.collection.immutable.HashMap map = input.map;
+for (Integer key : input.keys) {
+map = map.updated(key, String.valueOf(key));
+}
+return map;
+}
+
+@Benchmark
+public Map 
testUpdateEntriesInTimelineMap(TimelineMapInput input) {
+for (Integer key : input.keys) {
+input.map.put(key, String.valueOf(key));
+}
+return input.map;
+}
+
+@Benchmark
+public Map 
testUpdateEntriesWithSnapshots(TimelineMapInput input) {
+long epoch = 0;
+int j = 0;
+for (Integer key : input.keys) {
+if (j > 1_000) {
+input.snapshotRegistry.deleteSnapshotsUpTo(epoch - 10_000);
+input.snapshotRegistry.createSnapshot(epoch);
+j = 0;
+} else {
+j++;
+}
+input.map.put(key, String.valueOf(key));
+epoch++;
+}
+return input.map;
+}
+
+@Benchmark
+public Map testRemoveEntriesInHashMap(HashMapInput input) 
{
+for (Integer key : input.keys) {
+input.map.remove(key);
+}
+return input.map;
+}
+
+@Benchmark
+public scala.collection.Map 
testRemoveEntriesInImmutableMap(ImmutableMapInput input) {
+scala.collection.immutable.HashMap map = input.map;
+for (Integer key : input.keys) {
+map = map.removed(key);
+}
+return map;
+}
+
+@Benchmark
+public Map 
testRemoveEntriesInTimelineMap(TimelineMapInput input) {
+for (Integer key : input.keys) {
+input.map.remove(key);
+}
+return input.map;
+}
+
+@Benchmark
+public Map 
testRemoveEntriesWithSnapshots(TimelineMapInput input) {
+long epoch = 0;
+int j = 0;
+for (Integer key : input.keys) {
+if (j > 1_000) {
+input.snapshotRegistry.deleteSnapshotsUpTo(epoch - 10_000);
+input.snapshotRegistry.createSnapshot(epoch);
+j = 0;
+} else {
+j++;
+}
+input.map.remove(key, String.valueOf(key));
+epoch++;
+}
+return input.map;
+}
+
+@Benchmark
+public int testIterateEntriesInHashMap(HashMapInput input) {
+int count = 0;
+for (HashMap.Entry entry : input.map.entrySet()) {
+count++;
+}
+return count;
+}
+
+@Benchmark
+public int testIterateEntriesInImmutableMap(ImmutableMapInput input) {
+int count = 0;
+scala.collection.Iterator> iterator = 
input.map.iterator();
+while (iterator.hasNext()) {
+iterator.next();
+count++;
+}
+return count;
+}
+
+@Benchmark
+public int testIterateEntriesWithSnapshots(TimelineMapSnapshotInput input) 
{
+int count = 0;
+for (TimelineHashMap.Entry entry : 
input.map.entrySet(input.epoch)) {

Review comment:
   @cmccabe It looks like this benchmark fails with the following 
exception. Any idea on what's the issue?
   ```
   java.lang.ArrayIndexOutOfBoundsException: Index 1024 out of bounds for 
length 1024
   at 
org.apache.kafka.timeline.BaseHashTable.unpackSlot(BaseHashTable.java:210)
   at 
org.apache.kafka.timeline.SnapshottableHashTable$HistoricalIterator.hasNext(SnapshottableHashTable.java:255)
   at 
org.apache.kafka.timeline.TimelineHashMap$EntryIterator.hasNext(TimelineHashMap.java:359)
   at 
org.apache.kafka.jmh.timeline.TimelineHashMapBenchmark.testIterateEntriesWithSnapshots(TimelineHashMapBenchmark.java:303)
   at 
org.apache.kafka.jmh.timeline.jmh_generated.TimelineHashMapBenchmark_testIterateEntriesWithSnapshots_jmhTest.testIterateEntriesWithSnapshots_avgt_jmhStub(TimelineHashMapBenchmark_testIterateEntriesWithSnapshots_jmhTest.java:204)
   at 
org.apache.kafka.jmh.timeline.jmh_generated.TimelineHashMapBenchmark_testIterateEntriesWithSnapshots_jmhTest.testIterateEntriesWithSnapshots_AverageTime(TimelineHashMapBenchmark_testIterateEntriesWithSnapshots_jmhTest.java:162)
   at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
   at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(Nati

[GitHub] [kafka] jsancio commented on a change in pull request #10324: MINOR: Add a few more benchmark for the timeline map

2021-03-16 Thread GitBox


jsancio commented on a change in pull request #10324:
URL: https://github.com/apache/kafka/pull/10324#discussion_r595464237



##
File path: 
jmh-benchmarks/src/main/java/org/apache/kafka/jmh/timeline/TimelineHashMapBenchmark.java
##
@@ -44,33 +49,126 @@
 public class TimelineHashMapBenchmark {
 private final static int NUM_ENTRIES = 1_000_000;
 
+@State(Scope.Thread)
+public static class HashMapInput {
+public HashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+map = new HashMap<>(keys.size());
+for (Integer key : keys) {
+map.put(key, String.valueOf(key));
+}
+
+Collections.shuffle(keys);
+}
+}
+
+@State(Scope.Thread)
+public static class ImmutableMapInput {
+scala.collection.immutable.HashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+map = new scala.collection.immutable.HashMap<>();
+for (Integer key : keys) {
+map = map.updated(key, String.valueOf(key));
+}
+
+Collections.shuffle(keys);
+}
+}
+
+@State(Scope.Thread)
+public static class TimelineMapInput {
+public SnapshotRegistry snapshotRegistry;
+public TimelineHashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+snapshotRegistry = new SnapshotRegistry(new LogContext());
+map = new TimelineHashMap<>(snapshotRegistry, keys.size());
+
+for (Integer key : keys) {
+map.put(key, String.valueOf(key));
+}
+
+Collections.shuffle(keys);
+}
+}
+
+@State(Scope.Thread)
+public static class TimelineMapSnapshotInput {
+public SnapshotRegistry snapshotRegistry;
+public TimelineHashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+snapshotRegistry = new SnapshotRegistry(new LogContext());
+map = new TimelineHashMap<>(snapshotRegistry, keys.size());
+
+for (Integer key : keys) {
+map.put(key, String.valueOf(key));
+}
+
+int count = 0;
+for (Integer key : keys) {
+if (count % 1_000 == 0) {
+snapshotRegistry.deleteSnapshotsUpTo(count - 10_000);
+snapshotRegistry.createSnapshot(count);
+}
+map.put(key, String.valueOf(key));
+count++;
+}
+
+Collections.shuffle(keys);
+}
+}
+
+
 @Benchmark
 public Map testAddEntriesInHashMap() {
-HashMap map = new HashMap<>(NUM_ENTRIES);
+HashMap map = new HashMap<>();
 for (int i = 0; i < NUM_ENTRIES; i++) {
 int key = (int) (0x & ((i * 2862933555777941757L) + 
3037000493L));
 map.put(key, String.valueOf(key));
 }
+
+return map;
+}
+
+@Benchmark
+public scala.collection.immutable.HashMap 
testAddEntriesInImmutableMap() {
+scala.collection.immutable.HashMap map = new 
scala.collection.immutable.HashMap<>();
+for (int i = 0; i < NUM_ENTRIES; i++) {
+int key = (int) (0x & ((i * 2862933555777941757L) + 
3037000493L));
+map = map.updated(key, String.valueOf(key));
+}
+
 return map;
 }
 
 @Benchmark
 public Map testAddEntriesInTimelineMap() {
 SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new 
LogContext());
-TimelineHashMap map =
-new TimelineHashMap<>(snapshotRegistry, NUM_ENTRIES);
+TimelineHashMap map = new 
TimelineHashMap<>(snapshotRegistry, 16);
 for (int i = 0; i < NUM_ENTRIES; i++) {
 int key = (int) (0x & ((i * 2862933555777941757L) + 
3037000493L));

Review comment:
   I think this is an algorithm for generating pseudo random number. I 
think it relates to https://nuclear.llnl.gov/CNP/rng/rngman/node4.html.
   
   If this is true, let me fix the expression as it is supposed to multiply by 
`key` not `i`.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] jsancio commented on a change in pull request #10324: MINOR: Add a few more benchmark for the timeline map

2021-03-16 Thread GitBox


jsancio commented on a change in pull request #10324:
URL: https://github.com/apache/kafka/pull/10324#discussion_r595483066



##
File path: 
jmh-benchmarks/src/main/java/org/apache/kafka/jmh/timeline/TimelineHashMapBenchmark.java
##
@@ -44,33 +49,126 @@
 public class TimelineHashMapBenchmark {
 private final static int NUM_ENTRIES = 1_000_000;
 
+@State(Scope.Thread)
+public static class HashMapInput {
+public HashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+map = new HashMap<>(keys.size());
+for (Integer key : keys) {
+map.put(key, String.valueOf(key));
+}
+
+Collections.shuffle(keys);
+}
+}
+
+@State(Scope.Thread)
+public static class ImmutableMapInput {
+scala.collection.immutable.HashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+map = new scala.collection.immutable.HashMap<>();
+for (Integer key : keys) {
+map = map.updated(key, String.valueOf(key));
+}
+
+Collections.shuffle(keys);
+}
+}
+
+@State(Scope.Thread)
+public static class TimelineMapInput {
+public SnapshotRegistry snapshotRegistry;
+public TimelineHashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+snapshotRegistry = new SnapshotRegistry(new LogContext());
+map = new TimelineHashMap<>(snapshotRegistry, keys.size());
+
+for (Integer key : keys) {
+map.put(key, String.valueOf(key));
+}
+
+Collections.shuffle(keys);
+}
+}
+
+@State(Scope.Thread)
+public static class TimelineMapSnapshotInput {
+public SnapshotRegistry snapshotRegistry;
+public TimelineHashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+snapshotRegistry = new SnapshotRegistry(new LogContext());
+map = new TimelineHashMap<>(snapshotRegistry, keys.size());
+
+for (Integer key : keys) {
+map.put(key, String.valueOf(key));
+}
+
+int count = 0;
+for (Integer key : keys) {
+if (count % 1_000 == 0) {
+snapshotRegistry.deleteSnapshotsUpTo(count - 10_000);
+snapshotRegistry.createSnapshot(count);
+}
+map.put(key, String.valueOf(key));
+count++;
+}
+
+Collections.shuffle(keys);
+}
+}
+
+
 @Benchmark
 public Map testAddEntriesInHashMap() {
-HashMap map = new HashMap<>(NUM_ENTRIES);
+HashMap map = new HashMap<>();
 for (int i = 0; i < NUM_ENTRIES; i++) {
 int key = (int) (0x & ((i * 2862933555777941757L) + 
3037000493L));
 map.put(key, String.valueOf(key));
 }
+
+return map;
+}
+
+@Benchmark
+public scala.collection.immutable.HashMap 
testAddEntriesInImmutableMap() {
+scala.collection.immutable.HashMap map = new 
scala.collection.immutable.HashMap<>();
+for (int i = 0; i < NUM_ENTRIES; i++) {
+int key = (int) (0x & ((i * 2862933555777941757L) + 
3037000493L));
+map = map.updated(key, String.valueOf(key));

Review comment:
   Good catch. I looks like we were mostly measuring converting an int to a 
String!





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [kafka] jsancio commented on a change in pull request #10324: MINOR: Add a few more benchmark for the timeline map

2021-03-16 Thread GitBox


jsancio commented on a change in pull request #10324:
URL: https://github.com/apache/kafka/pull/10324#discussion_r595670643



##
File path: 
jmh-benchmarks/src/main/java/org/apache/kafka/jmh/timeline/TimelineHashMapBenchmark.java
##
@@ -44,33 +49,126 @@
 public class TimelineHashMapBenchmark {
 private final static int NUM_ENTRIES = 1_000_000;
 
+@State(Scope.Thread)
+public static class HashMapInput {
+public HashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+map = new HashMap<>(keys.size());
+for (Integer key : keys) {
+map.put(key, String.valueOf(key));
+}
+
+Collections.shuffle(keys);
+}
+}
+
+@State(Scope.Thread)
+public static class ImmutableMapInput {
+scala.collection.immutable.HashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+map = new scala.collection.immutable.HashMap<>();
+for (Integer key : keys) {
+map = map.updated(key, String.valueOf(key));
+}
+
+Collections.shuffle(keys);
+}
+}
+
+@State(Scope.Thread)
+public static class TimelineMapInput {
+public SnapshotRegistry snapshotRegistry;
+public TimelineHashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+snapshotRegistry = new SnapshotRegistry(new LogContext());
+map = new TimelineHashMap<>(snapshotRegistry, keys.size());
+
+for (Integer key : keys) {
+map.put(key, String.valueOf(key));
+}
+
+Collections.shuffle(keys);
+}
+}
+
+@State(Scope.Thread)
+public static class TimelineMapSnapshotInput {
+public SnapshotRegistry snapshotRegistry;
+public TimelineHashMap map;
+public final List keys = createKeys(NUM_ENTRIES);
+
+@Setup(Level.Invocation)
+public void setup() {
+snapshotRegistry = new SnapshotRegistry(new LogContext());
+map = new TimelineHashMap<>(snapshotRegistry, keys.size());
+
+for (Integer key : keys) {
+map.put(key, String.valueOf(key));
+}
+
+int count = 0;
+for (Integer key : keys) {
+if (count % 1_000 == 0) {
+snapshotRegistry.deleteSnapshotsUpTo(count - 10_000);
+snapshotRegistry.createSnapshot(count);
+}
+map.put(key, String.valueOf(key));
+count++;
+}
+
+Collections.shuffle(keys);
+}
+}
+
+
 @Benchmark
 public Map testAddEntriesInHashMap() {
-HashMap map = new HashMap<>(NUM_ENTRIES);
+HashMap map = new HashMap<>();
 for (int i = 0; i < NUM_ENTRIES; i++) {
 int key = (int) (0x & ((i * 2862933555777941757L) + 
3037000493L));
 map.put(key, String.valueOf(key));
 }
+
+return map;
+}
+
+@Benchmark
+public scala.collection.immutable.HashMap 
testAddEntriesInImmutableMap() {
+scala.collection.immutable.HashMap map = new 
scala.collection.immutable.HashMap<>();
+for (int i = 0; i < NUM_ENTRIES; i++) {
+int key = (int) (0x & ((i * 2862933555777941757L) + 
3037000493L));
+map = map.updated(key, String.valueOf(key));
+}
+
 return map;
 }
 
 @Benchmark
 public Map testAddEntriesInTimelineMap() {
 SnapshotRegistry snapshotRegistry = new SnapshotRegistry(new 
LogContext());
-TimelineHashMap map =
-new TimelineHashMap<>(snapshotRegistry, NUM_ENTRIES);
+TimelineHashMap map = new 
TimelineHashMap<>(snapshotRegistry, 16);
 for (int i = 0; i < NUM_ENTRIES; i++) {
 int key = (int) (0x & ((i * 2862933555777941757L) + 
3037000493L));

Review comment:
   Done.





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org