markrmiller commented on a change in pull request #214:
URL: https://github.com/apache/solr/pull/214#discussion_r683080790



##########
File path: solr/benchmark/src/java/org/apache/solr/bench/DocMaker.java
##########
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.bench;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Queue;
+import java.util.Random;
+import java.util.SplittableRandom;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.commons.lang3.Validate;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.TestUtil;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.SolrNamedThreadFactory;
+import org.apache.solr.common.util.SuppressForbidden;
+
+/**
+ * A tool to generate controlled random data for a benchmark. {@link 
SolrInputDocument}s are created
+ * based on supplied FieldDef definitions.
+ *
+ * <p>You can call getDocument to build and retrieve one {@link 
SolrInputDocument} at a time, or you
+ * can call {@link #preGenerateDocs} to generate the given number of documents 
in RAM, and then
+ * retrieve them via {@link #getGeneratedDocsIterator}.
+ */
+public class DocMaker {
+
+  private Queue<SolrInputDocument> docs = new ConcurrentLinkedQueue<>();
+
+  private final Map<String, FieldDef> fields = new HashMap<>();
+
+  private static final AtomicInteger ID = new AtomicInteger();
+
+  private ExecutorService executorService;
+
+  private SplittableRandom threadRandom;
+
+  public DocMaker() {
+    Long seed = Long.getLong("randomSeed");
+    if (seed != null) {
+      threadRandom = new SplittableRandom(seed);
+    } else {
+      threadRandom = new SplittableRandom();
+    }
+  }
+
+  @SuppressForbidden(reason = "This module does not need to deal with logging 
context")
+  public void preGenerateDocs(int numDocs) throws InterruptedException {
+    MiniClusterState.log("preGenerateDocs " + numDocs + " ...");
+
+    executorService =
+        Executors.newFixedThreadPool(
+            Runtime.getRuntime().availableProcessors() + 1,
+            new SolrNamedThreadFactory("SolrJMH DocMaker"));
+
+    for (int i = 0; i < numDocs; i++) {
+      executorService.submit(
+          () -> {
+            try {
+              SolrInputDocument doc = getDocument();
+              docs.add(doc);
+            } catch (Exception e) {
+              executorService.shutdownNow();
+              throw new RuntimeException(e);
+            }
+          });
+    }
+
+    executorService.shutdown();
+    boolean result = executorService.awaitTermination(10, TimeUnit.MINUTES);
+    if (!result) {
+      throw new RuntimeException("Timeout waiting for doc adds to finish");
+    }
+    MiniClusterState.log(
+        "done preGenerateDocs docs="
+            + docs.size()
+            + " ram="
+            + 
RamUsageEstimator.humanReadableUnits(RamUsageEstimator.sizeOfObject(docs)));
+
+    if (numDocs != docs.size()) {
+      throw new IllegalStateException("numDocs != " + docs.size());
+    }
+  }
+
+  public Iterator<SolrInputDocument> getGeneratedDocsIterator() {
+    return docs.iterator();
+  }
+
+  public SolrInputDocument getDocument() {
+    SolrInputDocument doc = new SolrInputDocument();
+
+    for (Map.Entry<String, FieldDef> entry : fields.entrySet()) {
+      doc.addField(entry.getKey(), getValue(entry.getValue()));
+    }
+
+    return doc;
+  }
+
+  public void addField(String name, FieldDef.FieldDefBuilder builder) {
+    fields.put(name, builder.build());
+  }
+
+  private Object getValue(FieldDef fieldDef) {
+    switch (fieldDef.getContent()) {
+      case UNIQUE_INT:
+        return ID.incrementAndGet();
+      case INTEGER:
+        if (fieldDef.getMaxCardinality() > 0) {
+          long start = fieldDef.getCardinalityStart();
+          long seed = nextLong(start, start + fieldDef.getMaxCardinality(), 
threadRandom.split());
+          return nextInt(0, Integer.MAX_VALUE, new SplittableRandom(seed));
+        }
+
+        return ThreadLocalRandom.current().nextInt(Integer.MAX_VALUE);
+      case ALPHEBETIC:
+        return getString(fieldDef, value -> getAlphabeticString(fieldDef));
+      case UNICODE:
+        return getString(fieldDef, value -> getUnicodeString(fieldDef));
+      default:
+        throw new UnsupportedOperationException(
+            "Unsupported content type type=" + fieldDef.getContent());
+    }
+  }
+
+  private String getString(FieldDef fieldDef, StringSupplier supplier) {
+    if (fieldDef.getNumTokens() > 1 || fieldDef.getMaxNumTokens() > 1) {
+      StringBuilder sb =
+          new StringBuilder(
+              fieldDef.getNumTokens()
+                  * (Math.max(fieldDef.getLength(), fieldDef.getMaxLength()) + 
1));
+      SplittableRandom random = threadRandom.split();
+      for (int i = 0;
+          i
+              < (fieldDef.getMaxNumTokens() > 1
+                  ? random.nextInt(1, fieldDef.getMaxNumTokens())
+                  : fieldDef.getNumTokens());
+          i++) {
+        if (i > 0) {
+          sb.append(' ');
+        }
+        sb.append(supplier.getString(fieldDef));
+      }
+      return sb.toString();
+    }
+    return supplier.getString(fieldDef);
+  }
+
+  private String getUnicodeString(FieldDef fieldDef) {
+    try {
+      if (fieldDef.getMaxCardinality() > 0) {
+        long start = fieldDef.getCardinalityStart();
+        long seed = nextLong(start, start + fieldDef.getMaxCardinality(), 
threadRandom.split());
+        if (fieldDef.getLength() > -1) {
+          return TestUtil.randomRealisticUnicodeString(
+              new Random(seed), fieldDef.getLength(), fieldDef.getLength());
+        } else {
+          return TestUtil.randomRealisticUnicodeString(
+              new Random(seed), 1, fieldDef.getMaxLength());
+        }
+      }
+
+      if (fieldDef.getLength() > -1) {
+        return TestUtil.randomRealisticUnicodeString(
+            ThreadLocalRandom.current(), fieldDef.getLength(), 
fieldDef.getLength());
+      } else {
+        return TestUtil.randomRealisticUnicodeString(
+            ThreadLocalRandom.current(), 1, fieldDef.getMaxLength());

Review comment:
       @sonatype-lift ignore

##########
File path: solr/benchmark/src/java/org/apache/solr/bench/FieldDef.java
##########
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.bench;
+
+import java.util.concurrent.ThreadLocalRandom;
+import org.apache.solr.common.SolrInputDocument;
+
+/**
+ * Provides the definition for a randomly generated field in a {@link 
SolrInputDocument} created by
+ * a {@link DocMaker}.
+ */
+public class FieldDef {
+  public static final int DEFAULT_MAX_LENGTH = 64;
+
+  private DocMaker.Content content;
+  private int numTokens = 1;
+  private int maxNumTokens = -1;
+  private int maxCardinality = -1;
+  private int maxLength = -1;
+  private int length = -1;
+  private long cardinalityStart;
+
+  public int getNumTokens() {
+    return numTokens;
+  }
+
+  public int getMaxNumTokens() {
+    return maxNumTokens;
+  }
+
+  public int getMaxCardinality() {
+    return maxCardinality;
+  }
+
+  public long getCardinalityStart() {
+    return cardinalityStart;
+  }
+
+  public int getMaxLength() {
+    return maxLength;
+  }
+
+  public int getLength() {
+    return length;
+  }
+
+  public DocMaker.Content getContent() {
+    return content;
+  }
+
+  public static final class FieldDefBuilder {
+
+    private DocMaker.Content content;
+    private int numTokens = 1;
+    private int maxNumTokens = -1;
+    private int maxCardinality = -1;
+    private int maxLength = -1;
+    private int length = -1;
+    private long cardinalityStart;
+
+    private FieldDefBuilder() {}
+
+    public static FieldDefBuilder aFieldDef() {
+      return new FieldDefBuilder();
+    }
+
+    public FieldDefBuilder withContent(DocMaker.Content content) {
+      this.content = content;
+      return this;
+    }
+
+    public FieldDefBuilder withTokenCount(int numTokens) {
+      if (numTokens > 1 && content == DocMaker.Content.UNIQUE_INT) {
+        throw new UnsupportedOperationException(
+            "UNIQUE_INT content type cannot be used with token count > 1");
+      }
+      if (maxCardinality > 1) {
+        throw new UnsupportedOperationException(
+            "tokenCount cannot be used with maxCardinality > 0");
+      }
+      this.numTokens = numTokens;
+      return this;
+    }
+
+    public FieldDefBuilder withMaxTokenCount(int maxNumTokens) {
+      if (numTokens > 1 && content == DocMaker.Content.UNIQUE_INT) {
+        throw new UnsupportedOperationException(
+            "UNIQUE_INT content type cannot be used with token count > 1");
+      }
+      if (maxCardinality > 1) {
+        throw new UnsupportedOperationException(
+            "maxNumTokens cannot be used with maxCardinality > 0");
+      }
+      this.maxNumTokens = maxNumTokens;
+      return this;
+    }
+
+    public FieldDefBuilder withMaxCardinality(int maxCardinality) {
+      if (numTokens > 1) {
+        throw new UnsupportedOperationException(
+            "maxCardinality cannot be used with token count > 1");
+      }
+      this.maxCardinality = maxCardinality;
+      this.cardinalityStart =
+          ThreadLocalRandom.current().nextLong(0, Long.MAX_VALUE - 
maxCardinality);

Review comment:
       @sonatype-lift ignore




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to