paulirwin commented on issue #933:
URL: https://github.com/apache/lucenenet/issues/933#issuecomment-2018531222

   I created a reproduction of the original benchmark using jmh. Forgive my 
quick and dirty Java port 😄 Please review and let me know if I made any 
mistakes.
   
   Make a project using the jmh mvn archetype, replace the benchmark code with 
the code below, and add mvn dependencies as below
   
   ```java
   package org.example;
   
   import org.apache.commons.io.FileUtils;
   import org.apache.lucene.analysis.Analyzer;
   import org.apache.lucene.analysis.standard.StandardAnalyzer;
   import org.apache.lucene.document.Document;
   import org.apache.lucene.document.Field;
   import org.apache.lucene.document.StringField;
   import org.apache.lucene.document.TextField;
   import org.apache.lucene.facet.FacetField;
   import org.apache.lucene.facet.FacetsConfig;
   import org.apache.lucene.facet.taxonomy.SearcherTaxonomyManager;
   import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
   import org.apache.lucene.index.IndexWriter;
   import org.apache.lucene.index.IndexWriterConfig;
   import org.apache.lucene.index.Term;
   import org.apache.lucene.search.SearcherFactory;
   import org.apache.lucene.store.Directory;
   import org.apache.lucene.store.MMapDirectory;
   import org.apache.lucene.util.Version;
   import org.openjdk.jmh.annotations.*;
   
   import java.io.File;
   import java.io.IOException;
   import java.nio.file.Files;
   import java.nio.file.Paths;
   import java.util.concurrent.ThreadLocalRandom;
   
   @State(Scope.Benchmark)
   @BenchmarkMode(Mode.AverageTime)
   public class MyBenchmark {
   
       private DirectoryTaxonomyWriter taxonomyWriter;
       private IndexWriter indexWriter;
       private Document[] documents;
       private FacetsConfig facetsConfig;
       private SearcherTaxonomyManager searcherManager;
   
       @Setup
       public void setup() {
           if (Files.exists(Paths.get("test_index"))) {
               try {
                   FileUtils.deleteDirectory(new File("test_index"));
               } catch (IOException e) {
                   e.printStackTrace();
               }
           }
   
           if (Files.exists(Paths.get("test_facets"))) {
               try {
                   FileUtils.deleteDirectory(new File("test_facets"));
               } catch (IOException e) {
                   e.printStackTrace();
               }
           }
   
           Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
   
           try {
               Directory luceneDirectory = new MMapDirectory(new 
File("test_index"));
               indexWriter = new IndexWriter(luceneDirectory, new 
IndexWriterConfig(Version.LUCENE_48, analyzer));
               taxonomyWriter = new DirectoryTaxonomyWriter(new 
MMapDirectory(new File("test_facets")));
               searcherManager = new SearcherTaxonomyManager(indexWriter, true, 
new SearcherFactory(), taxonomyWriter);
           } catch (IOException e) {
               throw new RuntimeException(e);
           }
   
           facetsConfig = new FacetsConfig();
           facetsConfig.setRequireDimCount("track_id", true);
   
           documents = new Document[N];
           for (int i = 0; i < N; i++)
           {
               String facet = generateRandomString(5);
               documents[i] = new Document();
               documents[i].add(new StringField("_id", Integer.toString(i), 
Field.Store.YES));
               documents[i].add(new TextField("content", 
generateRandomString(10), Field.Store.YES));
               documents[i].add(new FacetField("track_id", facet));
           }
       }
   
       @Param({"25"})
       public int N;
   
       @Benchmark
       public void indexDocumentsBenchmark() {
           for (int i = 0; i < documents.length; ++i)
           {
               try {
                   Document taxonomyDocument = 
facetsConfig.build(taxonomyWriter, documents[i]);
                   indexWriter.updateDocument(new Term("_id", 
Integer.toString(i)), taxonomyDocument);
                   searcherManager.maybeRefresh(); // maybe refresh causing 
dramatic performance drop on .NET 8.0
               } catch (IOException e) {
                   throw new RuntimeException(e);
               }
           }
       }
   
       private static String generateRandomString(int length)
       {
           // more spaces added on purpose
           final String chars = 
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789         ";
           char[] stringChars = new char[length];
   
           for (int i = 0; i < length; i++)
           {
               stringChars[i] = 
chars.charAt(ThreadLocalRandom.current().nextInt(chars.length()));
           }
   
           return new String(stringChars);
       }
   }
   
   ```
   
   additional mvn deps:
   ```xml
   <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-core</artifactId>
       <version>4.8.0</version>
   </dependency>
   <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-facet</artifactId>
       <version>4.8.0</version>
   </dependency>
   <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-analyzers-common</artifactId>
       <version>4.8.0</version>
   </dependency>
   <dependency>
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>
       <version>2.15.1</version>
   </dependency>
   ```
   
   I ran it against JDK 11, 17, and 21 via specifying a manual path to the JVM 
to run it on with the `-jvm` arg, i.e. `java -jar ./target/benchmarks.jar -jvm 
/opt/homebrew/Cellar/openjdk/21.0.2/bin/java`
   
   Java 21:
   ```
   Benchmark                            (N)  Mode  Cnt  Score   Error  Units
   MyBenchmark.indexDocumentsBenchmark   25  avgt   25  0.068 ± 0.004   s/op
   ```
   
   Java 17:
   ```
   Benchmark                            (N)  Mode  Cnt  Score   Error  Units
   MyBenchmark.indexDocumentsBenchmark   25  avgt   25  0.062 ± 0.002   s/op
   ```
   
   Java 11:
   ```
   Benchmark                            (N)  Mode  Cnt  Score   Error  Units
   MyBenchmark.indexDocumentsBenchmark   25  avgt   25  0.061 ± 0.001   s/op
   ```
   
   The results on the order of ~60-70ms (see [my N=25 results above from 
master](https://github.com/apache/lucenenet/issues/933#issuecomment-2016528219)),
 with no significant difference between Java versions, imply to me that Java 
behaves similarly to .NET <= 7, so we should consider a core library fix for 
this that is lucenenet specific. Unless I've done something wrong above...


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to