paulirwin commented on issue #933:
URL: https://github.com/apache/lucenenet/issues/933#issuecomment-2018531222
I created a reproduction of the original benchmark using jmh. Forgive my
quick and dirty Java port 😄 Please review and let me know if I made any
mistakes.
Make a project using the jmh mvn archetype, replace the benchmark code with
the code below, and add mvn dependencies as below
```java
package org.example;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.SearcherTaxonomyManager;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.Version;
import org.openjdk.jmh.annotations.*;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.concurrent.ThreadLocalRandom;
@State(Scope.Benchmark)
@BenchmarkMode(Mode.AverageTime)
public class MyBenchmark {
private DirectoryTaxonomyWriter taxonomyWriter;
private IndexWriter indexWriter;
private Document[] documents;
private FacetsConfig facetsConfig;
private SearcherTaxonomyManager searcherManager;
@Setup
public void setup() {
if (Files.exists(Paths.get("test_index"))) {
try {
FileUtils.deleteDirectory(new File("test_index"));
} catch (IOException e) {
e.printStackTrace();
}
}
if (Files.exists(Paths.get("test_facets"))) {
try {
FileUtils.deleteDirectory(new File("test_facets"));
} catch (IOException e) {
e.printStackTrace();
}
}
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
try {
Directory luceneDirectory = new MMapDirectory(new
File("test_index"));
indexWriter = new IndexWriter(luceneDirectory, new
IndexWriterConfig(Version.LUCENE_48, analyzer));
taxonomyWriter = new DirectoryTaxonomyWriter(new
MMapDirectory(new File("test_facets")));
searcherManager = new SearcherTaxonomyManager(indexWriter, true,
new SearcherFactory(), taxonomyWriter);
} catch (IOException e) {
throw new RuntimeException(e);
}
facetsConfig = new FacetsConfig();
facetsConfig.setRequireDimCount("track_id", true);
documents = new Document[N];
for (int i = 0; i < N; i++)
{
String facet = generateRandomString(5);
documents[i] = new Document();
documents[i].add(new StringField("_id", Integer.toString(i),
Field.Store.YES));
documents[i].add(new TextField("content",
generateRandomString(10), Field.Store.YES));
documents[i].add(new FacetField("track_id", facet));
}
}
@Param({"25"})
public int N;
@Benchmark
public void indexDocumentsBenchmark() {
for (int i = 0; i < documents.length; ++i)
{
try {
Document taxonomyDocument =
facetsConfig.build(taxonomyWriter, documents[i]);
indexWriter.updateDocument(new Term("_id",
Integer.toString(i)), taxonomyDocument);
searcherManager.maybeRefresh(); // maybe refresh causing
dramatic performance drop on .NET 8.0
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
private static String generateRandomString(int length)
{
// more spaces added on purpose
final String chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 ";
char[] stringChars = new char[length];
for (int i = 0; i < length; i++)
{
stringChars[i] =
chars.charAt(ThreadLocalRandom.current().nextInt(chars.length()));
}
return new String(stringChars);
}
}
```
additional mvn deps:
```xml
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-facet</artifactId>
<version>4.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.8.0</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.15.1</version>
</dependency>
```
I ran it against JDK 11, 17, and 21 via specifying a manual path to the JVM
to run it on with the `-jvm` arg, i.e. `java -jar ./target/benchmarks.jar -jvm
/opt/homebrew/Cellar/openjdk/21.0.2/bin/java`
Java 21:
```
Benchmark (N) Mode Cnt Score Error Units
MyBenchmark.indexDocumentsBenchmark 25 avgt 25 0.068 ± 0.004 s/op
```
Java 17:
```
Benchmark (N) Mode Cnt Score Error Units
MyBenchmark.indexDocumentsBenchmark 25 avgt 25 0.062 ± 0.002 s/op
```
Java 11:
```
Benchmark (N) Mode Cnt Score Error Units
MyBenchmark.indexDocumentsBenchmark 25 avgt 25 0.061 ± 0.001 s/op
```
The results on the order of ~60-70ms (see [my N=25 results above from
master](https://github.com/apache/lucenenet/issues/933#issuecomment-2016528219)),
with no significant difference between Java versions, imply to me that Java
behaves similarly to .NET <= 7, so we should consider a core library fix for
this that is lucenenet specific. Unless I've done something wrong above...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]