Jan Martin Keil created JENA-2325:
-------------------------------------
Summary: Performance optimization for SPARQ Path Alternative
Key: JENA-2325
URL: https://issues.apache.org/jira/browse/JENA-2325
Project: Apache Jena
Issue Type: Improvement
Reporter: Jan Martin Keil
The execution time of equivalent SPARQL queries using [Path
Alternative|https://www.w3.org/TR/sparql11-query/#rPathAlternative] or
[UNION|https://www.w3.org/TR/sparql11-query/#rGroupOrUnionGraphPattern] differ
considerably. Example:
{code:java}
import org.apache.jena.query.Query;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QueryFactory;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.ResourceFactory;
import org.junit.jupiter.api.Test;
public class PathAlternativeVsUnion {
@Test
public void compare() {
Model model = ModelFactory.createDefaultModel();
int scale = 100000;
Property property1 =
ResourceFactory.createProperty("http://example.org/property1");
Property property2 =
ResourceFactory.createProperty("http://example.org/property2");
for (int i = 0; i < scale; i++) {
model.createResource("http://example.org/r" +
i).addProperty(property1,
ResourceFactory.createResource("http://example.org/r" + (scale + i)));
model.createResource("http://example.org/r" + (scale *
2 + i)).addProperty(property2,
ResourceFactory.createResource("http://example.org/r" + (scale * 3 + i)));
}
Query pathAlternativeQuery = QueryFactory.create("SELECT *
WHERE {?a <" + property1 + ">|<" + property2 + "> ?b}");
Query unionQuery = QueryFactory
.create("SELECT * WHERE {{?a <" + property1 +
"> ?b } UNION { ?a <" + property2 + "> ?b}}");
// warm up pathAlternativeQuery
QueryExecutionFactory.create(pathAlternativeQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
// measure pathAlternativeQuery
long start = System.currentTimeMillis();
QueryExecutionFactory.create(pathAlternativeQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
long finish = System.currentTimeMillis();
System.out.println("Time pathAlternativeQuery: " + (finish -
start) + " ms");
// warm up unionQuery
QueryExecutionFactory.create(unionQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
// measure unionQuery
start = System.currentTimeMillis();
QueryExecutionFactory.create(unionQuery,
model).execSelect().forEachRemaining(qs -> {
/* consume but do nothing */});
finish = System.currentTimeMillis();
System.out.println("Time unionQuery: " + (finish -
start) + " ms");
}
}
{code}
Result:
{code}
Time pathAlternativeQuery: 10940 ms
Time unionQuery: 145 ms
{code}
Is it possible to add some automatic execution plan optimization for that to
Apache Jena?
--
This message was sent by Atlassian Jira
(v8.20.7#820007)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]