This is an automated email from the ASF dual-hosted git repository. dkuppitz pushed a commit to branch TINKERPOP-1882 in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
commit 5c9326ff6c9626603a0c29d51d2ad603874c2bde Author: Daniel Kuppitz <daniel_kupp...@hotmail.com> AuthorDate: Sat Jan 12 19:50:28 2019 -0700 TINKERPOP-1882 Implemented `EarlyLimitStrategy`. --- CHANGELOG.asciidoc | 1 + docs/src/upgrade/release-3.3.x.asciidoc | 12 ++ .../tinkerpop/gremlin/jsr223/CoreImports.java | 2 + .../process/traversal/TraversalStrategies.java | 2 + .../strategy/optimization/EarlyLimitStrategy.java | 143 +++++++++++++++++++++ .../strategy/optimization/LazyBarrierStrategy.java | 3 +- .../structure/io/graphson/GraphSONModule.java | 5 + .../gremlin/structure/io/gryo/GryoVersion.java | 7 +- .../optimization/EarlyLimitStrategyTest.java | 103 +++++++++++++++ .../Strategy/Optimization/EarlyLimitStrategy.cs | 32 +++++ .../jython/gremlin_python/process/strategies.py | 3 + .../gremlin/process/ProcessComputerSuite.java | 4 +- .../gremlin/process/ProcessStandardSuite.java | 4 +- .../EarlyLimitStrategyProcessTest.java | 101 +++++++++++++++ .../tinkergraph/structure/TinkerGraphPlayTest.java | 97 ++------------ 15 files changed, 431 insertions(+), 88 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 6d8f4e8..4d14176 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -29,6 +29,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima * Added `globalFunctionCacheEnabled` override to `SessionOpProcessor` configuration. * Fixed a concurrency issue in `TraverserSet` * Fixed a bug in `InlineFilterStrategy` that mixed up and's and or's when folding merging conditions together. +* Implemented `EarlyLimitStrategy` which is supposed to significantly reduce backend operations for queries that use `range()`. [[release-3-3-5]] === TinkerPop 3.3.5 (Release Date: January 2, 2019) diff --git a/docs/src/upgrade/release-3.3.x.asciidoc b/docs/src/upgrade/release-3.3.x.asciidoc index a7f3768..236ec5b 100644 --- a/docs/src/upgrade/release-3.3.x.asciidoc +++ b/docs/src/upgrade/release-3.3.x.asciidoc @@ -91,6 +91,18 @@ now fully configurable via the `GroovyCompilerGremlinPlugin.classMapCacheSpecifi See: link:https://issues.apache.org/jira/browse/TINKERPOP-2038[TINKERPOP-2038], link:http://tinkerpop.apache.org/docs/3.3.5/reference/#gremlin-server-cache[Reference Documentation - Cache Management] +==== RangeStep Optimizing Strategy + +A new strategy named `EarlyLimitStrategy` was added. The strategy will try to find a better spot for any `RangeStep`, +which is as early as possible in the traversal. If possible it will also merge multiple `RangeStep`s into a single one +by recalculating the range for the first step and removing the second. If it turns out that the merge of two steps won't +produce a valid range (an empty result), then the `EarlyLimitStrategy` will remove the `RangeStep`s and insert a `NoneStep` +instead. + +This strategy is particularly useful when a provider implementation generates the queries to the underlying database. By +making sure that the ranges are applied as early as possible, we can ensure that the underlying database is only asked +for the least amount of data necessary to continue the traversal evaluation. + === Upgrading for Providers ==== Graph Database Providers diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/jsr223/CoreImports.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/jsr223/CoreImports.java index 712cf01..576d0de 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/jsr223/CoreImports.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/jsr223/CoreImports.java @@ -76,6 +76,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.Subgra import org.apache.tinkerpop.gremlin.process.traversal.strategy.finalization.MatchAlgorithmStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.finalization.ProfileStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.AdjacentToIncidentStrategy; +import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.EarlyLimitStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.FilterRankingStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IdentityRemovalStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IncidentToAdjacentStrategy; @@ -232,6 +233,7 @@ public final class CoreImports { CLASS_IMPORTS.add(IdentityRemovalStrategy.class); CLASS_IMPORTS.add(IncidentToAdjacentStrategy.class); CLASS_IMPORTS.add(MatchPredicateStrategy.class); + CLASS_IMPORTS.add(EarlyLimitStrategy.class); CLASS_IMPORTS.add(OrderLimitStrategy.class); CLASS_IMPORTS.add(PathProcessorStrategy.class); CLASS_IMPORTS.add(CountStrategy.class); diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalStrategies.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalStrategies.java index 5e93345..e802304 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalStrategies.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalStrategies.java @@ -26,6 +26,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.Connec import org.apache.tinkerpop.gremlin.process.traversal.strategy.finalization.ProfileStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.AdjacentToIncidentStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.CountStrategy; +import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.EarlyLimitStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.FilterRankingStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IncidentToAdjacentStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.InlineFilterStrategy; @@ -213,6 +214,7 @@ public interface TraversalStrategies extends Serializable, Cloneable { final TraversalStrategies graphStrategies = new DefaultTraversalStrategies(); graphStrategies.addStrategies( ConnectiveStrategy.instance(), + EarlyLimitStrategy.instance(), InlineFilterStrategy.instance(), IncidentToAdjacentStrategy.instance(), AdjacentToIncidentStrategy.instance(), diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategy.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategy.java new file mode 100644 index 0000000..64e0415 --- /dev/null +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategy.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization; + +import org.apache.tinkerpop.gremlin.process.traversal.Step; +import org.apache.tinkerpop.gremlin.process.traversal.Traversal; +import org.apache.tinkerpop.gremlin.process.traversal.TraversalStrategy; +import org.apache.tinkerpop.gremlin.process.traversal.step.SideEffectCapable; +import org.apache.tinkerpop.gremlin.process.traversal.step.filter.NoneStep; +import org.apache.tinkerpop.gremlin.process.traversal.step.filter.RangeGlobalStep; +import org.apache.tinkerpop.gremlin.process.traversal.step.map.MapStep; +import org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.ProfileSideEffectStep; +import org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.SideEffectCapStep; +import org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.SideEffectStep; +import org.apache.tinkerpop.gremlin.process.traversal.strategy.AbstractTraversalStrategy; +import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper; + +import java.util.List; + +/** + * This strategy looks for {@link RangeGlobalStep}'s that can be moved further left in the traversal and thus be applied + * applied earlier. It will also try to merge multiple {@link RangeGlobalStep}'s into one. + * If the logical consequence of one or multiple {@link RangeGlobalStep}'s is an empty result, the strategy will remove + * as many steps as possible and add a {@link NoneStep} instead. + * + * @author Daniel Kuppitz (http://gremlin.guru) + * @example <pre> + * __.out().valueMap().limit(5) // becomes __.out().limit(5).valueMap() + * __.outE().range(2, 10).valueMap().limit(5) // becomes __.outE().range(2, 7).valueMap() + * __.outE().limit(5).valueMap().range(2, -1) // becomes __.outE().range(2, 5).valueMap() + * __.outE().limit(5).valueMap().range(5, 10) // becomes __.outE().none() + * __.outE().limit(5).valueMap().range(5, 10).cap("a") // becomes __.outE().none().cap("a") + * </pre> + */ +public final class EarlyLimitStrategy + extends AbstractTraversalStrategy<TraversalStrategy.OptimizationStrategy> + implements TraversalStrategy.OptimizationStrategy { + + private static final EarlyLimitStrategy INSTANCE = new EarlyLimitStrategy(); + + private EarlyLimitStrategy() { + } + + @SuppressWarnings("unchecked") + @Override + public void apply(final Traversal.Admin<?, ?> traversal) { + + final List<Step> steps = traversal.getSteps(); + Step insertAfter = null; + boolean merge = false; + for (int i = 0, j = steps.size(); i < j; i++) { + final Step step = steps.get(i); + if (step instanceof RangeGlobalStep) { + if (insertAfter != null) { + // RangeStep was found, move it to the earliest possible step or merge it with a + // previous RangeStep; keep the RangeStep's labels at its preceding step + TraversalHelper.copyLabels(step, step.getPreviousStep(), true); + insertAfter = moveRangeStep((RangeGlobalStep) step, insertAfter, traversal, merge); + if (insertAfter instanceof NoneStep) { + // any step besides a SideEffectCapStep after a NoneStep would be pointless + final int noneStepIndex = TraversalHelper.stepIndex(insertAfter, traversal); + for (i = j - 2; i > noneStepIndex; i--) { + if (!(steps.get(i) instanceof SideEffectCapStep) && !(steps.get(i) instanceof ProfileSideEffectStep)) { + traversal.removeStep(i); + } + } + break; + } + j = steps.size(); + } + } else if (!(step instanceof MapStep || step instanceof SideEffectStep)) { + // remember the last step that can be used to move any RangeStep to + // any RangeStep can be moved in front of all its preceding map- and sideEffect-steps + insertAfter = step; + merge = true; + } else if (step instanceof SideEffectCapable) { + // if there's any SideEffectCapable step along the way, RangeSteps cannot be merged as this could + // change the final traversal's internal memory + merge = false; + } + } + } + + @SuppressWarnings("unchecked") + private Step moveRangeStep(final RangeGlobalStep step, final Step insertAfter, final Traversal.Admin<?, ?> traversal, + final boolean merge) { + final Step rangeStep; + boolean remove = true; + if (insertAfter instanceof RangeGlobalStep) { + // there's a previous RangeStep which might affect the effective range of the current RangeStep + // recompute this step's low and high; if the result is still a valid range, create a new RangeStep, + // otherwise a NoneStep + final RangeGlobalStep other = (RangeGlobalStep) insertAfter; + final long low = other.getLowRange() + step.getLowRange(); + if (other.getHighRange() == -1L) { + rangeStep = new RangeGlobalStep(traversal, low, other.getLowRange() + step.getHighRange()); + } else if (step.getHighRange() == -1L) { + final long high = other.getHighRange() - other.getLowRange() - step.getLowRange() + low; + if (low < high) { + rangeStep = new RangeGlobalStep(traversal, low, high); + } else { + rangeStep = new NoneStep<>(traversal); + } + } else { + final long high = Math.min(other.getLowRange() + step.getHighRange(), other.getHighRange()); + rangeStep = high > low ? new RangeGlobalStep(traversal, low, high) : new NoneStep<>(traversal); + } + remove = merge; + TraversalHelper.replaceStep(merge ? insertAfter : step, rangeStep, traversal); + } else if (!step.getPreviousStep().equals(insertAfter, true)) { + // move the RangeStep behind the earliest possible map- or sideEffect-step + rangeStep = step.clone(); + TraversalHelper.insertAfterStep(rangeStep, insertAfter, traversal); + } else { + // no change if the earliest possible step to insert the RangeStep after is + // already the current step's previous step + return step; + } + if (remove) traversal.removeStep(step); + return rangeStep; + } + + public static EarlyLimitStrategy instance() { + return INSTANCE; + } +} diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/LazyBarrierStrategy.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/LazyBarrierStrategy.java index 927d6c9..1313d3e 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/LazyBarrierStrategy.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/LazyBarrierStrategy.java @@ -54,7 +54,8 @@ public final class LazyBarrierStrategy extends AbstractTraversalStrategy<Travers AdjacentToIncidentStrategy.class, FilterRankingStrategy.class, InlineFilterStrategy.class, - MatchPredicateStrategy.class)); + MatchPredicateStrategy.class, + EarlyLimitStrategy.class)); private static final int BIG_START_SIZE = 5; protected static final int MAX_BARRIER_SIZE = 2500; diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/graphson/GraphSONModule.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/graphson/GraphSONModule.java index 876d6d2..1078a6b 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/graphson/GraphSONModule.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/graphson/GraphSONModule.java @@ -41,6 +41,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.Partit import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.SubgraphStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.finalization.MatchAlgorithmStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.AdjacentToIncidentStrategy; +import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.EarlyLimitStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.FilterRankingStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IdentityRemovalStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IncidentToAdjacentStrategy; @@ -181,6 +182,7 @@ abstract class GraphSONModule extends TinkerPopJacksonModule { LambdaRestrictionStrategy.class, ReadOnlyStrategy.class, StandardVerificationStrategy.class, + EarlyLimitStrategy.class, // GraphFilterStrategy.class, VertexProgramStrategy.class @@ -297,6 +299,7 @@ abstract class GraphSONModule extends TinkerPopJacksonModule { LambdaRestrictionStrategy.class, ReadOnlyStrategy.class, StandardVerificationStrategy.class, + EarlyLimitStrategy.class, // GraphFilterStrategy.class, VertexProgramStrategy.class @@ -396,6 +399,7 @@ abstract class GraphSONModule extends TinkerPopJacksonModule { LambdaRestrictionStrategy.class, ReadOnlyStrategy.class, StandardVerificationStrategy.class, + EarlyLimitStrategy.class, // GraphFilterStrategy.class, VertexProgramStrategy.class @@ -504,6 +508,7 @@ abstract class GraphSONModule extends TinkerPopJacksonModule { LambdaRestrictionStrategy.class, ReadOnlyStrategy.class, StandardVerificationStrategy.class, + EarlyLimitStrategy.class, // GraphFilterStrategy.class, VertexProgramStrategy.class diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoVersion.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoVersion.java index 459c04f..d88a8c0 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoVersion.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoVersion.java @@ -51,6 +51,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.Partit import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.SubgraphStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.finalization.MatchAlgorithmStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.AdjacentToIncidentStrategy; +import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.EarlyLimitStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.FilterRankingStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IdentityRemovalStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IncidentToAdjacentStrategy; @@ -235,7 +236,7 @@ public enum GryoVersion { add(GryoTypeReg.of(Collections.singleton(null).getClass(), 54)); add(GryoTypeReg.of(Collections.singletonList(null).getClass(), 24)); add(GryoTypeReg.of(Collections.singletonMap(null, null).getClass(), 23)); - add(GryoTypeReg.of(Types.COLLECTIONS_SYNCHRONIZED_MAP, 185, new UtilSerializers.SynchronizedMapSerializer())); // ***LAST ID*** + add(GryoTypeReg.of(Types.COLLECTIONS_SYNCHRONIZED_MAP, 185, new UtilSerializers.SynchronizedMapSerializer())); add(GryoTypeReg.of(Contains.class, 49)); add(GryoTypeReg.of(Currency.class, 40)); add(GryoTypeReg.of(Date.class, 38)); @@ -335,6 +336,7 @@ public enum GryoVersion { add(GryoTypeReg.of(GraphFilterStrategy.class, 157)); add(GryoTypeReg.of(LambdaRestrictionStrategy.class, 158)); add(GryoTypeReg.of(ReadOnlyStrategy.class, 159)); + add(GryoTypeReg.of(EarlyLimitStrategy.class, 186)); // ***LAST ID*** add(GryoTypeReg.of(MatchStep.CountMatchAlgorithm.class, 160)); add(GryoTypeReg.of(MatchStep.GreedyMatchAlgorithm.class, 164)); @@ -412,7 +414,7 @@ public enum GryoVersion { add(GryoTypeReg.of(Collections.singleton(null).getClass(), 54)); add(GryoTypeReg.of(Collections.singletonList(null).getClass(), 24)); add(GryoTypeReg.of(Collections.singletonMap(null, null).getClass(), 23)); - add(GryoTypeReg.of(Types.COLLECTIONS_SYNCHRONIZED_MAP, 185, new UtilSerializers.SynchronizedMapSerializer())); // ***LAST ID*** + add(GryoTypeReg.of(Types.COLLECTIONS_SYNCHRONIZED_MAP, 185, new UtilSerializers.SynchronizedMapSerializer())); add(GryoTypeReg.of(Contains.class, 49)); add(GryoTypeReg.of(Currency.class, 40)); add(GryoTypeReg.of(Date.class, 38)); @@ -553,6 +555,7 @@ public enum GryoVersion { add(GryoTypeReg.of(GraphFilterStrategy.class, 157)); add(GryoTypeReg.of(LambdaRestrictionStrategy.class, 158)); add(GryoTypeReg.of(ReadOnlyStrategy.class, 159)); + add(GryoTypeReg.of(EarlyLimitStrategy.class, 186)); // ***LAST ID*** add(GryoTypeReg.of(MatchStep.CountMatchAlgorithm.class, 160)); add(GryoTypeReg.of(MatchStep.GreedyMatchAlgorithm.class, 167)); // skip 171, 172 to sync with tp33 diff --git a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategyTest.java b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategyTest.java new file mode 100644 index 0000000..d2b1c0f --- /dev/null +++ b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategyTest.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization; + +import org.apache.tinkerpop.gremlin.process.traversal.Traversal; +import org.apache.tinkerpop.gremlin.process.traversal.TraversalStrategies; +import org.apache.tinkerpop.gremlin.process.traversal.TraversalStrategy; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.process.traversal.strategy.finalization.ProfileStrategy; +import org.apache.tinkerpop.gremlin.process.traversal.util.DefaultTraversalStrategies; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; + +import static org.junit.Assert.assertEquals; + +/** + * @author Daniel Kuppitz (http://gremlin.guru) + */ +@RunWith(Parameterized.class) +public class EarlyLimitStrategyTest { + + @Parameterized.Parameter() + public Traversal original; + + @Parameterized.Parameter(value = 1) + public Traversal optimized; + + @Parameterized.Parameter(value = 2) + public Collection<TraversalStrategy> otherStrategies; + + @Test + public void doTest() { + final TraversalStrategies strategies = new DefaultTraversalStrategies(); + strategies.addStrategies(EarlyLimitStrategy.instance()); + for (final TraversalStrategy strategy : this.otherStrategies) { + strategies.addStrategies(strategy); + if (strategy instanceof ProfileStrategy) { + final TraversalStrategies os = new DefaultTraversalStrategies(); + os.addStrategies(ProfileStrategy.instance()); + this.optimized.asAdmin().setStrategies(os); + this.optimized.asAdmin().applyStrategies(); + } + } + this.original.asAdmin().setStrategies(strategies); + this.original.asAdmin().applyStrategies(); + assertEquals(this.optimized, this.original); + } + + @Parameterized.Parameters(name = "{0}") + public static Iterable<Object> generateTestParameters() { + return Arrays.asList(new Object[][]{ + {__.out().valueMap().limit(1), __.out().limit(1).valueMap(), Collections.emptyList()}, + {__.out().limit(5).valueMap().range(5, 10), __.start().out().none(), Collections.emptyList()}, + {__.out().limit(5).valueMap().range(6, 10), __.start().out().none(), Collections.emptyList()}, + {__.V().out().valueMap().limit(1), __.V().out().limit(1).valueMap(), Collections.singleton(LazyBarrierStrategy.instance())}, + {__.out().out().limit(1).in().in(), __.out().out().limit(1).in().barrier(LazyBarrierStrategy.MAX_BARRIER_SIZE).in(), Collections.singleton(LazyBarrierStrategy.instance())}, + {__.out().has("name","marko").limit(1).in().in(), __.out().has("name","marko").limit(1).in().in(), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).limit(1), __.out().limit(1).map(__.identity()).map(__.identity()), Collections.singleton(LazyBarrierStrategy.instance())}, + {__.out().map(__.identity()).map(__.identity()).limit(1).as("a"), __.out().limit(1).map(__.identity()).map(__.identity()).as("a"), Collections.singleton(LazyBarrierStrategy.instance())}, + {__.out().map(__.identity()).map(__.identity()).limit(2).out().map(__.identity()).map(__.identity()).limit(1), __.out().limit(2).map(__.identity()).map(__.identity()).out().limit(1).map(__.identity()).map(__.identity()), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).limit(2).map(__.identity()).map(__.identity()).limit(1), __.out().limit(1).map(__.identity()).map(__.identity()).map(__.identity()).map(__.identity()), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).range(5, 20).map(__.identity()).map(__.identity()).range(5, 10), __.out().range(10, 15).map(__.identity()).map(__.identity()).map(__.identity()).map(__.identity()), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).range(50, 100).map(__.identity()).map(__.identity()).range(10, 50), __.out().range(60, 100).map(__.identity()).map(__.identity()).map(__.identity()).map(__.identity()), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).range(50, 100).map(__.identity()).map(__.identity()).range(10, 60), __.out().range(60, 100).map(__.identity()).map(__.identity()).map(__.identity()).map(__.identity()), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).range(50, -1).map(__.identity()).map(__.identity()).range(10, 60), __.out().range(60, 110).map(__.identity()).map(__.identity()).map(__.identity()).map(__.identity()), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).range(50, 100).map(__.identity()).map(__.identity()).range(10, -1), __.out().range(60, 100).map(__.identity()).map(__.identity()).map(__.identity()).map(__.identity()), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).range(50, 100).as("a").map(__.identity()).map(__.identity()).range(10, -1).as("b"), __.out().range(60, 100).map(__.identity()).map(__.identity()).as("a").map(__.identity()).map(__.identity()).as("b"), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).range(50, 100).map(__.identity()).map(__.identity()).range(50, -1), __.out().none(), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).range(50, 100).map(__.identity()).map(__.identity()).range(60, -1), __.out().none(), Collections.emptyList()}, + {__.out().map(__.identity()).map(__.identity()).range(50, 100).as("a").map(__.identity()).map(__.identity()).range(60, -1).as("b"), __.out().none(), Collections.emptyList()}, + {__.out().range(50, 100).store("a").range(50, -1), __.out().range(50, 100).store("a").none(), Collections.emptyList()}, + {__.out().range(50, 100).store("a").range(50, -1).cap("a"), ((GraphTraversal) __.out().range(50, 100).store("a").none()).cap("a"), Collections.emptyList()}, + {__.out().range(50, 100).map(__.identity()).range(50, -1).profile(), __.out().none().profile(), Collections.singleton(ProfileStrategy.instance())}, + {__.out().store("a").limit(10), __.out().limit(10).store("a"), Collections.emptyList()}, + {__.out().aggregate("a").limit(10), __.out().aggregate("a").limit(10), Collections.emptyList()}, + {__.V().branch(__.label()).option("person", __.out("knows").valueMap().limit(1)).option("software", __.out("created").valueMap().limit(2).fold()), + __.V().branch(__.label()).option("person", __.out("knows").limit(1).valueMap()).option("software", __.out("created").limit(2).valueMap().fold()), Collections.emptyList()} + }); + } +} diff --git a/gremlin-dotnet/src/Gremlin.Net/Process/Traversal/Strategy/Optimization/EarlyLimitStrategy.cs b/gremlin-dotnet/src/Gremlin.Net/Process/Traversal/Strategy/Optimization/EarlyLimitStrategy.cs new file mode 100644 index 0000000..0831c92 --- /dev/null +++ b/gremlin-dotnet/src/Gremlin.Net/Process/Traversal/Strategy/Optimization/EarlyLimitStrategy.cs @@ -0,0 +1,32 @@ +#region License + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#endregion + +namespace Gremlin.Net.Process.Traversal.Strategy.Optimization +{ + /// <summary> + /// Moves <c>Range()</c> steps as far left as possible in order to to reduce backend operations. + /// </summary> + public class EarlyLimitStrategy : AbstractTraversalStrategy + { + } +} diff --git a/gremlin-python/src/main/jython/gremlin_python/process/strategies.py b/gremlin-python/src/main/jython/gremlin_python/process/strategies.py index f5ba9fb..6bb9ab9 100644 --- a/gremlin-python/src/main/jython/gremlin_python/process/strategies.py +++ b/gremlin-python/src/main/jython/gremlin_python/process/strategies.py @@ -168,6 +168,9 @@ class GraphFilterStrategy(TraversalStrategy): def __init__(self): TraversalStrategy.__init__(self) +class EarlyLimitStrategy(TraversalStrategy): + def __init__(self): + TraversalStrategy.__init__(self) ########################### # VERIFICATION STRATEGIES # diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/ProcessComputerSuite.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/ProcessComputerSuite.java index b224c8b..de53d87 100644 --- a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/ProcessComputerSuite.java +++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/ProcessComputerSuite.java @@ -88,6 +88,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.SubgraphTe import org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.TreeTest; import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.SubgraphStrategyProcessTest; import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.TranslationStrategyProcessTest; +import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.EarlyLimitStrategyProcessTest; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IncidentToAdjacentStrategyProcessTest; import org.apache.tinkerpop.gremlin.process.traversal.strategy.verification.ReadOnlyStrategyProcessTest; import org.apache.tinkerpop.gremlin.structure.Graph; @@ -202,7 +203,8 @@ public class ProcessComputerSuite extends AbstractGremlinSuite { SubgraphStrategyProcessTest.class, // optimizations - IncidentToAdjacentStrategyProcessTest.class + IncidentToAdjacentStrategyProcessTest.class, + EarlyLimitStrategyProcessTest.class }; /** diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/ProcessStandardSuite.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/ProcessStandardSuite.java index f7c19ac..16c0d0d 100644 --- a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/ProcessStandardSuite.java +++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/ProcessStandardSuite.java @@ -84,6 +84,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.EventS import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.PartitionStrategyProcessTest; import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.SubgraphStrategyProcessTest; import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.TranslationStrategyProcessTest; +import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.EarlyLimitStrategyProcessTest; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IncidentToAdjacentStrategyProcessTest; import org.apache.tinkerpop.gremlin.process.traversal.strategy.verification.ReadOnlyStrategyProcessTest; import org.apache.tinkerpop.gremlin.structure.Graph; @@ -187,7 +188,8 @@ public class ProcessStandardSuite extends AbstractGremlinSuite { SubgraphStrategyProcessTest.class, // optimizations - IncidentToAdjacentStrategyProcessTest.class + IncidentToAdjacentStrategyProcessTest.class, + EarlyLimitStrategyProcessTest.class }; /** diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategyProcessTest.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategyProcessTest.java new file mode 100644 index 0000000..4770fd7 --- /dev/null +++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategyProcessTest.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization; + +import org.apache.tinkerpop.gremlin.LoadGraphWith; +import org.apache.tinkerpop.gremlin.process.AbstractGremlinProcessTest; +import org.apache.tinkerpop.gremlin.process.GremlinProcessRunner; +import org.apache.tinkerpop.gremlin.process.remote.traversal.strategy.decoration.RemoteStrategy; +import org.apache.tinkerpop.gremlin.process.traversal.Order; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.process.traversal.step.filter.RangeGlobalStep; +import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalMetrics; +import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.junit.Test; +import org.junit.runner.RunWith; + +import java.util.List; +import java.util.Map; + +import static org.apache.tinkerpop.gremlin.LoadGraphWith.GraphData.GRATEFUL; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * @author Daniel Kuppitz (http://gremlin.guru) + */ +@RunWith(GremlinProcessRunner.class) +public class EarlyLimitStrategyProcessTest extends AbstractGremlinProcessTest { + + @Test + @LoadGraphWith(GRATEFUL) + public void shouldHandleRangeSteps() throws Exception { + + final GraphTraversal<Vertex, Map<String, List<String>>> t = + g.V().has("artist", "name", "Bob_Dylan") + .in("sungBy").as("a") + .repeat(__.out("followedBy") + .order() + .by(Order.shuffle) + .simplePath() + .from("a")) + .until(__.out("writtenBy").has("name", "Johnny_Cash")) + .limit(1).as("b") + .repeat(__.out() + .order() + .by(Order.shuffle).as("c") + .simplePath() + .from("b") + .to("c")) + .until(__.out("sungBy").has("name", "Grateful_Dead")) + .limit(5).as("d") + .path() + .from("a") + .limit(1).as("e") + .unfold(). + <List<String>>project("song", "artists") + .by("name") + .by(__.coalesce( + __.out("sungBy", "writtenBy").dedup().values("name"), + __.constant("Unknown")) + .fold()); + + final GraphTraversal pt = t.asAdmin().clone().profile(); + final List<Map<String, List<String>>> result = t.toList(); + final TraversalMetrics metrics = (TraversalMetrics) pt.next(); + + assertEquals(7, result.size()); + + if (t.asAdmin().getStrategies().toList().stream() + .anyMatch(s -> s instanceof EarlyLimitStrategy || s instanceof RemoteStrategy)) { + assertEquals(9, metrics.getMetrics().size()); + assertTrue(metrics.getMetrics(4).getName().endsWith("@[d]")); + assertEquals("RangeGlobalStep(0,1)", metrics.getMetrics(5).getName()); + assertEquals("PathStep@[e]", metrics.getMetrics(6).getName()); + assertTrue(metrics.getMetrics(6).getCounts().values().stream().noneMatch(x -> x != 1L)); + } else { + assertEquals(10, metrics.getMetrics().size()); + assertEquals("RangeGlobalStep(0,5)@[d]", metrics.getMetrics(5).getName()); + assertEquals("PathStep`", metrics.getMetrics(6).getName()); + assertEquals("RangeGlobalStep(0,1)@[e]", metrics.getMetrics(7).getName()); + assertTrue(metrics.getMetrics(6).getCounts().values().stream().allMatch(x -> x != 1L)); + } + } +} \ No newline at end of file diff --git a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphPlayTest.java b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphPlayTest.java index e0018fc..c278e89 100644 --- a/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphPlayTest.java +++ b/tinkergraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraphPlayTest.java @@ -19,6 +19,7 @@ package org.apache.tinkerpop.gremlin.tinkergraph.structure; import org.apache.tinkerpop.gremlin.process.computer.Computer; +import org.apache.tinkerpop.gremlin.process.traversal.Order; import org.apache.tinkerpop.gremlin.process.traversal.P; import org.apache.tinkerpop.gremlin.process.traversal.Pop; import org.apache.tinkerpop.gremlin.process.traversal.Traversal; @@ -26,6 +27,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.Traverser; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.EarlyLimitStrategy; import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.PathRetractionStrategy; import org.apache.tinkerpop.gremlin.structure.*; import org.apache.tinkerpop.gremlin.structure.io.graphml.GraphMLIo; @@ -122,90 +124,19 @@ public class TinkerGraphPlayTest { @Ignore public void testPlayDK() throws Exception { - final Map<String, String> aliases = new HashMap<>(); - aliases.put("marko","okram"); - final GraphTraversalSource g = TinkerFactory.createModern().traversal(); - /*g.withSideEffect("a", aliases).V().hasLabel("person"). - values("name").as("n"). - optional(select("a").select(select("n"))). - forEachRemaining(System.out::println);*/ - - // shortest path lengths (by summed weight) - g.withSack(0.0).V().has("person", "name", "marko"). - repeat(__.bothE(). - sack(sum). - by("weight"). - otherV(). - group("m"). - by(). - by(sack().min()).as("x"). - // where(P.eq("m")).by(sack()).by(select(select("x"))). // could be that easy, but "x" is unknown here - filter(project("s","x"). - by(sack()). - by(select("m").select(select("x"))). - where("s", P.eq("x"))). - group("p"). - by(). - by(project("path","length"). - by(path().by("name").by("weight")). - by(sack())) - ). - cap("p").unfold(). - group(). - by(select(Column.keys).values("name")). - by(Column.values).next().entrySet(). - forEach(System.out::println); - - System.out.println("---"); - - // longest path lengths (by summed weight) - g.withSack(0.0).V().has("person", "name", "marko"). - repeat(__.bothE().simplePath(). - sack(sum). - by("weight"). - otherV(). - group("m"). - by(). - by(sack().max()).as("x"). - filter(project("s","x"). - by(sack()). - by(select("m").select(select("x"))). - where("s", P.eq("x"))). - group("p"). - by(). - by(project("path","length"). - by(path().by("name").by("weight")). - by(sack())) - ). - cap("p").unfold(). - group(). - by(select(Column.keys).values("name")). - by(Column.values).next().entrySet(). - forEach(System.out::println); - - System.out.println("---"); + Graph graph = TinkerGraph.open(); + graph.io(GraphMLIo.build()).readGraph("../data/grateful-dead.xml"); - // all shortest paths (by summed weight) - g.withSack(0.0).V().as("a"). - repeat(__.bothE(). - sack(sum). - by("weight"). - otherV().as("b"). - group("m"). - by(select("a","b").by("name")). - by(sack().min()). - filter(project("s","x"). - by(sack()). - by(select("m").select(select("a", "b").by("name"))). - where("s", P.eq("x"))). - group("p"). - by(select("a","b").by("name")). - by(map(union(path().by("name").by("weight"), sack()).fold())) - ). - cap("p").unfold(). - order(). - by(select(Column.keys).select("a")). - by(select(Column.keys).select("b")). + GraphTraversalSource g = graph.traversal();//.withoutStrategies(EarlyLimitStrategy.class); + g.V().has("name", "Bob_Dylan").in("sungBy").as("a"). + repeat(out().order().by(Order.shuffle).simplePath().from("a")). + until(out("writtenBy").has("name", "Johnny_Cash")).limit(1).as("b"). + repeat(out().order().by(Order.shuffle).as("c").simplePath().from("b").to("c")). + until(out("sungBy").has("name", "Grateful_Dead")).limit(1). + path().from("a").unfold(). + <List<String>>project("song", "artists"). + by("name"). + by(__.coalesce(out("sungBy", "writtenBy").dedup().values("name"), constant("Unknown")).fold()). forEachRemaining(System.out::println); }