Repository: tinkerpop Updated Branches: refs/heads/master a2c8a8329 -> a93ffcc54
Found a bug in the PathRetractStrategy that occurs when the match() select() keepLabels do not include the match dedupLabels that were backpropagated into the MatchStep via MatchPredicateStrategy. This caused a 'label could not be found' exception. Also, inlined various code blocks so we don't just generate lists and call stacks on heavily used code sections in MatchStep (e.g. generateRemainingTraversals()). By doing that, a pretty significant performance gain was seen on a fat example travrsal I'm using -- 200ms to 160ms. CTR. Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/a93ffcc5 Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/a93ffcc5 Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/a93ffcc5 Branch: refs/heads/master Commit: a93ffcc5416399523e50b9560af10dd210fdf4bf Parents: a2c8a83 Author: Marko A. Rodriguez <okramma...@gmail.com> Authored: Tue Jul 12 07:47:06 2016 -0600 Committer: Marko A. Rodriguez <okramma...@gmail.com> Committed: Tue Jul 12 07:47:21 2016 -0600 ---------------------------------------------------------------------- .../process/traversal/step/map/MatchStep.java | 68 ++++++++++---------- .../traversal/step/map/MatchStepTest.java | 43 ------------- .../traversal/step/map/GroovyMatchTest.groovy | 7 +- .../process/traversal/step/map/MatchTest.java | 21 ++++++ 4 files changed, 61 insertions(+), 78 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/a93ffcc5/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchStep.java ---------------------------------------------------------------------- diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchStep.java index e1f6d8f..768d906 100644 --- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchStep.java +++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchStep.java @@ -80,6 +80,7 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> private final String computedStartLabel; private MatchAlgorithm matchAlgorithm; private Class<? extends MatchAlgorithm> matchAlgorithmClass = CountMatchAlgorithm.class; // default is CountMatchAlgorithm (use MatchAlgorithmStrategy to change) + private final Map<String, Set<String>> referencedLabelsMap = new HashMap<>(); // memoization of referenced labels for MatchEndSteps (Map<startStepId, referencedLabels>) private Set<List<Object>> dedups = null; private Set<String> dedupLabels = null; @@ -182,6 +183,8 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> @Override public void setKeepLabels(Set<String> labels) { this.keepLabels = labels; + if (null != this.dedupLabels) + this.keepLabels.addAll(this.dedupLabels); } @Override @@ -258,6 +261,8 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> if (!labels.isEmpty()) { this.dedups = new HashSet<>(); this.dedupLabels = new HashSet<>(labels); + if (null != this.keepLabels) + this.keepLabels.addAll(this.dedupLabels); } } @@ -333,6 +338,16 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> return false; } + private void generatedReferencedLabelsMap() { + for (final Traversal.Admin<?, ?> traversal : this.matchTraversals) { + final Set<String> referencedLabels = new HashSet<>(); + for (final Step<?, ?> step : traversal.getSteps()) { + referencedLabels.addAll(PathUtil.getReferencedLabels(step)); + } + this.referencedLabelsMap.put(traversal.getStartStep().getId(), referencedLabels); + } + } + private final TraverserSet standardAlgorithmBarrier = new TraverserSet(); @Override @@ -341,6 +356,7 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> if (this.first) { this.first = false; + this.generatedReferencedLabelsMap(); this.initializeMatchAlgorithm(TraversalEngine.Type.STANDARD); } else if (this.standardAlgorithmBarrier.isEmpty()) { for (final Traversal.Admin<?, ?> matchTraversal : this.matchTraversals) { @@ -385,6 +401,7 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> while (true) { if (this.first) { this.first = false; + this.generatedReferencedLabelsMap(); this.initializeMatchAlgorithm(TraversalEngine.Type.COMPUTER); } final Traverser.Admin traverser = this.starts.next(); @@ -419,16 +436,6 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> } } - protected List<Traversal.Admin<?, ?>> getRemainingTraversals(final Traverser.Admin<?> traverser) { - final Set<String> tags = traverser.getTags(); - final List<Traversal.Admin<?, ?>> remainingTraversals = new ArrayList<>(); - for (final Traversal.Admin<?, ?> matchTraversal : matchTraversals) { - if (!tags.contains(matchTraversal.getStartStep().getId())) - remainingTraversals.add(matchTraversal); - } - return remainingTraversals; - } - @Override public int hashCode() { int result = super.hashCode() ^ this.connective.hashCode(); @@ -443,17 +450,13 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> return this.getSelfAndChildRequirements(TraverserRequirement.LABELED_PATH, TraverserRequirement.SIDE_EFFECTS); } - @Override - protected Traverser.Admin<Map<String, E>> processNextStart() throws NoSuchElementException { - return super.processNextStart(); - } - ////////////////////////////// public static final class MatchStartStep extends AbstractStep<Object, Object> implements Scoping { private final String selectKey; private Set<String> scopeKeys = null; + private MatchStep<?, ?> parent = null; public MatchStartStep(final Traversal.Admin traversal, final String selectKey) { super(traversal); @@ -462,8 +465,11 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> @Override protected Traverser.Admin<Object> processNextStart() throws NoSuchElementException { + if (null == this.parent) + this.parent = (MatchStep<?, ?>) this.getTraversal().getParent(); + final Traverser.Admin<Object> traverser = this.starts.next(); - ((MatchStep<?, ?>) this.getTraversal().getParent()).getMatchAlgorithm().recordStart(traverser, this.getTraversal()); + this.parent.getMatchAlgorithm().recordStart(traverser, this.getTraversal()); // TODO: sideEffect check? return null == this.selectKey ? traverser : traverser.split(traverser.path().get(Pop.last, this.selectKey), this); } @@ -502,33 +508,27 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> public static final class MatchEndStep extends EndStep<Object> { private final String matchKey; - // memoization of referenced labels Map<startStepId, referencedLabels> - private final Map<String, Set<String>> referencedLabelsMap = new HashMap<>(); + private final Set<String> matchKeyCollection; private MatchStep<?, ?> parent = null; public MatchEndStep(final Traversal.Admin traversal, final String matchKey) { super(traversal); this.matchKey = matchKey; + this.matchKeyCollection = Collections.singleton(this.matchKey); } private <S> Traverser.Admin<S> retractUnnecessaryLabels(final Traverser.Admin<S> traverser) { if (null == this.parent.getKeepLabels()) return traverser; - final Set<String> keepers = new HashSet<>(this.parent.getKeepLabels()); - for (final Traversal.Admin<?, ?> traversal : this.parent.getRemainingTraversals(traverser)) { - Set<String> referencedLabels = this.referencedLabelsMap.get(traversal.getStartStep().getId()); - if (null == referencedLabels) { - referencedLabels = new HashSet<>(); - for (final Step<?, ?> step : traversal.getSteps()) { - referencedLabels.addAll(PathUtil.getReferencedLabels(step)); - } - this.referencedLabelsMap.put(traversal.getStartStep().getId(), referencedLabels); + final Set<String> tags = traverser.getTags(); + for (final Traversal.Admin<?, ?> matchTraversal : this.parent.matchTraversals) { // get remaining traversal patterns for the traverser + if (!tags.contains(matchTraversal.getStartStep().getId())) { + keepers.addAll(this.parent.referencedLabelsMap.get(matchTraversal.getStartStep().getId())); // get the reference labels required for those remaining traversals } - keepers.addAll(referencedLabels); } - return PathProcessor.processTraverserPathLabels(traverser, keepers); + return PathProcessor.processTraverserPathLabels(traverser, keepers); // remove all reference labels that are no longer required } @Override @@ -540,8 +540,8 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> final Traverser.Admin traverser = this.starts.next(); // no end label if (null == this.matchKey) { - if (this.traverserStepIdAndLabelsSetByChild) - traverser.setStepId(this.parent.getId()); + //if (this.traverserStepIdAndLabelsSetByChild) -- traverser equality is based on stepId, lets ensure they are all at the parent + traverser.setStepId(this.parent.getId()); this.parent.getMatchAlgorithm().recordEnd(traverser, this.getTraversal()); return this.retractUnnecessaryLabels(traverser); } @@ -549,9 +549,9 @@ public final class MatchStep<S, E> extends ComputerAwareStep<S, Map<String, E>> // path check final Path path = traverser.path(); if (!path.hasLabel(this.matchKey) || traverser.get().equals(path.get(Pop.last, this.matchKey))) { - if (this.traverserStepIdAndLabelsSetByChild) - traverser.setStepId(this.parent.getId()); - traverser.addLabels(Collections.singleton(this.matchKey)); + //if (this.traverserStepIdAndLabelsSetByChild) -- traverser equality is based on stepId and thus, lets ensure they are all at the parent + traverser.setStepId(this.parent.getId()); + traverser.addLabels(this.matchKeyCollection); this.parent.getMatchAlgorithm().recordEnd(traverser, this.getTraversal()); return this.retractUnnecessaryLabels(traverser); } http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/a93ffcc5/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchStepTest.java ---------------------------------------------------------------------- diff --git a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchStepTest.java b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchStepTest.java index 201207b..df85f16 100644 --- a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchStepTest.java +++ b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchStepTest.java @@ -21,7 +21,6 @@ package org.apache.tinkerpop.gremlin.process.traversal.step.map; import org.apache.tinkerpop.gremlin.process.traversal.P; import org.apache.tinkerpop.gremlin.process.traversal.Traversal; import org.apache.tinkerpop.gremlin.process.traversal.TraversalEngine; -import org.apache.tinkerpop.gremlin.process.traversal.TraversalStrategies; import org.apache.tinkerpop.gremlin.process.traversal.Traverser; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; import org.apache.tinkerpop.gremlin.process.traversal.step.StepTest; @@ -30,12 +29,9 @@ import org.apache.tinkerpop.gremlin.process.traversal.step.filter.ConnectiveStep import org.apache.tinkerpop.gremlin.process.traversal.step.filter.WherePredicateStep; import org.apache.tinkerpop.gremlin.process.traversal.step.filter.WhereTraversalStep; import org.apache.tinkerpop.gremlin.process.traversal.step.util.EmptyStep; -import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.PathRetractionStrategy; import org.apache.tinkerpop.gremlin.process.traversal.traverser.B_LP_O_P_S_SE_SL_TraverserGenerator; import org.apache.tinkerpop.gremlin.process.traversal.traverser.util.EmptyTraverser; -import org.apache.tinkerpop.gremlin.process.traversal.util.DefaultTraversalStrategies; import org.apache.tinkerpop.gremlin.structure.T; -import org.apache.tinkerpop.gremlin.structure.Vertex; import org.junit.Test; import java.util.Arrays; @@ -423,43 +419,4 @@ public class MatchStepTest extends StepTest { assertEquals("a", MatchStep.Helper.computeStartLabel(((MatchStep<?, ?>) traversal.getStartStep()).getGlobalChildren())); } - @Test - public void testGetRemainingTraversals() { - Traverser.Admin traverser = B_LP_O_P_S_SE_SL_TraverserGenerator.instance().generate(1, EmptyStep.instance(), 1l); - traverser.addLabels(Collections.singleton("a")); - - Traversal<Object, Vertex> mTraversal1 = as("a").out().as("b"); - Traversal<Object, Vertex> mTraversal2 = as("b").out().as("c"); - Traversal<Object, Vertex> mTraversal3 = as("a").out().as("d"); - Traversal<Object, Vertex> mTraversal4 = as("c").out().as("e"); - Traversal<Object, Vertex> mTraversal5 = as("c").out().as("f"); - - - Traversal.Admin<?, ?> traversal = __.match( - mTraversal1, - mTraversal2, - mTraversal3, - mTraversal4, - mTraversal5).asAdmin(); - - final TraversalStrategies strategies = new DefaultTraversalStrategies(); - strategies.addStrategies(PathRetractionStrategy.instance()); - traversal.asAdmin().setStrategies(strategies); - traversal.asAdmin().applyStrategies(); - - MatchStep matchStep = (MatchStep) traversal.getStartStep(); - assertEquals(new HashSet<>(Arrays.asList("a", "b", "c", "d", "e", "f")), matchStep.getKeepLabels()); - - traverser.getTags().add(mTraversal1.asAdmin().getStartStep().getId()); - traverser.setStepId(mTraversal2.asAdmin().getStartStep().getId()); - - List<Traversal.Admin<?, ?>> remainingTraversals = matchStep.getRemainingTraversals(traverser); - assertEquals(Arrays.asList(mTraversal2, mTraversal3, mTraversal4, mTraversal5), remainingTraversals); - - traverser.getTags().add(mTraversal2.asAdmin().getStartStep().getId()); - traverser.setStepId(mTraversal3.asAdmin().getStartStep().getId()); - - remainingTraversals = matchStep.getRemainingTraversals(traverser); - assertEquals(Arrays.asList(mTraversal3, mTraversal4, mTraversal5), remainingTraversals); - } } http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/a93ffcc5/gremlin-groovy-test/src/main/groovy/org/apache/tinkerpop/gremlin/process/traversal/step/map/GroovyMatchTest.groovy ---------------------------------------------------------------------- diff --git a/gremlin-groovy-test/src/main/groovy/org/apache/tinkerpop/gremlin/process/traversal/step/map/GroovyMatchTest.groovy b/gremlin-groovy-test/src/main/groovy/org/apache/tinkerpop/gremlin/process/traversal/step/map/GroovyMatchTest.groovy index 2dd2f03..b35b763 100644 --- a/gremlin-groovy-test/src/main/groovy/org/apache/tinkerpop/gremlin/process/traversal/step/map/GroovyMatchTest.groovy +++ b/gremlin-groovy-test/src/main/groovy/org/apache/tinkerpop/gremlin/process/traversal/step/map/GroovyMatchTest.groovy @@ -351,7 +351,12 @@ public abstract class GroovyMatchTest { @Override public Traversal<Vertex, Long> get_g_V_matchXa_knows_count_bX_selectXbX() { - new ScriptTraversal<>(g, "gremlin-groovy", "g.V().match(__.as('a').out('knows').count().as('b')).select('b')") + new ScriptTraversal<>(g, "gremlin-groovy", "g.V.match(__.as('a').out('knows').count.as('b')).select('b')") + } + + @Override + public Traversal<Vertex, String> get_g_V_matchXa_knows_b__b_created_c__a_created_cX_dedupXa_b_cX_selectXaX_byXnameX() { + new ScriptTraversal<>(g, "gremlin-groovy", "g.V.match(__.as('a').out('knows').as('b'), __.as('b').out('created').as('c'), __.as('a').out('created').as('c')).dedup('a', 'b', 'c').select('a').by('name')") } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/a93ffcc5/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchTest.java ---------------------------------------------------------------------- diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchTest.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchTest.java index 2c4789e..508968a 100644 --- a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchTest.java +++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MatchTest.java @@ -151,6 +151,9 @@ public abstract class MatchTest extends AbstractGremlinProcessTest { // reducing barrier on lazy standard shouldn't yield an empty barrier public abstract Traversal<Vertex, Long> get_g_V_matchXa_knows_count_bX_selectXbX(); + // verifying keep labels and dedup labels interactions + public abstract Traversal<Vertex, String> get_g_V_matchXa_knows_b__b_created_c__a_created_cX_dedupXa_b_cX_selectXaX_byXnameX(); + @Test @LoadGraphWith(MODERN) public void g_V_valueMap_matchXa_selectXnameX_bX() { @@ -533,6 +536,16 @@ public abstract class MatchTest extends AbstractGremlinProcessTest { assertFalse(traversal.hasNext()); } + @Test + @LoadGraphWith(MODERN) + public void g_V_matchXa_knows_b__b_created_c__a_created_cX_dedupXa_b_cX_selectXaX_byXnameX() { + final Traversal<Vertex, String> traversal = get_g_V_matchXa_knows_b__b_created_c__a_created_cX_dedupXa_b_cX_selectXaX_byXnameX(); + printTraversalForm(traversal); + assertEquals("marko", traversal.next()); + assertFalse(traversal.hasNext()); + } + + public static class GreedyMatchTraversals extends Traversals { @Before public void setupTest() { @@ -802,5 +815,13 @@ public abstract class MatchTest extends AbstractGremlinProcessTest { public Traversal<Vertex, Long> get_g_V_matchXa_knows_count_bX_selectXbX() { return g.V().match(as("a").out("knows").count().as("b")).select("b"); } + + @Override + public Traversal<Vertex, String> get_g_V_matchXa_knows_b__b_created_c__a_created_cX_dedupXa_b_cX_selectXaX_byXnameX() { + return g.V().match( + as("a").out("knows").as("b"), + as("b").out("created").as("c"), + as("a").out("created").as("c")).dedup("a", "b", "c").<String>select("a").by("name"); + } } }