http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/FunctionGenerator.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/FunctionGenerator.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/FunctionGenerator.java new file mode 100644 index 0000000..1a93d0f --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/FunctionGenerator.java @@ -0,0 +1,326 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import java.util.List; + +import org.apache.atlas.gremlin.GremlinExpressionFactory; +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.ClosureExpression; +import org.apache.atlas.groovy.ClosureExpression.VariableDeclaration; +import org.apache.atlas.groovy.FunctionCallExpression; +import org.apache.atlas.groovy.GroovyExpression; +import org.apache.atlas.groovy.IdentifierExpression; + +/** + * Extracts common expressions from an or-containing expression + * into functions. These expressions would otherwise be duplicated + * as part of expanding the "or". Doing this shortens the overall length + * of the Gremlin script so we can maximize query performance. + * + */ +public class FunctionGenerator implements CallHierarchyVisitor { + + //Function length constants. + //These assume we won't reach more than 9 function definition. Even if we do, this is still + //a reasonable approximation. + private static final int INITIAL_FUNCTION_DEF_LENGTH = "def f1={};".length(); + private final int functionDefLength; + private static final int FUNCTION_CALL_OVERHEAD = "f1()".length(); + + /** + * The expression that should be the first (deepest) expression + * in the body of the next generated function. As we go up the + * expression tree in the post visit, this is updated based on the + * expressions we see. During the post visits, if it is null, + * the body expression is set to the expression we're visiting. + * As we go up the tree, it is nulled out if we create a function + * or encounter an or expression. This guarantees that the + * next function body will not contain any or expressions + * and that it will not have expressions that are already + * part of some other function. + */ + private GroovyExpression nextFunctionBodyStart; + + /** + * The number of times expressions will be duplicated. + */ + private int scaleFactor = 1; + + private final OptimizationContext context; + + /** + * The current depth in the expression tree. + */ + private int depth = 0; + + /** + * The name of the last function that was generated. If set, + * we can safely update this function instead of creating a new one. + */ + private String currentFunctionName; + + /** + * The updated expression we will pass back to the caller. + */ + private GroovyExpression newRootExpression; + + private final GremlinExpressionFactory factory; + + public FunctionGenerator(GremlinExpressionFactory factory, OptimizationContext context) { + this.context = context; + this.factory = factory; + functionDefLength = ("def f1={" + factory.getTraversalExpressionClass() + " x->};").length(); + } + + @Override + public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) { + depth++; + if (IsOr.INSTANCE.apply(expr)) { + FunctionCallExpression functionCall = (FunctionCallExpression) expr; + scaleFactor *= functionCall.getArguments().size(); + } + if (newRootExpression == null) { + newRootExpression = expr; + } + + return true; + } + + @Override + public void visitNonFunctionCaller(GroovyExpression expr) { + if (nextFunctionBodyStart == null) { + nextFunctionBodyStart = expr; + } + + } + + @Override + public void visitNullCaller() { + //nothing to do + } + + @Override + public boolean postVisitFunctionCaller(AbstractFunctionExpression expr) { + boolean isRootExpr = depth == 1; + visitParentExpression(expr); + + //The root expression has no parent. To simplify the logic, we create + //a dummy expression so it does have a parent, then call visitParentExpression again + //to examine the root expression. + if (isRootExpr) { + FunctionCallExpression dummyParent = new FunctionCallExpression(expr, "dummy"); + visitParentExpression(dummyParent); + newRootExpression = dummyParent.getCaller(); + } + + depth--; + return true; + } + + /** + * Checks to see if the *caller* of this expression should become part + * of a function. If so, either a new function is created, or the + * expression becomes part of the last function we created. + * + * @param parentExpr + */ + private void visitParentExpression(AbstractFunctionExpression parentExpr) { + + if (nextFunctionBodyStart == null) { + nextFunctionBodyStart = parentExpr; + } + + if (currentFunctionName != null) { + updateCurrentFunction(parentExpr); + } else { + createFunctionIfNeeded(parentExpr); + } + + if (GremlinQueryOptimizer.isOrExpression(parentExpr)) { + //reset + currentFunctionName = null; + //don't include 'or' in generated functions + nextFunctionBodyStart = null; + } + + } + + /** + * Creates a function whose body goes from the child of parentExpr + * up to (and including) the functionBodyEndExpr. + * @param parentExpr + */ + private void createFunctionIfNeeded(AbstractFunctionExpression parentExpr) { + GroovyExpression potentialFunctionBody = parentExpr.getCaller(); + + if (creatingFunctionShortensGremlin(potentialFunctionBody)) { + GroovyExpression functionCall = null; + + if (nextFunctionBodyStart instanceof AbstractFunctionExpression) { + //The function body start is a a function call. In this + //case, we generate a function that takes one argument, which + //is a graph traversal. We have an expression tree that + //looks kind of like the following: + // + // parentExpr + // / + // / caller + // |/_ + // potentialFunctionBody + // / + // / caller + // |/_ + // ... + // / + // / caller + // |/_ + // nextFunctionBodyStart + // / + // / caller + // |/_ + // oldCaller + // + // + // Note that potentialFunctionBody and nextFunctionBodyStart + // could be the same expression. Let's say that the next + // function name is f1 + // + // We reshuffle these expressions to the following: + // + // parentExpr + // / + // / caller + // |/_ + // f1(oldCaller) + // + // + // potentialFunctionBody <- body of new function "f1(GraphTraversal x)" + // / + // / caller + // |/_ + // ... + // / + // / caller + // |/_ + // nextFunctionBodyStart + // / + // / caller + // |/_ + // x + // + // As an example, suppose parentExpr is g.V().or(x,y).has(a).has(b).has(c) + // where has(a) is nextFunctionBodyStart. + // + // We generate a function f1 = { GraphTraversal x -> x.has(a).has(b) } + // parentExpr would become : f1(g.V().or(x,y)).has(c) + + AbstractFunctionExpression nextFunctionBodyStartFunction= + (AbstractFunctionExpression) nextFunctionBodyStart; + String variableName = "x"; + IdentifierExpression var = new IdentifierExpression(variableName); + GroovyExpression oldCaller = nextFunctionBodyStartFunction.getCaller(); + nextFunctionBodyStartFunction.setCaller(var); + + currentFunctionName = context.addFunctionDefinition(new VariableDeclaration(factory.getTraversalExpressionClass(), "x"), + potentialFunctionBody); + functionCall = new FunctionCallExpression(potentialFunctionBody.getType(), + currentFunctionName, oldCaller); + + } else { + //The function body start is a not a function call. In this + //case, we generate a function that takes no arguments. + + // As an example, suppose parentExpr is g.V().has(a).has(b).has(c) + // where g is nextFunctionBodyStart. + // + // We generate a function f1 = { g.V().has(a).has(b) } + // parentExpr would become : f1().has(c) + + currentFunctionName = context.addFunctionDefinition(null, potentialFunctionBody); + functionCall = new FunctionCallExpression(potentialFunctionBody.getType(), currentFunctionName); + } + + //functionBodyEnd is now part of a function definition, don't propagate it + nextFunctionBodyStart = null; + parentExpr.setCaller(functionCall); + } + } + + /** + * Adds the caller of parentExpr to the current body of the last + * function that was created. + * + * @param parentExpr + */ + private void updateCurrentFunction(AbstractFunctionExpression parentExpr) { + GroovyExpression expr = parentExpr.getCaller(); + if (expr instanceof AbstractFunctionExpression) { + AbstractFunctionExpression exprAsFunction = (AbstractFunctionExpression) expr; + GroovyExpression exprCaller = exprAsFunction.getCaller(); + parentExpr.setCaller(exprCaller); + updateCurrentFunctionDefintion(exprAsFunction); + } + } + + private void updateCurrentFunctionDefintion(AbstractFunctionExpression exprToAdd) { + ClosureExpression functionBodyClosure = context.getUserDefinedFunctionBody(currentFunctionName); + if (functionBodyClosure == null) { + throw new IllegalStateException("User-defined function " + currentFunctionName + " not found!"); + } + List<GroovyExpression> exprs = functionBodyClosure.getStatements(); + GroovyExpression currentFunctionBody = exprs.get(exprs.size() - 1); + //Update the expression so it is called by the current return + //value of the function. + exprToAdd.setCaller(currentFunctionBody); + functionBodyClosure.replaceStatement(exprs.size() - 1, exprToAdd); + } + + //Determines if extracting this expression into a function will shorten + //the overall length of the Groovy script. + private boolean creatingFunctionShortensGremlin(GroovyExpression headExpr) { + int tailLength = getTailLength(); + int length = headExpr.toString().length() - tailLength; + + int overhead = 0; + if (nextFunctionBodyStart instanceof AbstractFunctionExpression) { + overhead = functionDefLength; + } else { + overhead = INITIAL_FUNCTION_DEF_LENGTH; + } + overhead += FUNCTION_CALL_OVERHEAD * scaleFactor; + //length * scaleFactor = space taken by having the expression be inlined [scaleFactor] times + //overhead + length = space taken by the function definition and its calls + return length * scaleFactor > overhead + length; + } + + private int getTailLength() { + if (nextFunctionBodyStart == null) { + return 0; + } + if (!(nextFunctionBodyStart instanceof AbstractFunctionExpression)) { + return 0; + } + AbstractFunctionExpression bodyEndAsFunction = (AbstractFunctionExpression) nextFunctionBodyStart; + return bodyEndAsFunction.getCaller().toString().length(); + } + + public GroovyExpression getNewRootExpression() { + return newRootExpression; + } +}
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/GremlinOptimization.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/GremlinOptimization.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/GremlinOptimization.java new file mode 100644 index 0000000..bfa45af --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/GremlinOptimization.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import org.apache.atlas.groovy.GroovyExpression; + +/** + * An optimization that can be applied to a gremlin query. + */ +public interface GremlinOptimization { + + /** + * Whether or not this optimization should be applied to the given expression + * @param expr + * @param contxt + * @return + */ + boolean appliesTo(GroovyExpression expr, OptimizationContext contxt); + /** + * Whether or not GremlinQueryOptimizer should call this optimization recursively + * on the updated children. + */ + boolean isApplyRecursively(); + + /** + * Applies the optimization. + * + * @param expr + * @param context + * @return the optimized expression + */ + GroovyExpression apply(GroovyExpression expr, OptimizationContext context); +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/GremlinQueryOptimizer.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/GremlinQueryOptimizer.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/GremlinQueryOptimizer.java new file mode 100644 index 0000000..a0c08fd --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/GremlinQueryOptimizer.java @@ -0,0 +1,262 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.atlas.gremlin.GremlinExpressionFactory; +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.GroovyExpression; +import org.apache.atlas.groovy.StatementListExpression; +import org.apache.atlas.groovy.TraversalStepType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + + + +/** + * Optimizer for gremlin queries. This class provides a framework for applying optimizations + * to gremlin queries. Each optimization is implemented as a class that implements {@link GremlinOptimization}. + * + * The GremlinQueryOptimizer is the entry point for applying these optimizations. + * + * + */ +public final class GremlinQueryOptimizer { + + private static final Logger LOGGER = LoggerFactory.getLogger(GremlinQueryOptimizer.class); + + + private final List<GremlinOptimization> optimizations = new ArrayList<>(); + + //Allows expression factory to be substituted in unit tests. + private static volatile GremlinExpressionFactory FACTORY = GremlinExpressionFactory.INSTANCE; + + private static volatile GremlinQueryOptimizer INSTANCE = null; + + private GremlinQueryOptimizer() { + + } + + private void addOptimization(GremlinOptimization opt) { + optimizations.add(opt); + } + + public static GremlinQueryOptimizer getInstance() { + if(INSTANCE == null) { + synchronized(GremlinQueryOptimizer.class) { + if(INSTANCE == null) { + GremlinQueryOptimizer createdInstance = new GremlinQueryOptimizer(); + //The order here is important. If there is an "or" nested within an "and", + //that will not be found if ExpandOrsOptimization runs before ExpandAndsOptimization. + createdInstance.addOptimization(new ExpandAndsOptimization(FACTORY)); + createdInstance.addOptimization(new ExpandOrsOptimization(FACTORY)); + INSTANCE = createdInstance; + } + } + } + return INSTANCE; + } + + /** + * For testing only + */ + @VisibleForTesting + public static void setExpressionFactory(GremlinExpressionFactory factory) { + GremlinQueryOptimizer.FACTORY = factory; + } + + /** + * For testing only + */ + @VisibleForTesting + public static void reset() { + INSTANCE = null; + } + + /** + * Optimizes the provided groovy expression. Note that the optimization + * is a <i>destructive</i> process. The source GroovyExpression will be + * modified as part of the optimization process. This is done to avoid + * expensive copying operations where possible. + * + * @param source what to optimize + * @return the optimized query + */ + public GroovyExpression optimize(GroovyExpression source) { + LOGGER.debug("Optimizing gremlin query: " + source); + OptimizationContext context = new OptimizationContext(); + GroovyExpression updatedExpression = source; + for (GremlinOptimization opt : optimizations) { + updatedExpression = optimize(updatedExpression, opt, context); + LOGGER.debug("After "+ opt.getClass().getSimpleName() + ", query = " + updatedExpression); + } + + StatementListExpression result = new StatementListExpression(); + result.addStatements(context.getInitialStatements()); + result.addStatement(updatedExpression); + LOGGER.debug("Final optimized query: " + result.toString()); + return result; + } + + /** + * Optimizes the expression using the given optimization + * @param source + * @param optimization + * @param context + * @return + */ + private GroovyExpression optimize(GroovyExpression source, GremlinOptimization optimization, + OptimizationContext context) { + GroovyExpression result = source; + if (optimization.appliesTo(source, context)) { + //Apply the optimization to the expression. + result = optimization.apply(source, context); + } + if (optimization.isApplyRecursively()) { + //Visit the children, update result with the optimized + //children. + List<GroovyExpression> updatedChildren = new ArrayList<>(); + boolean changed = false; + for (GroovyExpression child : result.getChildren()) { + //Recursively optimize this child. + GroovyExpression updatedChild = optimize(child, optimization, context); + changed |= updatedChild != child; + updatedChildren.add(updatedChild); + } + if (changed) { + //TBD - Can we update in place rather than making a copy? + result = result.copy(updatedChildren); + } + } + return result; + } + + /** + * Visits all expressions in the call hierarchy of an expression. For example, + * in the expression g.V().has('x','y'), the order would be + * <ol> + * <li>pre-visit has('x','y')</li> + * <li>pre-visit V()</li> + * <li>visit g (non-function caller)</li> + * <li>post-visit V()</li> + * <li>post-visit has('x','y')</li> + * </ol> + * @param expr + * @param visitor + */ + public static void visitCallHierarchy(GroovyExpression expr, CallHierarchyVisitor visitor) { + + if (expr == null) { + visitor.visitNullCaller(); + return; + } + if (expr instanceof AbstractFunctionExpression) { + AbstractFunctionExpression functionCall = (AbstractFunctionExpression)expr; + if (!visitor.preVisitFunctionCaller(functionCall)) { + return; + } + GroovyExpression caller = functionCall.getCaller(); + visitCallHierarchy(caller, visitor); + if (!visitor.postVisitFunctionCaller(functionCall)) { + return; + } + } else { + visitor.visitNonFunctionCaller(expr); + } + } + + /** + * Determines if the given expression is an "or" expression. + * @param expr + * @return + */ + public static boolean isOrExpression(GroovyExpression expr) { + return IsOr.INSTANCE.apply(expr); + } + + /** + * Determines whether the given expression can safely + * be pulled out of an and/or expression. + * + * @param expr an argument to an and or or function + * @return + */ + public static boolean isExtractable(GroovyExpression expr) { + + HasForbiddenType hasForbiddenTypePredicate = new HasForbiddenType(FACTORY); + + //alias could conflict with alias in parent traversal + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.SIDE_EFFECT); + + //inlining out(), in() steps will change the result of calls after the and/or() + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.FLAT_MAP_TO_ELEMENTS); + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.FLAT_MAP_TO_VALUES); + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.BARRIER); + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.MAP_TO_ELEMENT); + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.MAP_TO_VALUE); + + //caller expects to be able to continue the traversal. We can't end it + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.END); + + + //we can't inline child traversals + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.SOURCE); + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.START); + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.SIDE_EFFECT); + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.NONE); + hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.BRANCH); + + ExpressionFinder forbiddenExpressionFinder = new ExpressionFinder(hasForbiddenTypePredicate); + GremlinQueryOptimizer.visitCallHierarchy(expr, forbiddenExpressionFinder); + return ! forbiddenExpressionFinder.isExpressionFound(); + } + + /** + * Recursively copies and follows the caller hierarchy of the expression until we come + * to a function call with a null caller. The caller of that expression is set + * to newLeaf. + * + * @param expr + * @param newLeaf + * @return the updated (/copied) expression + */ + public static GroovyExpression copyWithNewLeafNode(AbstractFunctionExpression expr, GroovyExpression newLeaf) { + + + AbstractFunctionExpression result = (AbstractFunctionExpression)expr.copy(); + + //remove leading anonymous traversal expression, if there is one + if(FACTORY.isLeafAnonymousTraversalExpression(expr)) { + result = (AbstractFunctionExpression)newLeaf; + } else { + GroovyExpression newCaller = null; + if (expr.getCaller() == null) { + newCaller = newLeaf; + } else { + newCaller = copyWithNewLeafNode((AbstractFunctionExpression)result.getCaller(), newLeaf); + } + result.setCaller(newCaller); + } + return result; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/HasForbiddenType.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/HasForbiddenType.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/HasForbiddenType.java new file mode 100644 index 0000000..3fb9faa --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/HasForbiddenType.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import java.util.HashSet; +import java.util.Set; +import com.google.common.base.Function; + +import org.apache.atlas.gremlin.GremlinExpressionFactory; +import org.apache.atlas.groovy.GroovyExpression; +import org.apache.atlas.groovy.TraversalStepType; + +/** + * Function that tests whether the expression is an 'or' + * graph traversal function. + */ +public final class HasForbiddenType implements Function<GroovyExpression, Boolean> { + + private Set<TraversalStepType> forbiddenTypes = new HashSet<>(); + private final GremlinExpressionFactory factory; + + public HasForbiddenType(GremlinExpressionFactory factory) { + this.factory = factory; + } + + public void addForbiddenType(TraversalStepType type) { + forbiddenTypes.add(type); + } + + @Override + public Boolean apply(GroovyExpression expr) { + if(factory.isLeafAnonymousTraversalExpression(expr)) { + return false; + } + return forbiddenTypes.contains(expr.getType()); + } +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/IsOr.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/IsOr.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/IsOr.java new file mode 100644 index 0000000..ab74087 --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/IsOr.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import com.google.common.base.Function; + +import org.apache.atlas.groovy.FunctionCallExpression; +import org.apache.atlas.groovy.GroovyExpression; +import org.apache.atlas.groovy.TraversalStepType; + +/** + * Function that tests whether the expression is an 'or' + * graph traversal function. + */ +public final class IsOr implements Function<GroovyExpression, Boolean> { + + public static final IsOr INSTANCE = new IsOr(); + + private IsOr() { + } + + @Override + public Boolean apply(GroovyExpression expr) { + if (!(expr instanceof FunctionCallExpression)) { + return false; + } + if (expr.getType() != TraversalStepType.FILTER) { + return false; + } + FunctionCallExpression functionCall = (FunctionCallExpression)expr; + return functionCall.getFunctionName().equals("or"); + } +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/IsOrParent.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/IsOrParent.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/IsOrParent.java new file mode 100644 index 0000000..72085d0 --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/IsOrParent.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import com.google.common.base.Function; + +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.FunctionCallExpression; +import org.apache.atlas.groovy.GroovyExpression; +import org.apache.atlas.groovy.TraversalStepType; + +/** + * Matches an expression that gets called after calling or(). For example, + * in g.V().or(x,y).toList(), "toList()" is the "or parent", so calling + * "apply()" on this expression would return true and calling it on all + * the other ones would return false. + */ +public final class IsOrParent implements Function<GroovyExpression, Boolean> { + + public static final IsOrParent INSTANCE = new IsOrParent(); + + private IsOrParent() { + + } + + @Override + public Boolean apply(GroovyExpression expr) { + if (!(expr instanceof AbstractFunctionExpression)) { + return false; + } + AbstractFunctionExpression functionCall = (AbstractFunctionExpression)expr; + GroovyExpression target = functionCall.getCaller(); + + if (!(target instanceof FunctionCallExpression)) { + return false; + } + + if (target.getType() != TraversalStepType.FILTER) { + return false; + } + + FunctionCallExpression targetFunction = (FunctionCallExpression)target; + return targetFunction.getFunctionName().equals("or"); + } +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/OptimizationContext.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/OptimizationContext.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/OptimizationContext.java new file mode 100644 index 0000000..86c8b98 --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/OptimizationContext.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.ClosureExpression; +import org.apache.atlas.groovy.ClosureExpression.VariableDeclaration; +import org.apache.atlas.groovy.GroovyExpression; +import org.apache.atlas.groovy.IdentifierExpression; +import org.apache.atlas.groovy.ListExpression; +import org.apache.atlas.groovy.TypeCoersionExpression; +import org.apache.atlas.groovy.VariableAssignmentExpression; + +/** + * Maintains state information during gremlin optimization. + */ +public class OptimizationContext { + + private static final String TMP_ALIAS_NAME = "__tmp"; + private static final String FINAL_ALIAS_NAME = "__res"; + private static final String RESULT_VARIABLE = "r"; + private final List<GroovyExpression> initialStatements = new ArrayList<>(); + private GroovyExpression resultExpression = getResultVariable(); + private int counter = 1; + private final Map<String, ClosureExpression> functionBodies = new HashMap<>(); + private AbstractFunctionExpression rangeExpression; + + public OptimizationContext() { + + } + + /** + * @return + */ + public List<GroovyExpression> getInitialStatements() { + return initialStatements; + } + + public void prependStatement(GroovyExpression expr) { + initialStatements.add(0, expr); + } + + public String getUniqueFunctionName() { + return "f" + (counter++); + } + + + public GroovyExpression getDefineResultVariableStmt() { + GroovyExpression castExpression = new TypeCoersionExpression(new ListExpression(), "Set"); + GroovyExpression resultVarDef = new VariableAssignmentExpression(RESULT_VARIABLE, castExpression); + return resultVarDef; + + } + public void setResultExpression(GroovyExpression expr) { + resultExpression = expr; + } + + public GroovyExpression getResultExpression() { + return resultExpression; + } + + public GroovyExpression getResultVariable() { + return new IdentifierExpression(RESULT_VARIABLE); + } + + public ClosureExpression getUserDefinedFunctionBody(String functionName) { + return functionBodies.get(functionName); + } + + public String addFunctionDefinition(VariableDeclaration decl, GroovyExpression body) { + String functionName = getUniqueFunctionName(); + List<VariableDeclaration> decls = (decl == null) ? Collections.<VariableDeclaration>emptyList() : Collections.singletonList(decl); + ClosureExpression bodyClosure = new ClosureExpression(body, decls); + VariableAssignmentExpression expr = new VariableAssignmentExpression(functionName, bodyClosure); + initialStatements.add(expr); + functionBodies.put(functionName, bodyClosure); + return functionName; + } + + public String getFinalAliasName() { + return FINAL_ALIAS_NAME; + } + + public String getTempAliasName() { + return TMP_ALIAS_NAME; + } + + public void setRangeExpression(AbstractFunctionExpression rangeExpression) { + this.rangeExpression = rangeExpression; + } + + public AbstractFunctionExpression getRangeExpression() { + return rangeExpression; + } +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/OrderFinder.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/OrderFinder.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/OrderFinder.java new file mode 100644 index 0000000..792fc52 --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/OrderFinder.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import org.apache.atlas.gremlin.GremlinExpressionFactory; +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.GroovyExpression; + + +/** + * Finds order expression in the call hierarchy. + * + */ +public class OrderFinder implements CallHierarchyVisitor { + + private boolean hasOrderExpression; + private GremlinExpressionFactory gremlinFactory; + + public OrderFinder(GremlinExpressionFactory gremlinFactory) { + this.gremlinFactory = gremlinFactory; + } + + @Override + public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) { + + return true; + } + + @Override + public void visitNonFunctionCaller(GroovyExpression expr) { + } + + @Override + public void visitNullCaller() { + } + + @Override + public boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall) { + + if (gremlinFactory.isOrderExpression(functionCall)) { + hasOrderExpression = true; + return false; + } + return true; + } + + + public boolean hasOrderExpression() { + + return hasOrderExpression; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/PathExpressionFinder.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/PathExpressionFinder.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/PathExpressionFinder.java new file mode 100644 index 0000000..0e9070d --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/PathExpressionFinder.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.FunctionCallExpression; +import org.apache.atlas.groovy.GroovyExpression; + +/** + * Determines whether an expression contains a path() function. + */ +public class PathExpressionFinder implements CallHierarchyVisitor { + + private boolean found = false; + + @Override + public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) { + if(expr instanceof FunctionCallExpression) { + found = ((FunctionCallExpression)expr).getFunctionName().equals("path"); + if(found) { + return false; + } + } + return true; + } + + @Override + public void visitNonFunctionCaller(GroovyExpression expr) { + + } + + @Override + public void visitNullCaller() { + + } + + public boolean isPathExpressionFound() { + return found; + } + + @Override + public boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall) { + + return false; + } +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/RangeFinder.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/RangeFinder.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/RangeFinder.java new file mode 100644 index 0000000..fa8ca85 --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/RangeFinder.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.atlas.gremlin.GremlinExpressionFactory; +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.GroovyExpression; + + +/** + * Finds all range expressions in the call hierarchy. + * + */ +public class RangeFinder implements CallHierarchyVisitor { + + private List<AbstractFunctionExpression> rangeExpressions = new ArrayList<>(); + private GremlinExpressionFactory factory; + + public RangeFinder(GremlinExpressionFactory factory) { + this.factory = factory; + } + + @Override + public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) { + + return true; + } + + @Override + public void visitNonFunctionCaller(GroovyExpression expr) { + } + + @Override + public void visitNullCaller() { + } + + @Override + public boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall) { + + if (factory.isRangeExpression(functionCall)) { + rangeExpressions.add(functionCall); + } + return true; + } + + public List<AbstractFunctionExpression> getRangeExpressions() { + return rangeExpressions; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/SplitPointFinder.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/SplitPointFinder.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/SplitPointFinder.java new file mode 100644 index 0000000..f0295e7 --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/SplitPointFinder.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.atlas.gremlin.GremlinExpressionFactory; +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.FunctionCallExpression; +import org.apache.atlas.groovy.GroovyExpression; +import org.apache.atlas.groovy.TraversalStepType; + + +/** + * This class finds the first place in the expression where the value of the + * traverser is changed from being a vertex to being something else. This is + * important in the "or" optimization logic, since the union operation must be + * done on *vertices* in order to preserve the semantics of the query. In addition, + * expressions that have side effects must be moved as well, so that those + * side effects will be available to the steps that need them. + */ +public class SplitPointFinder implements CallHierarchyVisitor { + + //Any steps that change the traverser value to something that is not a vertex or edge + //must be included here, so that the union created by ExpandOrsOptimization + //is done over vertices/edges. + private static final Set<TraversalStepType> TYPES_REQUIRED_IN_RESULT_EXPRESSION = new HashSet<>( + Arrays.asList( + TraversalStepType.BARRIER, + TraversalStepType.BRANCH, + TraversalStepType.SIDE_EFFECT, + TraversalStepType.MAP_TO_VALUE, + TraversalStepType.FLAT_MAP_TO_VALUES, + TraversalStepType.END, + TraversalStepType.NONE)); + + private final Set<String> requiredAliases = new HashSet<>(); + + //Exceptions to the requirement that all expressions with a type + //in the above list must be in the result expression. If the + //function name is in this list, it is ok for that expression + //to not be in the result expression. This mechanism allows + //aliases to remain outside the result expression. Other + //exceptions may be found in the future. + private static final Map<TraversalStepType, WhiteList> WHITE_LISTS = new HashMap<>(); + static { + WHITE_LISTS.put(TraversalStepType.SIDE_EFFECT, new WhiteList("as")); + } + + private final GremlinExpressionFactory factory; + + public SplitPointFinder(GremlinExpressionFactory factory) { + this.factory = factory; + } + + /** + * Represents a set of function names. + */ + private static final class WhiteList { + private Set<String> allowedFunctionNames = new HashSet<>(); + public WhiteList(String... names) { + for(String name : names) { + allowedFunctionNames.add(name); + } + } + public boolean contains(String name) { + return allowedFunctionNames.contains(name); + } + } + + private AbstractFunctionExpression splitPoint; + + @Override + public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) { + requiredAliases.addAll(factory.getAliasesRequiredByExpression(expr)); + return true; + } + + @Override + public void visitNonFunctionCaller(GroovyExpression expr) { + + } + + @Override + public void visitNullCaller() { + + } + + public AbstractFunctionExpression getSplitPoint() { + return splitPoint; + } + + @Override + public boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall) { + String aliasName = factory.getAliasNameIfRelevant(functionCall); + if (splitPoint == null) { + + boolean required = isRequiredAlias(aliasName) || + isRequiredInResultExpression(functionCall); + if (required) { + splitPoint = functionCall; + } + } + removeSeenAlias(aliasName); + + return true; + } + + private void removeSeenAlias(String aliasName) { + if(aliasName != null) { + requiredAliases.remove(aliasName); + } + } + + private boolean isRequiredAlias(String aliasName) { + if(aliasName != null) { + return requiredAliases.contains(aliasName); + } + return false; + } + + private boolean isRequiredInResultExpression(AbstractFunctionExpression expr) { + + TraversalStepType type = expr.getType(); + if (!TYPES_REQUIRED_IN_RESULT_EXPRESSION.contains(type)) { + return false; + } + + if(expr instanceof FunctionCallExpression) { + FunctionCallExpression functionCall = (FunctionCallExpression)expr; + //check if the white list permits this function call. If there is + //no white list, all expressions with the current step type must go in the + //result expression. + WhiteList whiteList = WHITE_LISTS.get(type); + if(whiteList != null && whiteList.contains(functionCall.getFunctionName())) { + return false; + } + } + return true; + + } +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/UpdatedExpressions.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/UpdatedExpressions.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/UpdatedExpressions.java new file mode 100644 index 0000000..06351ea --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/UpdatedExpressions.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.atlas.groovy.GroovyExpression; + +/** + * Represents a list of updated expressions. + */ +public class UpdatedExpressions { + + private List<List<GroovyExpression>> updatedChildren = new ArrayList<>(); + private boolean changed = false; + + public UpdatedExpressions(boolean changed, List<List<GroovyExpression>> updatedChildren) { + this.changed = changed; + this.updatedChildren = updatedChildren; + } + + public List<List<GroovyExpression>> getUpdatedChildren() { + return updatedChildren; + } + + public boolean hasChanges() { + return changed; + } +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/repository/graph/GraphBackedMetadataRepository.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/repository/graph/GraphBackedMetadataRepository.java b/repository/src/main/java/org/apache/atlas/repository/graph/GraphBackedMetadataRepository.java index be02891..6608551 100755 --- a/repository/src/main/java/org/apache/atlas/repository/graph/GraphBackedMetadataRepository.java +++ b/repository/src/main/java/org/apache/atlas/repository/graph/GraphBackedMetadataRepository.java @@ -67,19 +67,26 @@ public class GraphBackedMetadataRepository implements MetadataRepository { private static final GraphHelper graphHelper = GraphHelper.getInstance(); - private final AtlasGraph graph; - private DeleteHandler deleteHandler; - private GraphToTypedInstanceMapper graphToInstanceMapper; + private final IAtlasGraphProvider graphProvider; + private final GraphToTypedInstanceMapper graphToInstanceMapper; @Inject public GraphBackedMetadataRepository(DeleteHandler deleteHandler) { - this.graph = AtlasGraphProvider.getGraphInstance(); - graphToInstanceMapper = new GraphToTypedInstanceMapper(graph); + this.graphProvider = new AtlasGraphProvider(); + this.graphToInstanceMapper = new GraphToTypedInstanceMapper(graphProvider); this.deleteHandler = deleteHandler; } + //for testing only + public GraphBackedMetadataRepository(IAtlasGraphProvider graphProvider, DeleteHandler deleteHandler) { + this.graphProvider = graphProvider; + this.graphToInstanceMapper = new GraphToTypedInstanceMapper(graphProvider); + this.deleteHandler = deleteHandler; + } + + public GraphToTypedInstanceMapper getGraphToInstanceMapper() { return graphToInstanceMapper; } @@ -194,7 +201,7 @@ public class GraphBackedMetadataRepository implements MetadataRepository { LOG.debug("Retrieving entity list for type={}", entityType); } - AtlasGraphQuery query = graph.query().has(Constants.ENTITY_TYPE_PROPERTY_KEY, entityType); + AtlasGraphQuery query = getGraph().query().has(Constants.ENTITY_TYPE_PROPERTY_KEY, entityType); Iterator<AtlasVertex> results = query.vertices().iterator(); if (!results.hasNext()) { return Collections.emptyList(); @@ -429,7 +436,7 @@ public class GraphBackedMetadataRepository implements MetadataRepository { requestContext.getDeletedEntityIds()); } - public AtlasGraph getGraph() { - return AtlasGraphProvider.getGraphInstance(); + public AtlasGraph getGraph() throws RepositoryException { + return graphProvider.get(); } } http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/repository/graph/GraphToTypedInstanceMapper.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/repository/graph/GraphToTypedInstanceMapper.java b/repository/src/main/java/org/apache/atlas/repository/graph/GraphToTypedInstanceMapper.java index 7b2b753..38a553a 100644 --- a/repository/src/main/java/org/apache/atlas/repository/graph/GraphToTypedInstanceMapper.java +++ b/repository/src/main/java/org/apache/atlas/repository/graph/GraphToTypedInstanceMapper.java @@ -19,6 +19,7 @@ package org.apache.atlas.repository.graph; import com.google.inject.Singleton; import org.apache.atlas.AtlasException; +import org.apache.atlas.repository.RepositoryException; import org.apache.atlas.repository.Constants; import org.apache.atlas.repository.graphdb.AtlasEdge; import org.apache.atlas.repository.graphdb.AtlasEdgeDirection; @@ -59,10 +60,10 @@ public final class GraphToTypedInstanceMapper { private static TypeSystem typeSystem = TypeSystem.getInstance(); private static final GraphHelper graphHelper = GraphHelper.getInstance(); - private AtlasGraph graph; + private final IAtlasGraphProvider graphProvider; - public GraphToTypedInstanceMapper(AtlasGraph graph) { - this.graph = graph; + public GraphToTypedInstanceMapper(IAtlasGraphProvider graphProvider) { + this.graphProvider = graphProvider; } public ITypedReferenceableInstance mapGraphToTypedInstance(String guid, AtlasVertex instanceVertex) @@ -407,7 +408,7 @@ public final class GraphToTypedInstanceMapper { public ITypedInstance getReferredEntity(String edgeId, IDataType<?> referredType) throws AtlasException { - final AtlasEdge edge = graph.getEdge(edgeId); + final AtlasEdge edge = getGraph().getEdge(edgeId); if (edge != null) { final AtlasVertex referredVertex = edge.getInVertex(); if (referredVertex != null) { @@ -433,5 +434,9 @@ public final class GraphToTypedInstanceMapper { } return null; } + + private AtlasGraph getGraph() throws RepositoryException { + return graphProvider.get(); + } } http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasEntityGraphDiscoveryV1.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasEntityGraphDiscoveryV1.java b/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasEntityGraphDiscoveryV1.java index 2848a20..0e1d9e6 100644 --- a/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasEntityGraphDiscoveryV1.java +++ b/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasEntityGraphDiscoveryV1.java @@ -17,8 +17,14 @@ */ package org.apache.atlas.repository.store.graph.v1; -import atlas.shaded.hbase.guava.common.annotations.VisibleForTesting; -import com.google.inject.Provider; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + import org.apache.atlas.AtlasErrorCode; import org.apache.atlas.exception.AtlasBaseException; import org.apache.atlas.model.TypeCategory; @@ -26,8 +32,8 @@ import org.apache.atlas.model.instance.AtlasEntity; import org.apache.atlas.model.instance.AtlasObjectId; import org.apache.atlas.model.instance.AtlasStruct; import org.apache.atlas.model.typedef.AtlasStructDef.AtlasAttributeDef; -import org.apache.atlas.repository.store.graph.EntityGraphDiscoveryContext; import org.apache.atlas.repository.store.graph.EntityGraphDiscovery; +import org.apache.atlas.repository.store.graph.EntityGraphDiscoveryContext; import org.apache.atlas.repository.store.graph.EntityResolver; import org.apache.atlas.type.AtlasArrayType; import org.apache.atlas.type.AtlasEntityType; @@ -36,14 +42,9 @@ import org.apache.atlas.type.AtlasStructType; import org.apache.atlas.type.AtlasType; import org.apache.atlas.type.AtlasTypeRegistry; -import javax.inject.Inject; -import java.util.Collection; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import com.google.common.annotations.VisibleForTesting; +import com.google.inject.Inject; +import com.google.inject.Provider; public class AtlasEntityGraphDiscoveryV1 implements EntityGraphDiscovery { http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasEntityStoreV1.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasEntityStoreV1.java b/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasEntityStoreV1.java index 1590aee..4c79cef 100644 --- a/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasEntityStoreV1.java +++ b/repository/src/main/java/org/apache/atlas/repository/store/graph/v1/AtlasEntityStoreV1.java @@ -18,8 +18,9 @@ package org.apache.atlas.repository.store.graph.v1; -import atlas.shaded.hbase.guava.common.annotations.VisibleForTesting; -import com.google.inject.Inject; +import java.util.ArrayList; +import java.util.List; + import org.apache.atlas.AtlasErrorCode; import org.apache.atlas.GraphTransaction; import org.apache.atlas.RequestContextV1; @@ -34,15 +35,17 @@ import org.apache.atlas.model.instance.EntityMutationResponse; import org.apache.atlas.model.instance.EntityMutations; import org.apache.atlas.repository.graphdb.AtlasVertex; import org.apache.atlas.repository.store.graph.AtlasEntityStore; -import org.apache.atlas.repository.store.graph.EntityGraphDiscoveryContext; import org.apache.atlas.repository.store.graph.EntityGraphDiscovery; +import org.apache.atlas.repository.store.graph.EntityGraphDiscoveryContext; import org.apache.atlas.type.AtlasEntityType; import org.apache.atlas.type.AtlasTypeRegistry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; +import com.google.common.annotations.VisibleForTesting; +import com.google.inject.Inject; + + public class AtlasEntityStoreV1 implements AtlasEntityStore { http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/util/AtlasRepositoryConfiguration.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/util/AtlasRepositoryConfiguration.java b/repository/src/main/java/org/apache/atlas/util/AtlasRepositoryConfiguration.java index a04dd95..aab6ee1 100644 --- a/repository/src/main/java/org/apache/atlas/util/AtlasRepositoryConfiguration.java +++ b/repository/src/main/java/org/apache/atlas/util/AtlasRepositoryConfiguration.java @@ -24,9 +24,11 @@ import org.apache.atlas.ApplicationProperties; import org.apache.atlas.AtlasException; import org.apache.atlas.repository.audit.EntityAuditRepository; import org.apache.atlas.repository.audit.HBaseBasedAuditRepository; +import org.apache.atlas.repository.graph.AtlasGraphProvider; import org.apache.atlas.repository.graph.DeleteHandler; import org.apache.atlas.repository.graph.SoftDeleteHandler; import org.apache.atlas.repository.graphdb.GraphDatabase; +import org.apache.atlas.repository.graphdb.GremlinVersion; import org.apache.atlas.repository.store.graph.v1.DeleteHandlerV1; import org.apache.atlas.repository.store.graph.v1.SoftDeleteHandlerV1; import org.apache.atlas.typesystem.types.cache.DefaultTypeCache; @@ -137,7 +139,6 @@ public class AtlasRepositoryConfiguration { } } - private static final String GRAPH_DATABASE_IMPLEMENTATION_PROPERTY = "atlas.graphdb.backend"; private static final String DEFAULT_GRAPH_DATABASE_IMPLEMENTATION_CLASS = "org.apache.atlas.repository.graphdb.titan0.Titan0GraphDatabase"; @@ -153,6 +154,22 @@ public class AtlasRepositoryConfiguration { } /** + * This optimization is configurable as a fail-safe in case issues are found + * with the optimizer in production systems. + */ + public static final String GREMLIN_OPTIMIZER_ENABLED_PROPERTY = "atlas.query.gremlinOptimizerEnabled"; + private static final boolean DEFAULT_GREMLIN_OPTIMZER_ENABLED = true; + + public static boolean isGremlinOptimizerEnabled() { + try { + return ApplicationProperties.get().getBoolean(GREMLIN_OPTIMIZER_ENABLED_PROPERTY, DEFAULT_GREMLIN_OPTIMZER_ENABLED); + } catch (AtlasException e) { + LOG.error("Could not determine value of " + GREMLIN_OPTIMIZER_ENABLED_PROPERTY + ". Defaulting to " + DEFAULT_GREMLIN_OPTIMZER_ENABLED, e); + return DEFAULT_GREMLIN_OPTIMZER_ENABLED; + } + } + + /** * Get the list of operations which are configured to be skipped from auditing * Valid format is HttpMethod:URL eg: GET:Version * @return list of string http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala ---------------------------------------------------------------------- diff --git a/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala b/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala old mode 100755 new mode 100644 index f7ba71a..2863aca --- a/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala +++ b/repository/src/main/scala/org/apache/atlas/query/GremlinQuery.scala @@ -32,15 +32,19 @@ import scala.collection.JavaConversions.bufferAsJavaList import scala.collection.mutable import scala.collection.mutable.ArrayBuffer + import org.apache.atlas.gremlin.GremlinExpressionFactory +import org.apache.atlas.gremlin.optimizer.GremlinQueryOptimizer import org.apache.atlas.groovy.CastExpression -import org.apache.atlas.groovy.CodeBlockExpression +import org.apache.atlas.groovy.ClosureExpression +import org.apache.atlas.groovy.LabeledExpression import org.apache.atlas.groovy.FunctionCallExpression import org.apache.atlas.groovy.GroovyExpression import org.apache.atlas.groovy.GroovyGenerationContext import org.apache.atlas.groovy.IdentifierExpression import org.apache.atlas.groovy.ListExpression import org.apache.atlas.groovy.LiteralExpression +import org.apache.atlas.groovy.TraversalStepType import org.apache.atlas.query.Expressions.AliasExpression import org.apache.atlas.query.Expressions.ArithmeticExpression import org.apache.atlas.query.Expressions.BackReference @@ -78,7 +82,10 @@ import org.apache.atlas.query.Expressions.MaxExpression import org.apache.atlas.query.Expressions.MinExpression import org.apache.atlas.query.Expressions.SumExpression import org.apache.atlas.query.Expressions.CountExpression + +import org.apache.atlas.util.AtlasRepositoryConfiguration import java.util.HashSet + trait IntSequence { def next: Int } @@ -120,6 +127,69 @@ trait SelectExpressionHandling { } } + // Removes back references in comparison expressions that are + // right after an alias expression. + // + //For example: + // .as('x').and(select('x').has(y),...) is changed to + // .as('x').and(has(y),...) + // + //This allows the "has" to be extracted out of the and/or by + //the GremlinQueryOptimizer so the index can be used to evaluate + //the predicate. + + val RemoveUnneededBackReferences : PartialFunction[Expression, Expression] = { + + case filterExpr@FilterExpression(aliasExpr@AliasExpression(_,aliasName), filterChild) => { + val updatedChild = removeUnneededBackReferences(filterChild, aliasName) + val changed = !(updatedChild eq filterChild) + if(changed) { + FilterExpression(aliasExpr, updatedChild) + } + else { + filterExpr + } + + } + case x => x + } + def removeUnneededBackReferences(expr: Expression, outerAlias: String) : Expression = expr match { + case logicalExpr@LogicalExpression(logicalOp,children) => { + var changed : Boolean = false; + val updatedChildren : List[Expression] = children.map { child => + val updatedChild = removeUnneededBackReferences(child, outerAlias); + changed |= ! (updatedChild eq child); + updatedChild + } + if(changed) { + LogicalExpression(logicalOp,updatedChildren) + } + else { + logicalExpr + } + } + case comparisonExpr@ComparisonExpression(_,_,_) => { + var changed = false + val updatedLeft = removeUnneededBackReferences(comparisonExpr.left, outerAlias); + changed |= !( updatedLeft eq comparisonExpr.left); + + val updatedRight = removeUnneededBackReferences(comparisonExpr.right, outerAlias); + changed |= !(updatedRight eq comparisonExpr.right); + + if (changed) { + ComparisonExpression(comparisonExpr.symbol, updatedLeft, updatedRight) + } else { + comparisonExpr + } + } + case FieldExpression(fieldName, fieldInfo, Some(br @ BackReference(brAlias, _, _))) if outerAlias.equals(brAlias) => { + //Remove the back reference, since the thing it references is right in front + //of the comparison expression we're in + FieldExpression(fieldName, fieldInfo, None) + } + case x => x + } + //in groupby, convert alias expressions defined in the group by child to BackReferences //in the groupby list and selectList. val AddBackReferencesToGroupBy : PartialFunction[Expression, Expression] = { @@ -456,7 +526,10 @@ class GremlinTranslator(expr: Expression, return translateLiteralValue(l.dataType, l); } case list: ListLiteral[_] => { - val values : java.util.List[GroovyExpression] = translateList(list.rawValue, true); //why hard coded + //Here, we are creating a Groovy list literal expression ([value1, value2, value3]). Because + //of this, any gremlin query expressions within the list must start with an anonymous traversal. + //We set 'inClosure' to true in this case to make that happen. + val values : java.util.List[GroovyExpression] = translateList(list.rawValue, true); return new ListExpression(values); } case in@TraitInstanceExpression(child) => { @@ -493,7 +566,7 @@ class GremlinTranslator(expr: Expression, case limitOffset@LimitExpression(child, limit, offset) => { val childExpr = genQuery(parent, child, inClosure); val totalResultRows = limit.value + offset.value; - return GremlinExpressionFactory.INSTANCE.generateLimitExpression(childExpr, offset.value, totalResultRows); + return GremlinExpressionFactory.INSTANCE.generateRangeExpression(childExpr, offset.value, totalResultRows); } case count@CountExpression() => { val listExpr = GremlinExpressionFactory.INSTANCE.getClosureArgumentValue(); @@ -621,8 +694,7 @@ class GremlinTranslator(expr: Expression, def genFullQuery(expr: Expression, hasSelect: Boolean): String = { - var q : GroovyExpression = new FunctionCallExpression(new IdentifierExpression("g"),"V"); - + var q : GroovyExpression = new FunctionCallExpression(TraversalStepType.START, new IdentifierExpression(TraversalStepType.SOURCE, "g"),"V"); val debug:Boolean = false if(gPersistenceBehavior.addGraphVertexPrefix(preStatements)) { @@ -631,15 +703,23 @@ class GremlinTranslator(expr: Expression, q = genQuery(q, expr, false) - q = new FunctionCallExpression(q, "toList"); + q = GremlinExpressionFactory.INSTANCE.generateToListExpression(q); q = gPersistenceBehavior.getGraph().addOutputTransformationPredicate(q, hasSelect, expr.isInstanceOf[PathExpression]); - var overallExpression = new CodeBlockExpression(); - overallExpression.addStatements(preStatements); - overallExpression.addStatement(q) - overallExpression.addStatements(postStatements); - var qryStr = generateGremlin(overallExpression); + if(AtlasRepositoryConfiguration.isGremlinOptimizerEnabled()) { + q = GremlinQueryOptimizer.getInstance().optimize(q); + } + + val closureExpression = new ClosureExpression(); + + closureExpression.addStatements(preStatements); + closureExpression.addStatement(q) + closureExpression.addStatements(postStatements); + + val overallExpression = new LabeledExpression("L", closureExpression); + + val qryStr = generateGremlin(overallExpression); if(debug) { println(" query " + qryStr) @@ -666,6 +746,7 @@ class GremlinTranslator(expr: Expression, e1 = e1.transformUp(addAliasToLoopInput()) e1 = e1.transformUp(instanceClauseToTop(e1)) e1 = e1.transformUp(traitClauseWithInstanceForTop(e1)) + e1 = e1.transformUp(RemoveUnneededBackReferences) //Following code extracts the select expressions from expression tree. http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java ---------------------------------------------------------------------- diff --git a/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java b/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java index f2ca6a8..d447c2d 100755 --- a/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java +++ b/repository/src/test/java/org/apache/atlas/discovery/GraphBackedDiscoveryServiceTest.java @@ -508,6 +508,9 @@ public class GraphBackedDiscoveryServiceTest extends BaseRepositoryTest { {"from hive_db limit 3 offset 1", 2}, {"hive_db", 3}, {"hive_db where hive_db.name=\"Reporting\"", 1}, + {"hive_db where hive_db.name=\"Reporting\" or hive_db.name=\"Sales\" or hive_db.name=\"Logging\" limit 1 offset 1", 1}, + {"hive_db where hive_db.name=\"Reporting\" or hive_db.name=\"Sales\" or hive_db.name=\"Logging\" limit 1 offset 2", 1}, + {"hive_db where hive_db.name=\"Reporting\" or hive_db.name=\"Sales\" or hive_db.name=\"Logging\" limit 2 offset 1", 2}, {"hive_db where hive_db.name=\"Reporting\" limit 10 ", 1}, {"hive_db hive_db.name = \"Reporting\"", 1}, {"hive_db where hive_db.name=\"Reporting\" select name, owner", 1},
