http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java b/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java index e862769..add7e07 100644 --- a/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java +++ b/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java @@ -19,21 +19,25 @@ package org.apache.atlas.gremlin; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.apache.atlas.AtlasException; +import org.apache.atlas.groovy.AbstractFunctionExpression; import org.apache.atlas.groovy.CastExpression; import org.apache.atlas.groovy.ClosureExpression; import org.apache.atlas.groovy.ComparisonExpression; +import org.apache.atlas.groovy.ComparisonExpression.ComparisonOperator; import org.apache.atlas.groovy.ComparisonOperatorExpression; import org.apache.atlas.groovy.FunctionCallExpression; import org.apache.atlas.groovy.GroovyExpression; import org.apache.atlas.groovy.IdentifierExpression; import org.apache.atlas.groovy.LiteralExpression; import org.apache.atlas.groovy.LogicalExpression; -import org.apache.atlas.groovy.TypeCoersionExpression; -import org.apache.atlas.groovy.ComparisonExpression.ComparisonOperator; import org.apache.atlas.groovy.LogicalExpression.LogicalOperator; +import org.apache.atlas.groovy.TernaryOperatorExpression; +import org.apache.atlas.groovy.TraversalStepType; +import org.apache.atlas.groovy.TypeCoersionExpression; import org.apache.atlas.query.GraphPersistenceStrategies; import org.apache.atlas.query.TypeUtils.FieldInfo; import org.apache.atlas.repository.graph.AtlasGraphProvider; @@ -69,27 +73,14 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { private static final String REPEAT_METHOD = "repeat"; private static final String RANGE_METHOD = "range"; private static final String LAST_METHOD = "last"; - private static final String WHERE_METHOD = "where"; private static final String TO_STRING_METHOD = "toString"; + private static final GroovyExpression EMPTY_STRING_EXPRESSION = new LiteralExpression(""); + @Override public GroovyExpression generateLogicalExpression(GroovyExpression parent, String operator, - List<GroovyExpression> operands) { - if (operands.size() == 1) { - // gremlin 3 treats one element expressions as 'false'. Avoid - // creating a boolean expression in this case. Inline the expression - // note: we can't simply omit it, since it will cause us to traverse - // the edge! - // use 'where' instead - GroovyExpression expr = operands.get(0); - // if child is a back expression, that expression becomes an - // argument to where - return new FunctionCallExpression(parent, WHERE_METHOD, expr); - } else { - // Gremlin 3 does not support _() syntax - // - return new FunctionCallExpression(parent, operator, operands); - } + List<GroovyExpression> operands) { + return new FunctionCallExpression(TraversalStepType.FILTER, parent, operator, operands); } @Override @@ -97,20 +88,20 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { if (inSelect) { return getFieldInSelect(); } else { - return new FunctionCallExpression(parent, SELECT_METHOD, new LiteralExpression(alias)); + return new FunctionCallExpression(TraversalStepType.MAP_TO_ELEMENT, parent, SELECT_METHOD, new LiteralExpression(alias)); } } @Override public GroovyExpression typeTestExpression(GraphPersistenceStrategies s, String typeName, GroovyExpression itRef) { - LiteralExpression typeAttrExpr = new LiteralExpression(s.typeAttributeName()); LiteralExpression superTypeAttrExpr = new LiteralExpression(s.superTypeAttributeName()); LiteralExpression typeNameExpr = new LiteralExpression(typeName); - - FunctionCallExpression result = new FunctionCallExpression(HAS_METHOD, typeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr)); - result = new FunctionCallExpression(result, "or"); - result = new FunctionCallExpression(HAS_METHOD, superTypeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr)); + LiteralExpression typeAttrExpr = new LiteralExpression(s.typeAttributeName()); + FunctionCallExpression result = new FunctionCallExpression(TraversalStepType.FILTER, HAS_METHOD, typeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr)); + result = new FunctionCallExpression(TraversalStepType.FILTER, result, "or"); + result = new FunctionCallExpression(TraversalStepType.FILTER, result, HAS_METHOD, superTypeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr)); return result; + } @Override @@ -118,41 +109,48 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { GroovyExpression emitExpr = generateLoopEmitExpression(s, dataType); - GroovyExpression result = new FunctionCallExpression(parent, REPEAT_METHOD, loopExpr); + GroovyExpression result = new FunctionCallExpression(TraversalStepType.BRANCH, parent, REPEAT_METHOD, loopExpr); if (times != null) { GroovyExpression timesExpr = new LiteralExpression(times); - result = new FunctionCallExpression(result, TIMES_METHOD, timesExpr); + result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, TIMES_METHOD, timesExpr); } - result = new FunctionCallExpression(result, EMIT_METHOD, emitExpr); + result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, EMIT_METHOD, emitExpr); return result; } @Override public GroovyExpression getLoopExpressionParent(GroovyExpression inputQry) { - return new IdentifierExpression("__"); + GroovyExpression curTraversal = getAnonymousTraversalStartExpression(); + return curTraversal; + } + + private IdentifierExpression getAnonymousTraversalStartExpression() { + return new IdentifierExpression(TraversalStepType.START, "__"); } @Override public GroovyExpression generateSelectExpression(GroovyExpression parent, List<LiteralExpression> sourceNames, List<GroovyExpression> srcExprs) { - FunctionCallExpression result = new FunctionCallExpression(parent, SELECT_METHOD, sourceNames); + FunctionCallExpression result = new FunctionCallExpression(TraversalStepType.MAP_TO_VALUE, parent, SELECT_METHOD, sourceNames); for (GroovyExpression expr : srcExprs) { GroovyExpression closure = new ClosureExpression(expr); GroovyExpression castClosure = new TypeCoersionExpression(closure, FUNCTION_CLASS); - result = new FunctionCallExpression(result, BY_METHOD, castClosure); + result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, BY_METHOD, castClosure); } return result; } @Override public GroovyExpression generateFieldExpression(GroovyExpression parent, FieldInfo fInfo, - String propertyName, boolean inSelect) { + String propertyName, boolean inSelect) { AttributeInfo attrInfo = fInfo.attrInfo(); IDataType attrType = attrInfo.dataType(); GroovyExpression propertyNameExpr = new LiteralExpression(propertyName); + //Whether it is the user or shared graph does not matter here, since we're + //just getting the conversion expression. Ideally that would be moved someplace else. AtlasGraph graph = AtlasGraphProvider.getGraphInstance(); if (inSelect) { @@ -161,13 +159,13 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { return graph.generatePersisentToLogicalConversionExpression(expr, attrType); } else { - GroovyExpression unmapped = new FunctionCallExpression(parent, VALUES_METHOD, propertyNameExpr); + GroovyExpression unmapped = new FunctionCallExpression(TraversalStepType.FLAT_MAP_TO_VALUES, parent, VALUES_METHOD, propertyNameExpr); if (graph.isPropertyValueConversionNeeded(attrType)) { GroovyExpression toConvert = new FunctionCallExpression(getItVariable(), GET_METHOD); GroovyExpression conversionFunction = graph.generatePersisentToLogicalConversionExpression(toConvert, attrType); - return new FunctionCallExpression(unmapped, MAP_METHOD, new ClosureExpression(conversionFunction)); + return new FunctionCallExpression(TraversalStepType.MAP_TO_VALUE, unmapped, MAP_METHOD, new ClosureExpression(conversionFunction)); } else { return unmapped; } @@ -238,15 +236,15 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { valueMatchesExpr); GroovyExpression filterFunction = new ClosureExpression(filterCondition); - return new FunctionCallExpression(parent, FILTER_METHOD, filterFunction); + return new FunctionCallExpression(TraversalStepType.FILTER, parent, FILTER_METHOD, filterFunction); } else { GroovyExpression valueMatches = new FunctionCallExpression(getComparisonFunction(symbol), requiredValue); - return new FunctionCallExpression(parent, HAS_METHOD, propertNameExpr, valueMatches); + return new FunctionCallExpression(TraversalStepType.FILTER, parent, HAS_METHOD, propertNameExpr, valueMatches); } } @Override - protected GroovyExpression initialExpression(GraphPersistenceStrategies s, GroovyExpression varExpr) { + protected GroovyExpression initialExpression(GroovyExpression varExpr, GraphPersistenceStrategies s) { // this bit of groovy magic converts the set of vertices in varName into // a String containing the ids of all the vertices. This becomes the @@ -255,15 +253,61 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { // _() // s"g.V(${varName}.collect{it.id()} as String[])" - GroovyExpression gExpr = getGraph(); + GroovyExpression gExpr = getGraphExpression(); GroovyExpression varRefExpr = new TypeCoersionExpression(varExpr, OBJECT_ARRAY_CLASS); - FunctionCallExpression expr = new FunctionCallExpression(gExpr, V_METHOD, varRefExpr); + GroovyExpression matchingVerticesExpr = new FunctionCallExpression(TraversalStepType.START, gExpr, V_METHOD, varRefExpr); + GroovyExpression isEmpty = new FunctionCallExpression(varExpr, "isEmpty"); + GroovyExpression emptyGraph = getEmptyTraversalExpression(); + + GroovyExpression expr = new TernaryOperatorExpression(isEmpty, emptyGraph, matchingVerticesExpr); + return s.addInitialQueryCondition(expr); } + private GroovyExpression getEmptyTraversalExpression() { + GroovyExpression emptyGraph = new FunctionCallExpression(TraversalStepType.START, getGraphExpression(), V_METHOD, EMPTY_STRING_EXPRESSION); + return emptyGraph; + } + + @Override + public GroovyExpression generateRangeExpression(GroovyExpression parent, int startIndex, int endIndex) { + //treat as barrier step, since limits need to be applied globally (even though it + //is technically a filter step) + return new FunctionCallExpression(TraversalStepType.BARRIER, parent, RANGE_METHOD, new LiteralExpression(startIndex), new LiteralExpression(endIndex)); + } + + @Override + public boolean isRangeExpression(GroovyExpression expr) { + + return (expr instanceof FunctionCallExpression && ((FunctionCallExpression)expr).getFunctionName().equals(RANGE_METHOD)); + } + + @Override + public int[] getRangeParameters(AbstractFunctionExpression expr) { + + if (isRangeExpression(expr)) { + FunctionCallExpression rangeExpression = (FunctionCallExpression) expr; + List<GroovyExpression> arguments = rangeExpression.getArguments(); + int startIndex = (int)((LiteralExpression)arguments.get(0)).getValue(); + int endIndex = (int)((LiteralExpression)arguments.get(1)).getValue(); + return new int[]{startIndex, endIndex}; + } + else { + return null; + } + } + @Override - public GroovyExpression generateLimitExpression(GroovyExpression parent, int offset, int totalRows) { - return new FunctionCallExpression(parent, RANGE_METHOD, new LiteralExpression(offset), new LiteralExpression(totalRows)); + public void setRangeParameters(GroovyExpression expr, int startIndex, int endIndex) { + + if (isRangeExpression(expr)) { + FunctionCallExpression rangeExpression = (FunctionCallExpression) expr; + rangeExpression.setArgument(0, new LiteralExpression(Integer.valueOf(startIndex))); + rangeExpression.setArgument(1, new LiteralExpression(Integer.valueOf(endIndex))); + } + else { + throw new IllegalArgumentException(expr + " is not a valid range expression"); + } } @Override @@ -295,7 +339,8 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { comparisonExpr = new ComparisonOperatorExpression(bCompExpr, aCompExpr); } ClosureExpression comparisonFunction = new ClosureExpression(comparisonExpr, "a", "b"); - return new FunctionCallExpression(new FunctionCallExpression(parent, ORDER_METHOD), BY_METHOD, orderByClause, comparisonFunction); + FunctionCallExpression orderCall = new FunctionCallExpression(TraversalStepType.BARRIER, parent, ORDER_METHOD); + return new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, orderCall, BY_METHOD, orderByClause, comparisonFunction); } @Override @@ -327,10 +372,10 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { public GroovyExpression generateGroupByExpression(GroovyExpression parent, GroovyExpression groupByExpression, GroovyExpression aggregationFunction) { - GroovyExpression result = new FunctionCallExpression(parent, "group"); + GroovyExpression result = new FunctionCallExpression(TraversalStepType.BARRIER, parent, "group"); GroovyExpression groupByClosureExpr = new TypeCoersionExpression(new ClosureExpression(groupByExpression), "Function"); - result = new FunctionCallExpression(result, "by", groupByClosureExpr); - result = new FunctionCallExpression(result, "toList"); + result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, "by", groupByClosureExpr); + result = new FunctionCallExpression(TraversalStepType.END, result, "toList"); GroovyExpression mapValuesClosure = new ClosureExpression(new FunctionCallExpression(new CastExpression(getItVariable(), "Map"), "values")); @@ -347,8 +392,55 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { result = new FunctionCallExpression(result, "collect", aggregrationFunctionClosure); return result; } + + @Override + public GroovyExpression generateSeededTraversalExpresssion(boolean isMap, GroovyExpression valueCollection) { + GroovyExpression coersedExpression = new TypeCoersionExpression(valueCollection, isMap ? "Map[]" : "Vertex[]"); + if(isMap) { + + return new FunctionCallExpression(TraversalStepType.START, "__", coersedExpression); + } + else { + //We cannot always use an anonymous traversal because that breaks repeat steps + return new FunctionCallExpression(TraversalStepType.START, getEmptyTraversalExpression(), "inject", coersedExpression); + } + } + + @Override public GroovyExpression getGroupBySelectFieldParent() { return null; } -} + @Override + public String getTraversalExpressionClass() { + return "GraphTraversal"; + } + + @Override + public boolean isSelectGeneratesMap(int aliasCount) { + //in Gremlin 3, you only get a map if there is more than 1 alias. + return aliasCount > 1; + } + + @Override + public GroovyExpression generateMapExpression(GroovyExpression parent, ClosureExpression closureExpression) { + return new FunctionCallExpression(TraversalStepType.MAP_TO_ELEMENT, parent, "map", closureExpression); + } + + @Override + public GroovyExpression generateGetSelectedValueExpression(LiteralExpression key, + GroovyExpression rowMapExpr) { + rowMapExpr = new CastExpression(rowMapExpr, "Map"); + GroovyExpression getExpr = new FunctionCallExpression(rowMapExpr, "get", key); + return getExpr; + } + + @Override + public GroovyExpression getCurrentTraverserObject(GroovyExpression traverser) { + return new FunctionCallExpression(traverser, "get"); + } + + public List<String> getAliasesRequiredByExpression(GroovyExpression expr) { + return Collections.emptyList(); + } +}
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java b/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java index 6c326b2..c2fdf09 100644 --- a/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java +++ b/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java @@ -17,8 +17,14 @@ */ package org.apache.atlas.gremlin; -import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + import org.apache.atlas.AtlasException; +import org.apache.atlas.groovy.AbstractFunctionExpression; import org.apache.atlas.groovy.ArithmeticExpression; import org.apache.atlas.groovy.ArithmeticExpression.ArithmeticOperator; import org.apache.atlas.groovy.CastExpression; @@ -29,6 +35,7 @@ import org.apache.atlas.groovy.GroovyExpression; import org.apache.atlas.groovy.IdentifierExpression; import org.apache.atlas.groovy.ListExpression; import org.apache.atlas.groovy.LiteralExpression; +import org.apache.atlas.groovy.TraversalStepType; import org.apache.atlas.groovy.TypeCoersionExpression; import org.apache.atlas.groovy.VariableAssignmentExpression; import org.apache.atlas.query.GraphPersistenceStrategies; @@ -40,13 +47,9 @@ import org.apache.atlas.repository.graphdb.GremlinVersion; import org.apache.atlas.typesystem.types.IDataType; import org.apache.atlas.typesystem.types.TypeSystem; import org.apache.atlas.typesystem.types.cache.TypeCache.TYPE_FILTER; +import org.apache.atlas.util.AtlasRepositoryConfiguration; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import com.google.common.collect.ImmutableList; /** * Factory to generate Groovy expressions representing Gremlin syntax that that @@ -58,7 +61,8 @@ public abstract class GremlinExpressionFactory { private static final String G_VARIABLE = "g"; private static final String IT_VARIABLE = "it"; - private static final String SET_CLASS = "Set"; + protected static final String SET_CLASS = "Set"; + private static final String OBJECT_FIELD = "object"; @@ -66,18 +70,25 @@ public abstract class GremlinExpressionFactory { protected static final String FILTER_METHOD = "filter"; private static final String PATH_METHOD = "path"; private static final String AS_METHOD = "as"; - private static final String FILL_METHOD = "fill"; private static final String IN_OPERATOR = "in"; protected static final String HAS_METHOD = "has"; protected static final String TO_LOWER_CASE_METHOD = "toLowerCase"; protected static final String SELECT_METHOD = "select"; protected static final String ORDER_METHOD = "order"; + protected static final String FILL_METHOD = "fill"; public static final GremlinExpressionFactory INSTANCE = AtlasGraphProvider.getGraphInstance() .getSupportedGremlinVersion() == GremlinVersion.THREE ? new Gremlin3ExpressionFactory() : new Gremlin2ExpressionFactory(); /** + * Returns the unqualified name of the class used in this version of gremlin to + * represent Gremlin queries as they are being generated. + * @return + */ + public abstract String getTraversalExpressionClass(); + + /** * Gets the expression to use as the parent when translating the loop * expression in a loop * @@ -172,14 +183,40 @@ public abstract class GremlinExpressionFactory { String propertyName, String symbol, GroovyExpression requiredValue, FieldInfo fInfo) throws AtlasException; /** - * Generates a limit expression + * Generates a range expression * * @param parent - * @param offset - * @param totalRows + * @param startIndex + * @param endIndex * @return */ - public abstract GroovyExpression generateLimitExpression(GroovyExpression parent, int offset, int totalRows); + public abstract GroovyExpression generateRangeExpression(GroovyExpression parent, int startIndex, int endIndex); + + /** + * Determines if the specified expression is a range method call. + * + * @param expr + * @return + */ + public abstract boolean isRangeExpression(GroovyExpression expr); + + /** + * Set the start index and end index of a range expression + * + * @param expr + * @param startIndex + * @param endIndex + */ + public abstract void setRangeParameters(GroovyExpression expr, int startIndex, int endIndex); + + /** + * If the specified function expression is a range expression, returns the start and end index parameters + * otherwise returns null. + * + * @param expr + * @return int array with two elements - element 0 is start index, element 1 is end index + */ + public abstract int[] getRangeParameters(AbstractFunctionExpression expr); /** * Generates an order by expression @@ -193,6 +230,22 @@ public abstract class GremlinExpressionFactory { List<GroovyExpression> translatedOrderBy, boolean isAscending); /** + * Determines if specified expression is an order method call + * + * @param expr + * @return + */ + public boolean isOrderExpression(GroovyExpression expr) { + if (expr instanceof FunctionCallExpression) { + FunctionCallExpression functionCallExpression = (FunctionCallExpression) expr; + if (functionCallExpression.getFunctionName().equals(ORDER_METHOD)) { + return true; + } + } + return false; + } + + /** * Returns the Groovy expressions that should be used as the parents when * translating an order by expression. This is needed because Gremlin 2 and * 3 handle order by expressions very differently. @@ -207,6 +260,17 @@ public abstract class GremlinExpressionFactory { */ public abstract GroovyExpression getAnonymousTraversalExpression(); + public boolean isLeafAnonymousTraversalExpression(GroovyExpression expr) { + if(!(expr instanceof FunctionCallExpression)) { + return false; + } + FunctionCallExpression functionCallExpr = (FunctionCallExpression)expr; + if(functionCallExpr.getCaller() != null) { + return false; + } + return functionCallExpr.getFunctionName().equals("_") & functionCallExpr.getArguments().size() == 0; + } + /** * Returns an expression representing * @@ -216,11 +280,11 @@ public abstract class GremlinExpressionFactory { /** * Generates the expression the serves as the root of the Gremlin query. - * @param s * @param varExpr variable containing the vertices to traverse * @return */ - protected abstract GroovyExpression initialExpression(GraphPersistenceStrategies s, GroovyExpression varExpr); + protected abstract GroovyExpression initialExpression(GroovyExpression varExpr, GraphPersistenceStrategies s); + /** * Generates an expression that tests whether the vertex represented by the 'toTest' @@ -236,19 +300,37 @@ public abstract class GremlinExpressionFactory { GroovyExpression vertexExpr); /** + /** * Generates a sequence of groovy expressions that filter the vertices to only * those that match the specified type. If GraphPersistenceStrategies.collectTypeInstancesIntoVar() - * is set, the vertices are put into a variable whose name is geneated from the specified IntSequence. - * The last item in the result will be a graph traversal restricted to only the matching vertices. + * is set and the gremlin optimizer is disabled, the vertices are put into a variable whose name is generated + * from the specified IntSequence. The last item in the result will be a graph traversal restricted to only + * the matching vertices. */ public List<GroovyExpression> generateTypeTestExpression(GraphPersistenceStrategies s, GroovyExpression parent, String typeName, IntSequence intSeq) throws AtlasException { - if (s.filterBySubTypes()) { - return typeTestExpressionUsingInFilter(s, parent, typeName); - } else if (s.collectTypeInstancesIntoVar()) { - return typeTestExpressionMultiStep(s, typeName, intSeq); - } else { - return typeTestExpressionUsingFilter(s, parent, typeName); + + if(AtlasRepositoryConfiguration.isGremlinOptimizerEnabled()) { + GroovyExpression superTypeAttributeNameExpr = new LiteralExpression(s.superTypeAttributeName()); + GroovyExpression typeNameExpr = new LiteralExpression(typeName); + GroovyExpression superTypeMatchesExpr = new FunctionCallExpression(TraversalStepType.FILTER, HAS_METHOD, superTypeAttributeNameExpr, + typeNameExpr); + + GroovyExpression typeAttributeNameExpr = new LiteralExpression(s.typeAttributeName()); + + GroovyExpression typeMatchesExpr = new FunctionCallExpression(TraversalStepType.FILTER, HAS_METHOD, typeAttributeNameExpr, + typeNameExpr); + GroovyExpression result = new FunctionCallExpression(TraversalStepType.FILTER, parent, "or", typeMatchesExpr, superTypeMatchesExpr); + return Collections.singletonList(result); + } + else { + if (s.filterBySubTypes()) { + return typeTestExpressionUsingInFilter(s, parent, typeName); + } else if (s.collectTypeInstancesIntoVar()) { + return typeTestExpressionMultiStep(s, typeName, intSeq); + } else { + return typeTestExpressionUsingFilter(s, parent, typeName); + } } } @@ -285,7 +367,7 @@ public abstract class GremlinExpressionFactory { result.add(newSetVar(varName)); result.add(fillVarWithTypeInstances(s, typeName, varName)); result.add(fillVarWithSubTypeInstances(s, typeName, varName)); - result.add(initialExpression(s, varExpr)); + result.add(initialExpression(varExpr, s)); return result; } @@ -324,7 +406,6 @@ public abstract class GremlinExpressionFactory { return Collections.singletonList(filterExpr); } - /** * Generates an expression which checks whether the vertices in the query have * a field with the given name. @@ -334,7 +415,7 @@ public abstract class GremlinExpressionFactory { * @return */ public GroovyExpression generateUnaryHasExpression(GroovyExpression parent, String fieldName) { - return new FunctionCallExpression(parent, HAS_METHOD, new LiteralExpression(fieldName)); + return new FunctionCallExpression(TraversalStepType.FILTER, parent, HAS_METHOD, new LiteralExpression(fieldName)); } /** @@ -344,7 +425,7 @@ public abstract class GremlinExpressionFactory { * @return */ public GroovyExpression generatePathExpression(GroovyExpression parent) { - return new FunctionCallExpression(parent, PATH_METHOD); + return new FunctionCallExpression(TraversalStepType.MAP_TO_VALUE, parent, PATH_METHOD); } /** @@ -365,7 +446,7 @@ public abstract class GremlinExpressionFactory { * @return */ public GroovyExpression generateAliasExpression(GroovyExpression parent, String alias) { - return new FunctionCallExpression(parent, AS_METHOD, new LiteralExpression(alias)); + return new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, parent, AS_METHOD, new LiteralExpression(alias)); } /** @@ -377,19 +458,19 @@ public abstract class GremlinExpressionFactory { * @return */ public GroovyExpression generateAdjacentVerticesExpression(GroovyExpression parent, AtlasEdgeDirection dir) { - return new FunctionCallExpression(parent, getGremlinFunctionName(dir)); + return new FunctionCallExpression(TraversalStepType.FLAT_MAP_TO_ELEMENTS, parent, getGremlinFunctionName(dir)); } private String getGremlinFunctionName(AtlasEdgeDirection dir) { switch(dir) { - case IN: - return "in"; - case OUT: - return "out"; - case BOTH: - return "both"; - default: - throw new RuntimeException("Unknown Atlas Edge Direction: " + dir); + case IN: + return "in"; + case OUT: + return "out"; + case BOTH: + return "both"; + default: + throw new RuntimeException("Unknown Atlas Edge Direction: " + dir); } } @@ -403,7 +484,7 @@ public abstract class GremlinExpressionFactory { */ public GroovyExpression generateAdjacentVerticesExpression(GroovyExpression parent, AtlasEdgeDirection dir, String label) { - return new FunctionCallExpression(parent, getGremlinFunctionName(dir), new LiteralExpression(label)); + return new FunctionCallExpression(TraversalStepType.FLAT_MAP_TO_ELEMENTS, parent, getGremlinFunctionName(dir), new LiteralExpression(label)); } /** @@ -423,17 +504,19 @@ public abstract class GremlinExpressionFactory { } protected GroovyExpression getAllVerticesExpr() { - GroovyExpression gExpr = getGraph(); - return new FunctionCallExpression(gExpr, V_METHOD); + GroovyExpression gExpr = getGraphExpression(); + return new FunctionCallExpression(TraversalStepType.START, gExpr, V_METHOD); } - protected IdentifierExpression getGraph() { - return new IdentifierExpression(G_VARIABLE); + protected IdentifierExpression getGraphExpression() { + return new IdentifierExpression(TraversalStepType.SOURCE, G_VARIABLE); } + protected GroovyExpression getCurrentObjectExpression() { return new FieldExpression(getItVariable(), OBJECT_FIELD); } + //assumes cast already performed public GroovyExpression generateCountExpression(GroovyExpression itExpr) { GroovyExpression collectionExpr = new CastExpression(itExpr,"Collection"); @@ -454,11 +537,9 @@ public abstract class GremlinExpressionFactory { private GroovyExpression getAggregrationExpression(GroovyExpression itExpr, GroovyExpression mapFunction, String functionName) { - GroovyExpression collectionExpr = new CastExpression(itExpr, - "Collection"); + GroovyExpression collectionExpr = new CastExpression(itExpr,"Collection"); ClosureExpression collectFunction = new ClosureExpression(mapFunction); - GroovyExpression transformedList = new FunctionCallExpression( - collectionExpr, "collect", collectFunction); + GroovyExpression transformedList = new FunctionCallExpression(collectionExpr, "collect", collectFunction); return new FunctionCallExpression(transformedList, functionName); } @@ -466,5 +547,106 @@ public abstract class GremlinExpressionFactory { return getItVariable(); } + /** + * Specifies the parent to use when translating the select list in + * a group by statement. + * + * @return + */ public abstract GroovyExpression getGroupBySelectFieldParent(); + + public GroovyExpression generateFillExpression(GroovyExpression parent, GroovyExpression variable) { + return new FunctionCallExpression(TraversalStepType.END,parent , "fill", variable); + } + + /** + * Generates an anonymous graph traversal initialized with the specified value. In Gremlin 3, we need + * to use a different syntax for this when the object is a map, so that information needs to be provided + * to this method so that the correct syntax is used. + * + * @param isMap true if the value contains Map instances, false if it contains Vertex instances + * @param valueCollection the source objects to start the traversal from. + */ + public abstract GroovyExpression generateSeededTraversalExpresssion(boolean isMap, GroovyExpression valueCollection); + + /** + * Returns the current value of the traverser. This is used when generating closure expressions that + * need to operate on the current value in the graph graversal. + * + * @param traverser + * @return + */ + public abstract GroovyExpression getCurrentTraverserObject(GroovyExpression traverser); + + /** + * Generates an expression that transforms the current value of the traverser by + * applying the function specified + * + * @param parent + * @param closureExpression + * @return + */ + public abstract GroovyExpression generateMapExpression(GroovyExpression parent, ClosureExpression closureExpression); + + /** + * Returns whether a select statement generates a map (or Gremlin 2 "Row") when it contains the specified + * number of aliases. + * + */ + public abstract boolean isSelectGeneratesMap(int aliasCount); + + /** + * Generates an expression to get the value of the value from the row map + * generated by select() with the specified key. + * + */ + public abstract GroovyExpression generateGetSelectedValueExpression(LiteralExpression key, + GroovyExpression rowMapExpr); + + public GroovyExpression removeExtraMapFromPathInResult(GroovyExpression parent) { + GroovyExpression listItem = getItVariable(); + GroovyExpression tailExpr = new FunctionCallExpression(listItem, "tail"); + return new FunctionCallExpression(parent, "collect", new ClosureExpression(tailExpr)); + + } + + /** + * Generates a toList expression to execute the gremlin query and + * store the result in a new list. + * + * @param expr + * @return + */ + public GroovyExpression generateToListExpression(GroovyExpression expr) { + return new FunctionCallExpression(TraversalStepType.END, expr, "toList"); + } + + /** + * Finds aliases that absolutely must be brought along with this expression into + * the output expression and cannot just be recreated there. For example, in the + * Gremlin 2 loop expression, the loop semantics break of the alias is simply recreated + * in the output expression. + * @param expr + * @return + */ + public abstract List<String> getAliasesRequiredByExpression(GroovyExpression expr); + + + /** + * Checks if the given expression is an alias expression, and if so + * returns the alias from the expression. Otherwise, null is + * returned. + */ + public String getAliasNameIfRelevant(GroovyExpression expr) { + if(!(expr instanceof FunctionCallExpression)) { + return null; + } + FunctionCallExpression fc = (FunctionCallExpression)expr; + if(! fc.getFunctionName().equals(AS_METHOD)) { + return null; + } + LiteralExpression aliasName = (LiteralExpression)fc.getArguments().get(0); + return aliasName.getValue().toString(); + + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java new file mode 100644 index 0000000..3e6c39a --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.FunctionCallExpression; +import org.apache.atlas.groovy.GroovyExpression; +import org.apache.atlas.groovy.LiteralExpression; +import org.apache.atlas.groovy.TraversalStepType; + +/** + * Finds all aliases in the expression. + */ +public class AliasFinder implements CallHierarchyVisitor { + + private List<LiteralExpression> foundAliases = new ArrayList<>(); + + //Whether a final alias is needed. A final alias is needed + //if there are transformation steps after the last alias in + //the expression. We initialize this to false since a final + //alias is not needed if there are no aliases. + private boolean finalAliasNeeded = false; + + @Override + public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) { + return true; + } + + @Override + public void visitNonFunctionCaller(GroovyExpression expr) { + + } + + @Override + public void visitNullCaller() { + + } + + private static final Set<TraversalStepType> TRANSFORMATION_STEP_TYPES = new HashSet<>(Arrays.asList( + TraversalStepType.MAP_TO_ELEMENT, + TraversalStepType.MAP_TO_VALUE, + TraversalStepType.FLAT_MAP_TO_ELEMENTS, + TraversalStepType.FLAT_MAP_TO_VALUES, + TraversalStepType.BARRIER, + TraversalStepType.NONE)); + + + @Override + public boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall) { + + if (functionCall instanceof FunctionCallExpression) { + FunctionCallExpression expr = (FunctionCallExpression)functionCall; + if (expr.getType() == TraversalStepType.SIDE_EFFECT && expr.getFunctionName().equals("as")) { + //We found an alias. This is currently the last expression we've seen + //in our traversal back up the expression tree, so at this point a final + //alias is not needed. + LiteralExpression aliasNameExpr = (LiteralExpression)expr.getArguments().get(0); + foundAliases.add(aliasNameExpr); + finalAliasNeeded=false; + } + } + + if(TRANSFORMATION_STEP_TYPES.contains(functionCall.getType())) { + //This step changes the value of the traverser. Now, a final alias + //needs to be added. + if(!foundAliases.isEmpty()) { + finalAliasNeeded = true; + } + } + + return true; + } + + public List<LiteralExpression> getAliases() { + return foundAliases; + } + + public boolean isFinalAliasNeeded() { + + return finalAliasNeeded; + } +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java new file mode 100644 index 0000000..6089353 --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.GroovyExpression; + +/** + * Call back interface for visiting the call hierarchy of a function call. + */ +public interface CallHierarchyVisitor { + + /** + * Visits a function expression before the visit to its caller. + * + * @param expr + * + * @return false to terminate the recursion + */ + boolean preVisitFunctionCaller(AbstractFunctionExpression expr); + + /** + * Called when a caller that is not an instance of + * AbstractFunctionExpression is found. This indicates that the deepest + * point in the call hierarchy has been reached. + * + * + */ + void visitNonFunctionCaller(GroovyExpression expr); + + /** + * Called when a null caller is found (this happens for static/user-defined + * functions). This indicates that the deepest point in the call hierarchy + * has been reached. + * + */ + void visitNullCaller(); + + /** + * Visits a function expression after the visit to its caller. + * + * @param expr + * + * @return false to terminate the recursion + */ + boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall); +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java new file mode 100644 index 0000000..7cf9711 --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.atlas.gremlin.GremlinExpressionFactory; +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.FunctionCallExpression; +import org.apache.atlas.groovy.GroovyExpression; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Optimizer that pulls has expressions out of an 'and' expression. + * + * For example: + * + * g.V().and(has('x'),has('y') + * + * is optimized to: + * + * g.V().has('x').has('y') + * + * There are certain cases where it is not safe to move an expression out + * of the 'and'. For example, in the expression + * + * g.V().and(has('x').out('y'),has('z')) + * + * has('x').out('y') cannot be moved out of the 'and', since it changes the value of the traverser. + * + * At this time, the ExpandAndsOptimizer is not able to handle this scenario, so we don't extract + * that expression. In this case, the result is: + * + * g.V().has('z').and(has('x').out('y')) + * + * The optimizer will call ExpandAndsOptimization recursively on the children, so + * there is no need to recursively update the children here. + * + * @param expr + * @param context + * @return the expressions that should be unioned together to get the query result + */ +public class ExpandAndsOptimization implements GremlinOptimization { + + private static final Logger logger_ = LoggerFactory.getLogger(ExpandAndsOptimization.class); + + + private final GremlinExpressionFactory factory; + + public ExpandAndsOptimization(GremlinExpressionFactory factory) { + this.factory = factory; + } + + @Override + public boolean appliesTo(GroovyExpression expr, OptimizationContext contxt) { + return expr instanceof FunctionCallExpression && ((FunctionCallExpression)expr).getFunctionName().equals("and"); + } + + /** + * Expands the given and expression. There is no need to recursively + * expand the children here. This method is called recursively by + * GremlinQueryOptimier on the children. + * + */ + @Override + public GroovyExpression apply(GroovyExpression expr, OptimizationContext context) { + + FunctionCallExpression exprAsFunction = (FunctionCallExpression)expr; + GroovyExpression result = exprAsFunction.getCaller(); + + List<GroovyExpression> nonExtractableArguments = new ArrayList<>(); + for(GroovyExpression argument : exprAsFunction.getArguments()) { + + if (GremlinQueryOptimizer.isExtractable(argument)) { + //Set the caller of the deepest expression in the call hierarchy + //of the argument to point to the current result. + //For example, if result is "g.V()" and the updatedArgument is "has('x').has('y')", + //updatedArgument would be a tree like this: + // + // has('y') + // / + // / caller + // |/_ + // has('x') + // / + // / caller + // |/_ + // (null) + // + //We would set the caller of has('x') to be g.V(), so result would become g.V().has('x').has('y'). + // + // Note: This operation is currently done by making a copy of the argument tree. That should + // be changed. + result = GremlinQueryOptimizer.copyWithNewLeafNode( + (AbstractFunctionExpression) argument, result); + } else { + logger_.warn("Found non-extractable argument '{}' in the 'and' expression '{}'",argument.toString(), expr.toString()); + nonExtractableArguments.add(argument); + } + } + + if (!nonExtractableArguments.isEmpty()) { + //add a final 'and' call with the arguments that could not be extracted + result = factory.generateLogicalExpression(result, "and", nonExtractableArguments); + } + return result; + } + + @Override + public boolean isApplyRecursively() { + return true; + } +} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java new file mode 100644 index 0000000..ba6059a --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java @@ -0,0 +1,584 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import org.apache.atlas.gremlin.GremlinExpressionFactory; +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.ClosureExpression; +import org.apache.atlas.groovy.FunctionCallExpression; +import org.apache.atlas.groovy.GroovyExpression; +import org.apache.atlas.groovy.LiteralExpression; +import org.apache.atlas.groovy.StatementListExpression; +import org.apache.atlas.groovy.TraversalStepType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Lists; + + + +/** + * Optimization that removes 'or' expressions from a graph traversal when possible + * and replaces them with separate calls that are combined using a logical union operation. + * Unfortunately, Titan does not use indices when executing the child graph traversals associated + * with an 'or' call. In order to make the index be used, we split queries with + * or expressions into multiple queries. These queries are executed individually, + * using indices, and then the results are combined back together. Here is a + * simple example to illustrate this: + * + * <h4>Original Query</h4> + * + * <pre> + * g.V().or(has('name','Fred'),has('age','17')) + * </pre> + * + *<h4>Optimized Query</h4> + * + * <pre> + * def r = [] as Set; + * g.V().has('name','Fred').fill(r); + * g.V().has('age','17').fill(r); + * r; + * </pre> + * + * Here, we introduce an intermediate variable "r" which is declared as a Set. The Set is performing + * the union for us. If there are vertices that happen to both have "Fred" as the name and "17" as the age, + * the Set will prevent the second query execution from adding a duplicate vertex to the result. Recall that + * in Groovy scripts, the last expression is the one that will be returned back to the caller. We refer to + * that expression is the "result expression". For this example, the result expression is simply "r", which + * contains the vertices that matched the query. + * <p/> + * If the query does any kind of transformation of the vertices to produce the query result, that needs + * to be done in the result expression. To understand why that is, let's take a look at another example: + * + * <h4>Original Query</h4> + * + * <pre> + * g.V().or(has('name','Fred'),has('age','17')).as('person').select('person').by('gender') + * </pre> + * + * <h4>Incorrect Optimized Query</h4> + * + * <pre> + * def r = [] as Set; + * g.V().has('name','Fred').as('person').select('person').by('gender').fill(r) + * g.V().has('age','17').as('person').select('person').by('gender').fill(r) + * r; + * </pre> + * + * The problem with this query is that now 'r' contains Strings (the gender of the person). Suppose + * that there is one person named Fred and there are 3 people whose age is 17 (let's say Fred's age is 16). + * The original query would have produced 4 rows, one corresponding to each of those people. The new + * query would produce at most 2 rows - one for 'male' and one for 'female'. This is happening because + * we are now performing the union on the Strings, not on the vertices. To fix this, we need to split + * the original query and put the end portion into the result expression: + * + * <h4>Correct Optimized Query</h4> + * + * <pre> + * def r = [] as Set; + * g.V().has('name','Fred').fill(r) + * g.V().has('age','17').fill(r) + * __.inject(r as Object[]).as('person').select('person').by('gender') + * </pre> + * + * The logic for doing this splitting is described in more detail in + * {@link #moveTransformationsToResultExpression(GroovyExpression, OptimizationContext)}. + * <p/> + * There is one more problematic case that this optimizer is able to handle. Let's look at the following example: + * + * <h4>Original Query</h4> + * + * <pre> + * g.V().or(has('type','Person'),has('superType','Person')).as('x').has('qualifiedName','Fred').as('y').select('x','y').by('name').by('name') + * </pre> + * + * Queries of this form appear often when translating DSL queries. + * + * If we were to optimize this query using the logic described above, we would get something like this: + * + * <h4>Incorrect Optimized Query</h4> + * + * <pre> + * def r = [] as Set; + * g.V().has('type','Person').fill(r); + * g.V().has('superType','Person').fill(r); + * __.inject(r as Object[]).as('x').has('qualifiedName','Fred').as('y').select('x','y'); + * </pre> + * + * While not strictly incorrect, this query will not perform well since the index on qualifiedName will + * not be used. In order for that index to be used, the 'has' expression needs to be part of the original + * query. However, if we do that alone, the query will be broken, since the select + * will now refer to an undefined label: + * + * <h4>Incorrect Optimized Query</h4> + * + * <pre> + * def r = [] as Set; + * g.V().has('type','Person').as('x').has('qualifiedName','Fred').fill(r); + * g.V().has('superType','Person').as('x').has('qualifiedName','Fred').fill(r); + * __.inject(r as Object[]).as('y').select('x','y') + * </pre> + * + * To fix this, we need to save the values of the aliased vertices in the original + * query, and create labels in the result expression that refer to them. We do this + * as follows: + * + * <h4>Correct Optimized Query</h4> + * + * <pre> + * def r = [] as Set; + * g.V().has('type','Person').as('x').has('qualifiedName','Fred').as('y').select('x','y').fill(r); + * g.V().has('superType','Person').as('x').has('qualifiedName','Fred').select('x','y').fill(r); + * __.inject(r as Object[]).as('__tmp').map({((Map)it.get()).get('x')}).as('x').select('__tmp').map({((Map)it.get()).get('x')}).as('y').select('x','y').by('name').by('name') + * </pre> + * + * This is not pretty, but is the best solution we've found so far for supporting expressions that contain aliases in this optimization. + * What ends up happening is that r gets populated with alias->Vertex maps. In the result expression, we make 'x' point + * to a step where the value in the traverser is the vertex for 'x', and we do the same thing for y. The <code>select('_tmp')</code> step in the middle restores the value of + * the traverser back to the map. + * <p/> + * The one known issue with the alias rearrangement is that it breaks loop expressions. As a result, expressions containing loops are currently excluded + * from this optimization. + * + * ExpandOrsOptimization expands the entire expression tree recursively, so it is not invoked + * recursively by GremlinQueryOptimizer. + * + */ +public class ExpandOrsOptimization implements GremlinOptimization { + + private static final Logger logger_ = LoggerFactory.getLogger(ExpandOrsOptimization.class); + + private final GremlinExpressionFactory factory; + + public ExpandOrsOptimization(GremlinExpressionFactory factory) { + this.factory = factory; + } + + @Override + public boolean appliesTo(GroovyExpression expr, OptimizationContext contxt) { + + ExpressionFinder finder = new ExpressionFinder(IsOr.INSTANCE); + GremlinQueryOptimizer.visitCallHierarchy(expr, finder); + return finder.isExpressionFound(); + } + + @Override + public GroovyExpression apply(GroovyExpression expr, OptimizationContext context) { + + setupRangeOptimization(expr, context); + GroovyExpression traveralExpression = moveTransformationsToResultExpression(expr, context); + + FunctionGenerator functionGenerator = new FunctionGenerator(factory, context); + GremlinQueryOptimizer.visitCallHierarchy(traveralExpression, functionGenerator); + traveralExpression = functionGenerator.getNewRootExpression(); + List<GroovyExpression> bodyExpressions = expandOrs(traveralExpression, context); + + + //Adds a statement to define the result variable 'v' in the + //groovy script. The variable is declared as a Set. The type + //of the objects in the Set depend on the number of aliases in the Groovy + // expression: + // - 0 or 1 alias : Vertex + // - multiple aliases: Map<String,Vertex> + StatementListExpression result = new StatementListExpression(); + context.prependStatement(context.getDefineResultVariableStmt()); + + + for (GroovyExpression bodyExpression : bodyExpressions) { + result.addStatement(bodyExpression); + } + result.addStatement(context.getResultExpression()); + return result; + } + + private void setupRangeOptimization(GroovyExpression expr, OptimizationContext context) { + + // Find any range expressions in the expression tree. + RangeFinder rangeFinder = new RangeFinder(factory); + GremlinQueryOptimizer.visitCallHierarchy(expr, rangeFinder); + List<AbstractFunctionExpression> rangeExpressions = rangeFinder.getRangeExpressions(); + if (rangeExpressions.size() == 1) { + OrderFinder orderFinder = new OrderFinder(factory); + GremlinQueryOptimizer.visitCallHierarchy(expr, orderFinder); + if (!orderFinder.hasOrderExpression()) { + // If there is one range expression and no order expression in the unoptimized gremlin, + // save the range parameters to use for adding a range expression to + // each expanded "or" expression result, such that it will only contain the specified range of vertices. + // For now, apply this optimization only if the range start index is zero. + AbstractFunctionExpression rangeExpression = rangeExpressions.get(0); + int[] rangeParameters = factory.getRangeParameters(rangeExpression); + if (rangeParameters[0] == 0) { + context.setRangeExpression(rangeExpression); + } + } + } + } + + private GroovyExpression moveTransformationsToResultExpression(GroovyExpression expr, OptimizationContext context) { + GroovyExpression traveralExpression = expr; + + // Determine the 'split point'. This is the expression that will become + // the deepest function call in the result expression. If a split + // point is found, its caller is changed. The new caller is + // set to the graph traversal expression in the result expression. + // The original caller becomes the new traversal expression that + // will be carried through the rest of the 'or' expansion processing. + // + // Example: g.V().has('x').as('x').select('x') + // Here, select('x') is the split expression + // so : + // 1) the result expression in OptimizationContext becomes [base result expression].select('x') + // 2) we return g.V().has('x').as('x') + + SplitPointFinder finder = new SplitPointFinder(factory); + GremlinQueryOptimizer.visitCallHierarchy(traveralExpression, finder); + AbstractFunctionExpression splitPoint = finder.getSplitPoint(); + + + List<LiteralExpression> aliases = new ArrayList<>(); + + //If we're not splitting the query, there is no need to save/restore + //the aliases. + if(splitPoint != null) { + + traveralExpression = splitPoint.getCaller(); + + AliasFinder aliasFinder = new AliasFinder(); + GremlinQueryOptimizer.visitCallHierarchy(traveralExpression, aliasFinder); + aliases.addAll(aliasFinder.getAliases()); + if(aliasFinder.isFinalAliasNeeded()) { + //The last alias in the expression does not capture the final vertex in the traverser, + //so we need to create an alias to record that. + traveralExpression = factory.generateAliasExpression(traveralExpression, context.getFinalAliasName()); + aliases.add(new LiteralExpression(context.getFinalAliasName())); + } + + GroovyExpression resultExpr = getBaseResultExpression(context, aliases); + splitPoint.setCaller(resultExpr); + expr = removeMapFromPathsIfNeeded(expr, aliases); + context.setResultExpression(expr); + } + + //Add expression(s) to the end of the traversal expression to add the vertices + //that were found into the intermediate variable ('r') + traveralExpression = addCallToUpdateResultVariable(traveralExpression, aliases, context); + return traveralExpression; + } + + private GroovyExpression removeMapFromPathsIfNeeded(GroovyExpression expr, List<LiteralExpression> aliases) { + if(aliases.size() > 0 && factory.isSelectGeneratesMap(aliases.size())) { + PathExpressionFinder pathExprFinder = new PathExpressionFinder(); + GremlinQueryOptimizer.visitCallHierarchy(expr, pathExprFinder); + boolean hasPath = pathExprFinder.isPathExpressionFound(); + if(hasPath) { + //the path will now start with the map that we added. That is an artifact + //of the optimization process and must be removed. + if(expr.getType() != TraversalStepType.END && expr.getType() != TraversalStepType.NONE) { + //we're still in the pipeline, need to execute the query before we can + //modify the result + expr = factory.generateToListExpression(expr); + } + expr = factory.removeExtraMapFromPathInResult(expr); + } + + } + return expr; + } + + /** + * This method adds steps to the end of the initial traversal to add the vertices + * that were found into an intermediate variable (defined as a Set). If there is one alias, + * this set will contain the vertices associated with that Alias. If there are multiple + * aliases, the values in the set will be alias->vertex maps that have the vertex + * associated with the alias for each result. + + * @param expr + * @param aliasNames + * @param context + * @return + */ + private GroovyExpression addCallToUpdateResultVariable(GroovyExpression expr,List<LiteralExpression> aliasNames, OptimizationContext context) { + + GroovyExpression result = expr; + // If there is one range expression in the unoptimized gremlin, + // add a range expression here so that the intermediate variable will only contain + // the specified range of vertices. + AbstractFunctionExpression rangeExpression = context.getRangeExpression(); + if (rangeExpression != null) { + int[] rangeParameters = factory.getRangeParameters(rangeExpression); + result = factory.generateRangeExpression(result, rangeParameters[0], rangeParameters[1]); + } + if( ! aliasNames.isEmpty()) { + result = factory.generateSelectExpression(result, aliasNames, Collections.<GroovyExpression>emptyList()); + } + return factory.generateFillExpression(result, context.getResultVariable()); + } + + /** + * Recursively traverses the given expression, expanding or expressions + * wherever they are found. + * + * @param expr + * @param context + * @return expressions that should be unioned together to get the query result + */ + private List<GroovyExpression> expandOrs(GroovyExpression expr, OptimizationContext context) { + + if (GremlinQueryOptimizer.isOrExpression(expr)) { + return expandOrFunction(expr, context); + } + return processOtherExpression(expr, context); + } + + /** + * This method takes an 'or' expression and expands it into multiple expressions. + * + * For example: + * + * g.V().or(has('x'),has('y') + * + * is expanded to: + * + * g.V().has('x') + * g.V().has('y') + * + * There are certain cases where it is not safe to move an expression out + * of the 'or'. For example, in the expression + * + * g.V().or(has('x').out('y'),has('z')) + * + * has('x').out('y') cannot be moved out of the 'or', since it changes the value of the traverser. + * + * At this time, the ExpandOrsOptimizer is not able to handle this scenario, so we don't remove + * that expression. In cases like this, a final expression is created that ors together + * all of the expressions that could not be extracted. In this case that would be: + * + * g.V().has('z') + * g.V().or(has('y').out('z')) + * + * This processing is done recursively. + * + * + * @param expr + * @param context + * @return the expressions that should be unioned together to get the query result + */ + private List<GroovyExpression> expandOrFunction(GroovyExpression expr, OptimizationContext context) { + FunctionCallExpression functionCall = (FunctionCallExpression) expr; + GroovyExpression caller = functionCall.getCaller(); + List<GroovyExpression> updatedCallers = null; + if (caller != null) { + updatedCallers = expandOrs(caller, context); + } else { + updatedCallers = Collections.singletonList(null); + } + UpdatedExpressions newArguments = getUpdatedChildren(functionCall.getArguments(), context); + List<GroovyExpression> allUpdatedArguments = new ArrayList<>(); + for (List<GroovyExpression> exprs : newArguments.getUpdatedChildren()) { + allUpdatedArguments.addAll(exprs); + } + List<AbstractFunctionExpression> extractableArguments = new ArrayList<>(); + List<GroovyExpression> nonExtractableArguments = new ArrayList<>(); + for (GroovyExpression argument : allUpdatedArguments) { + + if (GremlinQueryOptimizer.isExtractable(argument)) { + extractableArguments.add((AbstractFunctionExpression) argument); + } else { + logger_.warn("Found non-extractable argument '{}; in the 'or' expression '{}'",argument.toString(), expr.toString()); + nonExtractableArguments.add(argument); + } + } + + List<GroovyExpression> result = new ArrayList<>(); + for (GroovyExpression updatedCaller : updatedCallers) { + + for (AbstractFunctionExpression arg : extractableArguments) { + GroovyExpression updated = GremlinQueryOptimizer.copyWithNewLeafNode(arg, updatedCaller); + result.add(updated); + } + if (!nonExtractableArguments.isEmpty()) { + result.add(factory.generateLogicalExpression(updatedCaller, "or", nonExtractableArguments)); + } + + } + return result; + } + + private UpdatedExpressions getUpdatedChildren(List<GroovyExpression> children, OptimizationContext context) { + List<List<GroovyExpression>> updatedChildren = new ArrayList<>(); + boolean changed = false; + for (GroovyExpression child : children) { + List<GroovyExpression> childChoices = expandOrs(child, context); + if (childChoices.size() != 1 || childChoices.iterator().next() != child) { + changed = true; + } + updatedChildren.add(childChoices); + } + return new UpdatedExpressions(changed, updatedChildren); + } + + private UpdatedExpressions getUpdatedChildren(GroovyExpression expr, OptimizationContext context) { + return getUpdatedChildren(expr.getChildren(), context); + } + + /** + * This is called when we encounter an expression that is not an "or", for example an "and" expressio. For these + * expressions, we process the children and create copies with the cartesian product of the updated + * arguments. + * + * Example: + * + * g.V().and(or(has('x),has('y'), or(has('a'),has('b'))) + * + * Here, we have an "and" expression with two children: + * + * 1) or(has('x),has('y') + * 2) or(has('a'),has('b')) + * + * We first process these children. They each yield 2 expressions: + * + * 1 -> [ has('x'), has('y') ] + * 2 -> [ has('a'), has('b') ] + * + * The cartesian product of these gives this: + * + * [ has('x'), has('a') ] + * [ has('x'), has('b') ] + * [ has('y'), has('a') ] + * [ has('y'), has('b') ] + * + * So the overall result is: + * + * g.V().and(has('x'), has('a')) + * g.V().and(has('x'), has('b')) + * g.V().and(has('y'), has('a')) + * g.V().and(has('y'), has('b')) + * + * + * @param source + * @param context + * @return expressions that should be unioned together to get the query result + */ + private List<GroovyExpression> processOtherExpression(GroovyExpression source, OptimizationContext context) { + UpdatedExpressions updatedChildren = getUpdatedChildren(source, context); + if (!updatedChildren.hasChanges()) { + return Collections.singletonList(source); + } + List<GroovyExpression> result = new ArrayList<GroovyExpression>(); + + //The updated children list we get back has the possible values for each child + //in the expression. We compute a cartesian product to get all possible + //combinations of child values. + List<List<GroovyExpression>> updateChildLists = Lists.cartesianProduct(updatedChildren.getUpdatedChildren()); + + for (List<GroovyExpression> updatedChildList : updateChildLists) { + result.add(source.copy(updatedChildList)); + } + return result; + } + + @Override + public boolean isApplyRecursively() { + return false; + } + + /** + * + * This method creates a base result expression that recreates the state of the + * graph traverser at start of the result expression to what it would have been + * if we had been executing one Gremlin query (instead of many and doing a union). + * + * To do this, we start with an anonymous graph traversal that will iterate + * through the values in the intermediate Set that was created. We then need + * to set things up so that the aliases that were in the original gremlin query + * refer to steps with the correct traverser value. + * + * The way we do this depends on the number of aliases. If there are 0 or 1 alias, + * the intermediate variable already contains Vertices, so we just create the alias. + * + * If there are multiple aliases, the intermediate variable contains a String->Vertex + * map. We first create a temporary alias that refers to that map. For each alias, + * we use a MapStep to map the map to the Vertex for that alias. We then add back + * the alias, making it refer to the MapStep. Between the alias restorations, we restore the + * traverser object back to the map. + * + * @param context + * @param aliases + * @return + */ + private GroovyExpression getBaseResultExpression(OptimizationContext context, + List<LiteralExpression> aliases) { + + //Start with an anonymous traversal that gets its objects from the intermediate result variable. + GroovyExpression parent = factory.generateSeededTraversalExpresssion(aliases.size() > 1, context.getResultVariable()); + + if(aliases.isEmpty()) { + return parent; + } + + //The expression we will return. + GroovyExpression result = parent; + + //We use a temporary alias to save/restore the original value of the traverser + //at the start of the query. We do this so we can set the value of the traverser + //back to being the map after we retrieve each alias. If there is only one + //alias, the save/restore is not needed, so there is no need to create this alias. + if(aliases.size() > 1) { + + result = factory.generateAliasExpression(result, context.getTempAliasName()); + } + + Iterator<LiteralExpression> it = aliases.iterator(); + while(it.hasNext()) { + LiteralExpression curAlias = it.next(); + //A map is only generated by Gremlin when there is more than one alias. When there is only one + //alias, the intermediate variable will directly contain the vertices.` + if(factory.isSelectGeneratesMap(aliases.size())) { + //Since there is more than one alias, the current traverser object is an alias->vertex + //map. We use a MapStep to map that map to the Vertex for the current alias. This sets + //the current traverser object to that Vertex. We do this by defining the closure we + //pass to the MapStep call [map].get(aliasName) where [map] is the expression + //that refers to the map. + + GroovyExpression rowMapExpr = factory.getCurrentTraverserObject(factory.getClosureArgumentValue()); + GroovyExpression getExpr = factory.generateGetSelectedValueExpression(curAlias, rowMapExpr); + result = factory.generateMapExpression(result, new ClosureExpression(getExpr)); + } + + //Create alias that points to the previous step. The traverser value at that step + //is the Vertex associated with this alias. + result = factory.generateAliasExpression(result, curAlias.getValue().toString()); + if(it.hasNext()) { + //Restore the current value of the traverser back to the current alias->vertex map + result = factory.generateBackReferenceExpression(result, false, context.getTempAliasName()); + } + } + return result; + } + + + + +} + http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/62a05c97/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java new file mode 100644 index 0000000..2721049 --- /dev/null +++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.atlas.gremlin.optimizer; + +import com.google.common.base.Function; + +import org.apache.atlas.groovy.AbstractFunctionExpression; +import org.apache.atlas.groovy.GroovyExpression; + +/** + * Call hierarchy visitor that checks if an expression + * matching the specified criteria is present + * in the call hierarch. + */ +public class ExpressionFinder implements CallHierarchyVisitor { + + private final Function<GroovyExpression, Boolean> predicate; + private boolean expressionFound = false; + + public ExpressionFinder(Function<GroovyExpression, Boolean> predicate) { + this.predicate = predicate; + } + @Override + public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) { + if (predicate.apply(expr)) { + expressionFound = true; + return false; + } + return true; + } + + @Override + public void visitNonFunctionCaller(GroovyExpression expr) { + if (predicate.apply(expr)) { + expressionFound = true; + } + } + + @Override + public void visitNullCaller() { + //nothing to do + } + + @Override + public boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall) { + //nothing to do + return true; + } + + public boolean isExpressionFound() { + return expressionFound; + } + +}
