http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/5adca841/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java b/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java index add7e07..e862769 100644 --- a/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java +++ b/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java @@ -19,25 +19,21 @@ package org.apache.atlas.gremlin; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import org.apache.atlas.AtlasException; -import org.apache.atlas.groovy.AbstractFunctionExpression; import org.apache.atlas.groovy.CastExpression; import org.apache.atlas.groovy.ClosureExpression; import org.apache.atlas.groovy.ComparisonExpression; -import org.apache.atlas.groovy.ComparisonExpression.ComparisonOperator; import org.apache.atlas.groovy.ComparisonOperatorExpression; import org.apache.atlas.groovy.FunctionCallExpression; import org.apache.atlas.groovy.GroovyExpression; import org.apache.atlas.groovy.IdentifierExpression; import org.apache.atlas.groovy.LiteralExpression; import org.apache.atlas.groovy.LogicalExpression; -import org.apache.atlas.groovy.LogicalExpression.LogicalOperator; -import org.apache.atlas.groovy.TernaryOperatorExpression; -import org.apache.atlas.groovy.TraversalStepType; import org.apache.atlas.groovy.TypeCoersionExpression; +import org.apache.atlas.groovy.ComparisonExpression.ComparisonOperator; +import org.apache.atlas.groovy.LogicalExpression.LogicalOperator; import org.apache.atlas.query.GraphPersistenceStrategies; import org.apache.atlas.query.TypeUtils.FieldInfo; import org.apache.atlas.repository.graph.AtlasGraphProvider; @@ -73,14 +69,27 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { private static final String REPEAT_METHOD = "repeat"; private static final String RANGE_METHOD = "range"; private static final String LAST_METHOD = "last"; + private static final String WHERE_METHOD = "where"; private static final String TO_STRING_METHOD = "toString"; - private static final GroovyExpression EMPTY_STRING_EXPRESSION = new LiteralExpression(""); - @Override public GroovyExpression generateLogicalExpression(GroovyExpression parent, String operator, - List<GroovyExpression> operands) { - return new FunctionCallExpression(TraversalStepType.FILTER, parent, operator, operands); + List<GroovyExpression> operands) { + if (operands.size() == 1) { + // gremlin 3 treats one element expressions as 'false'. Avoid + // creating a boolean expression in this case. Inline the expression + // note: we can't simply omit it, since it will cause us to traverse + // the edge! + // use 'where' instead + GroovyExpression expr = operands.get(0); + // if child is a back expression, that expression becomes an + // argument to where + return new FunctionCallExpression(parent, WHERE_METHOD, expr); + } else { + // Gremlin 3 does not support _() syntax + // + return new FunctionCallExpression(parent, operator, operands); + } } @Override @@ -88,20 +97,20 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { if (inSelect) { return getFieldInSelect(); } else { - return new FunctionCallExpression(TraversalStepType.MAP_TO_ELEMENT, parent, SELECT_METHOD, new LiteralExpression(alias)); + return new FunctionCallExpression(parent, SELECT_METHOD, new LiteralExpression(alias)); } } @Override public GroovyExpression typeTestExpression(GraphPersistenceStrategies s, String typeName, GroovyExpression itRef) { + LiteralExpression typeAttrExpr = new LiteralExpression(s.typeAttributeName()); LiteralExpression superTypeAttrExpr = new LiteralExpression(s.superTypeAttributeName()); LiteralExpression typeNameExpr = new LiteralExpression(typeName); - LiteralExpression typeAttrExpr = new LiteralExpression(s.typeAttributeName()); - FunctionCallExpression result = new FunctionCallExpression(TraversalStepType.FILTER, HAS_METHOD, typeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr)); - result = new FunctionCallExpression(TraversalStepType.FILTER, result, "or"); - result = new FunctionCallExpression(TraversalStepType.FILTER, result, HAS_METHOD, superTypeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr)); - return result; + FunctionCallExpression result = new FunctionCallExpression(HAS_METHOD, typeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr)); + result = new FunctionCallExpression(result, "or"); + result = new FunctionCallExpression(HAS_METHOD, superTypeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr)); + return result; } @Override @@ -109,48 +118,41 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { GroovyExpression emitExpr = generateLoopEmitExpression(s, dataType); - GroovyExpression result = new FunctionCallExpression(TraversalStepType.BRANCH, parent, REPEAT_METHOD, loopExpr); + GroovyExpression result = new FunctionCallExpression(parent, REPEAT_METHOD, loopExpr); if (times != null) { GroovyExpression timesExpr = new LiteralExpression(times); - result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, TIMES_METHOD, timesExpr); + result = new FunctionCallExpression(result, TIMES_METHOD, timesExpr); } - result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, EMIT_METHOD, emitExpr); + result = new FunctionCallExpression(result, EMIT_METHOD, emitExpr); return result; } @Override public GroovyExpression getLoopExpressionParent(GroovyExpression inputQry) { - GroovyExpression curTraversal = getAnonymousTraversalStartExpression(); - return curTraversal; - } - - private IdentifierExpression getAnonymousTraversalStartExpression() { - return new IdentifierExpression(TraversalStepType.START, "__"); + return new IdentifierExpression("__"); } @Override public GroovyExpression generateSelectExpression(GroovyExpression parent, List<LiteralExpression> sourceNames, List<GroovyExpression> srcExprs) { - FunctionCallExpression result = new FunctionCallExpression(TraversalStepType.MAP_TO_VALUE, parent, SELECT_METHOD, sourceNames); + FunctionCallExpression result = new FunctionCallExpression(parent, SELECT_METHOD, sourceNames); for (GroovyExpression expr : srcExprs) { GroovyExpression closure = new ClosureExpression(expr); GroovyExpression castClosure = new TypeCoersionExpression(closure, FUNCTION_CLASS); - result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, BY_METHOD, castClosure); + result = new FunctionCallExpression(result, BY_METHOD, castClosure); } return result; } @Override public GroovyExpression generateFieldExpression(GroovyExpression parent, FieldInfo fInfo, - String propertyName, boolean inSelect) { + String propertyName, boolean inSelect) { AttributeInfo attrInfo = fInfo.attrInfo(); IDataType attrType = attrInfo.dataType(); GroovyExpression propertyNameExpr = new LiteralExpression(propertyName); - //Whether it is the user or shared graph does not matter here, since we're - //just getting the conversion expression. Ideally that would be moved someplace else. AtlasGraph graph = AtlasGraphProvider.getGraphInstance(); if (inSelect) { @@ -159,13 +161,13 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { return graph.generatePersisentToLogicalConversionExpression(expr, attrType); } else { - GroovyExpression unmapped = new FunctionCallExpression(TraversalStepType.FLAT_MAP_TO_VALUES, parent, VALUES_METHOD, propertyNameExpr); + GroovyExpression unmapped = new FunctionCallExpression(parent, VALUES_METHOD, propertyNameExpr); if (graph.isPropertyValueConversionNeeded(attrType)) { GroovyExpression toConvert = new FunctionCallExpression(getItVariable(), GET_METHOD); GroovyExpression conversionFunction = graph.generatePersisentToLogicalConversionExpression(toConvert, attrType); - return new FunctionCallExpression(TraversalStepType.MAP_TO_VALUE, unmapped, MAP_METHOD, new ClosureExpression(conversionFunction)); + return new FunctionCallExpression(unmapped, MAP_METHOD, new ClosureExpression(conversionFunction)); } else { return unmapped; } @@ -236,15 +238,15 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { valueMatchesExpr); GroovyExpression filterFunction = new ClosureExpression(filterCondition); - return new FunctionCallExpression(TraversalStepType.FILTER, parent, FILTER_METHOD, filterFunction); + return new FunctionCallExpression(parent, FILTER_METHOD, filterFunction); } else { GroovyExpression valueMatches = new FunctionCallExpression(getComparisonFunction(symbol), requiredValue); - return new FunctionCallExpression(TraversalStepType.FILTER, parent, HAS_METHOD, propertNameExpr, valueMatches); + return new FunctionCallExpression(parent, HAS_METHOD, propertNameExpr, valueMatches); } } @Override - protected GroovyExpression initialExpression(GroovyExpression varExpr, GraphPersistenceStrategies s) { + protected GroovyExpression initialExpression(GraphPersistenceStrategies s, GroovyExpression varExpr) { // this bit of groovy magic converts the set of vertices in varName into // a String containing the ids of all the vertices. This becomes the @@ -253,61 +255,15 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { // _() // s"g.V(${varName}.collect{it.id()} as String[])" - GroovyExpression gExpr = getGraphExpression(); + GroovyExpression gExpr = getGraph(); GroovyExpression varRefExpr = new TypeCoersionExpression(varExpr, OBJECT_ARRAY_CLASS); - GroovyExpression matchingVerticesExpr = new FunctionCallExpression(TraversalStepType.START, gExpr, V_METHOD, varRefExpr); - GroovyExpression isEmpty = new FunctionCallExpression(varExpr, "isEmpty"); - GroovyExpression emptyGraph = getEmptyTraversalExpression(); - - GroovyExpression expr = new TernaryOperatorExpression(isEmpty, emptyGraph, matchingVerticesExpr); - + FunctionCallExpression expr = new FunctionCallExpression(gExpr, V_METHOD, varRefExpr); return s.addInitialQueryCondition(expr); } - private GroovyExpression getEmptyTraversalExpression() { - GroovyExpression emptyGraph = new FunctionCallExpression(TraversalStepType.START, getGraphExpression(), V_METHOD, EMPTY_STRING_EXPRESSION); - return emptyGraph; - } - - @Override - public GroovyExpression generateRangeExpression(GroovyExpression parent, int startIndex, int endIndex) { - //treat as barrier step, since limits need to be applied globally (even though it - //is technically a filter step) - return new FunctionCallExpression(TraversalStepType.BARRIER, parent, RANGE_METHOD, new LiteralExpression(startIndex), new LiteralExpression(endIndex)); - } - - @Override - public boolean isRangeExpression(GroovyExpression expr) { - - return (expr instanceof FunctionCallExpression && ((FunctionCallExpression)expr).getFunctionName().equals(RANGE_METHOD)); - } - - @Override - public int[] getRangeParameters(AbstractFunctionExpression expr) { - - if (isRangeExpression(expr)) { - FunctionCallExpression rangeExpression = (FunctionCallExpression) expr; - List<GroovyExpression> arguments = rangeExpression.getArguments(); - int startIndex = (int)((LiteralExpression)arguments.get(0)).getValue(); - int endIndex = (int)((LiteralExpression)arguments.get(1)).getValue(); - return new int[]{startIndex, endIndex}; - } - else { - return null; - } - } - @Override - public void setRangeParameters(GroovyExpression expr, int startIndex, int endIndex) { - - if (isRangeExpression(expr)) { - FunctionCallExpression rangeExpression = (FunctionCallExpression) expr; - rangeExpression.setArgument(0, new LiteralExpression(Integer.valueOf(startIndex))); - rangeExpression.setArgument(1, new LiteralExpression(Integer.valueOf(endIndex))); - } - else { - throw new IllegalArgumentException(expr + " is not a valid range expression"); - } + public GroovyExpression generateLimitExpression(GroovyExpression parent, int offset, int totalRows) { + return new FunctionCallExpression(parent, RANGE_METHOD, new LiteralExpression(offset), new LiteralExpression(totalRows)); } @Override @@ -339,8 +295,7 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { comparisonExpr = new ComparisonOperatorExpression(bCompExpr, aCompExpr); } ClosureExpression comparisonFunction = new ClosureExpression(comparisonExpr, "a", "b"); - FunctionCallExpression orderCall = new FunctionCallExpression(TraversalStepType.BARRIER, parent, ORDER_METHOD); - return new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, orderCall, BY_METHOD, orderByClause, comparisonFunction); + return new FunctionCallExpression(new FunctionCallExpression(parent, ORDER_METHOD), BY_METHOD, orderByClause, comparisonFunction); } @Override @@ -372,10 +327,10 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { public GroovyExpression generateGroupByExpression(GroovyExpression parent, GroovyExpression groupByExpression, GroovyExpression aggregationFunction) { - GroovyExpression result = new FunctionCallExpression(TraversalStepType.BARRIER, parent, "group"); + GroovyExpression result = new FunctionCallExpression(parent, "group"); GroovyExpression groupByClosureExpr = new TypeCoersionExpression(new ClosureExpression(groupByExpression), "Function"); - result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, "by", groupByClosureExpr); - result = new FunctionCallExpression(TraversalStepType.END, result, "toList"); + result = new FunctionCallExpression(result, "by", groupByClosureExpr); + result = new FunctionCallExpression(result, "toList"); GroovyExpression mapValuesClosure = new ClosureExpression(new FunctionCallExpression(new CastExpression(getItVariable(), "Map"), "values")); @@ -392,55 +347,8 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory { result = new FunctionCallExpression(result, "collect", aggregrationFunctionClosure); return result; } - - @Override - public GroovyExpression generateSeededTraversalExpresssion(boolean isMap, GroovyExpression valueCollection) { - GroovyExpression coersedExpression = new TypeCoersionExpression(valueCollection, isMap ? "Map[]" : "Vertex[]"); - if(isMap) { - - return new FunctionCallExpression(TraversalStepType.START, "__", coersedExpression); - } - else { - //We cannot always use an anonymous traversal because that breaks repeat steps - return new FunctionCallExpression(TraversalStepType.START, getEmptyTraversalExpression(), "inject", coersedExpression); - } - } - - @Override public GroovyExpression getGroupBySelectFieldParent() { return null; } - - @Override - public String getTraversalExpressionClass() { - return "GraphTraversal"; - } - - @Override - public boolean isSelectGeneratesMap(int aliasCount) { - //in Gremlin 3, you only get a map if there is more than 1 alias. - return aliasCount > 1; - } - - @Override - public GroovyExpression generateMapExpression(GroovyExpression parent, ClosureExpression closureExpression) { - return new FunctionCallExpression(TraversalStepType.MAP_TO_ELEMENT, parent, "map", closureExpression); - } - - @Override - public GroovyExpression generateGetSelectedValueExpression(LiteralExpression key, - GroovyExpression rowMapExpr) { - rowMapExpr = new CastExpression(rowMapExpr, "Map"); - GroovyExpression getExpr = new FunctionCallExpression(rowMapExpr, "get", key); - return getExpr; - } - - @Override - public GroovyExpression getCurrentTraverserObject(GroovyExpression traverser) { - return new FunctionCallExpression(traverser, "get"); - } - - public List<String> getAliasesRequiredByExpression(GroovyExpression expr) { - return Collections.emptyList(); - } } +
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/5adca841/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java b/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java index c2fdf09..6c326b2 100644 --- a/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java +++ b/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java @@ -17,14 +17,8 @@ */ package org.apache.atlas.gremlin; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - +import com.google.common.collect.ImmutableList; import org.apache.atlas.AtlasException; -import org.apache.atlas.groovy.AbstractFunctionExpression; import org.apache.atlas.groovy.ArithmeticExpression; import org.apache.atlas.groovy.ArithmeticExpression.ArithmeticOperator; import org.apache.atlas.groovy.CastExpression; @@ -35,7 +29,6 @@ import org.apache.atlas.groovy.GroovyExpression; import org.apache.atlas.groovy.IdentifierExpression; import org.apache.atlas.groovy.ListExpression; import org.apache.atlas.groovy.LiteralExpression; -import org.apache.atlas.groovy.TraversalStepType; import org.apache.atlas.groovy.TypeCoersionExpression; import org.apache.atlas.groovy.VariableAssignmentExpression; import org.apache.atlas.query.GraphPersistenceStrategies; @@ -47,9 +40,13 @@ import org.apache.atlas.repository.graphdb.GremlinVersion; import org.apache.atlas.typesystem.types.IDataType; import org.apache.atlas.typesystem.types.TypeSystem; import org.apache.atlas.typesystem.types.cache.TypeCache.TYPE_FILTER; -import org.apache.atlas.util.AtlasRepositoryConfiguration; -import com.google.common.collect.ImmutableList; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * Factory to generate Groovy expressions representing Gremlin syntax that that @@ -61,8 +58,7 @@ public abstract class GremlinExpressionFactory { private static final String G_VARIABLE = "g"; private static final String IT_VARIABLE = "it"; - protected static final String SET_CLASS = "Set"; - + private static final String SET_CLASS = "Set"; private static final String OBJECT_FIELD = "object"; @@ -70,25 +66,18 @@ public abstract class GremlinExpressionFactory { protected static final String FILTER_METHOD = "filter"; private static final String PATH_METHOD = "path"; private static final String AS_METHOD = "as"; + private static final String FILL_METHOD = "fill"; private static final String IN_OPERATOR = "in"; protected static final String HAS_METHOD = "has"; protected static final String TO_LOWER_CASE_METHOD = "toLowerCase"; protected static final String SELECT_METHOD = "select"; protected static final String ORDER_METHOD = "order"; - protected static final String FILL_METHOD = "fill"; public static final GremlinExpressionFactory INSTANCE = AtlasGraphProvider.getGraphInstance() .getSupportedGremlinVersion() == GremlinVersion.THREE ? new Gremlin3ExpressionFactory() : new Gremlin2ExpressionFactory(); /** - * Returns the unqualified name of the class used in this version of gremlin to - * represent Gremlin queries as they are being generated. - * @return - */ - public abstract String getTraversalExpressionClass(); - - /** * Gets the expression to use as the parent when translating the loop * expression in a loop * @@ -183,40 +172,14 @@ public abstract class GremlinExpressionFactory { String propertyName, String symbol, GroovyExpression requiredValue, FieldInfo fInfo) throws AtlasException; /** - * Generates a range expression + * Generates a limit expression * * @param parent - * @param startIndex - * @param endIndex + * @param offset + * @param totalRows * @return */ - public abstract GroovyExpression generateRangeExpression(GroovyExpression parent, int startIndex, int endIndex); - - /** - * Determines if the specified expression is a range method call. - * - * @param expr - * @return - */ - public abstract boolean isRangeExpression(GroovyExpression expr); - - /** - * Set the start index and end index of a range expression - * - * @param expr - * @param startIndex - * @param endIndex - */ - public abstract void setRangeParameters(GroovyExpression expr, int startIndex, int endIndex); - - /** - * If the specified function expression is a range expression, returns the start and end index parameters - * otherwise returns null. - * - * @param expr - * @return int array with two elements - element 0 is start index, element 1 is end index - */ - public abstract int[] getRangeParameters(AbstractFunctionExpression expr); + public abstract GroovyExpression generateLimitExpression(GroovyExpression parent, int offset, int totalRows); /** * Generates an order by expression @@ -230,22 +193,6 @@ public abstract class GremlinExpressionFactory { List<GroovyExpression> translatedOrderBy, boolean isAscending); /** - * Determines if specified expression is an order method call - * - * @param expr - * @return - */ - public boolean isOrderExpression(GroovyExpression expr) { - if (expr instanceof FunctionCallExpression) { - FunctionCallExpression functionCallExpression = (FunctionCallExpression) expr; - if (functionCallExpression.getFunctionName().equals(ORDER_METHOD)) { - return true; - } - } - return false; - } - - /** * Returns the Groovy expressions that should be used as the parents when * translating an order by expression. This is needed because Gremlin 2 and * 3 handle order by expressions very differently. @@ -260,17 +207,6 @@ public abstract class GremlinExpressionFactory { */ public abstract GroovyExpression getAnonymousTraversalExpression(); - public boolean isLeafAnonymousTraversalExpression(GroovyExpression expr) { - if(!(expr instanceof FunctionCallExpression)) { - return false; - } - FunctionCallExpression functionCallExpr = (FunctionCallExpression)expr; - if(functionCallExpr.getCaller() != null) { - return false; - } - return functionCallExpr.getFunctionName().equals("_") & functionCallExpr.getArguments().size() == 0; - } - /** * Returns an expression representing * @@ -280,11 +216,11 @@ public abstract class GremlinExpressionFactory { /** * Generates the expression the serves as the root of the Gremlin query. + * @param s * @param varExpr variable containing the vertices to traverse * @return */ - protected abstract GroovyExpression initialExpression(GroovyExpression varExpr, GraphPersistenceStrategies s); - + protected abstract GroovyExpression initialExpression(GraphPersistenceStrategies s, GroovyExpression varExpr); /** * Generates an expression that tests whether the vertex represented by the 'toTest' @@ -300,37 +236,19 @@ public abstract class GremlinExpressionFactory { GroovyExpression vertexExpr); /** - /** * Generates a sequence of groovy expressions that filter the vertices to only * those that match the specified type. If GraphPersistenceStrategies.collectTypeInstancesIntoVar() - * is set and the gremlin optimizer is disabled, the vertices are put into a variable whose name is generated - * from the specified IntSequence. The last item in the result will be a graph traversal restricted to only - * the matching vertices. + * is set, the vertices are put into a variable whose name is geneated from the specified IntSequence. + * The last item in the result will be a graph traversal restricted to only the matching vertices. */ public List<GroovyExpression> generateTypeTestExpression(GraphPersistenceStrategies s, GroovyExpression parent, String typeName, IntSequence intSeq) throws AtlasException { - - if(AtlasRepositoryConfiguration.isGremlinOptimizerEnabled()) { - GroovyExpression superTypeAttributeNameExpr = new LiteralExpression(s.superTypeAttributeName()); - GroovyExpression typeNameExpr = new LiteralExpression(typeName); - GroovyExpression superTypeMatchesExpr = new FunctionCallExpression(TraversalStepType.FILTER, HAS_METHOD, superTypeAttributeNameExpr, - typeNameExpr); - - GroovyExpression typeAttributeNameExpr = new LiteralExpression(s.typeAttributeName()); - - GroovyExpression typeMatchesExpr = new FunctionCallExpression(TraversalStepType.FILTER, HAS_METHOD, typeAttributeNameExpr, - typeNameExpr); - GroovyExpression result = new FunctionCallExpression(TraversalStepType.FILTER, parent, "or", typeMatchesExpr, superTypeMatchesExpr); - return Collections.singletonList(result); - } - else { - if (s.filterBySubTypes()) { - return typeTestExpressionUsingInFilter(s, parent, typeName); - } else if (s.collectTypeInstancesIntoVar()) { - return typeTestExpressionMultiStep(s, typeName, intSeq); - } else { - return typeTestExpressionUsingFilter(s, parent, typeName); - } + if (s.filterBySubTypes()) { + return typeTestExpressionUsingInFilter(s, parent, typeName); + } else if (s.collectTypeInstancesIntoVar()) { + return typeTestExpressionMultiStep(s, typeName, intSeq); + } else { + return typeTestExpressionUsingFilter(s, parent, typeName); } } @@ -367,7 +285,7 @@ public abstract class GremlinExpressionFactory { result.add(newSetVar(varName)); result.add(fillVarWithTypeInstances(s, typeName, varName)); result.add(fillVarWithSubTypeInstances(s, typeName, varName)); - result.add(initialExpression(varExpr, s)); + result.add(initialExpression(s, varExpr)); return result; } @@ -406,6 +324,7 @@ public abstract class GremlinExpressionFactory { return Collections.singletonList(filterExpr); } + /** * Generates an expression which checks whether the vertices in the query have * a field with the given name. @@ -415,7 +334,7 @@ public abstract class GremlinExpressionFactory { * @return */ public GroovyExpression generateUnaryHasExpression(GroovyExpression parent, String fieldName) { - return new FunctionCallExpression(TraversalStepType.FILTER, parent, HAS_METHOD, new LiteralExpression(fieldName)); + return new FunctionCallExpression(parent, HAS_METHOD, new LiteralExpression(fieldName)); } /** @@ -425,7 +344,7 @@ public abstract class GremlinExpressionFactory { * @return */ public GroovyExpression generatePathExpression(GroovyExpression parent) { - return new FunctionCallExpression(TraversalStepType.MAP_TO_VALUE, parent, PATH_METHOD); + return new FunctionCallExpression(parent, PATH_METHOD); } /** @@ -446,7 +365,7 @@ public abstract class GremlinExpressionFactory { * @return */ public GroovyExpression generateAliasExpression(GroovyExpression parent, String alias) { - return new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, parent, AS_METHOD, new LiteralExpression(alias)); + return new FunctionCallExpression(parent, AS_METHOD, new LiteralExpression(alias)); } /** @@ -458,19 +377,19 @@ public abstract class GremlinExpressionFactory { * @return */ public GroovyExpression generateAdjacentVerticesExpression(GroovyExpression parent, AtlasEdgeDirection dir) { - return new FunctionCallExpression(TraversalStepType.FLAT_MAP_TO_ELEMENTS, parent, getGremlinFunctionName(dir)); + return new FunctionCallExpression(parent, getGremlinFunctionName(dir)); } private String getGremlinFunctionName(AtlasEdgeDirection dir) { switch(dir) { - case IN: - return "in"; - case OUT: - return "out"; - case BOTH: - return "both"; - default: - throw new RuntimeException("Unknown Atlas Edge Direction: " + dir); + case IN: + return "in"; + case OUT: + return "out"; + case BOTH: + return "both"; + default: + throw new RuntimeException("Unknown Atlas Edge Direction: " + dir); } } @@ -484,7 +403,7 @@ public abstract class GremlinExpressionFactory { */ public GroovyExpression generateAdjacentVerticesExpression(GroovyExpression parent, AtlasEdgeDirection dir, String label) { - return new FunctionCallExpression(TraversalStepType.FLAT_MAP_TO_ELEMENTS, parent, getGremlinFunctionName(dir), new LiteralExpression(label)); + return new FunctionCallExpression(parent, getGremlinFunctionName(dir), new LiteralExpression(label)); } /** @@ -504,19 +423,17 @@ public abstract class GremlinExpressionFactory { } protected GroovyExpression getAllVerticesExpr() { - GroovyExpression gExpr = getGraphExpression(); - return new FunctionCallExpression(TraversalStepType.START, gExpr, V_METHOD); + GroovyExpression gExpr = getGraph(); + return new FunctionCallExpression(gExpr, V_METHOD); } - protected IdentifierExpression getGraphExpression() { - return new IdentifierExpression(TraversalStepType.SOURCE, G_VARIABLE); + protected IdentifierExpression getGraph() { + return new IdentifierExpression(G_VARIABLE); } - protected GroovyExpression getCurrentObjectExpression() { return new FieldExpression(getItVariable(), OBJECT_FIELD); } - //assumes cast already performed public GroovyExpression generateCountExpression(GroovyExpression itExpr) { GroovyExpression collectionExpr = new CastExpression(itExpr,"Collection"); @@ -537,9 +454,11 @@ public abstract class GremlinExpressionFactory { private GroovyExpression getAggregrationExpression(GroovyExpression itExpr, GroovyExpression mapFunction, String functionName) { - GroovyExpression collectionExpr = new CastExpression(itExpr,"Collection"); + GroovyExpression collectionExpr = new CastExpression(itExpr, + "Collection"); ClosureExpression collectFunction = new ClosureExpression(mapFunction); - GroovyExpression transformedList = new FunctionCallExpression(collectionExpr, "collect", collectFunction); + GroovyExpression transformedList = new FunctionCallExpression( + collectionExpr, "collect", collectFunction); return new FunctionCallExpression(transformedList, functionName); } @@ -547,106 +466,5 @@ public abstract class GremlinExpressionFactory { return getItVariable(); } - /** - * Specifies the parent to use when translating the select list in - * a group by statement. - * - * @return - */ public abstract GroovyExpression getGroupBySelectFieldParent(); - - public GroovyExpression generateFillExpression(GroovyExpression parent, GroovyExpression variable) { - return new FunctionCallExpression(TraversalStepType.END,parent , "fill", variable); - } - - /** - * Generates an anonymous graph traversal initialized with the specified value. In Gremlin 3, we need - * to use a different syntax for this when the object is a map, so that information needs to be provided - * to this method so that the correct syntax is used. - * - * @param isMap true if the value contains Map instances, false if it contains Vertex instances - * @param valueCollection the source objects to start the traversal from. - */ - public abstract GroovyExpression generateSeededTraversalExpresssion(boolean isMap, GroovyExpression valueCollection); - - /** - * Returns the current value of the traverser. This is used when generating closure expressions that - * need to operate on the current value in the graph graversal. - * - * @param traverser - * @return - */ - public abstract GroovyExpression getCurrentTraverserObject(GroovyExpression traverser); - - /** - * Generates an expression that transforms the current value of the traverser by - * applying the function specified - * - * @param parent - * @param closureExpression - * @return - */ - public abstract GroovyExpression generateMapExpression(GroovyExpression parent, ClosureExpression closureExpression); - - /** - * Returns whether a select statement generates a map (or Gremlin 2 "Row") when it contains the specified - * number of aliases. - * - */ - public abstract boolean isSelectGeneratesMap(int aliasCount); - - /** - * Generates an expression to get the value of the value from the row map - * generated by select() with the specified key. - * - */ - public abstract GroovyExpression generateGetSelectedValueExpression(LiteralExpression key, - GroovyExpression rowMapExpr); - - public GroovyExpression removeExtraMapFromPathInResult(GroovyExpression parent) { - GroovyExpression listItem = getItVariable(); - GroovyExpression tailExpr = new FunctionCallExpression(listItem, "tail"); - return new FunctionCallExpression(parent, "collect", new ClosureExpression(tailExpr)); - - } - - /** - * Generates a toList expression to execute the gremlin query and - * store the result in a new list. - * - * @param expr - * @return - */ - public GroovyExpression generateToListExpression(GroovyExpression expr) { - return new FunctionCallExpression(TraversalStepType.END, expr, "toList"); - } - - /** - * Finds aliases that absolutely must be brought along with this expression into - * the output expression and cannot just be recreated there. For example, in the - * Gremlin 2 loop expression, the loop semantics break of the alias is simply recreated - * in the output expression. - * @param expr - * @return - */ - public abstract List<String> getAliasesRequiredByExpression(GroovyExpression expr); - - - /** - * Checks if the given expression is an alias expression, and if so - * returns the alias from the expression. Otherwise, null is - * returned. - */ - public String getAliasNameIfRelevant(GroovyExpression expr) { - if(!(expr instanceof FunctionCallExpression)) { - return null; - } - FunctionCallExpression fc = (FunctionCallExpression)expr; - if(! fc.getFunctionName().equals(AS_METHOD)) { - return null; - } - LiteralExpression aliasName = (LiteralExpression)fc.getArguments().get(0); - return aliasName.getValue().toString(); - - } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/5adca841/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java deleted file mode 100644 index 3e6c39a..0000000 --- a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java +++ /dev/null @@ -1,103 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.atlas.gremlin.optimizer; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.apache.atlas.groovy.AbstractFunctionExpression; -import org.apache.atlas.groovy.FunctionCallExpression; -import org.apache.atlas.groovy.GroovyExpression; -import org.apache.atlas.groovy.LiteralExpression; -import org.apache.atlas.groovy.TraversalStepType; - -/** - * Finds all aliases in the expression. - */ -public class AliasFinder implements CallHierarchyVisitor { - - private List<LiteralExpression> foundAliases = new ArrayList<>(); - - //Whether a final alias is needed. A final alias is needed - //if there are transformation steps after the last alias in - //the expression. We initialize this to false since a final - //alias is not needed if there are no aliases. - private boolean finalAliasNeeded = false; - - @Override - public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) { - return true; - } - - @Override - public void visitNonFunctionCaller(GroovyExpression expr) { - - } - - @Override - public void visitNullCaller() { - - } - - private static final Set<TraversalStepType> TRANSFORMATION_STEP_TYPES = new HashSet<>(Arrays.asList( - TraversalStepType.MAP_TO_ELEMENT, - TraversalStepType.MAP_TO_VALUE, - TraversalStepType.FLAT_MAP_TO_ELEMENTS, - TraversalStepType.FLAT_MAP_TO_VALUES, - TraversalStepType.BARRIER, - TraversalStepType.NONE)); - - - @Override - public boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall) { - - if (functionCall instanceof FunctionCallExpression) { - FunctionCallExpression expr = (FunctionCallExpression)functionCall; - if (expr.getType() == TraversalStepType.SIDE_EFFECT && expr.getFunctionName().equals("as")) { - //We found an alias. This is currently the last expression we've seen - //in our traversal back up the expression tree, so at this point a final - //alias is not needed. - LiteralExpression aliasNameExpr = (LiteralExpression)expr.getArguments().get(0); - foundAliases.add(aliasNameExpr); - finalAliasNeeded=false; - } - } - - if(TRANSFORMATION_STEP_TYPES.contains(functionCall.getType())) { - //This step changes the value of the traverser. Now, a final alias - //needs to be added. - if(!foundAliases.isEmpty()) { - finalAliasNeeded = true; - } - } - - return true; - } - - public List<LiteralExpression> getAliases() { - return foundAliases; - } - - public boolean isFinalAliasNeeded() { - - return finalAliasNeeded; - } -} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/5adca841/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java deleted file mode 100644 index 6089353..0000000 --- a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.atlas.gremlin.optimizer; - -import org.apache.atlas.groovy.AbstractFunctionExpression; -import org.apache.atlas.groovy.GroovyExpression; - -/** - * Call back interface for visiting the call hierarchy of a function call. - */ -public interface CallHierarchyVisitor { - - /** - * Visits a function expression before the visit to its caller. - * - * @param expr - * - * @return false to terminate the recursion - */ - boolean preVisitFunctionCaller(AbstractFunctionExpression expr); - - /** - * Called when a caller that is not an instance of - * AbstractFunctionExpression is found. This indicates that the deepest - * point in the call hierarchy has been reached. - * - * - */ - void visitNonFunctionCaller(GroovyExpression expr); - - /** - * Called when a null caller is found (this happens for static/user-defined - * functions). This indicates that the deepest point in the call hierarchy - * has been reached. - * - */ - void visitNullCaller(); - - /** - * Visits a function expression after the visit to its caller. - * - * @param expr - * - * @return false to terminate the recursion - */ - boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall); -} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/5adca841/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java deleted file mode 100644 index 7cf9711..0000000 --- a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java +++ /dev/null @@ -1,130 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.atlas.gremlin.optimizer; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.atlas.gremlin.GremlinExpressionFactory; -import org.apache.atlas.groovy.AbstractFunctionExpression; -import org.apache.atlas.groovy.FunctionCallExpression; -import org.apache.atlas.groovy.GroovyExpression; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Optimizer that pulls has expressions out of an 'and' expression. - * - * For example: - * - * g.V().and(has('x'),has('y') - * - * is optimized to: - * - * g.V().has('x').has('y') - * - * There are certain cases where it is not safe to move an expression out - * of the 'and'. For example, in the expression - * - * g.V().and(has('x').out('y'),has('z')) - * - * has('x').out('y') cannot be moved out of the 'and', since it changes the value of the traverser. - * - * At this time, the ExpandAndsOptimizer is not able to handle this scenario, so we don't extract - * that expression. In this case, the result is: - * - * g.V().has('z').and(has('x').out('y')) - * - * The optimizer will call ExpandAndsOptimization recursively on the children, so - * there is no need to recursively update the children here. - * - * @param expr - * @param context - * @return the expressions that should be unioned together to get the query result - */ -public class ExpandAndsOptimization implements GremlinOptimization { - - private static final Logger logger_ = LoggerFactory.getLogger(ExpandAndsOptimization.class); - - - private final GremlinExpressionFactory factory; - - public ExpandAndsOptimization(GremlinExpressionFactory factory) { - this.factory = factory; - } - - @Override - public boolean appliesTo(GroovyExpression expr, OptimizationContext contxt) { - return expr instanceof FunctionCallExpression && ((FunctionCallExpression)expr).getFunctionName().equals("and"); - } - - /** - * Expands the given and expression. There is no need to recursively - * expand the children here. This method is called recursively by - * GremlinQueryOptimier on the children. - * - */ - @Override - public GroovyExpression apply(GroovyExpression expr, OptimizationContext context) { - - FunctionCallExpression exprAsFunction = (FunctionCallExpression)expr; - GroovyExpression result = exprAsFunction.getCaller(); - - List<GroovyExpression> nonExtractableArguments = new ArrayList<>(); - for(GroovyExpression argument : exprAsFunction.getArguments()) { - - if (GremlinQueryOptimizer.isExtractable(argument)) { - //Set the caller of the deepest expression in the call hierarchy - //of the argument to point to the current result. - //For example, if result is "g.V()" and the updatedArgument is "has('x').has('y')", - //updatedArgument would be a tree like this: - // - // has('y') - // / - // / caller - // |/_ - // has('x') - // / - // / caller - // |/_ - // (null) - // - //We would set the caller of has('x') to be g.V(), so result would become g.V().has('x').has('y'). - // - // Note: This operation is currently done by making a copy of the argument tree. That should - // be changed. - result = GremlinQueryOptimizer.copyWithNewLeafNode( - (AbstractFunctionExpression) argument, result); - } else { - logger_.warn("Found non-extractable argument '{}' in the 'and' expression '{}'",argument.toString(), expr.toString()); - nonExtractableArguments.add(argument); - } - } - - if (!nonExtractableArguments.isEmpty()) { - //add a final 'and' call with the arguments that could not be extracted - result = factory.generateLogicalExpression(result, "and", nonExtractableArguments); - } - return result; - } - - @Override - public boolean isApplyRecursively() { - return true; - } -} http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/5adca841/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java deleted file mode 100644 index ba6059a..0000000 --- a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java +++ /dev/null @@ -1,584 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.atlas.gremlin.optimizer; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; - -import org.apache.atlas.gremlin.GremlinExpressionFactory; -import org.apache.atlas.groovy.AbstractFunctionExpression; -import org.apache.atlas.groovy.ClosureExpression; -import org.apache.atlas.groovy.FunctionCallExpression; -import org.apache.atlas.groovy.GroovyExpression; -import org.apache.atlas.groovy.LiteralExpression; -import org.apache.atlas.groovy.StatementListExpression; -import org.apache.atlas.groovy.TraversalStepType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.Lists; - - - -/** - * Optimization that removes 'or' expressions from a graph traversal when possible - * and replaces them with separate calls that are combined using a logical union operation. - * Unfortunately, Titan does not use indices when executing the child graph traversals associated - * with an 'or' call. In order to make the index be used, we split queries with - * or expressions into multiple queries. These queries are executed individually, - * using indices, and then the results are combined back together. Here is a - * simple example to illustrate this: - * - * <h4>Original Query</h4> - * - * <pre> - * g.V().or(has('name','Fred'),has('age','17')) - * </pre> - * - *<h4>Optimized Query</h4> - * - * <pre> - * def r = [] as Set; - * g.V().has('name','Fred').fill(r); - * g.V().has('age','17').fill(r); - * r; - * </pre> - * - * Here, we introduce an intermediate variable "r" which is declared as a Set. The Set is performing - * the union for us. If there are vertices that happen to both have "Fred" as the name and "17" as the age, - * the Set will prevent the second query execution from adding a duplicate vertex to the result. Recall that - * in Groovy scripts, the last expression is the one that will be returned back to the caller. We refer to - * that expression is the "result expression". For this example, the result expression is simply "r", which - * contains the vertices that matched the query. - * <p/> - * If the query does any kind of transformation of the vertices to produce the query result, that needs - * to be done in the result expression. To understand why that is, let's take a look at another example: - * - * <h4>Original Query</h4> - * - * <pre> - * g.V().or(has('name','Fred'),has('age','17')).as('person').select('person').by('gender') - * </pre> - * - * <h4>Incorrect Optimized Query</h4> - * - * <pre> - * def r = [] as Set; - * g.V().has('name','Fred').as('person').select('person').by('gender').fill(r) - * g.V().has('age','17').as('person').select('person').by('gender').fill(r) - * r; - * </pre> - * - * The problem with this query is that now 'r' contains Strings (the gender of the person). Suppose - * that there is one person named Fred and there are 3 people whose age is 17 (let's say Fred's age is 16). - * The original query would have produced 4 rows, one corresponding to each of those people. The new - * query would produce at most 2 rows - one for 'male' and one for 'female'. This is happening because - * we are now performing the union on the Strings, not on the vertices. To fix this, we need to split - * the original query and put the end portion into the result expression: - * - * <h4>Correct Optimized Query</h4> - * - * <pre> - * def r = [] as Set; - * g.V().has('name','Fred').fill(r) - * g.V().has('age','17').fill(r) - * __.inject(r as Object[]).as('person').select('person').by('gender') - * </pre> - * - * The logic for doing this splitting is described in more detail in - * {@link #moveTransformationsToResultExpression(GroovyExpression, OptimizationContext)}. - * <p/> - * There is one more problematic case that this optimizer is able to handle. Let's look at the following example: - * - * <h4>Original Query</h4> - * - * <pre> - * g.V().or(has('type','Person'),has('superType','Person')).as('x').has('qualifiedName','Fred').as('y').select('x','y').by('name').by('name') - * </pre> - * - * Queries of this form appear often when translating DSL queries. - * - * If we were to optimize this query using the logic described above, we would get something like this: - * - * <h4>Incorrect Optimized Query</h4> - * - * <pre> - * def r = [] as Set; - * g.V().has('type','Person').fill(r); - * g.V().has('superType','Person').fill(r); - * __.inject(r as Object[]).as('x').has('qualifiedName','Fred').as('y').select('x','y'); - * </pre> - * - * While not strictly incorrect, this query will not perform well since the index on qualifiedName will - * not be used. In order for that index to be used, the 'has' expression needs to be part of the original - * query. However, if we do that alone, the query will be broken, since the select - * will now refer to an undefined label: - * - * <h4>Incorrect Optimized Query</h4> - * - * <pre> - * def r = [] as Set; - * g.V().has('type','Person').as('x').has('qualifiedName','Fred').fill(r); - * g.V().has('superType','Person').as('x').has('qualifiedName','Fred').fill(r); - * __.inject(r as Object[]).as('y').select('x','y') - * </pre> - * - * To fix this, we need to save the values of the aliased vertices in the original - * query, and create labels in the result expression that refer to them. We do this - * as follows: - * - * <h4>Correct Optimized Query</h4> - * - * <pre> - * def r = [] as Set; - * g.V().has('type','Person').as('x').has('qualifiedName','Fred').as('y').select('x','y').fill(r); - * g.V().has('superType','Person').as('x').has('qualifiedName','Fred').select('x','y').fill(r); - * __.inject(r as Object[]).as('__tmp').map({((Map)it.get()).get('x')}).as('x').select('__tmp').map({((Map)it.get()).get('x')}).as('y').select('x','y').by('name').by('name') - * </pre> - * - * This is not pretty, but is the best solution we've found so far for supporting expressions that contain aliases in this optimization. - * What ends up happening is that r gets populated with alias->Vertex maps. In the result expression, we make 'x' point - * to a step where the value in the traverser is the vertex for 'x', and we do the same thing for y. The <code>select('_tmp')</code> step in the middle restores the value of - * the traverser back to the map. - * <p/> - * The one known issue with the alias rearrangement is that it breaks loop expressions. As a result, expressions containing loops are currently excluded - * from this optimization. - * - * ExpandOrsOptimization expands the entire expression tree recursively, so it is not invoked - * recursively by GremlinQueryOptimizer. - * - */ -public class ExpandOrsOptimization implements GremlinOptimization { - - private static final Logger logger_ = LoggerFactory.getLogger(ExpandOrsOptimization.class); - - private final GremlinExpressionFactory factory; - - public ExpandOrsOptimization(GremlinExpressionFactory factory) { - this.factory = factory; - } - - @Override - public boolean appliesTo(GroovyExpression expr, OptimizationContext contxt) { - - ExpressionFinder finder = new ExpressionFinder(IsOr.INSTANCE); - GremlinQueryOptimizer.visitCallHierarchy(expr, finder); - return finder.isExpressionFound(); - } - - @Override - public GroovyExpression apply(GroovyExpression expr, OptimizationContext context) { - - setupRangeOptimization(expr, context); - GroovyExpression traveralExpression = moveTransformationsToResultExpression(expr, context); - - FunctionGenerator functionGenerator = new FunctionGenerator(factory, context); - GremlinQueryOptimizer.visitCallHierarchy(traveralExpression, functionGenerator); - traveralExpression = functionGenerator.getNewRootExpression(); - List<GroovyExpression> bodyExpressions = expandOrs(traveralExpression, context); - - - //Adds a statement to define the result variable 'v' in the - //groovy script. The variable is declared as a Set. The type - //of the objects in the Set depend on the number of aliases in the Groovy - // expression: - // - 0 or 1 alias : Vertex - // - multiple aliases: Map<String,Vertex> - StatementListExpression result = new StatementListExpression(); - context.prependStatement(context.getDefineResultVariableStmt()); - - - for (GroovyExpression bodyExpression : bodyExpressions) { - result.addStatement(bodyExpression); - } - result.addStatement(context.getResultExpression()); - return result; - } - - private void setupRangeOptimization(GroovyExpression expr, OptimizationContext context) { - - // Find any range expressions in the expression tree. - RangeFinder rangeFinder = new RangeFinder(factory); - GremlinQueryOptimizer.visitCallHierarchy(expr, rangeFinder); - List<AbstractFunctionExpression> rangeExpressions = rangeFinder.getRangeExpressions(); - if (rangeExpressions.size() == 1) { - OrderFinder orderFinder = new OrderFinder(factory); - GremlinQueryOptimizer.visitCallHierarchy(expr, orderFinder); - if (!orderFinder.hasOrderExpression()) { - // If there is one range expression and no order expression in the unoptimized gremlin, - // save the range parameters to use for adding a range expression to - // each expanded "or" expression result, such that it will only contain the specified range of vertices. - // For now, apply this optimization only if the range start index is zero. - AbstractFunctionExpression rangeExpression = rangeExpressions.get(0); - int[] rangeParameters = factory.getRangeParameters(rangeExpression); - if (rangeParameters[0] == 0) { - context.setRangeExpression(rangeExpression); - } - } - } - } - - private GroovyExpression moveTransformationsToResultExpression(GroovyExpression expr, OptimizationContext context) { - GroovyExpression traveralExpression = expr; - - // Determine the 'split point'. This is the expression that will become - // the deepest function call in the result expression. If a split - // point is found, its caller is changed. The new caller is - // set to the graph traversal expression in the result expression. - // The original caller becomes the new traversal expression that - // will be carried through the rest of the 'or' expansion processing. - // - // Example: g.V().has('x').as('x').select('x') - // Here, select('x') is the split expression - // so : - // 1) the result expression in OptimizationContext becomes [base result expression].select('x') - // 2) we return g.V().has('x').as('x') - - SplitPointFinder finder = new SplitPointFinder(factory); - GremlinQueryOptimizer.visitCallHierarchy(traveralExpression, finder); - AbstractFunctionExpression splitPoint = finder.getSplitPoint(); - - - List<LiteralExpression> aliases = new ArrayList<>(); - - //If we're not splitting the query, there is no need to save/restore - //the aliases. - if(splitPoint != null) { - - traveralExpression = splitPoint.getCaller(); - - AliasFinder aliasFinder = new AliasFinder(); - GremlinQueryOptimizer.visitCallHierarchy(traveralExpression, aliasFinder); - aliases.addAll(aliasFinder.getAliases()); - if(aliasFinder.isFinalAliasNeeded()) { - //The last alias in the expression does not capture the final vertex in the traverser, - //so we need to create an alias to record that. - traveralExpression = factory.generateAliasExpression(traveralExpression, context.getFinalAliasName()); - aliases.add(new LiteralExpression(context.getFinalAliasName())); - } - - GroovyExpression resultExpr = getBaseResultExpression(context, aliases); - splitPoint.setCaller(resultExpr); - expr = removeMapFromPathsIfNeeded(expr, aliases); - context.setResultExpression(expr); - } - - //Add expression(s) to the end of the traversal expression to add the vertices - //that were found into the intermediate variable ('r') - traveralExpression = addCallToUpdateResultVariable(traveralExpression, aliases, context); - return traveralExpression; - } - - private GroovyExpression removeMapFromPathsIfNeeded(GroovyExpression expr, List<LiteralExpression> aliases) { - if(aliases.size() > 0 && factory.isSelectGeneratesMap(aliases.size())) { - PathExpressionFinder pathExprFinder = new PathExpressionFinder(); - GremlinQueryOptimizer.visitCallHierarchy(expr, pathExprFinder); - boolean hasPath = pathExprFinder.isPathExpressionFound(); - if(hasPath) { - //the path will now start with the map that we added. That is an artifact - //of the optimization process and must be removed. - if(expr.getType() != TraversalStepType.END && expr.getType() != TraversalStepType.NONE) { - //we're still in the pipeline, need to execute the query before we can - //modify the result - expr = factory.generateToListExpression(expr); - } - expr = factory.removeExtraMapFromPathInResult(expr); - } - - } - return expr; - } - - /** - * This method adds steps to the end of the initial traversal to add the vertices - * that were found into an intermediate variable (defined as a Set). If there is one alias, - * this set will contain the vertices associated with that Alias. If there are multiple - * aliases, the values in the set will be alias->vertex maps that have the vertex - * associated with the alias for each result. - - * @param expr - * @param aliasNames - * @param context - * @return - */ - private GroovyExpression addCallToUpdateResultVariable(GroovyExpression expr,List<LiteralExpression> aliasNames, OptimizationContext context) { - - GroovyExpression result = expr; - // If there is one range expression in the unoptimized gremlin, - // add a range expression here so that the intermediate variable will only contain - // the specified range of vertices. - AbstractFunctionExpression rangeExpression = context.getRangeExpression(); - if (rangeExpression != null) { - int[] rangeParameters = factory.getRangeParameters(rangeExpression); - result = factory.generateRangeExpression(result, rangeParameters[0], rangeParameters[1]); - } - if( ! aliasNames.isEmpty()) { - result = factory.generateSelectExpression(result, aliasNames, Collections.<GroovyExpression>emptyList()); - } - return factory.generateFillExpression(result, context.getResultVariable()); - } - - /** - * Recursively traverses the given expression, expanding or expressions - * wherever they are found. - * - * @param expr - * @param context - * @return expressions that should be unioned together to get the query result - */ - private List<GroovyExpression> expandOrs(GroovyExpression expr, OptimizationContext context) { - - if (GremlinQueryOptimizer.isOrExpression(expr)) { - return expandOrFunction(expr, context); - } - return processOtherExpression(expr, context); - } - - /** - * This method takes an 'or' expression and expands it into multiple expressions. - * - * For example: - * - * g.V().or(has('x'),has('y') - * - * is expanded to: - * - * g.V().has('x') - * g.V().has('y') - * - * There are certain cases where it is not safe to move an expression out - * of the 'or'. For example, in the expression - * - * g.V().or(has('x').out('y'),has('z')) - * - * has('x').out('y') cannot be moved out of the 'or', since it changes the value of the traverser. - * - * At this time, the ExpandOrsOptimizer is not able to handle this scenario, so we don't remove - * that expression. In cases like this, a final expression is created that ors together - * all of the expressions that could not be extracted. In this case that would be: - * - * g.V().has('z') - * g.V().or(has('y').out('z')) - * - * This processing is done recursively. - * - * - * @param expr - * @param context - * @return the expressions that should be unioned together to get the query result - */ - private List<GroovyExpression> expandOrFunction(GroovyExpression expr, OptimizationContext context) { - FunctionCallExpression functionCall = (FunctionCallExpression) expr; - GroovyExpression caller = functionCall.getCaller(); - List<GroovyExpression> updatedCallers = null; - if (caller != null) { - updatedCallers = expandOrs(caller, context); - } else { - updatedCallers = Collections.singletonList(null); - } - UpdatedExpressions newArguments = getUpdatedChildren(functionCall.getArguments(), context); - List<GroovyExpression> allUpdatedArguments = new ArrayList<>(); - for (List<GroovyExpression> exprs : newArguments.getUpdatedChildren()) { - allUpdatedArguments.addAll(exprs); - } - List<AbstractFunctionExpression> extractableArguments = new ArrayList<>(); - List<GroovyExpression> nonExtractableArguments = new ArrayList<>(); - for (GroovyExpression argument : allUpdatedArguments) { - - if (GremlinQueryOptimizer.isExtractable(argument)) { - extractableArguments.add((AbstractFunctionExpression) argument); - } else { - logger_.warn("Found non-extractable argument '{}; in the 'or' expression '{}'",argument.toString(), expr.toString()); - nonExtractableArguments.add(argument); - } - } - - List<GroovyExpression> result = new ArrayList<>(); - for (GroovyExpression updatedCaller : updatedCallers) { - - for (AbstractFunctionExpression arg : extractableArguments) { - GroovyExpression updated = GremlinQueryOptimizer.copyWithNewLeafNode(arg, updatedCaller); - result.add(updated); - } - if (!nonExtractableArguments.isEmpty()) { - result.add(factory.generateLogicalExpression(updatedCaller, "or", nonExtractableArguments)); - } - - } - return result; - } - - private UpdatedExpressions getUpdatedChildren(List<GroovyExpression> children, OptimizationContext context) { - List<List<GroovyExpression>> updatedChildren = new ArrayList<>(); - boolean changed = false; - for (GroovyExpression child : children) { - List<GroovyExpression> childChoices = expandOrs(child, context); - if (childChoices.size() != 1 || childChoices.iterator().next() != child) { - changed = true; - } - updatedChildren.add(childChoices); - } - return new UpdatedExpressions(changed, updatedChildren); - } - - private UpdatedExpressions getUpdatedChildren(GroovyExpression expr, OptimizationContext context) { - return getUpdatedChildren(expr.getChildren(), context); - } - - /** - * This is called when we encounter an expression that is not an "or", for example an "and" expressio. For these - * expressions, we process the children and create copies with the cartesian product of the updated - * arguments. - * - * Example: - * - * g.V().and(or(has('x),has('y'), or(has('a'),has('b'))) - * - * Here, we have an "and" expression with two children: - * - * 1) or(has('x),has('y') - * 2) or(has('a'),has('b')) - * - * We first process these children. They each yield 2 expressions: - * - * 1 -> [ has('x'), has('y') ] - * 2 -> [ has('a'), has('b') ] - * - * The cartesian product of these gives this: - * - * [ has('x'), has('a') ] - * [ has('x'), has('b') ] - * [ has('y'), has('a') ] - * [ has('y'), has('b') ] - * - * So the overall result is: - * - * g.V().and(has('x'), has('a')) - * g.V().and(has('x'), has('b')) - * g.V().and(has('y'), has('a')) - * g.V().and(has('y'), has('b')) - * - * - * @param source - * @param context - * @return expressions that should be unioned together to get the query result - */ - private List<GroovyExpression> processOtherExpression(GroovyExpression source, OptimizationContext context) { - UpdatedExpressions updatedChildren = getUpdatedChildren(source, context); - if (!updatedChildren.hasChanges()) { - return Collections.singletonList(source); - } - List<GroovyExpression> result = new ArrayList<GroovyExpression>(); - - //The updated children list we get back has the possible values for each child - //in the expression. We compute a cartesian product to get all possible - //combinations of child values. - List<List<GroovyExpression>> updateChildLists = Lists.cartesianProduct(updatedChildren.getUpdatedChildren()); - - for (List<GroovyExpression> updatedChildList : updateChildLists) { - result.add(source.copy(updatedChildList)); - } - return result; - } - - @Override - public boolean isApplyRecursively() { - return false; - } - - /** - * - * This method creates a base result expression that recreates the state of the - * graph traverser at start of the result expression to what it would have been - * if we had been executing one Gremlin query (instead of many and doing a union). - * - * To do this, we start with an anonymous graph traversal that will iterate - * through the values in the intermediate Set that was created. We then need - * to set things up so that the aliases that were in the original gremlin query - * refer to steps with the correct traverser value. - * - * The way we do this depends on the number of aliases. If there are 0 or 1 alias, - * the intermediate variable already contains Vertices, so we just create the alias. - * - * If there are multiple aliases, the intermediate variable contains a String->Vertex - * map. We first create a temporary alias that refers to that map. For each alias, - * we use a MapStep to map the map to the Vertex for that alias. We then add back - * the alias, making it refer to the MapStep. Between the alias restorations, we restore the - * traverser object back to the map. - * - * @param context - * @param aliases - * @return - */ - private GroovyExpression getBaseResultExpression(OptimizationContext context, - List<LiteralExpression> aliases) { - - //Start with an anonymous traversal that gets its objects from the intermediate result variable. - GroovyExpression parent = factory.generateSeededTraversalExpresssion(aliases.size() > 1, context.getResultVariable()); - - if(aliases.isEmpty()) { - return parent; - } - - //The expression we will return. - GroovyExpression result = parent; - - //We use a temporary alias to save/restore the original value of the traverser - //at the start of the query. We do this so we can set the value of the traverser - //back to being the map after we retrieve each alias. If there is only one - //alias, the save/restore is not needed, so there is no need to create this alias. - if(aliases.size() > 1) { - - result = factory.generateAliasExpression(result, context.getTempAliasName()); - } - - Iterator<LiteralExpression> it = aliases.iterator(); - while(it.hasNext()) { - LiteralExpression curAlias = it.next(); - //A map is only generated by Gremlin when there is more than one alias. When there is only one - //alias, the intermediate variable will directly contain the vertices.` - if(factory.isSelectGeneratesMap(aliases.size())) { - //Since there is more than one alias, the current traverser object is an alias->vertex - //map. We use a MapStep to map that map to the Vertex for the current alias. This sets - //the current traverser object to that Vertex. We do this by defining the closure we - //pass to the MapStep call [map].get(aliasName) where [map] is the expression - //that refers to the map. - - GroovyExpression rowMapExpr = factory.getCurrentTraverserObject(factory.getClosureArgumentValue()); - GroovyExpression getExpr = factory.generateGetSelectedValueExpression(curAlias, rowMapExpr); - result = factory.generateMapExpression(result, new ClosureExpression(getExpr)); - } - - //Create alias that points to the previous step. The traverser value at that step - //is the Vertex associated with this alias. - result = factory.generateAliasExpression(result, curAlias.getValue().toString()); - if(it.hasNext()) { - //Restore the current value of the traverser back to the current alias->vertex map - result = factory.generateBackReferenceExpression(result, false, context.getTempAliasName()); - } - } - return result; - } - - - - -} - http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/5adca841/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java ---------------------------------------------------------------------- diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java deleted file mode 100644 index 8997a70..0000000 --- a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.atlas.gremlin.optimizer; - -import java.util.function.Function; - -import org.apache.atlas.groovy.AbstractFunctionExpression; -import org.apache.atlas.groovy.GroovyExpression; - -/** - * Call hierarchy visitor that checks if an expression - * matching the specified criteria is present - * in the call hierarch. - */ -public class ExpressionFinder implements CallHierarchyVisitor { - - private final Function<GroovyExpression, Boolean> predicate; - private boolean expressionFound = false; - - public ExpressionFinder(Function<GroovyExpression, Boolean> predicate) { - this.predicate = predicate; - } - @Override - public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) { - if (predicate.apply(expr)) { - expressionFound = true; - return false; - } - return true; - } - - @Override - public void visitNonFunctionCaller(GroovyExpression expr) { - if (predicate.apply(expr)) { - expressionFound = true; - } - } - - @Override - public void visitNullCaller() { - //nothing to do - } - - @Override - public boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall) { - //nothing to do - return true; - } - - public boolean isExpressionFound() { - return expressionFound; - } - -}
