[GitHub] [drill] vvysotskyi commented on a diff in pull request #2638: DRILL-8136: Overhaul implict type casting logic

GitBox Wed, 14 Sep 2022 10:46:31 -0700


vvysotskyi commented on code in PR #2638:
URL: https://github.com/apache/drill/pull/2638#discussion_r971097106



##########
exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java:
##########
@@ -21,148 +21,211 @@
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeSet;
 
 import org.apache.drill.common.types.TypeProtos.MinorType;
+import 
org.apache.drill.shaded.guava.com.google.common.graph.ImmutableValueGraph;
+import org.apache.drill.shaded.guava.com.google.common.graph.ValueGraphBuilder;
 
 public class ResolverTypePrecedence {
 
+  // A weighted directed graph that represents the cost of casting between
+  // pairs of data types. The edge weights represent casting preferences and
+  // it is important to note that only some of these prefereces can be
+  // understood in terms of factors like computational cost or loss of
+  // precision. The others are derived from the expected behaviour of the query
+  // engine in the face of various data types and queries as expressed by the
+  // test suite.
+  //
+  // Bear in mind that this class only establishes which casts will be tried
+  // automatically and how they're ranked. See
+  // {@link org.apache.drill.exec.resolver.TypeCastRules} for listings of which
+  // casts are possible at all.
+  public static final ImmutableValueGraph<MinorType, Float> CAST_GRAPH = 
ValueGraphBuilder
+    .directed()
+    .<MinorType, Float>immutable()
+
+    // null type source vertex (null is castable to any type)
+    .putEdgeValue(MinorType.NULL, MinorType.VARCHAR, 1f)
+    .putEdgeValue(MinorType.NULL, MinorType.BIT, 2f)
+    .putEdgeValue(MinorType.NULL, MinorType.INT, 3f)
+    .putEdgeValue(MinorType.NULL, MinorType.FLOAT4, 4f)
+    .putEdgeValue(MinorType.NULL, MinorType.DECIMAL9, 5f)
+    .putEdgeValue(MinorType.NULL, MinorType.DATE, 6f)
+    .putEdgeValue(MinorType.NULL, MinorType.INTERVALDAY, 7f)
+    .putEdgeValue(MinorType.NULL, MinorType.MONEY, 8f)
+    .putEdgeValue(MinorType.NULL, MinorType.DICT, 9f)
+
+    // bit conversions
+    // prefer to cast VARCHAR to BIT than BIT to numerics
+    .putEdgeValue(MinorType.BIT, MinorType.TINYINT, 10f)
+    .putEdgeValue(MinorType.BIT, MinorType.UINT1, 10f)
+
+    // unsigned int widening
+    .putEdgeValue(MinorType.UINT1, MinorType.UINT2, 1f)
+    .putEdgeValue(MinorType.UINT2, MinorType.UINT4, 1f)
+    .putEdgeValue(MinorType.UINT4, MinorType.UINT8, 1f)
+    .putEdgeValue(MinorType.UINT8, MinorType.VARDECIMAL, 1f)
+    // unsigned int conversions
+    // prefer to cast UINTs to BIGINT over FLOAT4
+    .putEdgeValue(MinorType.UINT4, MinorType.BIGINT, 1f)
+    .putEdgeValue(MinorType.UINT4, MinorType.FLOAT4, 2f)
+    .putEdgeValue(MinorType.UINT8, MinorType.FLOAT4, 2f)
+
+    // int widening
+    .putEdgeValue(MinorType.TINYINT, MinorType.SMALLINT, 1f)
+    .putEdgeValue(MinorType.SMALLINT, MinorType.INT, 1f)
+    .putEdgeValue(MinorType.INT, MinorType.BIGINT, 1f)
+    // int conversions
+    .putEdgeValue(MinorType.BIGINT, MinorType.FLOAT4, 1f)
+    // prefer to cast INTs to FLOATs over VARDECIMALs
+    .putEdgeValue(MinorType.BIGINT, MinorType.VARDECIMAL, 2f)
+
+    // float widening
+    .putEdgeValue(MinorType.FLOAT4, MinorType.FLOAT8, 1f)
+    // float conversion
+    // FLOATs are not currently castable to VARDECIMAL (see TypeCastRules)
+    // but it is not possible to avoid some path between them here since
+    // FLOATs must ultimately be implcitly castable to VARCHAR, and VARCHAR
+    // to VARDECIMAL.
+    // prefer the cast in the opposite direction
+    .putEdgeValue(MinorType.FLOAT8, MinorType.VARDECIMAL, 20f)
+
+    // decimal widening
+    .putEdgeValue(MinorType.DECIMAL9, MinorType.DECIMAL18, 1f)
+    .putEdgeValue(MinorType.DECIMAL18, MinorType.DECIMAL28DENSE, 1f)
+    .putEdgeValue(MinorType.DECIMAL28DENSE, MinorType.DECIMAL28SPARSE, 1f)
+    .putEdgeValue(MinorType.DECIMAL28SPARSE, MinorType.DECIMAL38DENSE, 1f)
+    .putEdgeValue(MinorType.DECIMAL38DENSE, MinorType.DECIMAL38SPARSE, 1f)
+    .putEdgeValue(MinorType.DECIMAL38SPARSE, MinorType.VARDECIMAL, 1f)
+    .putEdgeValue(MinorType.MONEY, MinorType.VARDECIMAL, 1f)
+    // decimal conversions
+    // prefer to cast INTs to VARDECIMALs over VARDECIMALs to FLOATs
+    .putEdgeValue(MinorType.VARDECIMAL, MinorType.FLOAT4, 10f)
+    // prefer the casts in the opposite directions
+    .putEdgeValue(MinorType.VARDECIMAL, MinorType.INT, 11f)
+    .putEdgeValue(MinorType.VARDECIMAL, MinorType.VARCHAR, 12f)
+
+    // interval widening
+    .putEdgeValue(MinorType.INTERVALDAY, MinorType.INTERVALYEAR, 1f)
+    .putEdgeValue(MinorType.INTERVALYEAR, MinorType.INTERVAL, 1f)
+    // interval conversions
+    // prefer the cast in the opposite direction
+    .putEdgeValue(MinorType.INTERVAL, MinorType.VARCHAR, 10f)
+
+    // dict widening
+    .putEdgeValue(MinorType.DICT, MinorType.MAP, 1f)
+
+    // timestamp widening
+    .putEdgeValue(MinorType.DATE, MinorType.TIMESTAMP, 1f)
+    .putEdgeValue(MinorType.TIMESTAMP, MinorType.TIMESTAMPTZ, 1f)
+    .putEdgeValue(MinorType.TIME, MinorType.TIMETZ, 1f)
+    // timestamp conversions
+    // TIMESTAMP casting preference: DATE > TIME > VARCHAR
+    // prefer the casts in the opposite directions
+    .putEdgeValue(MinorType.TIMESTAMP, MinorType.DATE, 10f)
+    .putEdgeValue(MinorType.TIMESTAMP, MinorType.TIME, 11f)
+    .putEdgeValue(MinorType.TIMESTAMPTZ, MinorType.VARCHAR, 20f)
+    .putEdgeValue(MinorType.TIMETZ, MinorType.VARCHAR, 20f)
+
+    // char and binary widening
+    .putEdgeValue(MinorType.FIXEDCHAR, MinorType.VARCHAR, 1f)
+    .putEdgeValue(MinorType.FIXEDBINARY, MinorType.VARBINARY, 1f)
+    // char and binary conversions
+    .putEdgeValue(MinorType.VARCHAR, MinorType.INT, 1f)
+    .putEdgeValue(MinorType.VARCHAR, MinorType.FLOAT4, 2f)
+    .putEdgeValue(MinorType.VARCHAR, MinorType.VARDECIMAL, 3f)
+    .putEdgeValue(MinorType.VARCHAR, MinorType.TIMESTAMP, 4f)
+    .putEdgeValue(MinorType.VARCHAR, MinorType.INTERVALDAY, 5f)
+    .putEdgeValue(MinorType.VARCHAR, MinorType.BIT, 6f)
+    .putEdgeValue(MinorType.VARCHAR, MinorType.VARBINARY, 7f)
+    .putEdgeValue(MinorType.VARBINARY, MinorType.VARCHAR, 1f)
+
+    // union type sink vertex
+    .putEdgeValue(MinorType.LIST, MinorType.UNION, 1f)
+    .putEdgeValue(MinorType.MAP, MinorType.UNION, 1f)
+    .putEdgeValue(MinorType.VARBINARY, MinorType.UNION, 1f)
+
+    .build();
+
+  /**
+   * Searches the implicit casting graph for the path of least total cost using
+   * Dijkstra's algorithm.

Review Comment:
   Huh, it is good that we don't have negative distances, so it can be used.



##########
exec/java-exec/src/main/java/org/apache/drill/exec/resolver/ResolverTypePrecedence.java:
##########
@@ -21,148 +21,211 @@
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeSet;
 
 import org.apache.drill.common.types.TypeProtos.MinorType;
+import 
org.apache.drill.shaded.guava.com.google.common.graph.ImmutableValueGraph;
+import org.apache.drill.shaded.guava.com.google.common.graph.ValueGraphBuilder;
 
 public class ResolverTypePrecedence {
 
+  // A weighted directed graph that represents the cost of casting between
+  // pairs of data types. The edge weights represent casting preferences and
+  // it is important to note that only some of these prefereces can be
+  // understood in terms of factors like computational cost or loss of
+  // precision. The others are derived from the expected behaviour of the query
+  // engine in the face of various data types and queries as expressed by the
+  // test suite.
+  //
+  // Bear in mind that this class only establishes which casts will be tried
+  // automatically and how they're ranked. See
+  // {@link org.apache.drill.exec.resolver.TypeCastRules} for listings of which
+  // casts are possible at all.
+  public static final ImmutableValueGraph<MinorType, Float> CAST_GRAPH = 
ValueGraphBuilder

Review Comment:
   Thanks for using ValueGraph, code with it looks cleaner!



##########
exec/java-exec/src/main/java/org/apache/drill/exec/resolver/DefaultFunctionResolver.java:
##########
@@ -58,27 +56,20 @@ public DrillFuncHolder getBestMatch(List<DrillFuncHolder> 
methods, FunctionCall
       }
     }
 
-    if (bestcost < 0) {
+    if (bestcost == Float.POSITIVE_INFINITY) {
       //did not find a matched func implementation, either w/ or w/o implicit 
casts
       //TODO: raise exception here?
       return null;
-    } else {
-      if (AssertionUtil.isAssertionsEnabled() && bestMatchAlternatives.size() 
> 0) {
-        /*
-         * There are other alternatives to the best match function which could 
have been selected
-         * Log the possible functions and the chose implementation and raise 
an exception
-         */
-        logger.error("Chosen function impl: " + bestmatch.toString());
-
-        // printing the possible matches
-        logger.error("Printing all the possible functions that could have 
matched: ");
-        for (DrillFuncHolder holder: bestMatchAlternatives) {
-          logger.error(holder.toString());
-        }
+    }
+    if (bestMatchAlternatives.size() > 0) {

Review Comment:
   Drill should throw an error for this case because it means that a randomly 
chosen function could be selected and return inconsistent results.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@drill.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [drill] vvysotskyi commented on a diff in pull request #2638: DRILL-8136: Overhaul implict type casting logic

Reply via email to