[jira] [Commented] (DRILL-6381) Add capability to do index based planning and execution

ASF GitHub Bot (JIRA) Tue, 18 Sep 2018 16:06:29 -0700


    [ 
https://issues.apache.org/jira/browse/DRILL-6381?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16619859#comment-16619859
 ]


ASF GitHub Bot commented on DRILL-6381:
---------------------------------------

gparai commented on a change in pull request #1466: DRILL-6381: Add support for 
index based planning and execution
URL: https://github.com/apache/drill/pull/1466#discussion_r218588653
 
 

 ##########
 File path: 
contrib/format-maprdb/src/main/java/org/apache/drill/exec/planner/index/MapRDBIndexDiscover.java
 ##########
 @@ -0,0 +1,374 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.planner.index;
+
+import com.google.common.collect.Maps;
+import com.mapr.db.Admin;
+import com.mapr.db.MapRDB;
+import com.mapr.db.exceptions.DBException;
+import com.mapr.db.index.IndexDesc;
+import com.mapr.db.index.IndexDesc.MissingAndNullOrdering;
+import com.mapr.db.index.IndexFieldDesc;
+import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.CommonTokenStream;
+import org.apache.calcite.rel.RelFieldCollation;
+import org.apache.calcite.rel.RelFieldCollation.NullDirection;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.parser.ExprLexer;
+import org.apache.drill.common.expression.parser.ExprParser;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.physical.base.AbstractDbGroupScan;
+import org.apache.drill.exec.physical.base.GroupScan;
+import org.apache.drill.exec.planner.common.DrillScanRelBase;
+import org.apache.drill.exec.planner.logical.DrillTable;
+import org.apache.drill.exec.planner.physical.ScanPrel;
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.exec.store.dfs.FileSelection;
+import org.apache.drill.exec.store.dfs.FileSystemPlugin;
+import org.apache.drill.exec.store.mapr.db.MapRDBFormatMatcher;
+import org.apache.drill.exec.store.mapr.db.MapRDBFormatPlugin;
+import org.apache.drill.exec.store.mapr.db.MapRDBGroupScan;
+import org.apache.drill.exec.store.mapr.db.json.FieldPathHelper;
+import org.apache.drill.exec.util.ImpersonationUtil;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.ojai.FieldPath;
+
+import java.io.IOException;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class MapRDBIndexDiscover extends IndexDiscoverBase implements 
IndexDiscover {
+
+  static final String DEFAULT_STRING_CAST_LEN_STR = "256";
+
+  public MapRDBIndexDiscover(GroupScan inScan, DrillScanRelBase scanRel) {
+    super((AbstractDbGroupScan) inScan, scanRel);
+  }
+
+  public MapRDBIndexDiscover(GroupScan inScan, ScanPrel scanRel) {
+    super((AbstractDbGroupScan) inScan, scanRel);
+  }
+
+  @Override
+  public IndexCollection getTableIndex(String tableName) {
+    //return getTableIndexFromCommandLine(tableName);
+    return getTableIndexFromMFS(tableName);
+  }
+
+  /**
+   *
+   * @param tableName
+   * @return
+   */
+  private IndexCollection getTableIndexFromMFS(String tableName) {
+    try {
+      Set<DrillIndexDescriptor> idxSet = new HashSet<>();
+      Collection<IndexDesc> indexes = admin().getTableIndexes(new 
Path(tableName));
+      if (indexes.size() == 0 ) {
+        logger.error("No index returned from Admin.getTableIndexes for table 
{}", tableName);
+        return null;
+      }
+      for (IndexDesc idx : indexes) {
+        DrillIndexDescriptor hbaseIdx = buildIndexDescriptor(tableName, idx);
+        if (hbaseIdx == null) {
+          //not able to build a valid index based on the index info from MFS
+          logger.error("Not able to build index for {}", idx.toString());
+          continue;
+        }
+        idxSet.add(hbaseIdx);
+      }
+      if (idxSet.size() == 0) {
+        logger.error("No index found for table {}.", tableName);
+        return null;
+      }
+      return new DrillIndexCollection(getOriginalScanRel(), idxSet);
+    } catch (DBException ex) {
+      logger.error("Could not get table index from File system.", ex);
+    }
+    catch(InvalidIndexDefinitionException ex) {
+      logger.error("Invalid index definition detected.", ex);
+    }
+    return null;
+  }
+
+  FileSelection deriveFSSelection(DrillFileSystem fs, IndexDescriptor idxDesc) 
throws IOException {
+    String tableName = idxDesc.getTableName();
+    String[] tablePath = tableName.split("/");
+    String tableParent = tableName.substring(0, tableName.lastIndexOf("/"));
+
+    return FileSelection.create(fs, tableParent, tablePath[tablePath.length - 
1], false);
+  }
+
+  @Override
+  public DrillTable getNativeDrillTable(IndexDescriptor idxDescriptor) {
+
+    try {
+      final AbstractDbGroupScan origScan = getOriginalScan();
+      if (!(origScan instanceof MapRDBGroupScan)) {
+        return null;
+      }
+      MapRDBFormatPlugin maprFormatPlugin = ((MapRDBGroupScan) 
origScan).getFormatPlugin();
+      FileSystemPlugin fsPlugin = (FileSystemPlugin) (((MapRDBGroupScan) 
origScan).getStoragePlugin());
+
+      DrillFileSystem fs = 
ImpersonationUtil.createFileSystem(origScan.getUserName(), 
fsPlugin.getFsConf());
+      MapRDBFormatMatcher matcher = (MapRDBFormatMatcher) 
(maprFormatPlugin.getMatcher());
+      FileSelection fsSelection = deriveFSSelection(fs, idxDescriptor);
+      return matcher.isReadableIndex(fs, fsSelection, fsPlugin, 
fsPlugin.getName(),
+          origScan.getUserName(), idxDescriptor);
+
+    } catch (Exception e) {
+      logger.error("Failed to get native DrillTable.", e);
+    }
+    return null;
+  }
+
+  private SchemaPath fieldName2SchemaPath(String fieldName) {
+    if (fieldName.contains(":")) {
+      fieldName = fieldName.split(":")[1];
+    }
+    if (fieldName.contains(".")) {
+      return 
FieldPathHelper.fieldPath2SchemaPath(FieldPath.parseFrom(fieldName));
+    }
+    return SchemaPath.getSimplePath(fieldName);
+  }
+
+  String getDrillTypeStr(String maprdbTypeStr) {
+    String typeStr = maprdbTypeStr.toUpperCase();
+    String[] typeTokens = typeStr.split("[)(]");
+    String typeData = DEFAULT_STRING_CAST_LEN_STR;
+    if(typeTokens.length > 1) {
+      typeStr = typeTokens[0];
+      typeData = typeTokens[1];
+    }
+    switch(typeStr){
+      case "STRING":
+        // set default width since it is not specified
+        return "VARCHAR("+typeData+")";
+      case "LONG":
+        return "BIGINT";
+      case "INT":
+      case "INTEGER":
+        return "INT";
+      case "FLOAT":
+        return "FLOAT4";
+      case "DOUBLE":
+        return "FLOAT8";
+      case "INTERVAL_YEAR_MONTH":
+        return "INTERVALYEAR";
+      case "INTERVAL_DAY_TIME":
+        return "INTERVALDAY";
+      case "BOOLEAN":
+        return "BIT";
+      case "BINARY":
+        return "VARBINARY";
+      case "ANY":
+      case "DECIMAL":
+        return null;
+      default: return typeStr;
+    }
+
+  }
+
+  TypeProtos.MajorType getDrillType(String typeStr) {
+    switch(typeStr){
+      case "VARCHAR":
+      case "CHAR":
+      case "STRING":
+        // set default width since it is not specified
+        return
+            Types.required(TypeProtos.MinorType.VARCHAR).toBuilder().setWidth(
+                
getOriginalScanRel().getCluster().getTypeFactory().createSqlType(SqlTypeName.VARCHAR).getPrecision()).build();
+      case "LONG":
+      case "BIGINT":
+        return Types.required(TypeProtos.MinorType.BIGINT);
+      case "INT":
+      case "INTEGER":
+        return Types.required(TypeProtos.MinorType.INT);
+      case "FLOAT":
+        return Types.required(TypeProtos.MinorType.FLOAT4);
+      case "DOUBLE":
+        return Types.required(TypeProtos.MinorType.FLOAT8);
+      case "INTERVAL_YEAR_MONTH":
+        return Types.required(TypeProtos.MinorType.INTERVALYEAR);
+      case "INTERVAL_DAY_TIME":
+        return Types.required(TypeProtos.MinorType.INTERVALDAY);
+      case "BOOLEAN":
+        return Types.required(TypeProtos.MinorType.BIT);
+      case "BINARY":
+        return 
Types.required(TypeProtos.MinorType.VARBINARY).toBuilder().build();
+      case "ANY":
+      case "DECIMAL":
+        return null;
+      default: return Types.required(TypeProtos.MinorType.valueOf(typeStr));
+    }
+  }
+
+  private LogicalExpression castFunctionSQLSyntax(String field, String type) 
throws InvalidIndexDefinitionException {
+    //get castTypeStr so we can construct SQL syntax string before MapRDB 
could provide such syntax
+    String castTypeStr = getDrillTypeStr(type);
+    if(castTypeStr == null) {//no cast
+      throw new InvalidIndexDefinitionException("cast function type not 
recognized: " + type + "for field " + field);
+    }
+    try {
+      String castFunc = String.format("cast( %s as %s)", field, castTypeStr);
+      final ExprLexer lexer = new ExprLexer(new ANTLRStringStream(castFunc));
+      final CommonTokenStream tokens = new CommonTokenStream(lexer);
+      final ExprParser parser = new ExprParser(tokens);
+      final ExprParser.parse_return ret = parser.parse();
+      logger.trace("{}, {}", tokens, ret);
+      return ret.e;
+    }catch(Exception ex) {
+      logger.error("parse failed{}", ex);
+    }
+    return null;
+  }
+
+  private LogicalExpression getIndexExpression(IndexFieldDesc desc) throws 
InvalidIndexDefinitionException {
+    final String fieldName = desc.getFieldPath().asPathString();
+    final String functionDef = desc.getFunctionName();
+    if ((functionDef != null)) {//this is a function
+      String[] tokens = functionDef.split("\\s+");
+      if (tokens[0].equalsIgnoreCase("cast")) {
 
 Review comment:
   Use static final string instead?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Add capability to do index based planning and execution
> -------------------------------------------------------
>
>                 Key: DRILL-6381
>                 URL: https://issues.apache.org/jira/browse/DRILL-6381
>             Project: Apache Drill
>          Issue Type: New Feature
>          Components: Execution - Relational Operators, Query Planning &amp; 
> Optimization
>            Reporter: Aman Sinha
>            Assignee: Aman Sinha
>            Priority: Major
>             Fix For: 1.15.0
>
>
> If the underlying data source supports indexes (primary and secondary 
> indexes), Drill should leverage those during planning and execution in order 
> to improve query performance.  
> On the planning side, Drill planner should be enhanced to provide an 
> abstraction layer which express the index metadata and statistics.  Further, 
> a cost-based index selection is needed to decide which index(es) are 
> suitable.  
> On the execution side, appropriate operator enhancements would be needed to 
> handle different categories of indexes such as covering, non-covering 
> indexes, taking into consideration the index data may not be co-located with 
> the primary table, i.e a global index.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

[jira] [Commented] (DRILL-6381) Add capability to do index based planning and execution

Reply via email to