This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 4c033b7ed16 HIVE-27558: HBase table query does not push BETWEEN 
predicate to storage layer (Dayakar M, reviewed by Krisztian Kasa)
4c033b7ed16 is described below

commit 4c033b7ed16bc544cf1e6cb262208f05c380730e
Author: Dayakar M <59791497+mdaya...@users.noreply.github.com>
AuthorDate: Fri Sep 15 19:37:59 2023 +0530

    HIVE-27558: HBase table query does not push BETWEEN predicate to storage 
layer (Dayakar M, reviewed by Krisztian Kasa)
    
    Co-authored-by: mdayakar <mdaya...@cloudera.com>
---
 .../hadoop/hive/hbase/HBaseStorageHandler.java     |  44 +++++
 .../test/queries/positive/hbase_between_pushdown.q |  28 +++
 .../results/positive/hbase_between_pushdown.q.out  | 217 +++++++++++++++++++++
 3 files changed, 289 insertions(+)

diff --git 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
index 24b2ec93933..4e1a5da725c 100644
--- 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
+++ 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
@@ -54,6 +55,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import 
org.apache.hadoop.hive.ql.security.authorization.HiveCustomStorageHandlerUtils;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
@@ -427,6 +429,10 @@ public class HBaseStorageHandler extends 
DefaultStorageHandler
       ExprNodeDesc predicate) {
     ColumnMapping keyMapping = 
hBaseSerDe.getHBaseSerdeParam().getKeyColumnMapping();
     ColumnMapping tsMapping = 
hBaseSerDe.getHBaseSerdeParam().getTimestampColumnMapping();
+
+    // converts 'BETWEEN' predicate to '<= AND >=' predicate
+    predicate = convertBetweenToLTOrEqualAndGTOrEqual(predicate);
+
     IndexPredicateAnalyzer analyzer = 
HiveHBaseTableInputFormat.newIndexPredicateAnalyzer(
         keyMapping.columnName, keyMapping.isComparable(),
         tsMapping == null ? null : tsMapping.columnName);
@@ -503,6 +509,19 @@ public class HBaseStorageHandler extends 
DefaultStorageHandler
             continue;
           }
         }
+
+        // In HBase, Keys are sorted lexicographically so the byte 
representation of negative values comes after the positive values
+        // So don't pushdown the predicate if any constant value is a negative 
number.
+        // if first condition constant value is less than 0 then don't 
pushdown the predicate
+        if (((Number) 
searchConditions.get(0).getConstantDesc().getValue()).longValue() < 0 && scSize 
== 2) {
+          residualPredicate = extractResidualCondition(analyzer, 
searchConditions, residualPredicate);
+          continue;
+        }
+        // if second condition constant value is less than 0 then don't 
pushdown the predicate
+        if (scSize == 2 && ((Number) 
searchConditions.get(1).getConstantDesc().getValue()).longValue() < 0) {
+          residualPredicate = extractResidualCondition(analyzer, 
searchConditions, residualPredicate);
+          continue;
+        }
       }
 
       // This one can be pushed
@@ -516,6 +535,31 @@ public class HBaseStorageHandler extends 
DefaultStorageHandler
     return decomposedPredicate;
   }
 
+  private static ExprNodeDesc 
convertBetweenToLTOrEqualAndGTOrEqual(ExprNodeDesc predicate) {
+    if (predicate instanceof ExprNodeGenericFuncDesc && 
((ExprNodeGenericFuncDesc) predicate).getGenericUDF() instanceof 
GenericUDFBetween && "false".equalsIgnoreCase(
+        predicate.getChildren().get(0).getExprString())) {
+      try {
+        List<ExprNodeDesc> betweenInputs = predicate.getChildren();
+        List<ExprNodeDesc> gtOrEqualInputs = 
Arrays.asList(betweenInputs.get(1), betweenInputs.get(2));
+        List<ExprNodeDesc> ltOrEqualInputs = 
Arrays.asList(betweenInputs.get(1), betweenInputs.get(3));
+
+        ExprNodeGenericFuncDesc gtOrEqual =
+            
ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo(">=").getGenericUDF(),
 ">=",
+                gtOrEqualInputs);
+        ExprNodeGenericFuncDesc ltOrEqual =
+            
ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("<=").getGenericUDF(),
 "<=",
+                ltOrEqualInputs);
+        List<ExprNodeDesc> andInputs = Arrays.asList(ltOrEqual, gtOrEqual);
+
+        predicate = 
ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and").getGenericUDF(),
 "and",
+            andInputs);
+      } catch (Exception se) {
+        LOG.warn("Unable to convert 'BETWEEN' predicate to '>= AND <=' 
predicate.", se);
+      }
+    }
+    return predicate;
+  }
+
   private static ExprNodeGenericFuncDesc 
extractStorageHandlerCondition(IndexPredicateAnalyzer analyzer,
           List<IndexSearchCondition> searchConditions, ExprNodeGenericFuncDesc 
inputExpr) {
     if (inputExpr == null) {
diff --git a/hbase-handler/src/test/queries/positive/hbase_between_pushdown.q 
b/hbase-handler/src/test/queries/positive/hbase_between_pushdown.q
new file mode 100644
index 00000000000..a2a349be190
--- /dev/null
+++ b/hbase-handler/src/test/queries/positive/hbase_between_pushdown.q
@@ -0,0 +1,28 @@
+--! qt:dataset:src
+
+CREATE EXTERNAL TABLE hbase_pushdown(key int, value string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#binary,cf:string")
+TBLPROPERTIES ("external.table.purge" = "true");
+
+INSERT OVERWRITE TABLE hbase_pushdown SELECT * FROM src;
+INSERT INTO hbase_pushdown VALUES(-10, 'NegativeValue10'),(-9, 
'NegativeValue9'),(-5, 'NegativeValue5'),(-2, 'NegativeValue2');
+
+explain select * from hbase_pushdown where key >= 90 and key <= 100;
+select * from hbase_pushdown where key >= 90 and key <= 100;
+
+-- In HBase, Keys are sorted lexicographically so the byte representation of 
negative values comes after the positive values
+-- So don't pushdown the predicate if any constant value is a negative number.
+explain select * from hbase_pushdown where key >= -5 and key <= 5;
+select * from hbase_pushdown where key >= -5 and key <= 5;
+
+-- Here the predicate can be pushed down even though value is negative as its 
not a range predicate.
+explain select * from hbase_pushdown where key = -5;
+select * from hbase_pushdown where key = -5;
+
+
+set hive.optimize.ppd.storage=false;
+
+-- Now 'BETWEEN' predicate will not convert to '<= AND >=', also 'BETWEEN' 
predicate will not be pushed down
+explain select * from hbase_pushdown where key >= 90 and key <= 100;
+select * from hbase_pushdown where key >= 90 and key <= 100;
\ No newline at end of file
diff --git 
a/hbase-handler/src/test/results/positive/hbase_between_pushdown.q.out 
b/hbase-handler/src/test/results/positive/hbase_between_pushdown.q.out
new file mode 100644
index 00000000000..6fe60f9c599
--- /dev/null
+++ b/hbase-handler/src/test/results/positive/hbase_between_pushdown.q.out
@@ -0,0 +1,217 @@
+PREHOOK: query: CREATE EXTERNAL TABLE hbase_pushdown(key int, value string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#binary,cf:string")
+TBLPROPERTIES ("external.table.purge" = "true")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hbase_pushdown
+POSTHOOK: query: CREATE EXTERNAL TABLE hbase_pushdown(key int, value string)
+STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#binary,cf:string")
+TBLPROPERTIES ("external.table.purge" = "true")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hbase_pushdown
+PREHOOK: query: INSERT OVERWRITE TABLE hbase_pushdown SELECT * FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@hbase_pushdown
+POSTHOOK: query: INSERT OVERWRITE TABLE hbase_pushdown SELECT * FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@hbase_pushdown
+PREHOOK: query: INSERT INTO hbase_pushdown VALUES(-10, 'NegativeValue10'),(-9, 
'NegativeValue9'),(-5, 'NegativeValue5'),(-2, 'NegativeValue2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@hbase_pushdown
+POSTHOOK: query: INSERT INTO hbase_pushdown VALUES(-10, 
'NegativeValue10'),(-9, 'NegativeValue9'),(-5, 'NegativeValue5'),(-2, 
'NegativeValue2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@hbase_pushdown
+PREHOOK: query: explain select * from hbase_pushdown where key >= 90 and key 
<= 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from hbase_pushdown where key >= 90 and key 
<= 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: hbase_pushdown
+          filterExpr: ((key <= 100) and (key >= 90)) (type: boolean)
+          Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: key (type: int), value (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+            ListSink
+
+PREHOOK: query: select * from hbase_pushdown where key >= 90 and key <= 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key >= 90 and key <= 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+90     val_90
+92     val_92
+95     val_95
+96     val_96
+97     val_97
+98     val_98
+100    val_100
+PREHOOK: query: explain select * from hbase_pushdown where key >= -5 and key 
<= 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from hbase_pushdown where key >= -5 and key 
<= 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: hbase_pushdown
+            Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: ((key <= 5) and (key >= -5)) (type: boolean)
+              Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: int), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from hbase_pushdown where key >= -5 and key <= 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key >= -5 and key <= 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+0      val_0
+2      val_2
+4      val_4
+5      val_5
+-5     NegativeValue5
+-2     NegativeValue2
+PREHOOK: query: explain select * from hbase_pushdown where key = -5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from hbase_pushdown where key = -5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: hbase_pushdown
+          filterExpr: (key = -5) (type: boolean)
+          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: -5 (type: int), value (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE 
Column stats: NONE
+            ListSink
+
+PREHOOK: query: select * from hbase_pushdown where key = -5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key = -5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+-5     NegativeValue5
+PREHOOK: query: explain select * from hbase_pushdown where key >= 90 and key 
<= 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from hbase_pushdown where key >= 90 and key 
<= 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: hbase_pushdown
+            Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: key BETWEEN 90 AND 100 (type: boolean)
+              Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: int), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from hbase_pushdown where key >= 90 and key <= 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_pushdown where key >= 90 and key <= 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_pushdown
+#### A masked pattern was here ####
+90     val_90
+92     val_92
+95     val_95
+96     val_96
+97     val_97
+98     val_98
+100    val_100

Reply via email to