[09/20] phoenix git commit: PHOENIX-3439 Query using an RVC based on the base table PK is incorrectly using an index and doing a full scan instead of a point query

samarth Mon, 07 Nov 2016 13:23:01 -0800

PHOENIX-3439 Query using an RVC based on the base table PK is incorrectly using 
an index and doing a full scan instead of a point query



Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/1dcac346
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/1dcac346
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/1dcac346

Branch: refs/heads/encodecolumns2
Commit: 1dcac3463408059c5f2136e202dd3ae4ccd02803
Parents: 291624f
Author: James Taylor <[email protected]>
Authored: Sat Nov 5 21:16:41 2016 -0700
Committer: James Taylor <[email protected]>
Committed: Sat Nov 5 21:21:20 2016 -0700

----------------------------------------------------------------------
 .../org/apache/phoenix/compile/ScanRanges.java  | 18 +++++--
 .../apache/phoenix/optimize/QueryOptimizer.java | 15 ++++--
 .../phoenix/compile/QueryOptimizerTest.java     | 50 ++++++++++++++++++++
 3 files changed, 75 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/phoenix/blob/1dcac346/phoenix-core/src/main/java/org/apache/phoenix/compile/ScanRanges.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/compile/ScanRanges.java 
b/phoenix-core/src/main/java/org/apache/phoenix/compile/ScanRanges.java
index 19a4692..5a1fcb7 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/compile/ScanRanges.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/compile/ScanRanges.java
@@ -570,12 +570,24 @@ public class ScanRanges {
         return this.useSkipScanFilter ? ScanUtil.getRowKeyPosition(slotSpan, 
ranges.size()) : Math.max(getBoundPkSpan(ranges, slotSpan), 
getBoundMinMaxSlotCount());
     }
 
-    public int getBoundMinMaxSlotCount() {
+    private int getBoundMinMaxSlotCount() {
         if (minMaxRange == KeyRange.EMPTY_RANGE || minMaxRange == 
KeyRange.EVERYTHING_RANGE) {
             return 0;
         }
-        // The minMaxRange is always a single key
-        return 1 + slotSpan[0];
+        ImmutableBytesWritable ptr = new ImmutableBytesWritable();
+        // We don't track how many slots are bound for the minMaxRange, so we 
need
+        // to traverse the upper and lower range key and count the slots.
+        int lowerCount = 0;
+        int maxOffset = schema.iterator(minMaxRange.getLowerRange(), ptr);
+        for (int pos = 0; Boolean.TRUE.equals(schema.next(ptr, pos, 
maxOffset)); pos++) {
+            lowerCount++;
+        }
+        int upperCount = 0;
+        maxOffset = schema.iterator(minMaxRange.getUpperRange(), ptr);
+        for (int pos = 0; Boolean.TRUE.equals(schema.next(ptr, pos, 
maxOffset)); pos++) {
+            upperCount++;
+        }
+        return Math.max(lowerCount, upperCount);
     }
     
     public int getBoundSlotCount() {

http://git-wip-us.apache.org/repos/asf/phoenix/blob/1dcac346/phoenix-core/src/main/java/org/apache/phoenix/optimize/QueryOptimizer.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/optimize/QueryOptimizer.java 
b/phoenix-core/src/main/java/org/apache/phoenix/optimize/QueryOptimizer.java
index bd9c811..d77b14b 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/optimize/QueryOptimizer.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/optimize/QueryOptimizer.java
@@ -309,13 +309,16 @@ public class QueryOptimizer {
     /**
      * Order the plans among all the possible ones from best to worst.
      * Since we don't keep stats yet, we use the following simple algorithm:
-     * 1) If the query is a point lookup (i.e. we have a set of exact row 
keys), choose among those.
+     * 1) If the query is a point lookup (i.e. we have a set of exact row 
keys), choose that one immediately.
      * 2) If the query has an ORDER BY and a LIMIT, choose the plan that has 
all the ORDER BY expression
      * in the same order as the row key columns.
      * 3) If there are more than one plan that meets (1&2), choose the plan 
with:
-     *    a) the most row key columns that may be used to form the start/stop 
scan key.
+     *    a) the most row key columns that may be used to form the start/stop 
scan key (i.e. bound slots).
      *    b) the plan that preserves ordering for a group by.
-     *    c) the data table plan
+     *    c) the non local index table plan
+     * TODO: We should make more of a cost based choice: The largest number of 
bound slots does not necessarily
+     * correspond to the least bytes scanned. We could consider the slots 
bound for upper and lower ranges 
+     * separately, or we could calculate the bytes scanned between the start 
and stop row of each table.
      * @param plans the list of candidate plans
      * @return list of plans ordered from best to worst.
      */
@@ -380,11 +383,13 @@ public class QueryOptimizer {
             public int compare(QueryPlan plan1, QueryPlan plan2) {
                 PTable table1 = plan1.getTableRef().getTable();
                 PTable table2 = plan2.getTableRef().getTable();
+                int boundCount1 = 
plan1.getContext().getScanRanges().getBoundPkColumnCount();
+                int boundCount2 = 
plan2.getContext().getScanRanges().getBoundPkColumnCount();
                 // For shared indexes (i.e. indexes on views and local 
indexes),
                 // a) add back any view constants as these won't be in the 
index, and
                 // b) ignore the viewIndexId which will be part of the row key 
columns.
-                int c = 
(plan2.getContext().getScanRanges().getBoundPkColumnCount() + 
(table2.getViewIndexId() == null ? 0 : (boundRanges - 1))) -
-                        
(plan1.getContext().getScanRanges().getBoundPkColumnCount() + 
(table1.getViewIndexId() == null ? 0 : (boundRanges - 1)));
+                int c = (boundCount2 + (table2.getViewIndexId() == null ? 0 : 
(boundRanges - 1))) -
+                        (boundCount1 + (table1.getViewIndexId() == null ? 0 : 
(boundRanges - 1)));
                 if (c != 0) return c;
                 if (plan1.getGroupBy() != null && plan2.getGroupBy() != null) {
                     if (plan1.getGroupBy().isOrderPreserving() != 
plan2.getGroupBy().isOrderPreserving()) {

http://git-wip-us.apache.org/repos/asf/phoenix/blob/1dcac346/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryOptimizerTest.java
----------------------------------------------------------------------
diff --git 
a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryOptimizerTest.java 
b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryOptimizerTest.java
index e81d68a..25280fa 100644
--- 
a/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryOptimizerTest.java
+++ 
b/phoenix-core/src/test/java/org/apache/phoenix/compile/QueryOptimizerTest.java
@@ -41,8 +41,10 @@ import org.apache.phoenix.query.QueryConstants;
 import org.apache.phoenix.schema.PColumn;
 import org.apache.phoenix.schema.PTableType;
 import org.apache.phoenix.util.PhoenixRuntime;
+import org.apache.phoenix.util.PropertiesUtil;
 import org.apache.phoenix.util.QueryUtil;
 import org.apache.phoenix.util.SchemaUtil;
+import org.apache.phoenix.util.TestUtil;
 import org.junit.Test;
 
 import com.google.common.base.Joiner;
@@ -647,6 +649,54 @@ public class QueryOptimizerTest extends 
BaseConnectionlessQueryTest {
         assertEquals("T", 
plan.getTableRef().getTable().getTableName().getString());
     }
 
+    @Test
+    public void testViewUsedWithQueryMoreSalted() throws Exception {
+        testViewUsedWithQueryMore(3);
+    }
+    
+    @Test
+    public void testViewUsedWithQueryMoreUnsalted() throws Exception {
+        testViewUsedWithQueryMore(null);
+    }
+    
+    private void testViewUsedWithQueryMore(Integer saltBuckets) throws 
Exception {
+        Connection conn = DriverManager.getConnection(getUrl());
+        int offset = saltBuckets == null ? 0 : 1;
+        conn.createStatement().execute("CREATE TABLE MY_TABLES.MY_TABLE "
+                + "(ORGANIZATION_ID CHAR(15) NOT NULL, "
+                + "PKCOL1 CHAR(15) NOT NULL,"
+                + "PKCOL2 CHAR(15) NOT NULL,"
+                + "PKCOL3 CHAR(15) NOT NULL,"
+                + "PKCOL4 CHAR(15) NOT NULL,COL1 "
+                + "CHAR(15),"
+                + "COL2 CHAR(15)"
+                + "CONSTRAINT PK PRIMARY KEY 
(ORGANIZATION_ID,PKCOL1,PKCOL2,PKCOL3,PKCOL4)) MULTI_TENANT=true" + 
(saltBuckets == null ? "" : (",SALT_BUCKETS=" + saltBuckets)));
+        conn.createStatement().execute("CREATE INDEX MY_TABLE_INDEX \n" + 
+                "ON MY_TABLES.MY_TABLE (PKCOL1, PKCOL3, PKCOL2, PKCOL4)\n" + 
+                "INCLUDE (COL1, COL2)");
+        Properties props = PropertiesUtil.deepCopy(TestUtil.TEST_PROPERTIES);
+        props.setProperty(PhoenixRuntime.TENANT_ID_ATTRIB, "000000000000000");
+        Connection tsconn = DriverManager.getConnection(getUrl(), props);
+        tsconn.createStatement().execute("CREATE VIEW MY_TABLE_MT_VIEW AS 
SELECT * FROM MY_TABLES.MY_TABLE");
+        PhoenixStatement stmt = 
tsconn.createStatement().unwrap(PhoenixStatement.class);
+        QueryPlan plan = stmt.optimizeQuery("select * from my_table_mt_view 
where (pkcol1, pkcol2, pkcol3, pkcol4) > ('0', '0', '0', '0')");
+        assertEquals("MY_TABLE_MT_VIEW", 
plan.getTableRef().getTable().getTableName().getString());
+        
+        plan = stmt.compileQuery("select * from my_table_mt_view where 
(pkcol1, pkcol2) > ('0', '0') and pkcol3 = '000000000000000' and pkcol4 = 
'000000000000000'");
+        assertEquals(3 + offset, 
plan.getContext().getScanRanges().getBoundPkColumnCount());
+        plan = stmt.compileQuery("select * from my_table_mt_view where 
(pkcol3, pkcol4) > ('0', '0') and pkcol1 = '000000000000000'");
+        assertEquals(2 + offset, 
plan.getContext().getScanRanges().getBoundPkColumnCount());
+        plan = stmt.compileQuery("select * from my_table_mt_view where 
(pkcol1, pkcol2, pkcol3) < ('0', '0', '0')");
+        assertEquals(4 + offset, 
plan.getContext().getScanRanges().getBoundPkColumnCount());
+        plan = stmt.compileQuery("select * from my_table_mt_view where 
(pkcol1, pkcol2, pkcol3) < ('9', '9', '9') and (pkcol1, pkcol2) > ('0', '0')");
+        assertEquals(4 + offset, 
plan.getContext().getScanRanges().getBoundPkColumnCount());
+        plan = stmt.compileQuery("select * from my_table_mt_view where pkcol1 
= 'a' and pkcol2 = 'b' and pkcol3 = 'c' and (pkcol1, pkcol2) < ('z', 'z')");
+        assertEquals(4 + offset, 
plan.getContext().getScanRanges().getBoundPkColumnCount());
+        // TODO: in theory pkcol2 and pkcol3 could be bound, but we don't have 
the logic for that yet
+        plan = stmt.compileQuery("select * from my_table_mt_view where 
(pkcol2, pkcol3) > ('0', '0') and pkcol1 = '000000000000000'");
+        assertEquals(2 + offset, 
plan.getContext().getScanRanges().getBoundPkColumnCount());
+    }
+
     private void assertPlanDetails(PreparedStatement stmt, String 
expectedPkCols, String expectedPkColsDataTypes, boolean expectedHasOrderBy, int 
expectedLimit) throws SQLException {
         Connection conn = stmt.getConnection();
         QueryPlan plan = PhoenixRuntime.getOptimizedQueryPlan(stmt);

[09/20] phoenix git commit: PHOENIX-3439 Query using an RVC based on the base table PK is incorrectly using an index and doing a full scan instead of a point query

Reply via email to