Repository: phoenix Updated Branches: refs/heads/4.12-HBase-1.2 e90daafda -> 4765a8bf8
PHOENIX-4289 UPDATE STATISTICS command does not collect stats for local indexes Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/4765a8bf Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/4765a8bf Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/4765a8bf Branch: refs/heads/4.12-HBase-1.2 Commit: 4765a8bf8915787b15de6c03f6f16f7732efa9ce Parents: e90daaf Author: Samarth Jain <[email protected]> Authored: Sun Oct 29 23:08:39 2017 -0700 Committer: Samarth Jain <[email protected]> Committed: Sun Oct 29 23:08:39 2017 -0700 ---------------------------------------------------------------------- .../end2end/ExplainPlanWithStatsEnabledIT.java | 12 ++ .../phoenix/end2end/index/BaseLocalIndexIT.java | 1 + .../phoenix/end2end/index/LocalIndexIT.java | 46 ++++++- .../parse/UpdateStatisticsStatement.java | 4 + .../apache/phoenix/schema/MetaDataClient.java | 125 +++++++++++-------- 5 files changed, 138 insertions(+), 50 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/phoenix/blob/4765a8bf/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java index cd4555c..b4f9871 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java @@ -306,6 +306,18 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT { final Long estimatedRows; final Long estimateInfoTs; + public Long getEstimatedBytes() { + return estimatedBytes; + } + + public Long getEstimatedRows() { + return estimatedRows; + } + + public Long getEstimateInfoTs() { + return estimateInfoTs; + } + Estimate(Long rows, Long bytes, Long ts) { this.estimatedBytes = bytes; this.estimatedRows = rows; http://git-wip-us.apache.org/repos/asf/phoenix/blob/4765a8bf/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java index 30baec4..4f58ead 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java @@ -59,6 +59,7 @@ public abstract class BaseLocalIndexIT extends BaseUniqueNamesOwnClusterIT { serverProps.put(QueryServices.IS_NAMESPACE_MAPPING_ENABLED, "true"); Map<String, String> clientProps = Maps.newHashMapWithExpectedSize(1); clientProps.put(QueryServices.IS_NAMESPACE_MAPPING_ENABLED, "true"); + clientProps.put(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB, "120000"); setUpTestDriver(new ReadOnlyProps(serverProps.entrySet().iterator()), new ReadOnlyProps(clientProps.entrySet().iterator())); } http://git-wip-us.apache.org/repos/asf/phoenix/blob/4765a8bf/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java index 48221ab..0dcf1d5 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java @@ -17,6 +17,7 @@ */ package org.apache.phoenix.end2end.index; +import static org.apache.phoenix.end2end.ExplainPlanWithStatsEnabledIT.getByteRowEstimates; import static org.apache.phoenix.util.MetaDataUtil.getViewIndexSequenceName; import static org.apache.phoenix.util.MetaDataUtil.getViewIndexSequenceSchemaName; import static org.junit.Assert.assertArrayEquals; @@ -55,8 +56,10 @@ import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; import org.apache.phoenix.compile.QueryPlan; import org.apache.phoenix.coprocessor.BaseScannerRegionObserver; +import org.apache.phoenix.end2end.ExplainPlanWithStatsEnabledIT.Estimate; import org.apache.phoenix.hbase.index.IndexRegionSplitPolicy; import org.apache.phoenix.jdbc.PhoenixConnection; +import org.apache.phoenix.jdbc.PhoenixResultSet; import org.apache.phoenix.jdbc.PhoenixStatement; import org.apache.phoenix.query.QueryConstants; import org.apache.phoenix.schema.PNameFactory; @@ -67,9 +70,10 @@ import org.apache.phoenix.schema.TableNotFoundException; import org.apache.phoenix.util.QueryUtil; import org.apache.phoenix.util.SchemaUtil; import org.apache.phoenix.util.TestUtil; -import org.junit.Ignore; import org.junit.Test; +import com.google.common.collect.Lists; + public class LocalIndexIT extends BaseLocalIndexIT { public LocalIndexIT(boolean isNamespaceMapped) { super(isNamespaceMapped); @@ -714,4 +718,44 @@ public class LocalIndexIT extends BaseLocalIndexIT { } } + @Test // See https://issues.apache.org/jira/browse/PHOENIX-4289 + public void testEstimatesWithLocalIndexes() throws Exception { + String tableName = generateUniqueName(); + String indexName = "IDX_" + generateUniqueName(); + try (Connection conn = DriverManager.getConnection(getUrl())) { + int guidePostWidth = 20; + conn.createStatement() + .execute("CREATE TABLE " + tableName + + " (k INTEGER PRIMARY KEY, a bigint, b bigint)" + + " GUIDE_POSTS_WIDTH=" + guidePostWidth); + conn.createStatement().execute("upsert into " + tableName + " values (100,1,3)"); + conn.createStatement().execute("upsert into " + tableName + " values (101,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (102,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (103,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (104,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (105,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (106,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (107,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (108,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (109,2,4)"); + conn.commit(); + conn.createStatement().execute( + "CREATE LOCAL INDEX " + indexName + " ON " + tableName + " (a) INCLUDE (b) "); + String ddl = "ALTER TABLE " + tableName + " SET USE_STATS_FOR_PARALLELIZATION = false"; + conn.createStatement().execute(ddl); + conn.createStatement().execute("UPDATE STATISTICS " + tableName + ""); + } + List<Object> binds = Lists.newArrayList(); + try (Connection conn = DriverManager.getConnection(getUrl())) { + String sql = + "SELECT COUNT(*) " + " FROM " + tableName; + ResultSet rs = conn.createStatement().executeQuery(sql); + assertTrue("Index " + indexName + " should have been used", + rs.unwrap(PhoenixResultSet.class).getStatement().getQueryPlan().getTableRef() + .getTable().getName().getString().equals(indexName)); + Estimate info = getByteRowEstimates(conn, sql, binds); + assertEquals((Long) 10l, info.getEstimatedRows()); + assertTrue(info.getEstimateInfoTs() > 0); + } + } } http://git-wip-us.apache.org/repos/asf/phoenix/blob/4765a8bf/phoenix-core/src/main/java/org/apache/phoenix/parse/UpdateStatisticsStatement.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/parse/UpdateStatisticsStatement.java b/phoenix-core/src/main/java/org/apache/phoenix/parse/UpdateStatisticsStatement.java index 6f7b736..0331295 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/parse/UpdateStatisticsStatement.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/parse/UpdateStatisticsStatement.java @@ -46,6 +46,10 @@ public class UpdateStatisticsStatement extends SingleTableStatement { return scope == INDEX || scope == ALL; } + public boolean updateIndexOnly() { + return scope == INDEX; + } + public boolean updateAll() { return scope == ALL; } http://git-wip-us.apache.org/repos/asf/phoenix/blob/4765a8bf/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java index 0f6bab2..67fc2ed 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java @@ -1076,6 +1076,23 @@ public class MetaDataClient { } } + private boolean hasItBeenLongEnoughSinceLastUpdateStats(PName physicalName) throws SQLException { + ReadOnlyProps props = connection.getQueryServices().getProps(); + final long msMinBetweenUpdates = props + .getLong(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB, + props.getLong(QueryServices.STATS_UPDATE_FREQ_MS_ATTRIB, + QueryServicesOptions.DEFAULT_STATS_UPDATE_FREQ_MS) / 2); + String query = "SELECT CURRENT_DATE()," + LAST_STATS_UPDATE_TIME + " FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME + + " WHERE " + PHYSICAL_NAME + "='" + physicalName.getString() + "' AND " + COLUMN_FAMILY + + " IS NULL AND " + LAST_STATS_UPDATE_TIME + " IS NOT NULL"; + ResultSet rs = connection.createStatement().executeQuery(query); + long msSinceLastUpdate = Long.MAX_VALUE; + if (rs.next()) { + msSinceLastUpdate = rs.getLong(1) - rs.getLong(2); + } + return msSinceLastUpdate >= msMinBetweenUpdates; + } + public MutationState updateStatistics(UpdateStatisticsStatement updateStatisticsStmt) throws SQLException { // Don't mistakenly commit pending rows @@ -1085,7 +1102,9 @@ public class MetaDataClient { PTable table = resolver.getTables().get(0).getTable(); long rowCount = 0; if (updateStatisticsStmt.updateColumns()) { - rowCount += updateStatisticsInternal(table.getPhysicalName(), table, updateStatisticsStmt.getProps()); + if (hasItBeenLongEnoughSinceLastUpdateStats(table.getPhysicalName())) { + rowCount += updateStatisticsInternal(table.getPhysicalName(), table, updateStatisticsStmt.getProps()); + } } if (updateStatisticsStmt.updateIndex()) { // TODO: If our table is a VIEW with multiple indexes or a TABLE with local indexes, @@ -1093,7 +1112,10 @@ public class MetaDataClient { // across all indexes in that case so that we don't re-calculate the same stats // multiple times. for (PTable index : table.getIndexes()) { - rowCount += updateStatisticsInternal(index.getPhysicalName(), index, updateStatisticsStmt.getProps()); + // Local indexes are handled below + if (index.getIndexType() != IndexType.LOCAL && hasItBeenLongEnoughSinceLastUpdateStats(index.getPhysicalName())) { + rowCount += updateStatisticsInternal(index.getPhysicalName(), index, updateStatisticsStmt.getProps()); + } } // If analyzing the indexes of a multi-tenant table or a table with view indexes // then analyze all of those indexes too. @@ -1106,12 +1128,32 @@ public class MetaDataClient { return physicalName; } }; - rowCount += updateStatisticsInternal(physicalName, indexLogicalTable, updateStatisticsStmt.getProps()); + if (hasItBeenLongEnoughSinceLastUpdateStats(physicalName)) { + rowCount += updateStatisticsInternal(physicalName, indexLogicalTable, updateStatisticsStmt.getProps()); + } } PName physicalName = table.getPhysicalName(); List<byte[]> localCFs = MetaDataUtil.getLocalIndexColumnFamilies(connection, physicalName.getBytes()); if (!localCFs.isEmpty()) { - rowCount += updateStatisticsInternal(physicalName, table, updateStatisticsStmt.getProps(), localCFs); + /* + * Because local indexes share the same physical table as the data table, when + * stats collection has been requested for both the data table and the local + * index on it, checking when update stats was last run on the physical table + * can prevent update stats from being run for the local index. Consequently and + * unfortunately, we cannot tell when was last stats updated for a local index. + * As a result, if a user requests that he/she wants to collect stats for a + * local index only, we don't have any state available to prevent it from + * running if it is being requested to run too soon. For now, in this special + * case, we are just going to have to rely on the last_stats_update_time for the + * physical table. + */ + boolean collectStatsForLocalIndexes = true; + if (updateStatisticsStmt.updateIndexOnly()) { + collectStatsForLocalIndexes = hasItBeenLongEnoughSinceLastUpdateStats(physicalName); + } + if (collectStatsForLocalIndexes) { + rowCount += updateStatisticsInternal(physicalName, table, updateStatisticsStmt.getProps(), localCFs); + } } } } @@ -1130,57 +1172,42 @@ public class MetaDataClient { private long updateStatisticsInternal(PName physicalName, PTable logicalTable, Map<String, Object> statsProps, List<byte[]> cfs) throws SQLException { ReadOnlyProps props = connection.getQueryServices().getProps(); - final long msMinBetweenUpdates = props - .getLong(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB, - props.getLong(QueryServices.STATS_UPDATE_FREQ_MS_ATTRIB, - QueryServicesOptions.DEFAULT_STATS_UPDATE_FREQ_MS) / 2); - byte[] tenantIdBytes = ByteUtil.EMPTY_BYTE_ARRAY; Long scn = connection.getSCN(); // Always invalidate the cache long clientTimeStamp = connection.getSCN() == null ? HConstants.LATEST_TIMESTAMP : scn; - String query = "SELECT CURRENT_DATE()," + LAST_STATS_UPDATE_TIME + " FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME - + " WHERE " + PHYSICAL_NAME + "='" + physicalName.getString() + "' AND " + COLUMN_FAMILY - + " IS NULL AND " + LAST_STATS_UPDATE_TIME + " IS NOT NULL"; - ResultSet rs = connection.createStatement().executeQuery(query); - long msSinceLastUpdate = Long.MAX_VALUE; - if (rs.next()) { - msSinceLastUpdate = rs.getLong(1) - rs.getLong(2); - } long rowCount = 0; - if (msSinceLastUpdate >= msMinBetweenUpdates) { - /* - * Execute a COUNT(*) through PostDDLCompiler as we need to use the logicalTable passed through, - * since it may not represent a "real" table in the case of the view indexes of a base table. - */ - PostDDLCompiler compiler = new PostDDLCompiler(connection); - //even if table is transactional, while calculating stats we scan the table non-transactionally to - //view all the data belonging to the table - PTable nonTxnLogicalTable = new DelegateTable(logicalTable) { - @Override - public boolean isTransactional() { - return false; - } - }; - TableRef tableRef = new TableRef(null, nonTxnLogicalTable, clientTimeStamp, false); - MutationPlan plan = compiler.compile(Collections.singletonList(tableRef), null, cfs, null, clientTimeStamp); - Scan scan = plan.getContext().getScan(); - scan.setCacheBlocks(false); - scan.setAttribute(ANALYZE_TABLE, TRUE_BYTES); - boolean runUpdateStatsAsync = props.getBoolean(QueryServices.RUN_UPDATE_STATS_ASYNC, DEFAULT_RUN_UPDATE_STATS_ASYNC); - scan.setAttribute(RUN_UPDATE_STATS_ASYNC_ATTRIB, runUpdateStatsAsync ? TRUE_BYTES : FALSE_BYTES); - if (statsProps != null) { - Object gp_width = statsProps.get(QueryServices.STATS_GUIDEPOST_WIDTH_BYTES_ATTRIB); - if (gp_width != null) { - scan.setAttribute(BaseScannerRegionObserver.GUIDEPOST_WIDTH_BYTES, PLong.INSTANCE.toBytes(gp_width)); - } - Object gp_per_region = statsProps.get(QueryServices.STATS_GUIDEPOST_PER_REGION_ATTRIB); - if (gp_per_region != null) { - scan.setAttribute(BaseScannerRegionObserver.GUIDEPOST_PER_REGION, PInteger.INSTANCE.toBytes(gp_per_region)); - } + /* + * Execute a COUNT(*) through PostDDLCompiler as we need to use the logicalTable passed through, + * since it may not represent a "real" table in the case of the view indexes of a base table. + */ + PostDDLCompiler compiler = new PostDDLCompiler(connection); + //even if table is transactional, while calculating stats we scan the table non-transactionally to + //view all the data belonging to the table + PTable nonTxnLogicalTable = new DelegateTable(logicalTable) { + @Override + public boolean isTransactional() { + return false; + } + }; + TableRef tableRef = new TableRef(null, nonTxnLogicalTable, clientTimeStamp, false); + MutationPlan plan = compiler.compile(Collections.singletonList(tableRef), null, cfs, null, clientTimeStamp); + Scan scan = plan.getContext().getScan(); + scan.setCacheBlocks(false); + scan.setAttribute(ANALYZE_TABLE, TRUE_BYTES); + boolean runUpdateStatsAsync = props.getBoolean(QueryServices.RUN_UPDATE_STATS_ASYNC, DEFAULT_RUN_UPDATE_STATS_ASYNC); + scan.setAttribute(RUN_UPDATE_STATS_ASYNC_ATTRIB, runUpdateStatsAsync ? TRUE_BYTES : FALSE_BYTES); + if (statsProps != null) { + Object gp_width = statsProps.get(QueryServices.STATS_GUIDEPOST_WIDTH_BYTES_ATTRIB); + if (gp_width != null) { + scan.setAttribute(BaseScannerRegionObserver.GUIDEPOST_WIDTH_BYTES, PLong.INSTANCE.toBytes(gp_width)); + } + Object gp_per_region = statsProps.get(QueryServices.STATS_GUIDEPOST_PER_REGION_ATTRIB); + if (gp_per_region != null) { + scan.setAttribute(BaseScannerRegionObserver.GUIDEPOST_PER_REGION, PInteger.INSTANCE.toBytes(gp_per_region)); } - MutationState mutationState = plan.execute(); - rowCount = mutationState.getUpdateCount(); } + MutationState mutationState = plan.execute(); + rowCount = mutationState.getUpdateCount(); /* * Update the stats table so that client will pull the new one with the updated stats.
