sankarh commented on a change in pull request #1109: URL: https://github.com/apache/hive/pull/1109#discussion_r452771399
########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java ########## @@ -514,6 +629,131 @@ public boolean containsPartition(List<String> partVals) { return containsPart; } + public void removeConstraint(String name) { + try { + tableLock.writeLock().lock(); + Object constraint = null; + MemberName mn = null; + Class constraintClass = null; + name = name.toLowerCase(); + if (this.primaryKeyCache.containsKey(name)) { + constraint = this.primaryKeyCache.remove(name); + mn = MemberName.PRIMARY_KEY_CACHE; + constraintClass = SQLPrimaryKey.class; + } else if (this.foreignKeyCache.containsKey(name)) { + constraint = this.foreignKeyCache.remove(name); + mn = MemberName.FOREIGN_KEY_CACHE; + constraintClass = SQLForeignKey.class; + } else if (this.notNullConstraintCache.containsKey(name)) { + constraint = this.notNullConstraintCache.remove(name); + mn = MemberName.NOTNULL_CONSTRAINT_CACHE; + constraintClass = SQLNotNullConstraint.class; + } else if (this.uniqueConstraintCache.containsKey(name)) { + constraint = this.uniqueConstraintCache.remove(name); + mn = MemberName.UNIQUE_CONSTRAINT_CACHE; + constraintClass = SQLUniqueConstraint.class; + } + + if(constraint == null) { + LOG.debug("Constraint: " + name + " does not exist in cache."); + return; + } + setMemberCacheUpdated(mn, true); + int size = getObjectSize(constraintClass, constraint); + updateMemberSize(mn, -1 * size, SizeMode.Delta); + } finally { + tableLock.writeLock().unlock(); + } + } + + public void refreshPrimaryKeys(List<SQLPrimaryKey> keys) { + Map<String, SQLPrimaryKey> newKeys = new ConcurrentHashMap<>(); + try { + tableLock.writeLock().lock(); + int size = 0; + for (SQLPrimaryKey key : keys) { + if (compareAndSetMemberCacheUpdated(MemberName.PRIMARY_KEY_CACHE, true, false)) { + LOG.debug("Skipping primary key cache update for table: " + getTable().getTableName() + + "; the primary keys we have is dirty."); + return; + } + newKeys.put(key.getPk_name().toLowerCase(), key); + size += getObjectSize(SQLPrimaryKey.class, key); + } + primaryKeyCache = newKeys; + updateMemberSize(MemberName.PRIMARY_KEY_CACHE, size, SizeMode.Snapshot); + LOG.debug("Primary keys refresh in cache was successful."); Review comment: Shall add catalog, db and table names in the log msg otherwise this is no use. Same for other methods too. ########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java ########## @@ -2490,26 +2616,99 @@ long getPartsFound() { @Override public List<SQLPrimaryKey> getPrimaryKeys(String catName, String dbName, String tblName) throws MetaException { - // TODO constraintCache - return rawStore.getPrimaryKeys(catName, dbName, tblName); + catName = normalizeIdentifier(catName); + dbName = StringUtils.normalizeIdentifier(dbName); + tblName = StringUtils.normalizeIdentifier(tblName); + if (!shouldCacheTable(catName, dbName, tblName) || (canUseEvents && rawStore.isActiveTransaction())) { + return rawStore.getPrimaryKeys(catName, dbName, tblName); + } + + Table tbl = sharedCache.getTableFromCache(catName, dbName, tblName); + if (tbl == null) { + // The table containing the primary keys is not yet loaded in cache + return rawStore.getPrimaryKeys(catName, dbName, tblName); + } + List<SQLPrimaryKey> keys = sharedCache.listCachedPrimaryKeys(catName, dbName, tblName); + if (keys == null || keys.isEmpty()) { Review comment: Can we have a flag in TableWrapper in Cache to tell if it was set or not? Can be a follow-up jira. ########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java ########## @@ -470,6 +484,107 @@ boolean cachePartitions(Iterable<Partition> parts, SharedCache sharedCache, bool } } + boolean cachePrimaryKeys(List<SQLPrimaryKey> primaryKeys, boolean fromPrewarm) { + return cacheConstraints(primaryKeys, fromPrewarm, MemberName.PRIMARY_KEY_CACHE); + } + + boolean cacheForeignKeys(List<SQLForeignKey> foreignKeys, boolean fromPrewarm) { + return cacheConstraints(foreignKeys, fromPrewarm, MemberName.FOREIGN_KEY_CACHE); + } + + boolean cacheUniqueConstraints(List<SQLUniqueConstraint> uniqueConstraints, boolean fromPrewarm) { + return cacheConstraints(uniqueConstraints, fromPrewarm, MemberName.UNIQUE_CONSTRAINT_CACHE); + } + + boolean cacheNotNullConstraints(List<SQLNotNullConstraint> notNullConstraints, boolean fromPrewarm) { + return cacheConstraints(notNullConstraints, fromPrewarm, MemberName.NOTNULL_CONSTRAINT_CACHE); + } + + // Common method to cache constraints + private boolean cacheConstraints(List constraintsList, + boolean fromPrewarm, + MemberName mn) { + if (constraintsList == null || constraintsList.isEmpty()) { + return true; + } + try { + tableLock.writeLock().lock(); + final int[] size = {0}; Review comment: Why do we use int array if we just want to store one value? and why is it final? ########## File path: standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStore.java ########## @@ -1556,6 +1543,289 @@ public Object call() { cachedStore.shutdown(); } + @Test + public void testPrimaryKeys() { + Configuration conf = MetastoreConf.newMetastoreConf(); + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST, true); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CACHED_RAW_STORE_MAX_CACHE_MEMORY, "-1Kb"); + MetaStoreTestUtils.setConfForStandloneMode(conf); + CachedStore cachedStore = new CachedStore(); + CachedStore.clearSharedCache(); + cachedStore.setConfForTest(conf); + SharedCache sharedCache = CachedStore.getSharedCache(); + + Database db = createDatabaseObject("db", "testUser"); + Table tbl = createUnpartitionedTableObject(db); + + sharedCache.addTableToCache(DEFAULT_CATALOG_NAME, "db", tbl.getTableName(), tbl); + + Assert.assertEquals(sharedCache.getCachedTableCount(), 1); + + List<SQLPrimaryKey> origKeys = createPrimaryKeys(tbl); + sharedCache.addPrimaryKeysToCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), origKeys); + + // List operation + List<SQLPrimaryKey> cachedKeys = sharedCache.listCachedPrimaryKeys( + DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + + Assert.assertEquals(cachedKeys.size(), 1); + Assert.assertEquals(cachedKeys.get(0).getPk_name(), "pk1"); + Assert.assertEquals(cachedKeys.get(0).getTable_db(), "db"); + Assert.assertEquals(cachedKeys.get(0).getTable_name(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.get(0).getColumn_name(), "col1"); + Assert.assertEquals(cachedKeys.get(0).getCatName(), DEFAULT_CATALOG_NAME); + + SQLPrimaryKey modifiedKey = origKeys.get(0).deepCopy(); + modifiedKey.setColumn_name("col2"); + modifiedKey.setPk_name("pk_modified"); + + sharedCache.addPrimaryKeysToCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), + Arrays.asList(modifiedKey)); + cachedKeys = sharedCache.listCachedPrimaryKeys(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + + Assert.assertEquals(cachedKeys.size(), 2); + sharedCache.removeConstraintFromCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), "pk1"); + cachedKeys = sharedCache.listCachedPrimaryKeys(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + + sharedCache.refreshPrimaryKeysInCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), + Arrays.asList(modifiedKey)); + Assert.assertEquals(cachedKeys.size(), 1); + Assert.assertEquals(cachedKeys.get(0).getPk_name(), "pk_modified"); + Assert.assertEquals(cachedKeys.get(0).getTable_db(), "db"); + Assert.assertEquals(cachedKeys.get(0).getTable_name(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.get(0).getColumn_name(), "col2"); + Assert.assertEquals(cachedKeys.get(0).getCatName(), DEFAULT_CATALOG_NAME); + + // remove constraints + sharedCache.removeConstraintFromCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), "pk_modified"); + + cachedKeys = sharedCache.listCachedPrimaryKeys(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.size(), 0); + + cachedStore.shutdown(); + } + + @Test + public void testNotNullConstraint() { + Configuration conf = MetastoreConf.newMetastoreConf(); + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST, true); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CACHED_RAW_STORE_MAX_CACHE_MEMORY, "-1Kb"); + MetaStoreTestUtils.setConfForStandloneMode(conf); + CachedStore cachedStore = new CachedStore(); + CachedStore.clearSharedCache(); + cachedStore.setConfForTest(conf); + SharedCache sharedCache = CachedStore.getSharedCache(); + + Database db = createDatabaseObject("db", "testUser"); + Table tbl = createUnpartitionedTableObject(db); + + sharedCache.addTableToCache(DEFAULT_CATALOG_NAME, "db", tbl.getTableName(), tbl); + + Assert.assertEquals(sharedCache.getCachedTableCount(), 1); + + List<SQLNotNullConstraint> origKeys = createNotNullConstraint(tbl); + sharedCache.addNotNullConstraintsToCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), origKeys); + + // List operation + List<SQLNotNullConstraint> cachedKeys = sharedCache.listCachedNotNullConstraints( + DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + + Assert.assertEquals(cachedKeys.size(), 1); + Assert.assertEquals(cachedKeys.get(0).getNn_name(), "nn1"); + Assert.assertEquals(cachedKeys.get(0).getTable_db(), "db"); + Assert.assertEquals(cachedKeys.get(0).getTable_name(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.get(0).getColumn_name(), "col1"); + Assert.assertEquals(cachedKeys.get(0).getCatName(), DEFAULT_CATALOG_NAME); + + SQLNotNullConstraint modifiedKey = origKeys.get(0).deepCopy(); + modifiedKey.setColumn_name("col2"); + modifiedKey.setNn_name("nn_modified"); + + sharedCache.addNotNullConstraintsToCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), + Arrays.asList(modifiedKey)); + cachedKeys = sharedCache.listCachedNotNullConstraints(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.size(), 2); + + sharedCache.removeConstraintFromCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), "nn1"); + cachedKeys = sharedCache.listCachedNotNullConstraints(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.size(), 1); + Assert.assertEquals(cachedKeys.get(0).getNn_name(), "nn_modified"); + Assert.assertEquals(cachedKeys.get(0).getTable_db(), "db"); + Assert.assertEquals(cachedKeys.get(0).getTable_name(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.get(0).getColumn_name(), "col2"); + Assert.assertEquals(cachedKeys.get(0).getCatName(), DEFAULT_CATALOG_NAME); + + sharedCache.removeConstraintFromCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), "nn_modified"); + + cachedKeys = sharedCache.listCachedNotNullConstraints(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.size(), 0); + + cachedStore.shutdown(); + } + + @Test + public void testUniqueConstraint() { + Configuration conf = MetastoreConf.newMetastoreConf(); + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST, true); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CACHED_RAW_STORE_MAX_CACHE_MEMORY, "-1Kb"); + MetaStoreTestUtils.setConfForStandloneMode(conf); + CachedStore cachedStore = new CachedStore(); + CachedStore.clearSharedCache(); + cachedStore.setConfForTest(conf); + SharedCache sharedCache = CachedStore.getSharedCache(); + + Database db = createDatabaseObject("db", "testUser"); + Table tbl = createUnpartitionedTableObject(db); + + sharedCache.addTableToCache(DEFAULT_CATALOG_NAME, "db", tbl.getTableName(), tbl); + + Assert.assertEquals(sharedCache.getCachedTableCount(), 1); + + List<SQLUniqueConstraint> origKeys = createUniqueConstraint(tbl); + sharedCache.addUniqueConstraintsToCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), origKeys); + + // List operation + List<SQLUniqueConstraint> cachedKeys = sharedCache.listCachedUniqueConstraint( + DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + + Assert.assertEquals(cachedKeys.size(), 1); + Assert.assertEquals(cachedKeys.get(0).getUk_name(), "uk1"); + Assert.assertEquals(cachedKeys.get(0).getTable_db(), "db"); + Assert.assertEquals(cachedKeys.get(0).getTable_name(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.get(0).getColumn_name(), "col1"); + Assert.assertEquals(cachedKeys.get(0).getCatName(), DEFAULT_CATALOG_NAME); + + SQLUniqueConstraint modifiedKey = origKeys.get(0).deepCopy(); + modifiedKey.setColumn_name("col2"); + modifiedKey.setUk_name("uk_modified"); + + sharedCache.addUniqueConstraintsToCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), + Arrays.asList(modifiedKey)); + cachedKeys = sharedCache.listCachedUniqueConstraint(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + + Assert.assertEquals(cachedKeys.size(), 2); + + sharedCache.removeConstraintFromCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), "uk1"); + cachedKeys = sharedCache.listCachedUniqueConstraint(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.size(), 1); + Assert.assertEquals(cachedKeys.get(0).getUk_name(), "uk_modified"); + Assert.assertEquals(cachedKeys.get(0).getTable_db(), "db"); + Assert.assertEquals(cachedKeys.get(0).getTable_name(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.get(0).getColumn_name(), "col2"); + Assert.assertEquals(cachedKeys.get(0).getCatName(), DEFAULT_CATALOG_NAME); + + sharedCache.removeConstraintFromCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), "uk_modified"); + + cachedKeys = sharedCache.listCachedUniqueConstraint(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName()); + Assert.assertEquals(cachedKeys.size(), 0); + + cachedStore.shutdown(); + } + + @Test + public void testForeignKeys() { + Configuration conf = MetastoreConf.newMetastoreConf(); + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST, true); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CACHED_RAW_STORE_MAX_CACHE_MEMORY, "-1Kb"); + MetaStoreTestUtils.setConfForStandloneMode(conf); + CachedStore cachedStore = new CachedStore(); + CachedStore.clearSharedCache(); + cachedStore.setConfForTest(conf); + SharedCache sharedCache = CachedStore.getSharedCache(); + + Database db = createDatabaseObject("db", "testUser"); + Table tbl = createUnpartitionedTableObject(db); + + sharedCache.addTableToCache(DEFAULT_CATALOG_NAME, "db", tbl.getTableName(), tbl); + + Assert.assertEquals(sharedCache.getCachedTableCount(), 1); + + List<SQLForeignKey> origKeys = createForeignKeys(tbl, tbl); + sharedCache.addForeignKeysToCache(DEFAULT_CATALOG_NAME, tbl.getDbName(), tbl.getTableName(), origKeys); + + // List operation + List<SQLForeignKey> cachedKeys = sharedCache.listCachedForeignKeys( Review comment: Can we add foreign keys for multiple parent db/tbl and get it from cache to verify if return correct fk? ########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java ########## @@ -514,6 +629,131 @@ public boolean containsPartition(List<String> partVals) { return containsPart; } + public void removeConstraint(String name) { + try { + tableLock.writeLock().lock(); + Object constraint = null; + MemberName mn = null; + Class constraintClass = null; + name = name.toLowerCase(); + if (this.primaryKeyCache.containsKey(name)) { + constraint = this.primaryKeyCache.remove(name); + mn = MemberName.PRIMARY_KEY_CACHE; + constraintClass = SQLPrimaryKey.class; + } else if (this.foreignKeyCache.containsKey(name)) { + constraint = this.foreignKeyCache.remove(name); + mn = MemberName.FOREIGN_KEY_CACHE; + constraintClass = SQLForeignKey.class; + } else if (this.notNullConstraintCache.containsKey(name)) { + constraint = this.notNullConstraintCache.remove(name); + mn = MemberName.NOTNULL_CONSTRAINT_CACHE; + constraintClass = SQLNotNullConstraint.class; + } else if (this.uniqueConstraintCache.containsKey(name)) { + constraint = this.uniqueConstraintCache.remove(name); + mn = MemberName.UNIQUE_CONSTRAINT_CACHE; + constraintClass = SQLUniqueConstraint.class; + } + + if(constraint == null) { + LOG.debug("Constraint: " + name + " does not exist in cache."); + return; + } + setMemberCacheUpdated(mn, true); + int size = getObjectSize(constraintClass, constraint); + updateMemberSize(mn, -1 * size, SizeMode.Delta); + } finally { + tableLock.writeLock().unlock(); + } + } + + public void refreshPrimaryKeys(List<SQLPrimaryKey> keys) { + Map<String, SQLPrimaryKey> newKeys = new ConcurrentHashMap<>(); + try { + tableLock.writeLock().lock(); + int size = 0; + for (SQLPrimaryKey key : keys) { + if (compareAndSetMemberCacheUpdated(MemberName.PRIMARY_KEY_CACHE, true, false)) { + LOG.debug("Skipping primary key cache update for table: " + getTable().getTableName() Review comment: The debug log msg is confusing. It says, primary keys is dirty and so skipping the update. I think, it should be "Skipping the primary keys update for table: <tbl> as it was already refreshed." Same for other methods too. ########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java ########## @@ -261,44 +283,57 @@ public int getObjectSize(Class<?> clazz, Object obj) { private Map<String, String> parameters; private byte[] sdHash; private int otherSize; - private int tableColStatsCacheSize; - private int partitionCacheSize; - private int partitionColStatsCacheSize; - private int aggrColStatsCacheSize; + + // Arrays to hold the size/updated bit of cached objects. + // These arrays are to be referenced using MemberName enum only. + private int[] memberObjectsSize = new int[MemberName.values().length]; + private AtomicBoolean[] memberCacheUpdated = new AtomicBoolean[MemberName.values().length]; private ReentrantReadWriteLock tableLock = new ReentrantReadWriteLock(true); // For caching column stats for an unpartitioned table // Key is column name and the value is the col stat object private Map<String, ColumnStatisticsObj> tableColStatsCache = new ConcurrentHashMap<String, ColumnStatisticsObj>(); - private AtomicBoolean isTableColStatsCacheDirty = new AtomicBoolean(false); // For caching partition objects // Ket is partition values and the value is a wrapper around the partition object private Map<String, PartitionWrapper> partitionCache = new ConcurrentHashMap<String, PartitionWrapper>(); - private AtomicBoolean isPartitionCacheDirty = new AtomicBoolean(false); // For caching column stats for a partitioned table // Key is aggregate of partition values, column name and the value is the col stat object private Map<String, ColumnStatisticsObj> partitionColStatsCache = new ConcurrentHashMap<String, ColumnStatisticsObj>(); - private AtomicBoolean isPartitionColStatsCacheDirty = new AtomicBoolean(false); // For caching aggregate column stats for all and all minus default partition // Key is column name and the value is a list of 2 col stat objects // (all partitions and all but default) private Map<String, List<ColumnStatisticsObj>> aggrColStatsCache = new ConcurrentHashMap<String, List<ColumnStatisticsObj>>(); - private AtomicBoolean isAggrPartitionColStatsCacheDirty = new AtomicBoolean(false); + + private Map<String, SQLPrimaryKey> primaryKeyCache = new ConcurrentHashMap<>(); + + private Map<String, SQLForeignKey> foreignKeyCache = new ConcurrentHashMap<>(); + + private Map<String, SQLNotNullConstraint> notNullConstraintCache = new ConcurrentHashMap<>(); + + private Map<String, SQLUniqueConstraint> uniqueConstraintCache = new ConcurrentHashMap<>(); TableWrapper(Table t, byte[] sdHash, String location, Map<String, String> parameters) { this.t = t; this.sdHash = sdHash; this.location = location; this.parameters = parameters; - this.tableColStatsCacheSize = 0; - this.partitionCacheSize = 0; - this.partitionColStatsCacheSize = 0; - this.aggrColStatsCacheSize = 0; + for(MemberName mn : MemberName.values()) { + this.memberObjectsSize[mn.getValue()] = 0; Review comment: Can't we just use mn instead of mn.getValue()? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org