Repository: hive Updated Branches: refs/heads/master 1bcf40329 -> bcbd2d529
HIVE-19718: Adding partitions in bulk also fetches table for each partition (Peter Vary, reviewed by Vihang Karajgaonkar and Alexander Kolbasov) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bcbd2d52 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bcbd2d52 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bcbd2d52 Branch: refs/heads/master Commit: bcbd2d5299ddc937f1488a97faec23145b22cded Parents: 1bcf403 Author: Peter Vary <pv...@cloudera.com> Authored: Thu Jun 14 10:09:05 2018 +0200 Committer: Peter Vary <pv...@cloudera.com> Committed: Thu Jun 14 10:09:05 2018 +0200 ---------------------------------------------------------------------- .../listener/DummyRawStoreFailEvent.java | 5 +- .../hadoop/hive/metastore/HiveMetaStore.java | 11 ++- .../hadoop/hive/metastore/ObjectStore.java | 97 +++++++++++++++----- .../apache/hadoop/hive/metastore/RawStore.java | 4 +- .../hive/metastore/cache/CachedStore.java | 7 +- .../DummyRawStoreControlledCommit.java | 5 +- .../DummyRawStoreForJdoConnection.java | 3 +- 7 files changed, 96 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java ---------------------------------------------------------------------- diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index aa66c84..8f9a03f 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -779,8 +779,9 @@ public class DummyRawStoreFailEvent implements RawStore, Configurable { @Override public boolean doesPartitionExist(String catName, String dbName, String tableName, - List<String> partVals) throws MetaException, NoSuchObjectException { - return objectStore.doesPartitionExist(catName, dbName, tableName, partVals); + List<FieldSchema> partKeys, List<String> partVals) + throws MetaException, NoSuchObjectException { + return objectStore.doesPartitionExist(catName, dbName, tableName, partKeys, partVals); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 278c58c..9241e29 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -3304,7 +3304,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { throw new MetaException("Partition value cannot be null."); } - boolean shouldAdd = startAddPartition(ms, part, ifNotExists); + boolean shouldAdd = startAddPartition(ms, part, tbl.getPartitionKeys(), ifNotExists); if (!shouldAdd) { existingParts.add(part); LOG.info("Not adding partition {} as it already exists", part); @@ -3570,7 +3570,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { throw new MetaException("The partition values cannot be null or empty."); } - boolean shouldAdd = startAddPartition(ms, part, ifNotExists); + boolean shouldAdd = startAddPartition(ms, part, tbl.getPartitionKeys(), ifNotExists); if (!shouldAdd) { LOG.info("Not adding partition {} as it already exists", part); continue; @@ -3678,11 +3678,12 @@ public class HiveMetaStore extends ThriftHiveMetastore { } private boolean startAddPartition( - RawStore ms, Partition part, boolean ifNotExists) throws TException { + RawStore ms, Partition part, List<FieldSchema> partitionKeys, boolean ifNotExists) + throws TException { MetaStoreUtils.validatePartitionNameCharacters(part.getValues(), partitionValidationPattern); boolean doesExist = ms.doesPartitionExist(part.getCatName(), - part.getDbName(), part.getTableName(), part.getValues()); + part.getDbName(), part.getTableName(), partitionKeys, part.getValues()); if (doesExist && !ifNotExists) { throw new AlreadyExistsException("Partition already exists: " + part); } @@ -3805,7 +3806,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { if (part.getValues() == null || part.getValues().isEmpty()) { throw new MetaException("The partition values cannot be null or empty."); } - boolean shouldAdd = startAddPartition(ms, part, false); + boolean shouldAdd = startAddPartition(ms, part, tbl.getPartitionKeys(), false); assert shouldAdd; // start would throw if it already existed here boolean madeDir = createLocationForAddedPartition(tbl, part); try { http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 191c535..660b119 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -1934,7 +1934,6 @@ public class ObjectStore implements RawStore, Configurable { lowered_tbl_names.add(normalizeIdentifier(t)); } query = pm.newQuery(MTable.class); -//<<<<<<< HEAD query.setFilter("database.name == db && database.catalogName == cat && tbl_names.contains(tableName)"); query.declareParameters("java.lang.String db, java.lang.String cat, java.util.Collection tbl_names"); Collection mtables = (Collection) query.execute(db, catName, lowered_tbl_names); @@ -2331,7 +2330,7 @@ public class ObjectStore implements RawStore, Configurable { throw new MetaException("Partition does not belong to target table " + dbName + "." + tblName + ": " + part); } - MPartition mpart = convertToMPart(part, true); + MPartition mpart = convertToMPart(part, table, true); toPersist.add(mpart); int now = (int)(System.currentTimeMillis()/1000); if (tabGrants != null) { @@ -2367,11 +2366,11 @@ public class ObjectStore implements RawStore, Configurable { } private boolean isValidPartition( - Partition part, boolean ifNotExists) throws MetaException { + Partition part, List<FieldSchema> partitionKeys, boolean ifNotExists) throws MetaException { MetaStoreUtils.validatePartitionNameCharacters(part.getValues(), partitionValidationPattern); boolean doesExist = doesPartitionExist(part.getCatName(), - part.getDbName(), part.getTableName(), part.getValues()); + part.getDbName(), part.getTableName(), partitionKeys, part.getValues()); if (doesExist && !ifNotExists) { throw new MetaException("Partition already exists: " + part); } @@ -2402,11 +2401,12 @@ public class ObjectStore implements RawStore, Configurable { int now = (int)(System.currentTimeMillis()/1000); + List<FieldSchema> partitionKeys = convertToFieldSchemas(table.getPartitionKeys()); while (iterator.hasNext()) { Partition part = iterator.next(); - if (isValidPartition(part, ifNotExists)) { - MPartition mpart = convertToMPart(part, true); + if (isValidPartition(part, partitionKeys, ifNotExists)) { + MPartition mpart = convertToMPart(part, table, true); pm.makePersistent(mpart); if (tabGrants != null) { for (MTablePrivilege tab : tabGrants) { @@ -2506,26 +2506,62 @@ public class ObjectStore implements RawStore, Configurable { return part; } + /** + * Getting MPartition object. Use this method only if the partition name is not available, + * since then the table will be queried to get the partition keys. + * @param catName The catalogue + * @param dbName The database + * @param tableName The table + * @param part_vals The values defining the partition + * @return The MPartition object in the backend database + * @throws MetaException + */ private MPartition getMPartition(String catName, String dbName, String tableName, List<String> part_vals) throws MetaException { - List<MPartition> mparts = null; - MPartition ret = null; - boolean commited = false; - Query query = null; + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + tableName = normalizeIdentifier(tableName); + boolean committed = false; + MPartition result = null; try { openTransaction(); - catName = normalizeIdentifier(catName); - dbName = normalizeIdentifier(dbName); - tableName = normalizeIdentifier(tableName); MTable mtbl = getMTable(catName, dbName, tableName); if (mtbl == null) { - commited = commitTransaction(); return null; } // Change the query to use part_vals instead of the name which is // redundant TODO: callers of this often get part_vals out of name for no reason... String name = Warehouse.makePartName(convertToFieldSchemas(mtbl.getPartitionKeys()), part_vals); + result = getMPartition(catName, dbName, tableName, name); + committed = commitTransaction(); + } finally { + rollbackAndCleanup(committed, (Query)null); + } + return result; + } + + /** + * Getting MPartition object. Use this method if the partition name is available, so we do not + * query the table object again. + * @param catName The catalogue + * @param dbName The database + * @param tableName The table + * @param name The partition name + * @return The MPartition object in the backend database + * @throws MetaException + */ + private MPartition getMPartition(String catName, String dbName, String tableName, + String name) throws MetaException { + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + tableName = normalizeIdentifier(tableName); + List<MPartition> mparts = null; + MPartition ret = null; + boolean commited = false; + Query query = null; + try { + openTransaction(); query = pm.newQuery(MPartition.class, "table.tableName == t1 && table.database.name == t2 && partitionName == t3 " + @@ -2566,7 +2602,7 @@ public class ObjectStore implements RawStore, Configurable { * to the same one as the table's storage descriptor. * @param part the partition to convert * @param useTableCD whether to try to use the parent table's column descriptor. - * @return the model partition object + * @return the model partition object, and null if the input partition is null. * @throws InvalidObjectException * @throws MetaException */ @@ -2576,6 +2612,26 @@ public class ObjectStore implements RawStore, Configurable { return null; } MTable mt = getMTable(part.getCatName(), part.getDbName(), part.getTableName()); + return convertToMPart(part, mt, useTableCD); + } + + /** + * Convert a Partition object into an MPartition, which is an object backed by the db + * If the Partition's set of columns is the same as the parent table's AND useTableCD + * is true, then this partition's storage descriptor's column descriptor will point + * to the same one as the table's storage descriptor. + * @param part the partition to convert + * @param mt the parent table object + * @param useTableCD whether to try to use the parent table's column descriptor. + * @return the model partition object, and null if the input partition is null. + * @throws InvalidObjectException + * @throws MetaException + */ + private MPartition convertToMPart(Partition part, MTable mt, boolean useTableCD) + throws InvalidObjectException, MetaException { + if (part == null) { + return null; + } if (mt == null) { throw new InvalidObjectException( "Partition doesn't have a valid table or database name"); @@ -9212,14 +9268,11 @@ public class ObjectStore implements RawStore, Configurable { } @Override - public boolean doesPartitionExist(String catName, String dbName, String tableName, List<String> - partVals) + public boolean doesPartitionExist(String catName, String dbName, String tableName, + List<FieldSchema> partKeys, List<String> partVals) throws MetaException { - try { - return this.getPartition(catName, dbName, tableName, partVals) != null; - } catch (NoSuchObjectException e) { - return false; - } + String name = Warehouse.makePartName(partKeys, partVals); + return this.getMPartition(catName, dbName, tableName, name) != null; } private void debugLog(String message) { http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java index b2d4283..bbbdf21 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -323,13 +323,15 @@ public interface RawStore extends Configurable { * @param catName catalog name. * @param dbName database name. * @param tableName table name. + * @param partKeys list of partition keys used to generate the partition name. * @param part_vals list of partition values. * @return true if the partition exists, false otherwise. * @throws MetaException failure reading RDBMS * @throws NoSuchObjectException this is never thrown. */ boolean doesPartitionExist(String catName, String dbName, String tableName, - List<String> part_vals) throws MetaException, NoSuchObjectException; + List<FieldSchema> partKeys, List<String> part_vals) + throws MetaException, NoSuchObjectException; /** * Drop a partition. http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 2f31c68..7c3588d 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -971,17 +971,18 @@ public class CachedStore implements RawStore, Configurable { @Override public boolean doesPartitionExist(String catName, String dbName, String tblName, - List<String> part_vals) throws MetaException, NoSuchObjectException { + List<FieldSchema> partKeys, List<String> part_vals) + throws MetaException, NoSuchObjectException { catName = normalizeIdentifier(catName); dbName = StringUtils.normalizeIdentifier(dbName); tblName = StringUtils.normalizeIdentifier(tblName); if (!shouldCacheTable(catName, dbName, tblName)) { - return rawStore.doesPartitionExist(catName, dbName, tblName, part_vals); + return rawStore.doesPartitionExist(catName, dbName, tblName, partKeys, part_vals); } Table tbl = sharedCache.getTableFromCache(catName, dbName, tblName); if (tbl == null) { // The table containing the partition is not yet loaded in cache - return rawStore.doesPartitionExist(catName, dbName, tblName, part_vals); + return rawStore.doesPartitionExist(catName, dbName, tblName, partKeys, part_vals); } return sharedCache.existPartitionFromCache(catName, dbName, tblName, part_vals); } http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index 58af0df..7c7429d 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -740,8 +740,9 @@ public class DummyRawStoreControlledCommit implements RawStore, Configurable { @Override public boolean doesPartitionExist(String catName, String dbName, String tableName, - List<String> partVals) throws MetaException, NoSuchObjectException { - return objectStore.doesPartitionExist(catName, dbName, tableName, partVals); + List<FieldSchema> partKeys, List<String> partVals) + throws MetaException, NoSuchObjectException { + return objectStore.doesPartitionExist(catName, dbName, tableName, partKeys, partVals); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index 1823430..e4f2a17 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -750,7 +750,8 @@ public class DummyRawStoreForJdoConnection implements RawStore { @Override public boolean doesPartitionExist(String catName, String dbName, String tableName, - List<String> partVals) throws MetaException, NoSuchObjectException { + List<FieldSchema> partKeys, List<String> partVals) + throws MetaException, NoSuchObjectException { return false; }