HIVE-20307 : Add support for filterspec to the getPartitions with projection API (Vihang Karajgaonkar, reviewed by Andrew Sherman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e39a1980 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e39a1980 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e39a1980 Branch: refs/heads/master Commit: e39a19801abf7b2a711883945e8c7a9e3551a09d Parents: dc8d8e1 Author: Vihang Karajgaonkar <vihan...@apache.org> Authored: Wed Aug 22 20:29:23 2018 -0700 Committer: Vihang Karajgaonkar <vihan...@apache.org> Committed: Tue Oct 16 14:15:45 2018 -0700 ---------------------------------------------------------------------- .../listener/DummyRawStoreFailEvent.java | 12 +- .../ql/metadata/SessionHiveMetaStoreClient.java | 2 +- .../hive/metastore/utils/MetaStoreUtils.java | 6 +- .../hadoop/hive/metastore/HiveMetaStore.java | 20 +- .../hive/metastore/MetaStoreDirectSql.java | 53 +- .../hadoop/hive/metastore/ObjectStore.java | 173 +++- .../apache/hadoop/hive/metastore/RawStore.java | 51 +- .../hive/metastore/cache/CachedStore.java | 11 +- .../DummyRawStoreControlledCommit.java | 22 +- .../DummyRawStoreForJdoConnection.java | 6 +- .../TestGetPartitionsUsingProjection.java | 700 -------------- ...PartitionsUsingProjectionAndFilterSpecs.java | 904 +++++++++++++++++++ 12 files changed, 1146 insertions(+), 814 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java ---------------------------------------------------------------------- diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index d59d5d8..c3e1e8e 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -19,6 +19,8 @@ package org.apache.hive.hcatalog.listener; import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.metastore.api.GetPartitionsFilterSpec; +import org.apache.hadoop.hive.metastore.api.GetPartitionsProjectionSpec; import org.apache.hadoop.hive.metastore.api.ISchemaName; import org.apache.hadoop.hive.metastore.api.SchemaVersionDescriptor; import org.apache.hadoop.hive.metastore.api.Catalog; @@ -413,12 +415,10 @@ public class DummyRawStoreFailEvent implements RawStore, Configurable { } @Override - public List<Partition> getPartitionSpecsByFilterAndProjection(String catalog, - String dbName, String tblName, - List<String> fieldList, String includeParamKeyPattern, - String excludeParamKeyPattern) throws MetaException, NoSuchObjectException { - return objectStore.getPartitionSpecsByFilterAndProjection(catalog, dbName, tblName, fieldList, - includeParamKeyPattern, excludeParamKeyPattern); + public List<Partition> getPartitionSpecsByFilterAndProjection(Table table, + GetPartitionsProjectionSpec projectionSpec, GetPartitionsFilterSpec filterSpec) + throws MetaException, NoSuchObjectException { + return objectStore.getPartitionSpecsByFilterAndProjection(table, projectionSpec, filterSpec); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index a2b57fb..dd23d7d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -948,7 +948,7 @@ public class SessionHiveMetaStoreClient extends HiveMetaStoreClient implements I * */ private List<Partition> getPartitions(List<String> partialPartVals) throws MetaException { - String partNameMatcher = MetaStoreUtils.makePartNameMatcher(tTable, partialPartVals); + String partNameMatcher = MetaStoreUtils.makePartNameMatcher(tTable, partialPartVals, ".*"); List<Partition> matchedPartitions = new ArrayList<>(); for(String key : parts.keySet()) { if(key.matches(partNameMatcher)) { http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 16f4a50..8fb1fa7 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -287,7 +287,7 @@ public class MetaStoreUtils { } return pvals; } - public static String makePartNameMatcher(Table table, List<String> partVals) throws MetaException { + public static String makePartNameMatcher(Table table, List<String> partVals, String defaultStr) throws MetaException { List<FieldSchema> partCols = table.getPartitionKeys(); int numPartKeys = partCols.size(); if (partVals.size() > numPartKeys) { @@ -300,10 +300,10 @@ public class MetaStoreUtils { // or a regex of the form ".*" // This works because the "=" and "/" separating key names and partition key/values // are not escaped. - String partNameMatcher = Warehouse.makePartName(partCols, partVals, ".*"); + String partNameMatcher = Warehouse.makePartName(partCols, partVals, defaultStr); // add ".*" to the regex to match anything else afterwards the partial spec. if (partVals.size() < numPartKeys) { - partNameMatcher += ".*"; + partNameMatcher += defaultStr; } return partNameMatcher; } http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 0861474..8cd46e3 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -4698,24 +4698,10 @@ public class HiveMetaStore extends ThriftHiveMetastore { GetPartitionsResponse response = null; Exception ex = null; try { - List<String> fieldList = null; - String paramkeyPattern = null; - String excludeParamKeyPattern = null; - if (request.isSetProjectionSpec()) { - GetPartitionsProjectionSpec partitionsProjectSpec = request.getProjectionSpec(); - fieldList = partitionsProjectSpec.getFieldList(); - if (partitionsProjectSpec.isSetIncludeParamKeyPattern()) { - paramkeyPattern = partitionsProjectSpec.getIncludeParamKeyPattern(); - } - if (partitionsProjectSpec.isSetExcludeParamKeyPattern()) { - excludeParamKeyPattern = partitionsProjectSpec.getExcludeParamKeyPattern(); - } - } - String dbName = parsedDbName[DB_NAME]; - Table table = get_table_core(catName, dbName, tableName); + Table table = get_table_core(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName); List<Partition> partitions = getMS() - .getPartitionSpecsByFilterAndProjection(catName, dbName, tableName, fieldList, paramkeyPattern, - excludeParamKeyPattern); + .getPartitionSpecsByFilterAndProjection(table, request.getProjectionSpec(), + request.getFilterSpec()); List<PartitionSpec> partitionSpecs = MetaStoreServerUtils.getPartitionspecsGroupedByStorageDescriptor(table, partitions); response = new GetPartitionsResponse(); http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index af75793..58dc6ee 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.GetPartitionsFilterSpec; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.Partition; @@ -563,19 +564,59 @@ class MetaStoreDirectSql { * @param excludeParamKeyPattern The SQL regex paterrn which is used to exclude the parameter keys. Can include _ or % * When this pattern is set, all the partition parameters where key is NOT LIKE the pattern * are returned. This is applied in conjunction with the includeParamKeyPattern if it is set. + * @param filterSpec The filterSpec from <code>GetPartitionsRequest</code> which includes the filter mode (BY_EXPR, BY_VALUES or BY_NAMES) + * and the list of filter strings to be used to filter the results + * @param filter SqlFilterForPushDown which is set in the <code>canUseDirectSql</code> method before this method is called. + * The filter is used only when the mode is BY_EXPR * @return * @throws MetaException */ - public List<Partition> getPartitionSpecsUsingProjection(Table tbl, - final List<String> partitionFields, final String includeParamKeyPattern, final String excludeParamKeyPattern) + public List<Partition> getPartitionsUsingProjectionAndFilterSpec(Table tbl, + final List<String> partitionFields, final String includeParamKeyPattern, + final String excludeParamKeyPattern, GetPartitionsFilterSpec filterSpec, SqlFilterForPushdown filter) throws MetaException { final String tblName = tbl.getTableName(); final String dbName = tbl.getDbName(); final String catName = tbl.getCatName(); - //TODO add support for filter - List<Long> partitionIds = - getPartitionIdsViaSqlFilter(catName, dbName, tblName, null, Collections.<String>emptyList(), - Collections.<String>emptyList(), null); + List<Long> partitionIds = null; + if (filterSpec.isSetFilterMode()) { + List<String> filters = filterSpec.getFilters(); + if (filters == null || filters.isEmpty()) { + throw new MetaException("Invalid filter expressions in the filter spec"); + } + switch(filterSpec.getFilterMode()) { + case BY_EXPR: + partitionIds = + getPartitionIdsViaSqlFilter(catName, dbName, tblName, filter.filter, filter.params, + filter.joins, null); + break; + case BY_NAMES: + String partNamesFilter = + "" + PARTITIONS + ".\"PART_NAME\" in (" + makeParams(filterSpec.getFilters().size()) + + ")"; + partitionIds = getPartitionIdsViaSqlFilter(catName, dbName, tblName, partNamesFilter, + filterSpec.getFilters(), Collections.EMPTY_LIST, null); + break; + case BY_VALUES: + // we are going to use the SQL regex pattern in the LIKE clause below. So the default string + // is _% and not .* + String partNameMatcher = MetaStoreUtils.makePartNameMatcher(tbl, filters, "_%"); + String partNamesLikeFilter = + "" + PARTITIONS + ".\"PART_NAME\" LIKE (?)"; + partitionIds = + getPartitionIdsViaSqlFilter(catName, dbName, tblName, partNamesLikeFilter, Arrays.asList(partNameMatcher), + Collections.EMPTY_LIST, null); + break; + default: + throw new MetaException("Unsupported filter mode " + filterSpec.getFilterMode()); + } + } else { + // there is no filter mode. Fetch all the partition ids + partitionIds = + getPartitionIdsViaSqlFilter(catName, dbName, tblName, null, Collections.EMPTY_LIST, + Collections.EMPTY_LIST, null); + } + if (partitionIds.isEmpty()) { return Collections.emptyList(); } http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 66977d7..b98b4b4 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -3255,15 +3255,12 @@ public class ObjectStore implements RawStore, Configurable { throw new NoSuchObjectException(TableName.getQualified(catName, dbName, tableName) + " table not found"); } - String partNameMatcher = MetaStoreUtils.makePartNameMatcher(table, part_vals); + // size is known since it contains dbName, catName, tblName and partialRegex pattern + Map<String, String> params = new HashMap<>(4); + String filter = getJDOFilterStrForPartitionVals(table, part_vals, params); Query query = queryWrapper.query = pm.newQuery(MPartition.class); - StringBuilder queryFilter = new StringBuilder("table.database.name == dbName"); - queryFilter.append(" && table.database.catalogName == catName"); - queryFilter.append(" && table.tableName == tableName"); - queryFilter.append(" && partitionName.matches(partialRegex)"); - query.setFilter(queryFilter.toString()); - query.declareParameters("java.lang.String dbName, java.lang.String catName, " - + "java.lang.String tableName, java.lang.String partialRegex"); + query.setFilter(filter); + query.declareParameters(makeParameterDeclarationString(params)); if (max_parts >= 0) { // User specified a row limit, set it on the Query query.setRange(0, max_parts); @@ -3272,7 +3269,7 @@ public class ObjectStore implements RawStore, Configurable { query.setResult(resultsCol); } - return (Collection) query.executeWithArray(dbName, catName, tableName, partNameMatcher); + return (Collection) query.executeWithMap(params); } @Override @@ -3360,32 +3357,27 @@ public class ObjectStore implements RawStore, Configurable { } // This code is only executed in JDO code path, not from direct SQL code path. - private List<MPartition> listMPartitionsWithProjection(String catName, String dbName, String tblName, int max, - QueryWrapper queryWrapper, List<String> fieldNames) throws MetaException { + private List<MPartition> listMPartitionsWithProjection(QueryWrapper queryWrapper, + List<String> fieldNames, String jdoFilter, Map<String, Object> params) throws MetaException { boolean success = false; List<MPartition> mparts = null; try { openTransaction(); LOG.debug("Executing listMPartitionsWithProjection"); - dbName = normalizeIdentifier(dbName); - tblName = normalizeIdentifier(tblName); - Query query = queryWrapper.query = pm.newQuery(MPartition.class, - "table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"); - query.declareParameters("java.lang.String t1, java.lang.String t2, java.lang.String t3"); + Query query = queryWrapper.query = pm.newQuery(MPartition.class, jdoFilter); + String parameterDeclaration = makeParameterDeclarationStringObj(params); + query.declareParameters(parameterDeclaration); query.setOrdering("partitionName ascending"); - if (max >= 0) { - query.setRange(0, max); - } if (fieldNames == null || fieldNames.isEmpty()) { // full fetch of partitions - mparts = (List<MPartition>) query.execute(tblName, dbName, catName); + mparts = (List<MPartition>) query.executeWithMap(params); pm.retrieveAll(mparts); } else { // fetch partially filled partitions using result clause query.setResult(Joiner.on(',').join(fieldNames)); // if more than one fields are in the result class the return type is List<Object[]> if (fieldNames.size() > 1) { - List<Object[]> results = (List<Object[]>) query.execute(tblName, dbName, catName); + List<Object[]> results = (List<Object[]>) query.executeWithMap(params); mparts = new ArrayList<>(results.size()); for (Object[] row : results) { MPartition mpart = new MPartition(); @@ -3398,7 +3390,7 @@ public class ObjectStore implements RawStore, Configurable { } } else { // only one field is requested, return type is List<Object> - List<Object> results = (List<Object>) query.execute(tblName, dbName, catName); + List<Object> results = (List<Object>) query.executeWithMap(params); mparts = new ArrayList<>(results.size()); for (Object row : results) { MPartition mpart = new MPartition(); @@ -3581,7 +3573,6 @@ public class ObjectStore implements RawStore, Configurable { return result.intValue(); } - /** * Gets partition names from the table via ORM (JDOQL) name filter. * @param dbName Database name. @@ -3648,12 +3639,14 @@ public class ObjectStore implements RawStore, Configurable { return candidateCds; } - private ObjectPair<Query, Map<String, String>> getPartQueryWithParams( - String catName, String dbName, String tblName, List<String> partNames) { + private String getJDOFilterStrForPartitionNames(String catName, String dbName, String tblName, + List<String> partNames, Map params) { StringBuilder sb = new StringBuilder("table.tableName == t1 && table.database.name == t2 &&" + " table.database.catalogName == t3 && ("); + params.put("t1", normalizeIdentifier(tblName)); + params.put("t2", normalizeIdentifier(dbName)); + params.put("t3", normalizeIdentifier(catName)); int n = 0; - Map<String, String> params = new HashMap<>(); for (Iterator<String> itr = partNames.iterator(); itr.hasNext();) { String pn = "p" + n; n++; @@ -3664,12 +3657,30 @@ public class ObjectStore implements RawStore, Configurable { } sb.setLength(sb.length() - 4); // remove the last " || " sb.append(')'); + return sb.toString(); + } + + private String getJDOFilterStrForPartitionVals(Table table, List<String> vals, + Map params) throws MetaException { + String partNameMatcher = MetaStoreUtils.makePartNameMatcher(table, vals, ".*"); + StringBuilder queryFilter = new StringBuilder("table.database.name == dbName"); + queryFilter.append(" && table.database.catalogName == catName"); + queryFilter.append(" && table.tableName == tableName"); + queryFilter.append(" && partitionName.matches(partialRegex)"); + params.put("dbName", table.getDbName()); + params.put("catName", table.getCatName()); + params.put("tableName", table.getTableName()); + params.put("partialRegex", partNameMatcher); + return queryFilter.toString(); + } + + private ObjectPair<Query, Map<String, String>> getPartQueryWithParams( + String catName, String dbName, String tblName, List<String> partNames) { Query query = pm.newQuery(); - query.setFilter(sb.toString()); - LOG.debug(" JDOQL filter is {}", sb); - params.put("t1", normalizeIdentifier(tblName)); - params.put("t2", normalizeIdentifier(dbName)); - params.put("t3", normalizeIdentifier(catName)); + Map<String, String> params = new HashMap<>(); + String filterStr = getJDOFilterStrForPartitionNames(catName, dbName, tblName, partNames, params); + query.setFilter(filterStr); + LOG.debug(" JDOQL filter is {}", filterStr); query.declareParameters(makeParameterDeclarationString(params)); return new ObjectPair<>(query, params); } @@ -4040,24 +4051,69 @@ public class ObjectStore implements RawStore, Configurable { } @Override - public List<Partition> getPartitionSpecsByFilterAndProjection(String catName, String dbName, - String tblName, List<String> fieldList, - String includeParamKeyPattern, - String excludeParamKeyPattern) - throws MetaException, NoSuchObjectException { + public List<Partition> getPartitionSpecsByFilterAndProjection(final Table table, + GetPartitionsProjectionSpec partitionsProjectSpec, + final GetPartitionsFilterSpec filterSpec) throws MetaException, NoSuchObjectException { + List<String> fieldList = null; + String inputIncludePattern = null; + String inputExcludePattern = null; + if (partitionsProjectSpec != null) { + fieldList = partitionsProjectSpec.getFieldList(); + if (partitionsProjectSpec.isSetIncludeParamKeyPattern()) { + inputIncludePattern = partitionsProjectSpec.getIncludeParamKeyPattern(); + } + if (partitionsProjectSpec.isSetExcludeParamKeyPattern()) { + inputExcludePattern = partitionsProjectSpec.getExcludeParamKeyPattern(); + } + } if (fieldList == null || fieldList.isEmpty()) { // no fields are requested. Fallback to regular getPartitions implementation to return all the fields - return getPartitionsInternal(catName, dbName, tblName, -1, true, true); + return getPartitionsInternal(table.getCatName(), table.getDbName(), table.getTableName(), -1, + true, true); } - return new GetListHelper<Partition>(catName, dbName, tblName, + // anonymous class below requires final String objects + final String includeParamKeyPattern = inputIncludePattern; + final String excludeParamKeyPattern = inputExcludePattern; + + return new GetListHelper<Partition>(table.getCatName(), table.getDbName(), table.getTableName(), fieldList, true, true) { + private final SqlFilterForPushdown filter = new SqlFilterForPushdown(); + private ExpressionTree tree; + + @Override + protected boolean canUseDirectSql(GetHelper<List<Partition>> ctx) throws MetaException { + if (filterSpec.isSetFilterMode() && filterSpec.getFilterMode().equals(PartitionFilterMode.BY_EXPR)) { + // if the filter mode is BY_EXPR initialize the filter and generate the expression tree + // if there are more than one filter string we AND them together + initExpressionTree(); + return directSql.generateSqlFilterForPushdown(ctx.getTable(), tree, filter); + } + // BY_VALUES and BY_NAMES are always supported + return true; + } + + private void initExpressionTree() throws MetaException { + StringBuilder filterBuilder = new StringBuilder(); + int len = filterSpec.getFilters().size(); + List<String> filters = filterSpec.getFilters(); + for (int i = 0; i < len; i++) { + filterBuilder.append('('); + filterBuilder.append(filters.get(i)); + filterBuilder.append(')'); + if (i + 1 < len) { + filterBuilder.append(" AND "); + } + } + String filterStr = filterBuilder.toString(); + tree = PartFilterExprUtil.getFilterParser(filterStr).tree; + } @Override protected List<Partition> getSqlResult(GetHelper<List<Partition>> ctx) throws MetaException { return directSql - .getPartitionSpecsUsingProjection(ctx.getTable(), ctx.partitionFields, includeParamKeyPattern, - excludeParamKeyPattern); + .getPartitionsUsingProjectionAndFilterSpec(ctx.getTable(), ctx.partitionFields, + includeParamKeyPattern, excludeParamKeyPattern, filterSpec, filter); } @Override @@ -4066,11 +4122,44 @@ public class ObjectStore implements RawStore, Configurable { // For single-valued fields we can use setResult() to implement projection of fields but // JDO doesn't support multi-valued fields in setResult() so currently JDO implementation // fallbacks to full-partition fetch if the requested fields contain multi-valued fields - // TODO: Add param filtering logic List<String> fieldNames = PartitionProjectionEvaluator.getMPartitionFieldNames(ctx.partitionFields); + Map<String, Object> params = new HashMap<>(); + String jdoFilter = null; + if (filterSpec.isSetFilterMode()) { + // generate the JDO filter string + switch(filterSpec.getFilterMode()) { + case BY_EXPR: + if (tree == null) { + // tree could be null when directSQL is disabled + initExpressionTree(); + } + jdoFilter = + makeQueryFilterString(table.getCatName(), table.getDbName(), table, tree, params, + true); + if (jdoFilter == null) { + throw new MetaException("Could not generate JDO filter from given expression"); + } + break; + case BY_NAMES: + jdoFilter = getJDOFilterStrForPartitionNames(table.getCatName(), table.getDbName(), + table.getTableName(), filterSpec.getFilters(), params); + break; + case BY_VALUES: + jdoFilter = getJDOFilterStrForPartitionVals(table, filterSpec.getFilters(), params); + break; + default: + throw new MetaException("Unsupported filter mode " + filterSpec.getFilterMode()); + } + } else { + // filter mode is not set create simple JDOFilterStr and params + jdoFilter = "table.tableName == t1 && table.database.name == t2 && table.database.catalogName == t3"; + params.put("t1", normalizeIdentifier(tblName)); + params.put("t2", normalizeIdentifier(dbName)); + params.put("t3", normalizeIdentifier(catName)); + } try (QueryWrapper queryWrapper = new QueryWrapper()) { return convertToParts( - listMPartitionsWithProjection(catName, dbName, tblName, -1, queryWrapper, fieldNames)); + listMPartitionsWithProjection(queryWrapper, fieldNames, jdoFilter, params)); } } }.run(true); http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java index a6d9583..c3914b6 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -554,35 +554,34 @@ public interface RawStore extends Configurable { /** * Generic Partition request API, providing different kinds of filtering and controlling output. * - * @param catName catalog name - * @param dbName database name - * @param tblName table name - * @param fieldList a dot separated strings which represent the fields which must be returned. - * Any other field which is not in the fieldList may be unset in the returned - * partitions (it is up to the implementation to decide whether it chooses to - * include or exclude such fields). E.g. setting the field list to <em>sd.location</em>, - * <em>serdeInfo.name</em>, <em>sd.cols.name</em>, <em>sd.cols.type</em> will - * return partitions which will have location field set in the storage descriptor. - * Also the serdeInf in the returned storage descriptor will only have name field - * set. This applies to multi-valued fields as well like sd.cols, so in the - * example above only name and type fields will be set for <em>sd.cols</em>. - * If the <em>fieldList</em> is empty or not present, all the fields will be set. - * @param includeParamKeyPattern SQL-92 compliant regex pattern for param keys to be included - * _ or % wildcards are supported. '_' represent one character and - * '%' represents 0 or more characters. - * @param excludeParamKeyPattern SQL-92 compliant regex pattern for param keys to be excluded - * _ or % wildcards are supported. '_' represent one character and - * '%' represents 0 or more characters + * @param table table for which whose partitions are requested + * * @param table table for which partitions are requested + * @param projectionSpec the projection spec from the <code>GetPartitionsRequest</code> + * This projection spec includes a fieldList which represents the fields which must be returned. + * Any other field which is not in the fieldList may be unset in the returned + * partitions (it is up to the implementation to decide whether it chooses to + * include or exclude such fields). E.g. setting the field list to <em>sd.location</em>, + * <em>serdeInfo.name</em>, <em>sd.cols.name</em>, <em>sd.cols.type</em> will + * return partitions which will have location field set in the storage descriptor. + * Also the serdeInf in the returned storage descriptor will only have name field + * set. This applies to multi-valued fields as well like sd.cols, so in the + * example above only name and type fields will be set for <em>sd.cols</em>. + * If the <em>fieldList</em> is empty or not present, all the fields will be set. + * Additionally, it also includes a includeParamKeyPattern and excludeParamKeyPattern + * which is a SQL-92 compliant regex pattern to include or exclude parameters. The paramKeyPattern + * supports _ or % wildcards which represent one character and 0 or more characters respectively + * @param filterSpec The filter spec from <code>GetPartitionsRequest</code> which includes the filter mode + * and the list of filter strings. The filter mode could be BY_NAMES, BY_VALUES or BY_EXPR + * to filter by partition names, partition values or expression. The filter strings are provided + * in the list of filters within the filterSpec. When more than one filters are provided in the list + * they are logically AND together * @return List of matching partitions which which may be partially filled according to fieldList. - * @throws MetaException in case of errors - * @throws NoSuchObjectException when catalog or database or table isn't found + * @throws MetaException in case of errors + * @throws NoSuchObjectException when table isn't found */ - List<Partition> getPartitionSpecsByFilterAndProjection(String catName, String dbName, String tblName, - final List<String> fieldList, - final String includeParamKeyPattern, - final String excludeParamKeyPattern) + List<Partition> getPartitionSpecsByFilterAndProjection(Table table, + GetPartitionsProjectionSpec projectionSpec, GetPartitionsFilterSpec filterSpec) throws MetaException, NoSuchObjectException; - /** * Get partitions using an already parsed expression. * @param catName catalog name. http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 70490f0..47ac68c 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -1274,13 +1274,10 @@ public class CachedStore implements RawStore, Configurable { /** * getPartitionSpecsByFilterAndProjection interface is currently non-cacheable. */ - public List<Partition> getPartitionSpecsByFilterAndProjection(String catName, String dbName, - String tblName, - List<String> fieldList, - String includeParamKeyPattern, - String excludeParamKeysPattern) throws MetaException, NoSuchObjectException { - return rawStore.getPartitionSpecsByFilterAndProjection(catName, dbName, tblName, fieldList, - includeParamKeyPattern, excludeParamKeysPattern); + public List<Partition> getPartitionSpecsByFilterAndProjection(Table table, + GetPartitionsProjectionSpec projectionSpec, GetPartitionsFilterSpec filterSpec) + throws MetaException, NoSuchObjectException { + return rawStore.getPartitionSpecsByFilterAndProjection(table, projectionSpec, filterSpec); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index 4dd4edc..9669798 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -18,6 +18,21 @@ package org.apache.hadoop.hive.metastore; +import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.metastore.api.CreationMetadata; +import org.apache.hadoop.hive.metastore.api.GetPartitionsFilterSpec; +import org.apache.hadoop.hive.metastore.api.GetPartitionsProjectionSpec; +import org.apache.hadoop.hive.metastore.api.ISchemaName; +import org.apache.hadoop.hive.metastore.api.SchemaVersionDescriptor; +import org.apache.hadoop.hive.metastore.api.Catalog; +import org.apache.hadoop.hive.metastore.api.WMFullResourcePlan; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.TableName; @@ -376,11 +391,10 @@ public class DummyRawStoreControlledCommit implements RawStore, Configurable { } @Override - public List<Partition> getPartitionSpecsByFilterAndProjection(String catName, String dbName, String tblName, - List<String> fieldList, String paramKeys, String excludeFlag) + public List<Partition> getPartitionSpecsByFilterAndProjection(Table table, + GetPartitionsProjectionSpec projectionSpec, GetPartitionsFilterSpec filterSpec) throws MetaException, NoSuchObjectException { - return objectStore.getPartitionSpecsByFilterAndProjection(catName, - dbName, tblName, fieldList, paramKeys, excludeFlag); + return objectStore.getPartitionSpecsByFilterAndProjection(table, projectionSpec, filterSpec); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index 06f4cbc..593d562 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -20,6 +20,8 @@ package org.apache.hadoop.hive.metastore; import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.metastore.api.CreationMetadata; +import org.apache.hadoop.hive.metastore.api.GetPartitionsFilterSpec; +import org.apache.hadoop.hive.metastore.api.GetPartitionsProjectionSpec; import org.apache.hadoop.hive.metastore.api.ISchemaName; import org.apache.hadoop.hive.metastore.api.SchemaVersionDescriptor; import org.apache.hadoop.hive.metastore.api.Catalog; @@ -380,8 +382,8 @@ public class DummyRawStoreForJdoConnection implements RawStore { } @Override - public List<Partition> getPartitionSpecsByFilterAndProjection(String catName, String dbName, String tblName, - List<String> fieldList, String paramKeys, String excludeFlag) + public List<Partition> getPartitionSpecsByFilterAndProjection(Table table, + GetPartitionsProjectionSpec projectSpec, GetPartitionsFilterSpec filterSpec) throws MetaException, NoSuchObjectException { return Collections.emptyList(); } http://git-wip-us.apache.org/repos/asf/hive/blob/e39a1980/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestGetPartitionsUsingProjection.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestGetPartitionsUsingProjection.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestGetPartitionsUsingProjection.java deleted file mode 100644 index dcff606..0000000 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestGetPartitionsUsingProjection.java +++ /dev/null @@ -1,700 +0,0 @@ -/* - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.metastore; - -import org.apache.commons.beanutils.PropertyUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.GetPartitionsFilterSpec; -import org.apache.hadoop.hive.metastore.api.GetPartitionsProjectionSpec; -import org.apache.hadoop.hive.metastore.api.GetPartitionsRequest; -import org.apache.hadoop.hive.metastore.api.GetPartitionsResponse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.PartitionListComposingSpec; -import org.apache.hadoop.hive.metastore.api.PartitionSpec; -import org.apache.hadoop.hive.metastore.api.PartitionSpecWithSharedSD; -import org.apache.hadoop.hive.metastore.api.PartitionWithoutSD; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder; -import org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder; -import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; -import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; -import org.apache.thrift.TException; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.lang.reflect.InvocationTargetException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import static org.apache.hadoop.hive.metastore.ColumnType.SERIALIZATION_FORMAT; - -/** - * Tests for getPartitionsWithSpecs metastore API. This test create some partitions and makes sure - * that getPartitionsWithSpecs returns results which are comparable with the get_partitions API when - * various combinations of projection spec are set. Also checks the JDO code path in addition to - * directSQL code path - */ -@Category(MetastoreCheckinTest.class) -public class TestGetPartitionsUsingProjection { - private static final Logger LOG = LoggerFactory.getLogger(TestGetPartitionsUsingProjection.class); - protected static Configuration conf = MetastoreConf.newMetastoreConf(); - private static int port; - private static final String dbName = "test_projection_db"; - private static final String tblName = "test_projection_table"; - private List<Partition> origPartitions; - private Table tbl; - private static final String EXCLUDE_KEY_PREFIX = "exclude"; - private HiveMetaStoreClient client; - - @BeforeClass - public static void startMetaStoreServer() throws Exception { - conf.set("hive.in.test", "true"); - MetaStoreTestUtils.setConfForStandloneMode(conf); - MetastoreConf.setLongVar(conf, ConfVars.BATCH_RETRIEVE_MAX, 2); - MetastoreConf.setLongVar(conf, ConfVars.LIMIT_PARTITION_REQUEST, 100); - port = MetaStoreTestUtils.startMetaStoreWithRetry(HadoopThriftAuthBridge.getBridge(), conf); - LOG.info("Starting MetaStore Server on port " + port); - - try (HiveMetaStoreClient client = createClient()) { - new DatabaseBuilder().setName(dbName).create(client, conf); - } - } - - @AfterClass - public static void tearDown() throws Exception { - try (HiveMetaStoreClient client = createClient()) { - client.dropDatabase(dbName, true, true, true); - } - } - - @Before - public void setup() throws TException { - // This is default case with setugi off for both client and server - client = createClient(); - createTestTables(); - origPartitions = client.listPartitions(dbName, tblName, (short) -1); - tbl = client.getTable(dbName, tblName); - // set directSQL to true explicitly - client.setMetaConf(ConfVars.TRY_DIRECT_SQL.getVarname(), "true"); - client.setMetaConf(ConfVars.TRY_DIRECT_SQL_DDL.getVarname(), "true"); - } - - @After - public void cleanup() { - dropTestTables(); - client.close(); - client = null; - } - - private void dropTestTables() { - try { - client.dropTable(dbName, tblName); - } catch (TException e) { - // ignored - } - } - - private void createTestTables() throws TException { - if (client.tableExists(dbName, tblName)) { - LOG.info("Table is already existing. Dropping it and then recreating"); - client.dropTable(dbName, tblName); - } - new TableBuilder().setTableName(tblName).setDbName(dbName).setCols(Arrays - .asList(new FieldSchema("col1", "string", "c1 comment"), - new FieldSchema("col2", "int", "c2 comment"))).setPartCols(Arrays - .asList(new FieldSchema("state", "string", "state comment"), - new FieldSchema("city", "string", "city comment"))) - .setTableParams(new HashMap<String, String>(2) {{ - put("tableparam1", "tableval1"); - put("tableparam2", "tableval2"); - }}) - .setBucketCols(Collections.singletonList("col1")) - .addSortCol("col2", 1) - .addSerdeParam(SERIALIZATION_FORMAT, "1").setSerdeName(tblName) - .setSerdeLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") - .setInputFormat("org.apache.hadoop.hive.ql.io.HiveInputFormat") - .setOutputFormat("org.apache.hadoop.hive.ql.io.HiveOutputFormat") - .create(client, conf); - - Table table = client.getTable(dbName, tblName); - Assert.assertTrue("Table " + dbName + "." + tblName + " does not exist", - client.tableExists(dbName, tblName)); - - List<Partition> partitions = new ArrayList<>(); - partitions.add(createPartition(Arrays.asList("CA", "SanFrancisco"), table)); - partitions.add(createPartition(Arrays.asList("CA", "PaloAlto"), table)); - partitions.add(createPartition(Arrays.asList("WA", "Seattle"), table)); - partitions.add(createPartition(Arrays.asList("AZ", "Phoenix"), table)); - - client.add_partitions(partitions); - } - - private Partition createPartition(List<String> vals, Table table) throws MetaException { - return new PartitionBuilder() - .inTable(table) - .setValues(vals) - .addPartParam("key1", "S1") - .addPartParam("key2", "S2") - .addPartParam(EXCLUDE_KEY_PREFIX + "key1", "e1") - .addPartParam(EXCLUDE_KEY_PREFIX + "key2", "e2") - .setBucketCols(table.getSd().getBucketCols()) - .setSortCols(table.getSd().getSortCols()) - .setSerdeName(table.getSd().getSerdeInfo().getName()) - .setSerdeLib(table.getSd().getSerdeInfo().getSerializationLib()) - .setSerdeParams(table.getSd().getSerdeInfo().getParameters()) - .build(conf); - } - - private static HiveMetaStoreClient createClient() throws MetaException { - MetastoreConf.setVar(conf, ConfVars.THRIFT_URIS, "thrift://localhost:" + port); - MetastoreConf.setBoolVar(conf, ConfVars.EXECUTE_SET_UGI, false); - return new HiveMetaStoreClient(conf); - } - - @Test - public void testGetPartitions() throws TException { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsResponse response = client.getPartitionsWithSpecs(request); - validateBasic(response); - } - - @Test - public void testPartitionProjectionEmptySpec() throws Throwable { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - - projectSpec.setFieldList(new ArrayList<>(0)); - projectSpec.setExcludeParamKeyPattern("exclude%"); - - GetPartitionsResponse response; - response = client.getPartitionsWithSpecs(request); - Assert.assertEquals(1, response.getPartitionSpec().size()); - PartitionSpec partitionSpec = response.getPartitionSpec().get(0); - PartitionSpecWithSharedSD partitionSpecWithSharedSD = partitionSpec.getSharedSDPartitionSpec(); - - StorageDescriptor sharedSD = partitionSpecWithSharedSD.getSd(); - Assert.assertNotNull(sharedSD); - // everything except location in sharedSD should be same - StorageDescriptor origSd = origPartitions.get(0).getSd().deepCopy(); - origSd.unsetLocation(); - StorageDescriptor sharedSDCopy = sharedSD.deepCopy(); - sharedSDCopy.unsetLocation(); - Assert.assertEquals(origSd, sharedSDCopy); - - List<PartitionWithoutSD> partitionWithoutSDS = partitionSpecWithSharedSD.getPartitions(); - Assert.assertNotNull(partitionWithoutSDS); - Assert.assertEquals("Unexpected number of partitions returned", - origPartitions.size(), partitionWithoutSDS.size()); - for (int i = 0; i < origPartitions.size(); i++) { - Partition origPartition = origPartitions.get(i); - PartitionWithoutSD retPartition = partitionWithoutSDS.get(i); - Assert.assertEquals(origPartition.getCreateTime(), retPartition.getCreateTime()); - Assert.assertEquals(origPartition.getLastAccessTime(), retPartition.getLastAccessTime()); - Assert.assertEquals(origPartition.getSd().getLocation(), - sharedSD.getLocation() + retPartition.getRelativePath()); - validateMap(origPartition.getParameters(), retPartition.getParameters()); - validateList(origPartition.getValues(), retPartition.getValues()); - } - } - - @Test - public void testPartitionProjectionAllSingleValuedFields() throws Throwable { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - - List<String> projectedFields = Arrays - .asList("dbName", "tableName", "createTime", "lastAccessTime", "sd.location", - "sd.inputFormat", "sd.outputFormat", "sd.compressed", "sd.numBuckets", - "sd.serdeInfo.name", "sd.serdeInfo.serializationLib"/*, "sd.serdeInfo.serdeType"*/); - //TODO directSQL does not support serdeType, serializerClass and deserializerClass in serdeInfo - projectSpec.setFieldList(projectedFields); - - GetPartitionsResponse response = client.getPartitionsWithSpecs(request); - Assert.assertEquals(1, response.getPartitionSpec().size()); - PartitionSpec partitionSpec = response.getPartitionSpec().get(0); - Assert.assertTrue("DbName is not set", partitionSpec.isSetDbName()); - Assert.assertTrue("tableName is not set", partitionSpec.isSetTableName()); - PartitionSpecWithSharedSD partitionSpecWithSharedSD = partitionSpec.getSharedSDPartitionSpec(); - - StorageDescriptor sharedSD = partitionSpecWithSharedSD.getSd(); - Assert.assertNotNull(sharedSD); - List<PartitionWithoutSD> partitionWithoutSDS = partitionSpecWithSharedSD.getPartitions(); - Assert.assertNotNull(partitionWithoutSDS); - Assert.assertEquals(partitionWithoutSDS.size(), origPartitions.size()); - comparePartitionForSingleValuedFields(projectedFields, sharedSD, partitionWithoutSDS, 0); - } - - @Test - public void testProjectionUsingJDO() throws Throwable { - // disable direct SQL to make sure - client.setMetaConf(ConfVars.TRY_DIRECT_SQL.getVarname(), "false"); - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - List<String> projectedFields = Collections.singletonList("sd.location"); - projectSpec.setFieldList(projectedFields); - - GetPartitionsResponse response = client.getPartitionsWithSpecs(request); - Assert.assertEquals(1, response.getPartitionSpec().size()); - PartitionSpec partitionSpec = response.getPartitionSpec().get(0); - Assert.assertTrue("DbName is not set", partitionSpec.isSetDbName()); - Assert.assertTrue("tableName is not set", partitionSpec.isSetTableName()); - PartitionSpecWithSharedSD partitionSpecWithSharedSD = partitionSpec.getSharedSDPartitionSpec(); - - StorageDescriptor sharedSD = partitionSpecWithSharedSD.getSd(); - Assert.assertNotNull(sharedSD); - List<PartitionWithoutSD> partitionWithoutSDS = partitionSpecWithSharedSD.getPartitions(); - Assert.assertNotNull(partitionWithoutSDS); - Assert.assertEquals(partitionWithoutSDS.size(), origPartitions.size()); - comparePartitionForSingleValuedFields(projectedFields, sharedSD, partitionWithoutSDS, 0); - - // set all the single-valued fields and try using JDO - request = getGetPartitionsRequest(); - projectSpec = request.getProjectionSpec(); - projectedFields = Arrays - .asList("dbName", "tableName", "createTime", "lastAccessTime", "sd.location", - "sd.inputFormat", "sd.outputFormat", "sd.compressed", "sd.numBuckets", - "sd.serdeInfo.name", "sd.serdeInfo.serializationLib", "sd.serdeInfo.serdeType", - "sd.serdeInfo.serializerClass", "sd.serdeInfo.deserializerClass"); - projectSpec.setFieldList(projectedFields); - - response = client.getPartitionsWithSpecs(request); - Assert.assertEquals(1, response.getPartitionSpec().size()); - partitionSpec = response.getPartitionSpec().get(0); - Assert.assertTrue("DbName is not set", partitionSpec.isSetDbName()); - Assert.assertTrue("tableName is not set", partitionSpec.isSetTableName()); - partitionSpecWithSharedSD = partitionSpec.getSharedSDPartitionSpec(); - - sharedSD = partitionSpecWithSharedSD.getSd(); - Assert.assertNotNull(sharedSD); - partitionWithoutSDS = partitionSpecWithSharedSD.getPartitions(); - Assert.assertNotNull(partitionWithoutSDS); - Assert.assertEquals(partitionWithoutSDS.size(), origPartitions.size()); - comparePartitionForSingleValuedFields(projectedFields, sharedSD, partitionWithoutSDS, 0); - } - - /** - * Confirms if the partitionWithoutSD object at partitionWithoutSDSIndex index has all the - * projected fields set to values which are same as the ones set in origPartitions - * @param projectedFields - * @param sharedSD - * @param partitionWithoutSDS - * @param partitionWithoutSDSIndex - * @throws IllegalAccessException - * @throws InvocationTargetException - * @throws NoSuchMethodException - */ - private void comparePartitionForSingleValuedFields(List<String> projectedFields, - StorageDescriptor sharedSD, List<PartitionWithoutSD> partitionWithoutSDS, int partitionWithoutSDSIndex) - throws IllegalAccessException, InvocationTargetException, NoSuchMethodException { - for (Partition origPart : origPartitions) { - for (String projectField : projectedFields) { - // dbname, tableName and catName is not stored in partition - if (projectField.equals("dbName") || projectField.equals("tableName") || projectField - .equals("catName")) - continue; - if (projectField.startsWith("sd")) { - String sdPropertyName = projectField.substring(projectField.indexOf("sd.") + 3); - if (sdPropertyName.equals("location")) { - // in case of location sharedSD has the base location and partition has relative location - Assert.assertEquals("Location does not match", origPart.getSd().getLocation(), - sharedSD.getLocation() + partitionWithoutSDS.get(partitionWithoutSDSIndex).getRelativePath()); - } else { - Assert.assertEquals(PropertyUtils.getNestedProperty(origPart, projectField), - PropertyUtils.getNestedProperty(sharedSD, sdPropertyName)); - } - } else { - Assert.assertEquals(PropertyUtils.getNestedProperty(origPart, projectField), - PropertyUtils.getNestedProperty(partitionWithoutSDS.get(partitionWithoutSDSIndex), projectField)); - } - } - partitionWithoutSDSIndex++; - } - } - - @Test - public void testPartitionProjectionAllMultiValuedFields() throws Throwable { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - List<String> projectedFields = Arrays - .asList("values", "parameters", "sd.cols", "sd.bucketCols", "sd.sortCols", "sd.parameters", - "sd.skewedInfo", "sd.serdeInfo.parameters"); - projectSpec.setFieldList(projectedFields); - - GetPartitionsResponse response = client.getPartitionsWithSpecs(request); - - Assert.assertEquals(1, response.getPartitionSpec().size()); - PartitionSpec partitionSpec = response.getPartitionSpec().get(0); - PartitionSpecWithSharedSD partitionSpecWithSharedSD = partitionSpec.getSharedSDPartitionSpec(); - Assert.assertEquals(origPartitions.size(), partitionSpecWithSharedSD.getPartitions().size()); - StorageDescriptor sharedSD = partitionSpecWithSharedSD.getSd(); - for (int i = 0; i < origPartitions.size(); i++) { - Partition origPartition = origPartitions.get(i); - PartitionWithoutSD retPartition = partitionSpecWithSharedSD.getPartitions().get(i); - for (String projectedField : projectedFields) { - switch (projectedField) { - case "values": - validateList(origPartition.getValues(), retPartition.getValues()); - break; - case "parameters": - validateMap(origPartition.getParameters(), retPartition.getParameters()); - break; - case "sd.cols": - validateList(origPartition.getSd().getCols(), sharedSD.getCols()); - break; - case "sd.bucketCols": - validateList(origPartition.getSd().getBucketCols(), sharedSD.getBucketCols()); - break; - case "sd.sortCols": - validateList(origPartition.getSd().getSortCols(), sharedSD.getSortCols()); - break; - case "sd.parameters": - validateMap(origPartition.getSd().getParameters(), sharedSD.getParameters()); - break; - case "sd.skewedInfo": - if (!origPartition.getSd().getSkewedInfo().getSkewedColNames().isEmpty()) { - validateList(origPartition.getSd().getSkewedInfo().getSkewedColNames(), - sharedSD.getSkewedInfo().getSkewedColNames()); - } - if (!origPartition.getSd().getSkewedInfo().getSkewedColValues().isEmpty()) { - for (int i1 = 0; - i1 < origPartition.getSd().getSkewedInfo().getSkewedColValuesSize(); i1++) { - validateList(origPartition.getSd().getSkewedInfo().getSkewedColValues().get(i1), - sharedSD.getSkewedInfo().getSkewedColValues().get(i1)); - } - } - if (!origPartition.getSd().getSkewedInfo().getSkewedColValueLocationMaps().isEmpty()) { - validateMap(origPartition.getSd().getSkewedInfo().getSkewedColValueLocationMaps(), - sharedSD.getSkewedInfo().getSkewedColValueLocationMaps()); - } - break; - case "sd.serdeInfo.parameters": - validateMap(origPartition.getSd().getSerdeInfo().getParameters(), - sharedSD.getSerdeInfo().getParameters()); - break; - default: - throw new IllegalArgumentException("Invalid field " + projectedField); - } - } - } - } - - @Test - public void testPartitionProjectionIncludeParameters() throws Throwable { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - projectSpec - .setFieldList(Arrays.asList("dbName", "tableName", "catName", "parameters", "values")); - projectSpec.setIncludeParamKeyPattern(EXCLUDE_KEY_PREFIX + "%"); - - GetPartitionsResponse response = client.getPartitionsWithSpecs(request); - - PartitionSpecWithSharedSD partitionSpecWithSharedSD = - response.getPartitionSpec().get(0).getSharedSDPartitionSpec(); - Assert.assertNotNull("All the partitions should be returned in sharedSD spec", - partitionSpecWithSharedSD); - PartitionListComposingSpec partitionListComposingSpec = - response.getPartitionSpec().get(0).getPartitionList(); - Assert.assertNull("Partition list composing spec should be null since all the " - + "partitions are expected to be in sharedSD spec", partitionListComposingSpec); - for (PartitionWithoutSD retPartion : partitionSpecWithSharedSD.getPartitions()) { - Assert.assertTrue("included parameter key is not found in the response", - retPartion.getParameters().containsKey(EXCLUDE_KEY_PREFIX + "key1")); - Assert.assertTrue("included parameter key is not found in the response", - retPartion.getParameters().containsKey(EXCLUDE_KEY_PREFIX + "key2")); - Assert.assertEquals("Additional parameters returned other than inclusion keys", - 2, retPartion.getParameters().size()); - } - } - - @Test - public void testPartitionProjectionIncludeExcludeParameters() throws Throwable { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - projectSpec - .setFieldList(Arrays.asList("dbName", "tableName", "catName", "parameters", "values")); - // test parameter key inclusion using setIncludeParamKeyPattern - projectSpec.setIncludeParamKeyPattern(EXCLUDE_KEY_PREFIX + "%"); - projectSpec.setExcludeParamKeyPattern("%key1%"); - - GetPartitionsResponse response = client.getPartitionsWithSpecs(request); - - PartitionSpecWithSharedSD partitionSpecWithSharedSD = - response.getPartitionSpec().get(0).getSharedSDPartitionSpec(); - Assert.assertNotNull("All the partitions should be returned in sharedSD spec", - partitionSpecWithSharedSD); - PartitionListComposingSpec partitionListComposingSpec = - response.getPartitionSpec().get(0).getPartitionList(); - Assert.assertNull("Partition list composing spec should be null since all the " - + "partitions are expected to be in sharedSD spec", partitionListComposingSpec); - for (PartitionWithoutSD retPartion : partitionSpecWithSharedSD.getPartitions()) { - Assert.assertFalse("excluded parameter key is found in the response", - retPartion.getParameters().containsKey(EXCLUDE_KEY_PREFIX + "key1")); - Assert.assertTrue("included parameter key is not found in the response", - retPartion.getParameters().containsKey(EXCLUDE_KEY_PREFIX + "key2")); - Assert.assertEquals("Additional parameters returned other than inclusion keys", - 1, retPartion.getParameters().size()); - } - } - - @Test - public void testPartitionProjectionExcludeParameters() throws Throwable { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - projectSpec - .setFieldList(Arrays.asList("dbName", "tableName", "catName", "parameters", "values")); - projectSpec.setExcludeParamKeyPattern(EXCLUDE_KEY_PREFIX + "%"); - - GetPartitionsResponse response = client.getPartitionsWithSpecs(request); - - PartitionSpecWithSharedSD partitionSpecWithSharedSD = - response.getPartitionSpec().get(0).getSharedSDPartitionSpec(); - Assert.assertNotNull("All the partitions should be returned in sharedSD spec", - partitionSpecWithSharedSD); - PartitionListComposingSpec partitionListComposingSpec = - response.getPartitionSpec().get(0).getPartitionList(); - Assert.assertNull("Partition list composing spec should be null", partitionListComposingSpec); - for (PartitionWithoutSD retPartion : partitionSpecWithSharedSD.getPartitions()) { - Assert.assertFalse("excluded parameter key is found in the response", - retPartion.getParameters().containsKey(EXCLUDE_KEY_PREFIX + "key1")); - Assert.assertFalse("excluded parameter key is found in the response", - retPartion.getParameters().containsKey(EXCLUDE_KEY_PREFIX + "key2")); - } - } - - @Test - public void testNestedMultiValuedFieldProjection() throws TException { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - projectSpec.setFieldList(Arrays.asList("sd.cols.name", "sd.cols.type")); - - GetPartitionsResponse response = client.getPartitionsWithSpecs(request); - - PartitionSpecWithSharedSD partitionSpecWithSharedSD = - response.getPartitionSpec().get(0).getSharedSDPartitionSpec(); - StorageDescriptor sharedSD = partitionSpecWithSharedSD.getSd(); - Assert.assertNotNull("sd.cols were requested but was not returned", sharedSD.getCols()); - for (FieldSchema col : sharedSD.getCols()) { - Assert.assertTrue("sd.cols.name was requested but was not returned", col.isSetName()); - Assert.assertTrue("sd.cols.type was requested but was not returned", col.isSetType()); - Assert.assertFalse("sd.cols.comment was not requested but was returned", col.isSetComment()); - } - } - - @Test - public void testParameterExpansion() throws TException { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - projectSpec.setFieldList(Arrays.asList("sd.cols", "sd.serdeInfo")); - - GetPartitionsResponse response = client.getPartitionsWithSpecs(request); - - PartitionSpecWithSharedSD partitionSpecWithSharedSD = - response.getPartitionSpec().get(0).getSharedSDPartitionSpec(); - StorageDescriptor sharedSD = partitionSpecWithSharedSD.getSd(); - Assert.assertNotNull("sd.cols were requested but was not returned", sharedSD.getCols()); - Assert.assertEquals("Returned serdeInfo does not match with original serdeInfo", - origPartitions.get(0).getSd().getCols(), sharedSD.getCols()); - - Assert - .assertNotNull("sd.serdeInfo were requested but was not returned", sharedSD.getSerdeInfo()); - Assert.assertEquals("Returned serdeInfo does not match with original serdeInfo", - origPartitions.get(0).getSd().getSerdeInfo(), sharedSD.getSerdeInfo()); - } - - @Test - public void testNonStandardPartitions() throws TException { - String testTblName = "test_non_standard"; - new TableBuilder() - .setTableName(testTblName) - .setDbName(dbName) - .addCol("ns_c1", "string", "comment 1") - .addCol("ns_c2", "int", "comment 2") - .addPartCol("part", "string") - .addPartCol("city", "string") - .addBucketCol("ns_c1") - .addSortCol("ns_c2", 1) - .addTableParam("tblparamKey", "Partitions of this table are not located within table directory") - .create(client, conf); - - Table table = client.getTable(dbName, testTblName); - Assert.assertNotNull("Unable to create a test table ", table); - - List<Partition> partitions = new ArrayList<>(); - partitions.add(createPartition(Arrays.asList("p1", "SanFrancisco"), table)); - partitions.add(createPartition(Arrays.asList("p1", "PaloAlto"), table)); - partitions.add(createPartition(Arrays.asList("p2", "Seattle"), table)); - partitions.add(createPartition(Arrays.asList("p2", "Phoenix"), table)); - - client.add_partitions(partitions); - // change locations of two of the partitions outside table directory - List<Partition> testPartitions = client.listPartitions(dbName, testTblName, (short) -1); - Assert.assertEquals(4, testPartitions.size()); - Partition p1 = testPartitions.get(2); - p1.getSd().setLocation("/tmp/some_other_location/part=p2/city=Seattle"); - Partition p2 = testPartitions.get(3); - p2.getSd().setLocation("/tmp/some_other_location/part=p2/city=Phoenix"); - client.alter_partitions(dbName, testTblName, Arrays.asList(p1, p2)); - - GetPartitionsRequest request = getGetPartitionsRequest(); - request.getProjectionSpec().setFieldList(Arrays.asList("values", "sd")); - request.setDbName(dbName); - request.setTblName(testTblName); - - GetPartitionsResponse response = client.getPartitionsWithSpecs(request); - Assert.assertNotNull("Response should have returned partition specs", - response.getPartitionSpec()); - Assert - .assertEquals("We should have two partition specs", 2, response.getPartitionSpec().size()); - Assert.assertNotNull("One SharedSD spec is expected", - response.getPartitionSpec().get(0).getSharedSDPartitionSpec()); - Assert.assertNotNull("One composing spec is expected", - response.getPartitionSpec().get(1).getPartitionList()); - - PartitionSpecWithSharedSD partitionSpecWithSharedSD = - response.getPartitionSpec().get(0).getSharedSDPartitionSpec(); - Assert.assertNotNull("sd was requested but not returned", partitionSpecWithSharedSD.getSd()); - Assert.assertEquals("shared SD should have table location", table.getSd().getLocation(), - partitionSpecWithSharedSD.getSd().getLocation()); - List<List<String>> expectedVals = new ArrayList<>(2); - expectedVals.add(Arrays.asList("p1", "PaloAlto")); - expectedVals.add(Arrays.asList("p1", "SanFrancisco")); - - for (int i=0; i<partitionSpecWithSharedSD.getPartitions().size(); i++) { - PartitionWithoutSD retPartition = partitionSpecWithSharedSD.getPartitions().get(i); - Assert.assertEquals(2, retPartition.getValuesSize()); - validateList(expectedVals.get(i), retPartition.getValues()); - Assert.assertNull("parameters were not requested so should have been null", - retPartition.getParameters()); - } - - PartitionListComposingSpec composingSpec = - response.getPartitionSpec().get(1).getPartitionList(); - Assert.assertNotNull("composing spec should have returned 2 partitions", - composingSpec.getPartitions()); - Assert.assertEquals("composing spec should have returned 2 partitions", 2, - composingSpec.getPartitionsSize()); - - expectedVals.clear(); - expectedVals.add(Arrays.asList("p2", "Phoenix")); - expectedVals.add(Arrays.asList("p2", "Seattle")); - for (int i=0; i<composingSpec.getPartitions().size(); i++) { - Partition partition = composingSpec.getPartitions().get(i); - Assert.assertEquals(2, partition.getValuesSize()); - validateList(expectedVals.get(i), partition.getValues()); - Assert.assertNull("parameters were not requested so should have been null", - partition.getParameters()); - } - } - - @Test(expected = TException.class) - public void testInvalidProjectFieldNames() throws TException { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - projectSpec.setFieldList(Arrays.asList("values", "invalid.field.name")); - client.getPartitionsWithSpecs(request); - } - - @Test(expected = TException.class) - public void testInvalidProjectFieldNames2() throws TException { - GetPartitionsRequest request = getGetPartitionsRequest(); - GetPartitionsProjectionSpec projectSpec = request.getProjectionSpec(); - projectSpec.setFieldList(Arrays.asList("")); - client.getPartitionsWithSpecs(request); - } - - private void validateBasic(GetPartitionsResponse response) throws TException { - Assert.assertNotNull("Response is null", response); - Assert.assertNotNull("Returned partition spec is null", response.getPartitionSpec()); - Assert.assertEquals(1, response.getPartitionSpecSize()); - PartitionSpecWithSharedSD partitionSpecWithSharedSD = - response.getPartitionSpec().get(0).getSharedSDPartitionSpec(); - Assert.assertNotNull(partitionSpecWithSharedSD.getSd()); - StorageDescriptor sharedSD = partitionSpecWithSharedSD.getSd(); - Assert.assertEquals("Root location should be set to table location", tbl.getSd().getLocation(), - sharedSD.getLocation()); - - List<PartitionWithoutSD> partitionWithoutSDS = partitionSpecWithSharedSD.getPartitions(); - Assert.assertEquals(origPartitions.size(), partitionWithoutSDS.size()); - for (int i = 0; i < origPartitions.size(); i++) { - Partition origPartition = origPartitions.get(i); - PartitionWithoutSD returnedPartitionWithoutSD = partitionWithoutSDS.get(i); - Assert.assertEquals(String.format("Location returned for Partition %d is not correct", i), - origPartition.getSd().getLocation(), - sharedSD.getLocation() + returnedPartitionWithoutSD.getRelativePath()); - } - } - - private GetPartitionsRequest getGetPartitionsRequest() { - GetPartitionsRequest request = new GetPartitionsRequest(); - request.setProjectionSpec(new GetPartitionsProjectionSpec()); - request.setFilterSpec(new GetPartitionsFilterSpec()); - request.setTblName(tblName); - request.setDbName(dbName); - return request; - } - - private <K, V> void validateMap(Map<K, V> aMap, Map<K, V> bMap) { - if ((aMap == null || aMap.isEmpty()) && (bMap == null || bMap.isEmpty())) { - return; - } - // Equality is verified here because metastore updates stats automatically - // and adds them in the returned partition. So the returned partition will - // have parameters + some more parameters for the basic stats - Assert.assertTrue(bMap.size() >= aMap.size()); - for (Entry<K, V> entries : aMap.entrySet()) { - Assert.assertTrue("Expected " + entries.getKey() + " is missing from the map", - bMap.containsKey(entries.getKey())); - Assert.assertEquals("Expected value to be " + aMap.get(entries.getKey()) + " found" + bMap - .get(entries.getKey()), aMap.get(entries.getKey()), bMap.get(entries.getKey())); - } - } - - private <T> void validateList(List<T> aList, List<T> bList) { - if ((aList == null || aList.isEmpty()) && (bList == null || bList.isEmpty())) { - return; - } - Assert.assertEquals(aList.size(), bList.size()); - Iterator<T> origValuesIt = aList.iterator(); - Iterator<T> retValuesIt = bList.iterator(); - while (origValuesIt.hasNext()) { - Assert.assertTrue(retValuesIt.hasNext()); - Assert.assertEquals(origValuesIt.next(), retValuesIt.next()); - } - } -}