wecharyu commented on code in PR #5539:
URL: https://github.com/apache/hive/pull/5539#discussion_r1890470526
##########
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/show/status/formatter/ShowTableStatusFormatter.java:
##########
@@ -22,9 +22,15 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.GetPartitionsFilterSpec;
Review Comment:
nit: remove unused imports.
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java:
##########
@@ -4466,6 +4526,81 @@ public List<Partition> getPartitionsByFilter(Table tbl,
String filter)
return convertFromMetastore(tbl, tParts);
}
+ public List<Partition> getPartitionsWithSpecs(Table tbl,
GetPartitionsRequest request)
+ throws HiveException, TException {
+
+ if (!tbl.isPartitioned()) {
+ throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED,
tbl.getTableName());
+ }
+ int batchSize= MetastoreConf.getIntVar(Hive.get().getConf(),
MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
+ if(batchSize > 0){
+ return new ArrayList<>(getAllPartitionsWithSpecsInBatches(tbl,
batchSize, DEFAULT_BATCH_DECAYING_FACTOR, MetastoreConf.getIntVar(
+ Hive.get().getConf(),
MetastoreConf.ConfVars.GETPARTITIONS_BATCH_MAX_RETRIES), request));
+ }else{
+ return getPartitionsWithSpecsInternal(tbl, request);
+ }
+ }
+
+ public List<Partition> getPartitionsWithSpecsInternal(Table tbl,
GetPartitionsRequest request)
+ throws HiveException, TException {
+
+ if (!tbl.isPartitioned()) {
+ throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED,
tbl.getTableName());
+ }
+ GetPartitionsResponse response = getMSC().getPartitionsWithSpecs(request);
+ List<org.apache.hadoop.hive.metastore.api.PartitionSpec> partitionSpecs =
response.getPartitionSpec();
+ List<Partition> partitions = new ArrayList<>();
+ partitions.addAll(convertFromPartSpec(partitionSpecs.iterator(), tbl));
+
+ return partitions;
+ }
+
+ List<Partition> getPartitionsWithSpecsByNames(Table tbl, List<String>
partNames, GetPartitionsRequest request)
+ throws HiveException, TException {
+
+ if (!tbl.isPartitioned()) {
+ throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED,
tbl.getTableName());
+ }
+ List<Partition> partitions = new ArrayList<Partition>(partNames.size());
+
+ int batchSize = HiveConf.getIntVar(conf,
HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX);
+ int nParts = partNames.size();
+ int nBatches = nParts / batchSize;
+ // I do not want to modify the original request when implementing
batching, hence we will know what actual request was being made
+ GetPartitionsRequest req = request;
+ if (!req.isSetFilterSpec()) {
+ req.setFilterSpec(new GetPartitionsFilterSpec());
+ }
+
+ try {
+ for (int i = 0; i < nBatches; ++i) {
+ req.getFilterSpec().setFilters(partNames.subList(i*batchSize,
(i+1)*batchSize));
+ req.getFilterSpec().setFilterMode(PartitionFilterMode.BY_NAMES);
+ List<Partition> tParts = getPartitionsWithSpecsInternal(tbl, request);
+
+ if (tParts != null) {
+ for (Partition tpart: tParts) {
+ partitions.add(new Partition(tbl, tpart.getTPartition()));
Review Comment:
can we use `partitions.addAll(tParts)` directly here?
##########
ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplExternalTables.java:
##########
@@ -97,15 +100,22 @@ void dataLocationDump(Table table, FileList fileList,
HashMap<String, Boolean> s
}
if (table.isPartitioned()) {
List<Partition> partitions;
+ GetPartitionsRequest request = new
GetPartitionsRequest(table.getDbName(), table.getTableName(),
+ null, null);
+ request.setCatName(table.getCatName());
+ request.setProjectionSpec(
+ new
org.apache.hadoop.hive.metastore.client.builder.GetPartitionProjectionsSpecBuilder()
+ .addProjectField("catName").addProjectField("tableName")
Review Comment:
use `addProjectFieldList()` here.
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java:
##########
@@ -4086,6 +4091,37 @@ public List<String> getPartitionNames(String dbName,
String tblName,
return names;
}
+ // get partition names from provided partition values
+ public List<String> getPartitionNames(String dbName, String tblName,
+ short max, List<String> pVals) throws HiveException {
+ List<String> names = null;
+ Table t = getTable(dbName, tblName);
+
+ try {
+ GetPartitionNamesPsRequest req = new GetPartitionNamesPsRequest();
+ req.setTblName(tblName);
+ req.setDbName(dbName);
+ req.setPartValues(pVals);
+ req.setMaxParts(max);
+ if (AcidUtils.isTransactionalTable(t)) {
+ ValidWriteIdList validWriteIdList = getValidWriteIdList(dbName,
tblName);
+ req.setValidWriteIdList(validWriteIdList != null ?
validWriteIdList.toString() : null);
+ req.setId(t.getTTable().getId());
+ }
+ GetPartitionNamesPsResponse res =
getMSC().listPartitionNamesRequest(req);
+ names = res.getNames();
+ } catch (NoSuchObjectException nsoe) {
+ // this means no partition exists for the given partition spec
+ // key value pairs - thrift cannot handle null return values, hence
+ // listPartitionNames() throws NoSuchObjectException to indicate null
partitions
+ return Lists.newArrayList();
Review Comment:
The `NoSuchObjectException` is thrown when the table does not exist, it does
not mean the partition does not exist.
##########
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java:
##########
@@ -77,9 +85,24 @@ protected void analyzeCommand(TableName tableName,
Map<String, String> partition
if (AcidUtils.isTransactionalTable(sourceTable) ||
AcidUtils.isTransactionalTable(destTable)) {
throw new
SemanticException(ErrorMsg.EXCHANGE_PARTITION_NOT_ALLOWED_WITH_TRANSACTIONAL_TABLES.getMsg());
}
+ List<String> sourceProjectFilters =
MetaStoreUtils.getPvals(sourceTable.getPartCols(), partitionSpecs);
// check if source partition exists
- PartitionUtils.getPartitions(db, sourceTable, partitionSpecs, true);
+ GetPartitionsFilterSpec getSourcePartitionsFilterSpec = new
GetPartitionsFilterSpec();
+ getSourcePartitionsFilterSpec.setFilters(sourceProjectFilters);
+ getSourcePartitionsFilterSpec.setFilterMode(PartitionFilterMode.BY_VALUES);
+
+ GetProjectionsSpec getProjectionsSpec = new
GetPartitionProjectionsSpecBuilder()
+ .addProjectFieldList(Arrays.asList("values","lastAccessTime")).build();
+
+ GetPartitionsRequest request = new
GetPartitionsRequest(sourceTable.getDbName(), sourceTable.getTableName(),
+ getProjectionsSpec, getSourcePartitionsFilterSpec);
+ request.setCatName(sourceTable.getCatName());
+ try {
+ PartitionUtils.getPartitionsWithSpecs(db, sourceTable, request, true);
+ } catch (SemanticException ex) {
Review Comment:
The `try ... catch()` clause is unnecessary here.
##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/client/builder/GetPartitionProjectionsSpecBuilder.java:
##########
@@ -41,6 +42,12 @@ public GetPartitionProjectionsSpecBuilder
addProjectField(String field) {
return this;
}
+ public GetPartitionProjectionsSpecBuilder addProjectFieldList(List<String>
fields) {
+ fieldList.addAll(Arrays.asList("catName","dbName","tableName"));
Review Comment:
why need these fields here?
##########
ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/exchange/AlterTableExchangePartitionAnalyzer.java:
##########
@@ -77,9 +85,24 @@ protected void analyzeCommand(TableName tableName,
Map<String, String> partition
if (AcidUtils.isTransactionalTable(sourceTable) ||
AcidUtils.isTransactionalTable(destTable)) {
throw new
SemanticException(ErrorMsg.EXCHANGE_PARTITION_NOT_ALLOWED_WITH_TRANSACTIONAL_TABLES.getMsg());
}
+ List<String> sourceProjectFilters =
MetaStoreUtils.getPvals(sourceTable.getPartCols(), partitionSpecs);
// check if source partition exists
- PartitionUtils.getPartitions(db, sourceTable, partitionSpecs, true);
+ GetPartitionsFilterSpec getSourcePartitionsFilterSpec = new
GetPartitionsFilterSpec();
+ getSourcePartitionsFilterSpec.setFilters(sourceProjectFilters);
+ getSourcePartitionsFilterSpec.setFilterMode(PartitionFilterMode.BY_VALUES);
+
+ GetProjectionsSpec getProjectionsSpec = new
GetPartitionProjectionsSpecBuilder()
+ .addProjectFieldList(Arrays.asList("values","lastAccessTime")).build();
Review Comment:
the project field list seems unnecessary since we are just checking if
partition exists?
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java:
##########
@@ -4466,6 +4526,81 @@ public List<Partition> getPartitionsByFilter(Table tbl,
String filter)
return convertFromMetastore(tbl, tParts);
}
+ public List<Partition> getPartitionsWithSpecs(Table tbl,
GetPartitionsRequest request)
+ throws HiveException, TException {
+
+ if (!tbl.isPartitioned()) {
+ throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED,
tbl.getTableName());
+ }
+ int batchSize= MetastoreConf.getIntVar(Hive.get().getConf(),
MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
+ if(batchSize > 0){
+ return new ArrayList<>(getAllPartitionsWithSpecsInBatches(tbl,
batchSize, DEFAULT_BATCH_DECAYING_FACTOR, MetastoreConf.getIntVar(
+ Hive.get().getConf(),
MetastoreConf.ConfVars.GETPARTITIONS_BATCH_MAX_RETRIES), request));
+ }else{
+ return getPartitionsWithSpecsInternal(tbl, request);
+ }
+ }
+
+ public List<Partition> getPartitionsWithSpecsInternal(Table tbl,
GetPartitionsRequest request)
+ throws HiveException, TException {
+
+ if (!tbl.isPartitioned()) {
+ throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED,
tbl.getTableName());
+ }
+ GetPartitionsResponse response = getMSC().getPartitionsWithSpecs(request);
+ List<org.apache.hadoop.hive.metastore.api.PartitionSpec> partitionSpecs =
response.getPartitionSpec();
+ List<Partition> partitions = new ArrayList<>();
+ partitions.addAll(convertFromPartSpec(partitionSpecs.iterator(), tbl));
+
+ return partitions;
+ }
+
+ List<Partition> getPartitionsWithSpecsByNames(Table tbl, List<String>
partNames, GetPartitionsRequest request)
+ throws HiveException, TException {
+
+ if (!tbl.isPartitioned()) {
+ throw new HiveException(ErrorMsg.TABLE_NOT_PARTITIONED,
tbl.getTableName());
+ }
+ List<Partition> partitions = new ArrayList<Partition>(partNames.size());
+
+ int batchSize = HiveConf.getIntVar(conf,
HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX);
+ int nParts = partNames.size();
+ int nBatches = nParts / batchSize;
+ // I do not want to modify the original request when implementing
batching, hence we will know what actual request was being made
+ GetPartitionsRequest req = request;
+ if (!req.isSetFilterSpec()) {
+ req.setFilterSpec(new GetPartitionsFilterSpec());
+ }
+
+ try {
+ for (int i = 0; i < nBatches; ++i) {
Review Comment:
use `org.apache.hadoop.hive.metastore.Batchable` to run batches.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]