deniskuzZ commented on code in PR #4748:
URL: https://github.com/apache/hive/pull/4748#discussion_r1350133561
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -1831,4 +1833,47 @@ public ColumnInfo
getColumnInfo(org.apache.hadoop.hive.ql.metadata.Table hmsTabl
throw new SemanticException(String.format("Unable to find a column with
the name: %s", colName));
}
}
+
+ @Override
+ public boolean supportsMetadataDelete() {
+ return true;
+ }
+
+ @Override
+ public boolean
canPerformMetadataDelete(org.apache.hadoop.hive.ql.metadata.Table hmsTable,
SearchArgument sarg) {
+ if (!supportsMetadataDelete()) {
+ return false;
+ }
+
+ Expression exp;
+ try {
+ exp = HiveIcebergFilterFactory.generateFilterExpression(sarg);
+ } catch (UnsupportedOperationException e) {
+ LOG.warn("Unable to create Iceberg filter," +
+ " continuing without metadata delete: ", e);
+ return false;
+ }
+ Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+ FindFiles.Builder builder = new
FindFiles.Builder(table).withRecordsMatching(exp).includeColumnStats();
+ Set<DataFile> dataFiles = Sets.newHashSet(builder.collect());
+ boolean result = true;
+ for (DataFile dataFile : dataFiles) {
+ PartitionData partitionData = (PartitionData) dataFile.partition();
+ Expression residual = ResidualEvaluator.of(table.spec(), exp, false)
+ .residualFor(partitionData);
+ StrictMetricsEvaluator strictMetricsEvaluator = new
StrictMetricsEvaluator(table.schema(), residual);
+ if (!strictMetricsEvaluator.eval(dataFile)) {
+ result = false;
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public void performMetadataDelete(org.apache.hadoop.hive.ql.metadata.Table
hmsTable, SearchArgument sarg) {
+ Expression exp = HiveIcebergFilterFactory.generateFilterExpression(sarg);
+ Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+ DeleteFiles deleteFiles = table.newDelete().deleteFromRowFilter(exp);
+ deleteFiles.commit();
Review Comment:
ok, looks like it's either metadata delete or regular delete. Should we
still validate the snapshot and use the proper branch?
````
if (StringUtils.isNotEmpty(branchName)) {
write.toBranch(HiveUtils.getTableSnapshotRef(branchName));
}
Long snapshotId = getSnapshotId(table, branchName);
if (snapshotId != null) {
write.validateFromSnapshot(snapshotId);
}
if (!results.dataFiles().isEmpty()) {
write.validateDeletedFiles();
write.validateNoConflictingDeleteFiles();
}
write.validateNoConflictingDataFiles();
write.commit();
````
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]