github-actions[bot] commented on code in PR #64269:
URL: https://github.com/apache/doris/pull/64269#discussion_r3378088834
##########
fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java:
##########
@@ -427,7 +437,65 @@ private void performDropTable(String remoteDbName, String
remoteTblName, boolean
ErrorReport.reportDdlException(ErrorCode.ERR_UNKNOWN_TABLE,
remoteTblName, remoteDbName);
}
}
- catalog.dropTable(getTableIdentifier(remoteDbName, remoteTblName),
true);
+ TableIdentifier tableIdentifier = getTableIdentifier(remoteDbName,
remoteTblName);
+ Optional<String> tableLocation = shouldCleanupManagedLocation()
+ ? loadTableLocation(tableIdentifier) : Optional.empty();
+ catalog.dropTable(tableIdentifier, true);
+ tableLocation.ifPresent(location ->
+ cleanupEmptyLocation(location, "table", remoteDbName + "." +
remoteTblName));
+ }
+
+ private Optional<String> loadNamespaceLocation(Namespace namespace) {
+ Map<String, String> namespaceMetadata =
nsCatalog.loadNamespaceMetadata(namespace);
+ String location = namespaceMetadata.get(NAMESPACE_LOCATION_PROP);
+ return StringUtils.isBlank(location) ? Optional.empty() :
Optional.of(location);
+ }
+
+ private Optional<String> loadTableLocation(TableIdentifier
tableIdentifier) {
+ String location = catalog.loadTable(tableIdentifier).location();
+ return StringUtils.isBlank(location) ? Optional.empty() :
Optional.of(location);
+ }
+
+ private void cleanupEmptyLocation(String location, String objectType,
String objectName) {
+ if (!shouldCleanupManagedLocation() || StringUtils.isBlank(location)) {
+ return;
+ }
+ try (FileSystem fs = createCleanupFileSystem()) {
+ boolean deleted = deleteEmptyDirectory(fs, Location.of(location));
+ if (deleted) {
+ LOG.info("Cleaned empty Iceberg {} location {} for {}",
objectType, location, objectName);
+ } else {
+ LOG.info("Skip cleaning Iceberg {} location {} for {}, because
it still contains files",
+ objectType, location, objectName);
+ }
+ } catch (Exception e) {
+ LOG.warn("Failed to clean Iceberg {} location {} for {} after
drop",
+ objectType, location, objectName, e);
+ }
+ }
+
+ private boolean shouldCleanupManagedLocation() {
+ // Only cleanup HMS-Iceberg location
+ return dorisCatalog instanceof IcebergExternalCatalog
+ && IcebergExternalCatalog.ICEBERG_HMS.equals(
+ ((IcebergExternalCatalog)
dorisCatalog).getIcebergCatalogType());
+ }
+
+ @VisibleForTesting
+ protected FileSystem createCleanupFileSystem() {
+ return new
SpiSwitchingFileSystem(dorisCatalog.getCatalogProperty().getStoragePropertiesMap());
+ }
+
+ @VisibleForTesting
+ static boolean deleteEmptyDirectory(FileSystem fs, Location location)
throws IOException {
+ if (!fs.exists(location)) {
+ return true;
+ }
+ if (!fs.listFilesRecursive(location).isEmpty()) {
+ return false;
+ }
+ fs.delete(location, true);
+ return true;
Review Comment:
Please avoid the check-then-recursive-delete pattern here.
`listFilesRecursive()` only proves that no files were present at listing time;
`fs.delete(location, true)` then deletes the entire directory/prefix. If
another HMS/Iceberg client recreates this table/database location or writes a
file under the same warehouse path after the listing but before this delete,
Doris will remove that newly-created file even though it was never part of the
dropped object. This applies to HDFS as a non-atomic empty-directory delete
race and to S3 because recursive delete removes every object under the prefix.
Please make the final cleanup fail-safe, for example by using a non-recursive
empty-directory/marker delete that fails if a child appears, or by deleting
only the specific marker keys observed during the empty-marker cleanup rather
than recursively deleting the whole location after a separate emptiness check.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]