rdblue commented on a change in pull request #416: Adding docs on how to use custom catalog with iceberg URL: https://github.com/apache/incubator-iceberg/pull/416#discussion_r318252717
########## File path: site/docs/custom-catalog.md ########## @@ -0,0 +1,149 @@ +# Custom Catalog Implementation + +It's possible to read an iceberg table either from an hdfs path or from a hive table. It's also possible to use a custom metastore in place of hive. The steps to do that are as follows. + +- [Custom TableOperations](#custom-table-operations-implementation) +- [Custom Catalog](#custom-table-implementation) +- [Custom IcebergSource](#custom-icebergsource) + +### Custom table operations implementation +Extend `BaseMetastoreTableOperations` to provide implementation on how to read and write metadata + +Example: +```java +class CustomTableOperations extends BaseMetastoreTableOperations { + private String dbName; + private String tableName; + private Configuration conf; + private FileIO fileIO; + + protected CustomTableOperations(Configuration conf, String dbName, String tableName) { + this.conf = conf; + this.dbName = dbName; + this.tableName = tableName; + } + + // The doRefresh method should provide implementation on how to get the metadata location + @Override + public void doRefresh() { + + // Example custom service which returns the metadata location given a dbName and tableName + String metadataLocation = CustomService.getMetadataForTable(conf, dbName, tableName); + + // Use existing method to refresh metadata + refreshFromMetadataLocation(metadataLocation); + + } + + // The doCommit method should provide implementation on how to update with metadata location atomically + @Override + public void doCommit(TableMetadata base, TableMetadata metadata) { + // if the metadata is already out of date, reject it + if (base != current()) { + throw new CommitFailedException("Cannot commit: stale table metadata for %s.%s", dbName, tableName); + } + + // if the metadata is not changed, return early + if (base == metadata) { + return; + } + + String oldMetadataLocation = base.location(); + + // Write new metadata + String newMetadataLocation = writeNewMetadata(metadata, currentVersion() + 1); + + // Example custom service which updates the metadata location for the given db and table atomically + CustomService.updateMetadataLocation(dbName, tableName, oldMetadataLocation, newMetadataLocation); + + } + + // The io method provides a FileIO which is used to read and write the table metadata files + @Override + public FileIO io() { + if (fileIO == null) { + fileIO = new HadoopFileIO(conf); + } + return fileIO; + } + + // Optional: this can be overridden to provide custom location provider implementation + @Override + public LocationProvider locationProvider() { + // TODO + } +} +``` + +### Custom table implementation +Extend `BaseMetastoreCatalog` to provide default warehouse locations and instantiate `CustomTableOperations` + +Example: +```java +public class CustomCatalog extends BaseMetastoreCatalog { + + private Configuration configuration; + + public CustomCatalog(Configuration configuration) { + this.configuration = configuration; + } + + @Override + protected TableOperations newTableOps(TableIdentifier tableIdentifier) { + String dbName = tableIdentifier.namespace().level(0); + String tableName = tableIdentifier.name(); + // instantiate the CustomTableOperations + return new CustomTableOperations(configuration, dbName, tableName); + } + + @Override + protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { + + // Can choose to use any other configuration name + String tableLocation = configuration.get("custom.iceberg.warehouse.location"); + + // Can be an s3 or hdfs path + if (tableLocation == null) { + throw new RuntimeException("custom.iceberg.warehouse.location configuration not set!"); + } + + return String.format( + "%s/%s.db/%s", tableLocation, + tableIdentifier.namespace().levels()[0], + tableIdentifier.name()); + } + + @Override + public boolean dropTable(TableIdentifier identifier, boolean purge) { + // TODO implement behavior + throw new RuntimeException("Not yet implemented"); Review comment: Maybe `CustomService.deleteTable(identifier.namepsace().level(0), identifier.name())`? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org