talatuyarer commented on code in PR #35787: URL: https://github.com/apache/beam/pull/35787#discussion_r2320350283
########## sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/provider/iceberg/IcebergMetastore.java: ########## @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.sql.meta.provider.iceberg; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; + +import java.util.HashMap; +import java.util.Map; +import org.apache.beam.sdk.extensions.sql.TableUtils; +import org.apache.beam.sdk.extensions.sql.impl.TableName; +import org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable; +import org.apache.beam.sdk.extensions.sql.meta.Table; +import org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider; +import org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore; +import org.apache.beam.sdk.io.iceberg.IcebergCatalogConfig; +import org.apache.beam.sdk.io.iceberg.IcebergCatalogConfig.IcebergTableInfo; +import org.apache.beam.sdk.io.iceberg.TableAlreadyExistsException; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class IcebergMetastore extends InMemoryMetaStore { + private static final Logger LOG = LoggerFactory.getLogger(IcebergMetastore.class); + @VisibleForTesting final IcebergCatalogConfig catalogConfig; + private final Map<String, Table> cachedTables = new HashMap<>(); + private final String database; + + public IcebergMetastore(String db, IcebergCatalogConfig catalogConfig) { + this.database = db; + this.catalogConfig = catalogConfig; + } + + @Override + public String getTableType() { + return "iceberg"; + } + + @Override + public void createTable(Table table) { + if (!table.getType().equals("iceberg")) { + getProvider(table.getType()).createTable(table); + } else { + String identifier = getIdentifier(table); + try { + catalogConfig.createTable(identifier, table.getSchema(), table.getPartitionFields()); + } catch (TableAlreadyExistsException e) { + LOG.info( + "Iceberg table '{}' already exists at location '{}'.", table.getName(), identifier); + } + } + cachedTables.put(table.getName(), table); + } + + @Override + public void dropTable(String tableName) { + String identifier = getIdentifier(tableName); + if (catalogConfig.dropTable(identifier)) { + LOG.info("Dropped table '{}' (path: '{}').", tableName, identifier); + } else { + LOG.info( + "Ignoring DROP TABLE call for '{}' (path: '{}') because it does not exist.", + tableName, + identifier); + } + cachedTables.remove(tableName); + } + + @Override + public Map<String, Table> getTables() { + for (String id : catalogConfig.listTables(database)) { Review Comment: @ahmedabu98 How frequently we call this method ? I feel this is very inefficient. ########## sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/CatalogManagerSchema.java: ########## @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.sql.impl; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; +import static org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.util.Static.RESOURCE; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.beam.sdk.extensions.sql.impl.parser.SqlDdlNodes; +import org.apache.beam.sdk.extensions.sql.meta.catalog.Catalog; +import org.apache.beam.sdk.extensions.sql.meta.catalog.CatalogManager; +import org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider; +import org.apache.beam.sdk.extensions.sql.meta.store.MetaStore; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.linq4j.tree.Expression; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.Function; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.Schema; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.SchemaPlus; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.SchemaVersion; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.Schemas; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.Table; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.sql.SqlIdentifier; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.sql.SqlUtil; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A Calcite {@link Schema} that corresponds to a {@link CatalogManager}. This is typically the root + * node of a pipeline. Child schemas are of type {@link CatalogSchema}. + */ +public class CatalogManagerSchema implements Schema { + private static final Logger LOG = LoggerFactory.getLogger(CatalogManagerSchema.class); + private final JdbcConnection connection; + private final CatalogManager catalogManager; + private final Map<String, CatalogSchema> catalogSubSchemas = new HashMap<>(); + + CatalogManagerSchema(JdbcConnection jdbcConnection, CatalogManager catalogManager) { + this.connection = jdbcConnection; + this.catalogManager = catalogManager; + } + + @VisibleForTesting + public JdbcConnection connection() { + return connection; + } + + public void createCatalog( + SqlIdentifier catalogIdentifier, + String type, + Map<String, String> properties, + boolean replace, + boolean ifNotExists) { + String name = SqlDdlNodes.name(catalogIdentifier); + if (catalogManager.getCatalog(name) != null) { + if (replace) { + LOG.info("Replacing existing catalog '{}'", name); + catalogManager.dropCatalog(name); + } else if (!ifNotExists) { + throw SqlUtil.newContextException( + catalogIdentifier.getParserPosition(), + RESOURCE.internal(String.format("Catalog '%s' already exists.", name))); + } else { + LOG.info("Catalog '{}' already exists", name); + return; + } + } + + // create the catalog Review Comment: nit: redundant comment ########## sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/parser/SqlUseDatabase.java: ########## @@ -66,38 +63,32 @@ public List<SqlNode> getOperandList() { public void execute(CalcitePrepare.Context context) { final Pair<CalciteSchema, String> pair = SqlDdlNodes.schema(context, true, databaseName); Schema schema = pair.left.schema; - String name = checkStateNotNull(pair.right); + String path = databaseName.toString(); + List<String> components = Lists.newArrayList(Splitter.on(".").split(path)); + TableName pathOverride = TableName.create(components, ""); - if (!(schema instanceof BeamCalciteSchema)) { - throw SqlUtil.newContextException( - databaseName.getParserPosition(), - RESOURCE.internal("Schema is not of instance BeamCalciteSchema")); - } - - BeamCalciteSchema beamCalciteSchema = (BeamCalciteSchema) schema; - @Nullable CatalogManager catalogManager = beamCalciteSchema.getCatalogManager(); - if (catalogManager == null) { + if (!(schema instanceof CatalogManagerSchema)) { throw SqlUtil.newContextException( databaseName.getParserPosition(), RESOURCE.internal( - String.format( - "Unexpected 'USE DATABASE' call using Schema '%s' that is not a Catalog.", - name))); - } - - Catalog catalog = catalogManager.currentCatalog(); - if (!catalog.listDatabases().contains(name)) { - throw SqlUtil.newContextException( - databaseName.getParserPosition(), - RESOURCE.internal(String.format("Cannot use database: '%s' not found.", name))); + "Attempting to create database '" + + path + + "' with unexpected Calcite Schema of type " + + schema.getClass())); } - if (name.equals(catalog.currentDatabase())) { - LOG.info("Database '{}' is already in use.", name); - return; + CatalogManagerSchema catalogManagerSchema = (CatalogManagerSchema) schema; + CatalogSchema catalogSchema = catalogManagerSchema.getCatalogSchema(pathOverride); + // if database exists in a different catalog, we need to also switch to that catalog + if (pathOverride.catalog() != null Review Comment: Can you add a specific test case that ensures if the database switch fails for some reason, the session context doesn't get stuck in an inconsistent state ########## sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/meta/store/InMemoryMetaStore.java: ########## @@ -112,22 +119,35 @@ private void initTablesFromProvider(TableProvider provider) { this.tables.putAll(tables); } - Map<String, TableProvider> getProviders() { + @Override + public Map<String, TableProvider> tableProviders() { return providers; } @Override public boolean supportsPartitioning(Table table) { - TableProvider provider = providers.get(table.getType()); - if (provider == null) { - throw new IllegalArgumentException( - "No TableProvider registered for table type: " + table.getType()); - } - return provider.supportsPartitioning(table); + return getProvider(table.getType()).supportsPartitioning(table); } + /** + * Fetches a {@link TableProvider} for this type. This provider can exist in the current {@link + * InMemoryMetaStore} or a nested {@link InMemoryMetaStore}. + * + * @param type + * @return + */ public TableProvider getProvider(String type) { - return checkArgumentNotNull( - providers.get(type), "No TableProvider registered for table type: " + type); + @Nullable TableProvider provider = providers.get(type); Review Comment: While my testing i hit a bug if I have UPPER CASE type it does not match with tableprovider. Could you add lower case for type always ? ``` 0: BeamSQL> CREATE EXTERNAL TABLE iceberg_table_lower (id INTEGER, data VARCHAR) TYPE iceberg; Sep 03, 2025 1:43:01 PM org.apache.beam.sdk.io.iceberg.IcebergCatalogConfig createTable INFO: Attempting to create table 'beam_ns.iceberg_table_lower', with schema: table { 1: id: optional int 2: data: optional string }, partition spec: []. Sep 03, 2025 1:43:03 PM org.apache.beam.sdk.io.iceberg.IcebergCatalogConfig createTable INFO: Successfully created table 'beam_ns.iceberg_table_lower'. No rows affected (2.568 seconds) 0: BeamSQL> CREATE EXTERNAL TABLE iceberg_table_upper (id INTEGER, data VARCHAR) TYPE ICEBERG; Error: Error while executing SQL "CREATE EXTERNAL TABLE iceberg_table_upper (id INTEGER, data VARCHAR) TYPE ICEBERG": No TableProvider registered for table type: ICEBERG (state=,code=0) 0: BeamSQL> ``` ########## sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/CatalogManagerSchema.java: ########## @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.extensions.sql.impl; + +import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull; +import static org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.util.Static.RESOURCE; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.beam.sdk.extensions.sql.impl.parser.SqlDdlNodes; +import org.apache.beam.sdk.extensions.sql.meta.catalog.Catalog; +import org.apache.beam.sdk.extensions.sql.meta.catalog.CatalogManager; +import org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider; +import org.apache.beam.sdk.extensions.sql.meta.store.MetaStore; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.linq4j.tree.Expression; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.Function; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.Schema; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.SchemaPlus; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.SchemaVersion; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.Schemas; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.schema.Table; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.sql.SqlIdentifier; +import org.apache.beam.vendor.calcite.v1_40_0.org.apache.calcite.sql.SqlUtil; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A Calcite {@link Schema} that corresponds to a {@link CatalogManager}. This is typically the root + * node of a pipeline. Child schemas are of type {@link CatalogSchema}. + */ +public class CatalogManagerSchema implements Schema { + private static final Logger LOG = LoggerFactory.getLogger(CatalogManagerSchema.class); + private final JdbcConnection connection; + private final CatalogManager catalogManager; + private final Map<String, CatalogSchema> catalogSubSchemas = new HashMap<>(); + + CatalogManagerSchema(JdbcConnection jdbcConnection, CatalogManager catalogManager) { + this.connection = jdbcConnection; + this.catalogManager = catalogManager; + } + + @VisibleForTesting + public JdbcConnection connection() { + return connection; + } + + public void createCatalog( + SqlIdentifier catalogIdentifier, + String type, + Map<String, String> properties, + boolean replace, + boolean ifNotExists) { + String name = SqlDdlNodes.name(catalogIdentifier); + if (catalogManager.getCatalog(name) != null) { + if (replace) { + LOG.info("Replacing existing catalog '{}'", name); + catalogManager.dropCatalog(name); + } else if (!ifNotExists) { + throw SqlUtil.newContextException( + catalogIdentifier.getParserPosition(), + RESOURCE.internal(String.format("Catalog '%s' already exists.", name))); + } else { + LOG.info("Catalog '{}' already exists", name); + return; + } + } + + // create the catalog + catalogManager.createCatalog(name, type, properties); + CatalogSchema catalogSchema = + new CatalogSchema(connection, checkStateNotNull(catalogManager.getCatalog(name))); + catalogSubSchemas.put(name, catalogSchema); + } + + public void useCatalog(SqlIdentifier catalogIdentifier) { + String name = catalogIdentifier.toString(); + if (catalogManager.getCatalog(catalogIdentifier.toString()) == null) { + throw SqlUtil.newContextException( + catalogIdentifier.getParserPosition(), + RESOURCE.internal(String.format("Cannot use catalog: '%s' not found.", name))); + } + + if (catalogManager.currentCatalog().name().equals(name)) { + LOG.info("Catalog '{}' is already in use.", name); + return; + } + + catalogManager.useCatalog(name); + LOG.info("Switched to catalog '{}' (type: {})", name, catalogManager.currentCatalog().type()); + } + + public void dropCatalog(SqlIdentifier identifier, boolean ifExists) { + String name = SqlDdlNodes.name(identifier); + if (catalogManager.getCatalog(name) == null) { + if (!ifExists) { + throw SqlUtil.newContextException( + identifier.getParserPosition(), + RESOURCE.internal(String.format("Cannot drop catalog: '%s' not found.", name))); + } + LOG.info("Ignoring 'DROP CATALOG` call for non-existent catalog: {}", name); + return; + } + + if (catalogManager.currentCatalog().name().equals(name)) { + throw SqlUtil.newContextException( + identifier.getParserPosition(), + RESOURCE.internal( + String.format( + "Unable to drop active catalog '%s'. Please switch to another catalog first.", + name))); + } + + catalogManager.dropCatalog(name); + LOG.info("Successfully dropped catalog '{}'", name); + catalogSubSchemas.remove(name); + } + + // A BeamCalciteSchema may be used to interact with multiple TableProviders. + // If such a TableProvider is not registered in the BeamCalciteSchema, this method + // will attempt to do so. + public void maybeRegisterProvider(TableName path, String type) { + CatalogSchema catalogSchema = getCatalogSchema(path); + BeamCalciteSchema beamCalciteSchema = catalogSchema.getDatabaseSchema(path); + + if (beamCalciteSchema.getTableProvider() instanceof MetaStore) { + MetaStore metaStore = (MetaStore) beamCalciteSchema.getTableProvider(); + if (metaStore.tableProviders().containsKey(type)) { + return; + } + + // Start with the narrowest scope. + // Attempt to fetch provider from Catalog first, then CatalogManager. + @Nullable TableProvider provider = catalogSchema.getCatalog().tableProviders().get(type); + if (provider == null) { + provider = catalogManager.tableProviders().get(type); + } + // register provider + if (provider != null) { + metaStore.registerProvider(provider); + } + } + } + + @Override + public @Nullable Table getTable(String table) { + @Nullable + CatalogSchema catalogSchema = catalogSubSchemas.get(catalogManager.currentCatalog().name()); + return catalogSchema != null ? catalogSchema.getTable(table) : null; + } + + @Override + public Set<String> getTableNames() { + ImmutableSet.Builder<String> names = ImmutableSet.builder(); + // TODO: this might be a heavy operation + for (CatalogSchema catalogSchema : catalogSubSchemas.values()) { Review Comment: @ahmedabu98 I believe this is very heavy method, can we only list tables within the currently active catalog and database ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
