singhpk234 commented on code in PR #3268: URL: https://github.com/apache/polaris/pull/3268#discussion_r2617850221
########## persistence/nosql/persistence/metastore-maintenance/src/main/java/org/apache/polaris/persistence/nosql/metastore/maintenance/CatalogRetainedIdentifier.java: ########## @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.metastore.maintenance; + +import static java.lang.String.format; +import static org.apache.polaris.persistence.nosql.api.obj.ObjRef.OBJ_REF_SERIALIZER; +import static org.apache.polaris.persistence.nosql.coretypes.catalog.CatalogRolesObj.CATALOG_ROLES_REF_NAME_PATTERN; +import static org.apache.polaris.persistence.nosql.coretypes.catalog.CatalogStateObj.CATALOG_STATE_REF_NAME_PATTERN; +import static org.apache.polaris.persistence.nosql.coretypes.catalog.CatalogsObj.CATALOGS_REF_NAME; +import static org.apache.polaris.persistence.nosql.coretypes.principals.PrincipalRolesObj.PRINCIPAL_ROLES_REF_NAME; +import static org.apache.polaris.persistence.nosql.coretypes.principals.PrincipalsObj.PRINCIPALS_REF_NAME; +import static org.apache.polaris.persistence.nosql.coretypes.realm.ImmediateTasksObj.IMMEDIATE_TASKS_REF_NAME; +import static org.apache.polaris.persistence.nosql.coretypes.realm.PolicyMapping.POLICY_MAPPING_SERIALIZER; +import static org.apache.polaris.persistence.nosql.coretypes.realm.PolicyMappingsObj.POLICY_MAPPINGS_REF_NAME; +import static org.apache.polaris.persistence.nosql.coretypes.realm.RealmGrantsObj.REALM_GRANTS_REF_NAME; +import static org.apache.polaris.persistence.nosql.coretypes.realm.RootObj.ROOT_REF_NAME; +import static org.apache.polaris.persistence.nosql.metastore.maintenance.CatalogsMaintenanceConfig.DEFAULT_CATALOGS_HISTORY_RETAIN; +import static org.apache.polaris.persistence.nosql.metastore.maintenance.CatalogsMaintenanceConfig.DEFAULT_CATALOG_POLICIES_RETAIN; +import static org.apache.polaris.persistence.nosql.metastore.maintenance.CatalogsMaintenanceConfig.DEFAULT_CATALOG_ROLES_RETAIN; +import static org.apache.polaris.persistence.nosql.metastore.maintenance.CatalogsMaintenanceConfig.DEFAULT_CATALOG_STATE_RETAIN; +import static org.apache.polaris.persistence.nosql.metastore.maintenance.CatalogsMaintenanceConfig.DEFAULT_GRANTS_RETAIN; +import static org.apache.polaris.persistence.nosql.metastore.maintenance.CatalogsMaintenanceConfig.DEFAULT_IMMEDIATE_TASKS_RETAIN; +import static org.apache.polaris.persistence.nosql.metastore.maintenance.CatalogsMaintenanceConfig.DEFAULT_PRINCIPALS_RETAIN; +import static org.apache.polaris.persistence.nosql.metastore.maintenance.CatalogsMaintenanceConfig.DEFAULT_PRINCIPAL_ROLES_RETAIN; + +import jakarta.annotation.Nonnull; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.inject.Inject; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Consumer; +import java.util.function.Function; +import org.apache.polaris.ids.api.MonotonicClock; +import org.apache.polaris.maintenance.cel.CelReferenceContinuePredicate; +import org.apache.polaris.persistence.nosql.api.exceptions.ReferenceNotFoundException; +import org.apache.polaris.persistence.nosql.api.index.IndexContainer; +import org.apache.polaris.persistence.nosql.api.index.IndexKey; +import org.apache.polaris.persistence.nosql.api.obj.BaseCommitObj; +import org.apache.polaris.persistence.nosql.api.obj.ObjRef; +import org.apache.polaris.persistence.nosql.coretypes.ContainerObj; +import org.apache.polaris.persistence.nosql.coretypes.catalog.CatalogObj; +import org.apache.polaris.persistence.nosql.coretypes.catalog.CatalogRolesObj; +import org.apache.polaris.persistence.nosql.coretypes.catalog.CatalogStateObj; +import org.apache.polaris.persistence.nosql.coretypes.catalog.CatalogsObj; +import org.apache.polaris.persistence.nosql.coretypes.principals.PrincipalRolesObj; +import org.apache.polaris.persistence.nosql.coretypes.principals.PrincipalsObj; +import org.apache.polaris.persistence.nosql.coretypes.realm.ImmediateTasksObj; +import org.apache.polaris.persistence.nosql.coretypes.realm.PolicyMappingsObj; +import org.apache.polaris.persistence.nosql.coretypes.realm.RealmGrantsObj; +import org.apache.polaris.persistence.nosql.coretypes.realm.RootObj; +import org.apache.polaris.persistence.nosql.maintenance.spi.PerRealmRetainedIdentifier; +import org.apache.polaris.persistence.nosql.maintenance.spi.RetainedCollector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@ApplicationScoped +class CatalogRetainedIdentifier implements PerRealmRetainedIdentifier { + + private static final Logger LOGGER = LoggerFactory.getLogger(CatalogRetainedIdentifier.class); + + private final CatalogsMaintenanceConfig catalogsMaintenanceConfig; + private final MonotonicClock monotonicClock; + + @SuppressWarnings("CdiInjectionPointsInspection") + @Inject + CatalogRetainedIdentifier( + CatalogsMaintenanceConfig catalogsMaintenanceConfig, MonotonicClock monotonicClock) { + this.catalogsMaintenanceConfig = catalogsMaintenanceConfig; + this.monotonicClock = monotonicClock; + } + + @Override + public String name() { + return "Catalog data"; + } + + @Override + public boolean identifyRetained(@Nonnull RetainedCollector collector) { + + // Note: References & objects retrieved via the `Persistence` instance returned by the + // `RetainedCollector` are automatically retained (no need to call collector.retain*() + // explicitly). + var persistence = collector.realmPersistence(); + + // per realm + + // The root object is "special" (there's only one) + LOGGER.info("Identifying root object..."); + ignoreReferenceNotFound(() -> persistence.fetchReferenceHead(ROOT_REF_NAME, RootObj.class)); + + perRealmContainer( + "principals", + PRINCIPALS_REF_NAME, + catalogsMaintenanceConfig.principalsRetain().orElse(DEFAULT_PRINCIPALS_RETAIN), + PrincipalsObj.class, + collector); + + perRealmContainer( + "principal roles", + PRINCIPAL_ROLES_REF_NAME, + catalogsMaintenanceConfig.principalRolesRetain().orElse(DEFAULT_PRINCIPAL_ROLES_RETAIN), + PrincipalRolesObj.class, + collector); + + perRealm( + "grants", + REALM_GRANTS_REF_NAME, + catalogsMaintenanceConfig.grantsRetain().orElse(DEFAULT_GRANTS_RETAIN), + RealmGrantsObj.class, + RealmGrantsObj::acls, + collector); + + perRealmContainer( + "immediate tasks", + IMMEDIATE_TASKS_REF_NAME, + catalogsMaintenanceConfig.immediateTasksRetain().orElse(DEFAULT_IMMEDIATE_TASKS_RETAIN), + ImmediateTasksObj.class, + collector); + + LOGGER.info("Identifying policy mappings..."); + ignoreReferenceNotFound( + () -> { + var policyMappingsContinue = + new CelReferenceContinuePredicate<PolicyMappingsObj>( + POLICY_MAPPINGS_REF_NAME, + persistence, + catalogsMaintenanceConfig + .catalogPoliciesRetain() + .orElse(DEFAULT_CATALOG_POLICIES_RETAIN)); + // PolicyMappings are stored _INLINE_ + collector.refRetain( + POLICY_MAPPINGS_REF_NAME, + PolicyMappingsObj.class, + policyMappingsContinue, + policyMappingsObj -> + policyMappingsObj + .policyMappings() + .indexForRead(collector.realmPersistence(), POLICY_MAPPING_SERIALIZER) + .forEach( + e -> { + var policyMapping = e.getValue(); + policyMapping.externalMapping().ifPresent(collector::retainObject); + })); + }); + + // per catalog + + LOGGER.info("Identifying catalogs..."); + ignoreReferenceNotFound( + () -> { + var catalogsHistoryContinue = + new CelReferenceContinuePredicate<CatalogsObj>( + CATALOGS_REF_NAME, + persistence, + catalogsMaintenanceConfig + .catalogsHistoryRetain() + .orElse(DEFAULT_CATALOGS_HISTORY_RETAIN)); + var currentCatalogs = new ConcurrentHashMap<IndexKey, ObjRef>(); + collector.refRetain( + CATALOGS_REF_NAME, + CatalogsObj.class, + catalogsHistoryContinue, + catalogs -> { + var allCatalogsIndex = + catalogs.nameToObjRef().indexForRead(persistence, OBJ_REF_SERIALIZER); + for (var entry : allCatalogsIndex) { + var catalogKey = entry.getKey(); + var catalogObjRef = entry.getValue(); + currentCatalogs.putIfAbsent(catalogKey, catalogObjRef); + } + collector.indexRetain(catalogs.stableIdToName()); + }); + + var catalogObjs = + persistence.fetchMany( + CatalogObj.class, currentCatalogs.values().toArray(ObjRef[]::new)); + for (var catalogObj : catalogObjs) { + if (catalogObj == null) { + // just in case... + continue; + } + + perCatalog( + "catalog roles", + CATALOG_ROLES_REF_NAME_PATTERN, + catalogObj, + catalogsMaintenanceConfig.catalogRolesRetain().orElse(DEFAULT_CATALOG_ROLES_RETAIN), + CatalogRolesObj.class, + CatalogRolesObj::nameToObjRef, + collector, + catalogRolesObj -> collector.indexRetain(catalogRolesObj.stableIdToName())); + + LOGGER.info( + "Identifying catalog state for catalog '{}' ({})...", + catalogObj.name(), + catalogObj.stableId()); + ignoreReferenceNotFound( + () -> { + var catalogStateRefName = + format(CATALOG_STATE_REF_NAME_PATTERN, catalogObj.stableId()); + var catalogStateContinue = + new CelReferenceContinuePredicate<CatalogStateObj>( + catalogStateRefName, + persistence, + catalogsMaintenanceConfig + .catalogStateRetain() + .orElse(DEFAULT_CATALOG_STATE_RETAIN)); + collector.refRetainIndexToSingleObj( + catalogStateRefName, + CatalogStateObj.class, + catalogStateContinue, + CatalogStateObj::nameToObjRef, + new RetainedCollector.ProgressListener<>() { + public static final long PROGRESS_LOG_INTERVAL_MS = 2_000L; + private long commit; + private long nextLog = + monotonicClock.currentTimeMillis() + PROGRESS_LOG_INTERVAL_MS; + + @Override + public void onCommit(CatalogStateObj catalogStateObj, long commit) { + collector.indexRetain(catalogStateObj.stableIdToName()); + catalogStateObj.locations().ifPresent(collector::indexRetain); + catalogStateObj.changes().ifPresent(collector::indexRetain); + this.commit = commit; + } + + @Override + public void onIndexEntry(long inCommit, long total) { + var now = monotonicClock.currentTimeMillis(); + if (now >= nextLog) { + LOGGER.info( + "... {} total index entries processed so far, at commit {}", + total, + commit); + nextLog = now + PROGRESS_LOG_INTERVAL_MS; + } + } + }, + x -> {}); Review Comment: can we have meaningful name what does `x` denotes ? ########## persistence/nosql/persistence/metastore-maintenance/src/main/java/org/apache/polaris/persistence/nosql/metastore/maintenance/CatalogsMaintenanceConfig.java: ########## @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.metastore.maintenance; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import io.smallrye.config.ConfigMapping; +import io.smallrye.config.WithDefault; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Polaris stores a history of changes per kind of object (principals, principal roles, grants, + * immediate tasks, catalog roles and catalog state). Review Comment: This is not true for all persistence in Apache Polaris. ```suggestion * No SQL persistence implementation of Polaris stores a history of changes per kind of object (principals, principal roles, grants, * immediate tasks, catalog roles and catalog state). ``` ########## persistence/nosql/persistence/metastore-maintenance/src/main/java/org/apache/polaris/persistence/nosql/metastore/maintenance/CatalogsMaintenanceConfig.java: ########## @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.metastore.maintenance; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import io.smallrye.config.ConfigMapping; +import io.smallrye.config.WithDefault; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Polaris stores a history of changes per kind of object (principals, principal roles, grants, + * immediate tasks, catalog roles and catalog state). + * + * <p>The rules are defined using a <a href="https://github.com/projectnessie/cel-java/">CEL Review Comment: why are we using this library to define the rules, have we considered alternatives ? what kind of rules is required ? ########## persistence/nosql/persistence/metastore-maintenance/src/main/java/org/apache/polaris/persistence/nosql/metastore/maintenance/CatalogsMaintenanceConfig.java: ########## @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.persistence.nosql.metastore.maintenance; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import io.smallrye.config.ConfigMapping; +import io.smallrye.config.WithDefault; +import java.util.Optional; +import org.apache.polaris.immutables.PolarisImmutable; + +/** + * Polaris stores a history of changes per kind of object (principals, principal roles, grants, + * immediate tasks, catalog roles and catalog state). + * + * <p>The rules are defined using a <a href="https://github.com/projectnessie/cel-java/">CEL + * script</a>. The default rules for all kinds of objects are to retain the history for 3 days, for + * the catalog state for 30 days. + * + * <p>The scripts have access to the following declared values: + * + * <ul> + * <li>{@code ref} (string) name of the reference + * <li>{@code commits} (64-bit int) number of the currently processed commit, starting at {@code + * 1} + * <li>{@code ageDays} (64-bit int) age of currently processed commit in days + * <li>{@code ageHours} (64-bit int) age of currently processed commit in hours + * <li>{@code ageMinutes} (64-bit int) age of currently processed commit in minutes + * </ul> + * + * <p>Scripts <em>must</em> return a {@code boolean} yielding whether the commit shall be retained. + * Note that maintenance-service implementations can keep the first not-to-be-retained commit. + * + * <p>Example scripts + * + * <ul> + * <li>{@code ageDays < 30 || commits <= 10} retains the reference history with at least 10 + * commits and commits that are younger than 30 days + * <li>{@code true} retains the whole reference history + * <li>{@code false} retains the most recent commit + * </ul> + */ +@ConfigMapping(prefix = "polaris.persistence.maintenance.catalog") Review Comment: ```suggestion @ConfigMapping(prefix = "polaris.persistence.nosql.maintenance.catalog") ``` this is not persistence implementation agnostic prev discussion : https://lists.apache.org/thread/vmdpb45j1nmmlhswz7fm87gxgoldmr28 similar callout: https://github.com/apache/polaris/pull/3135#discussion_r2558515915 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
