This is an automated email from the ASF dual-hosted git repository. smiklosovic pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
commit 43a022c4fc30784580c2704486a96858d2fce9bc Author: Stefan Miklosovic <[email protected]> AuthorDate: Fri Feb 27 11:04:42 2026 +0100 Implement a guardrail ensuring that minimum training frequency parameter is provided in ZstdDictionaryCompressor patch by Stefan Miklosovic; reviewed by Yifan Cai for CASSANDRA-21192 --- CHANGES.txt | 1 + conf/cassandra.yaml | 8 ++ conf/cassandra_latest.yaml | 8 ++ src/java/org/apache/cassandra/config/Config.java | 2 + .../apache/cassandra/config/GuardrailsOptions.java | 30 +++++++ .../statements/schema/AlterSchemaStatement.java | 14 ++++ .../statements/schema/AlterTableStatement.java | 4 +- .../cql3/statements/schema/CopyTableStatement.java | 4 +- .../statements/schema/CreateTableStatement.java | 4 +- .../apache/cassandra/db/guardrails/Guardrails.java | 39 +++++++++ .../cassandra/db/guardrails/GuardrailsConfig.java | 30 +++++++ .../cassandra/db/guardrails/GuardrailsMBean.java | 30 +++++++ .../tools/nodetool/GuardrailsConfigCommand.java | 4 +- .../GuardrailUnsetTrainingMinFrequencyTest.java | 95 ++++++++++++++++++++++ .../nodetool/GuardrailsConfigCommandsTest.java | 2 + 15 files changed, 271 insertions(+), 4 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index ac6f6df701..f35e9a343d 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Implement a guardrail ensuring that minimum training frequency parameter is provided in ZstdDictionaryCompressor (CASSANDRA-21192) * Replace manual referencing with ColumnFamilyStore.selectAndReference when training a dictionary (CASSANDRA-21188) * Forbid nodes upgrading to a version which cannot read existing log entries (CASSANDRA-21174) * Introduce a check for minimum time to pass to train or import a compression dictionary from the last one (CASSANDRA-21179) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 6cb3c08db0..375135bcc1 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2674,6 +2674,14 @@ max_security_label_length: 48 # specify an index implementation via USING. #default_secondary_index_enabled: true +# Whether a user will be warned when a table is compressed by dictionary compressor +# and training_min_frequency is set to 0m (the default when unset). +#unset_training_min_frequency_warned: true + +# Whether a query is allowed when a user wants to create or alter a table which is +# compressed by dictionary compressor and training_min_frequency is set to 0m (the default when unset). +#unset_training_min_frequency_enabled: true + # Startup Checks are executed as part of Cassandra startup process, not all of them # are configurable (so you can disable them) but these which are enumerated bellow. # Uncomment the startup checks and configure them appropriately to cover your needs. diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 1d08bf1722..13b54aa53f 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -2438,6 +2438,14 @@ default_secondary_index: sai # specify an index implementation via USING. default_secondary_index_enabled: true +# Whether a user will be warned when a table is compressed by dictionary compressor +# and training_min_frequency is set to 0m (the default when unset). +#unset_training_min_frequency_warned: true + +# Whether a query is allowed when a user wants to create or alter a table which is +# compressed by dictionary compressor and training_min_frequency is set to 0m (the default when unset). +#unset_training_min_frequency_enabled: true + # Startup Checks are executed as part of Cassandra startup process, not all of them # are configurable (so you can disable them) but these which are enumerated bellow. # Uncomment the startup checks and configure them appropriately to cover your needs. diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index a6d7d48262..c6c0351f08 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -998,6 +998,8 @@ public class Config public volatile boolean non_partition_restricted_index_query_enabled = true; public volatile boolean intersect_filtering_query_warned = true; public volatile boolean intersect_filtering_query_enabled = true; + public volatile boolean unset_training_min_frequency_warned = true; + public volatile boolean unset_training_min_frequency_enabled = true; public volatile int sai_sstable_indexes_per_query_warn_threshold = 32; public volatile int sai_sstable_indexes_per_query_fail_threshold = -1; diff --git a/src/java/org/apache/cassandra/config/GuardrailsOptions.java b/src/java/org/apache/cassandra/config/GuardrailsOptions.java index 7443565d89..b7bea893d2 100644 --- a/src/java/org/apache/cassandra/config/GuardrailsOptions.java +++ b/src/java/org/apache/cassandra/config/GuardrailsOptions.java @@ -1338,6 +1338,36 @@ public class GuardrailsOptions implements GuardrailsConfig x -> config.vector_type_enabled = x); } + @Override + public void setUnsetTrainingMinFrequencyWarned(boolean enabled) + { + updatePropertyWithLogging("unset_training_min_frequency_warned", + enabled, + () -> config.unset_training_min_frequency_warned, + x -> config.unset_training_min_frequency_warned = x); + } + + @Override + public boolean getUnsetTrainingMinFrequencyWarned() + { + return config.unset_training_min_frequency_warned; + } + + @Override + public void setUnsetTrainingMinFrequencyEnabled(boolean enabled) + { + updatePropertyWithLogging("unset_training_min_frequency_enabled", + enabled, + () -> config.unset_training_min_frequency_enabled, + x -> config.unset_training_min_frequency_enabled = x); + } + + @Override + public boolean getUnsetTrainingMinFrequencyEnabled() + { + return config.unset_training_min_frequency_enabled; + } + private static <T> void updatePropertyWithLogging(String propertyName, T newValue, Supplier<T> getter, Consumer<T> setter) { T oldValue = getter.get(); diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java index 59f4e5255d..062dad7541 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/AlterSchemaStatement.java @@ -51,6 +51,8 @@ import org.apache.cassandra.transport.Dispatcher; import org.apache.cassandra.transport.Event.SchemaChange; import org.apache.cassandra.transport.messages.ResultMessage; +import static org.apache.cassandra.io.compress.IDictionaryCompressor.DEFAULT_TRAINING_MIN_FREQUENCY; +import static org.apache.cassandra.io.compress.IDictionaryCompressor.TRAINING_MIN_FREQUENCY_PARAMETER_NAME; import static org.apache.cassandra.schema.KeyspaceMetadata.validateKeyspaceName; abstract public class AlterSchemaStatement implements CQLStatement.SingleKeyspaceCqlStatement, SchemaTransformation @@ -235,6 +237,18 @@ abstract public class AlterSchemaStatement implements CQLStatement.SingleKeyspac Guardrails.zeroTTLOnTWCSEnabled.ensureEnabled(state); } + protected void validateMinimumTrainingFrequencyForDictionaryCompressor(TableParams params) + { + if (!SchemaConstants.isSystemKeyspace(keyspaceName) && + params.compression.isDictionaryCompressionEnabled() && + DEFAULT_TRAINING_MIN_FREQUENCY.equals(params.compression.getOtherOptions() + .getOrDefault(TRAINING_MIN_FREQUENCY_PARAMETER_NAME, + DEFAULT_TRAINING_MIN_FREQUENCY))) + { + Guardrails.unsetTrainingMinFrequency.ensureEnabled(state); + } + } + private void grantPermissionsOnResource(IResource resource, AuthenticatedUser user) { try diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java index 685d0c84cd..aee1496cb5 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/AlterTableStatement.java @@ -631,7 +631,9 @@ public abstract class AlterTableStatement extends AlterSchemaStatement MemtableParams.get(attrs.getString(TableParams.Option.MEMTABLE.toString())); Guardrails.tableProperties.guard(attrs.updatedProperties(), attrs::removeProperty, state); - validateDefaultTimeToLive(attrs.asNewTableParams(keyspaceName)); + TableParams paramsForValidation = attrs.asNewTableParams(keyspaceName); + validateDefaultTimeToLive(paramsForValidation); + validateMinimumTrainingFrequencyForDictionaryCompressor(paramsForValidation); } private TableParams validateAndUpdateTransactionalMigration(boolean isCounter, TableParams prev, TableParams next) diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/CopyTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/CopyTableStatement.java index 4bc552ec4a..1250cb79ec 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/CopyTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/CopyTableStatement.java @@ -254,7 +254,9 @@ public final class CopyTableStatement extends AlterSchemaStatement .sum(); Guardrails.tables.guard(totalUserTables + 1, targetTableName, false, state); } - validateDefaultTimeToLive(attrs.asNewTableParams(keyspaceName)); + TableParams paramsForValidation = attrs.asNewTableParams(keyspaceName); + validateDefaultTimeToLive(paramsForValidation); + validateMinimumTrainingFrequencyForDictionaryCompressor(paramsForValidation); } public final static class Raw extends CQLStatement.Raw diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java index ef53578272..26dfbb8dec 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/CreateTableStatement.java @@ -227,7 +227,9 @@ public final class CreateTableStatement extends AlterSchemaStatement if (useCompactStorage) Guardrails.compactTablesEnabled.ensureEnabled(state); - validateDefaultTimeToLive(attrs.asNewTableParams(keyspaceName)); + TableParams paramsForValidation = attrs.asNewTableParams(keyspaceName); + validateDefaultTimeToLive(paramsForValidation); + validateMinimumTrainingFrequencyForDictionaryCompressor(paramsForValidation); rawColumns.forEach((name, raw) -> raw.validate(state, name)); } diff --git a/src/java/org/apache/cassandra/db/guardrails/Guardrails.java b/src/java/org/apache/cassandra/db/guardrails/Guardrails.java index 6738d9a7d9..9f0e63bbcd 100644 --- a/src/java/org/apache/cassandra/db/guardrails/Guardrails.java +++ b/src/java/org/apache/cassandra/db/guardrails/Guardrails.java @@ -38,7 +38,9 @@ import org.apache.cassandra.config.DurationSpec; import org.apache.cassandra.config.GuardrailsOptions; import org.apache.cassandra.db.ConsistencyLevel; import org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy; +import org.apache.cassandra.io.compress.IDictionaryCompressor; import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.schema.SystemDistributedKeyspace; import org.apache.cassandra.service.ClientState; import org.apache.cassandra.service.disk.usage.DiskUsageBroadcaster; import org.apache.cassandra.utils.JsonUtils; @@ -682,6 +684,19 @@ public final class Guardrails implements GuardrailsMBean state -> CONFIG_PROVIDER.getOrCreate(state).getNonPartitionRestrictedQueryEnabled(), "Non-partition key restricted query"); + public static EnableFlag unsetTrainingMinFrequency = + new EnableFlag("unset_training_min_frequency_enabled", + format("Table uses ZstdDictionaryCompressor without %s set. Unlimited training frequency may degrade " + + "compression quality and accumulate dictionaries in %s.%s. " + + "Consider to set %s to a non-zero value.", + IDictionaryCompressor.TRAINING_MIN_FREQUENCY_PARAMETER_NAME, + SystemDistributedKeyspace.NAME, + SystemDistributedKeyspace.COMPRESSION_DICTIONARIES, + IDictionaryCompressor.TRAINING_MIN_FREQUENCY_PARAMETER_NAME), + state -> CONFIG_PROVIDER.getOrCreate(state).getUnsetTrainingMinFrequencyWarned(), + state -> CONFIG_PROVIDER.getOrCreate(state).getUnsetTrainingMinFrequencyEnabled(), + "unset minimum frequency of training for dictionary compressor"); + private Guardrails() { MBeanWrapper.instance.registerMBean(this, MBEAN_NAME); @@ -1814,6 +1829,30 @@ public final class Guardrails implements GuardrailsMBean DEFAULT_CONFIG.setIntersectFilteringQueryEnabled(value); } + @Override + public void setUnsetTrainingMinFrequencyWarned(boolean value) + { + DEFAULT_CONFIG.setUnsetTrainingMinFrequencyWarned(value); + } + + @Override + public boolean getUnsetTrainingMinFrequencyWarned() + { + return DEFAULT_CONFIG.getUnsetTrainingMinFrequencyWarned(); + } + + @Override + public void setUnsetTrainingMinFrequencyEnabled(boolean value) + { + DEFAULT_CONFIG.setUnsetTrainingMinFrequencyEnabled(value); + } + + @Override + public boolean getUnsetTrainingMinFrequencyEnabled() + { + return DEFAULT_CONFIG.getUnsetTrainingMinFrequencyEnabled(); + } + private static String toCSV(Set<String> values) { return values == null || values.isEmpty() ? "" : String.join(",", values); diff --git a/src/java/org/apache/cassandra/db/guardrails/GuardrailsConfig.java b/src/java/org/apache/cassandra/db/guardrails/GuardrailsConfig.java index 8e0d020259..70e1b7f7d3 100644 --- a/src/java/org/apache/cassandra/db/guardrails/GuardrailsConfig.java +++ b/src/java/org/apache/cassandra/db/guardrails/GuardrailsConfig.java @@ -652,4 +652,34 @@ public interface GuardrailsConfig * @return configuration for role name policy guardrail. */ CustomGuardrailConfig getRoleNamePolicyConfig(); + + /** + * Sets whether a user will be warned when creating a table with a dictionary-based compressor which + * does not have any limit how often dictionaries can be trained. + * + * @param value value to set + */ + void setUnsetTrainingMinFrequencyWarned(boolean value); + + /** + * + * @return true if a user will be warned when training compression dictionaries for tables backed by + * dictionary compressor as frequently as needed, without any limits, false otherwise. + */ + boolean getUnsetTrainingMinFrequencyWarned(); + + /** + * Sets whether it is allowed to create a table with a dictionary-based compressor which + * does not have any limit how often dictionaries can be trained. + * + * @param value value to set + */ + void setUnsetTrainingMinFrequencyEnabled(boolean value); + + /** + * + * @return true if it is possible to train compression dictionaries for tables backed by + * dictionary compressor as frequently as needed, without any limits, false otherwise. + */ + boolean getUnsetTrainingMinFrequencyEnabled(); } diff --git a/src/java/org/apache/cassandra/db/guardrails/GuardrailsMBean.java b/src/java/org/apache/cassandra/db/guardrails/GuardrailsMBean.java index e2e9cd74dd..fe139b3bbb 100644 --- a/src/java/org/apache/cassandra/db/guardrails/GuardrailsMBean.java +++ b/src/java/org/apache/cassandra/db/guardrails/GuardrailsMBean.java @@ -1160,4 +1160,34 @@ public interface GuardrailsMBean * @param value configuration of new role name validator. */ void setRoleNamePolicy(String value); + + /** + * Sets whether a user will be warned when creating a table with a dictionary-based compressor which + * does not have any limit how often dictionaries can be trained. + * + * @param value value to set + */ + void setUnsetTrainingMinFrequencyWarned(boolean value); + + /** + * + * @return true if a user will be warned when training compression dictionaries for tables backed by + * dictionary compressor as frequently as needed, without any limits, false otherwise. + */ + boolean getUnsetTrainingMinFrequencyWarned(); + + /** + * Sets whether it is allowed to create a table with a dictionary-based compressor which + * does not have any limit how often dictionaries can be trained. + * + * @param value value to set + */ + void setUnsetTrainingMinFrequencyEnabled(boolean value); + + /** + * + * @return true if it is possible to train compression dictionaries for tables backed by + * dictionary compressor as frequently as needed, without any limits, false otherwise. + */ + boolean getUnsetTrainingMinFrequencyEnabled(); } diff --git a/src/java/org/apache/cassandra/tools/nodetool/GuardrailsConfigCommand.java b/src/java/org/apache/cassandra/tools/nodetool/GuardrailsConfigCommand.java index b6c7a4fe4f..095b4b85f1 100644 --- a/src/java/org/apache/cassandra/tools/nodetool/GuardrailsConfigCommand.java +++ b/src/java/org/apache/cassandra/tools/nodetool/GuardrailsConfigCommand.java @@ -370,7 +370,9 @@ public abstract class GuardrailsConfigCommand extends AbstractCommand /** * Set of guardrails which are flags, even though their suffix would suggest they are part of "values" which have warned, ignored, and disallowed sub-categories */ - private static final Set<String> specialFlags = Set.of("intersect_filtering_query_warned", "zero_ttl_on_twcs_warned"); + private static final Set<String> specialFlags = Set.of("intersect_filtering_query_warned", + "unset_training_min_frequency_warned", + "zero_ttl_on_twcs_warned"); @VisibleForTesting public enum GuardrailCategory diff --git a/test/unit/org/apache/cassandra/db/guardrails/GuardrailUnsetTrainingMinFrequencyTest.java b/test/unit/org/apache/cassandra/db/guardrails/GuardrailUnsetTrainingMinFrequencyTest.java new file mode 100644 index 0000000000..be4aa66eb5 --- /dev/null +++ b/test/unit/org/apache/cassandra/db/guardrails/GuardrailUnsetTrainingMinFrequencyTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.guardrails; + +import org.junit.Test; + +public class GuardrailUnsetTrainingMinFrequencyTest extends GuardrailTester +{ + private static final String UNSET_FREQUENCY = "CREATE TABLE IF NOT EXISTS tb1 (k int PRIMARY KEY, a int, b int) " + + "WITH compression = {'class': 'ZstdDictionaryCompressor', " + + "'training_min_frequency': '0m' }"; + + private static final String SET_FREQUENCY = "CREATE TABLE IF NOT EXISTS tb1 (k int PRIMARY KEY, a int, b int) " + + "WITH compression = {'class': 'ZstdDictionaryCompressor', " + + "'training_min_frequency': '1d' }"; + + private static final String NOT_DICT_COMPRESSOR = "CREATE TABLE IF NOT EXISTS tb1 (k int PRIMARY KEY, a int, b int) " + + "WITH compression = {'class': 'ZstdCompressor' }"; + + public GuardrailUnsetTrainingMinFrequencyTest() + { + super(Guardrails.unsetTrainingMinFrequency); + } + + @Test + public void testGuardrailDisabled() throws Throwable + { + prepareTest(false, true); + assertFails(UNSET_FREQUENCY, "unset minimum frequency of training for dictionary compressor is not allowed"); + } + + @Test + public void testGuardrailEnabledWarnEnabled() throws Throwable + { + prepareTest(true, true); + assertWarns(UNSET_FREQUENCY, "unset minimum frequency of training for dictionary compressor is not recommended"); + } + + @Test + public void testGuardrailEnabledWarnDisabled() throws Throwable + { + prepareTest(true, false); + assertValid(SET_FREQUENCY); + assertValid(UNSET_FREQUENCY); + } + + @Test + public void testGuardrailNotTriggered() throws Throwable + { + prepareTest(true, true); + assertValid(SET_FREQUENCY); + assertValid(NOT_DICT_COMPRESSOR); + + prepareTest(false, true); + assertValid(SET_FREQUENCY); + assertValid(NOT_DICT_COMPRESSOR); + } + + @Test + public void testExcludedUsers() throws Throwable + { + for (boolean enabled : new boolean[]{ false, true }) + { + for (boolean warned : new boolean[]{ false, true }) + { + prepareTest(enabled, warned); + testExcludedUsers(() -> UNSET_FREQUENCY, + () -> SET_FREQUENCY, + () -> NOT_DICT_COMPRESSOR); + } + } + } + + private void prepareTest(boolean enabled, boolean warned) + { + guardrails().setUnsetTrainingMinFrequencyEnabled(enabled); + guardrails().setUnsetTrainingMinFrequencyWarned(warned); + } +} diff --git a/test/unit/org/apache/cassandra/tools/nodetool/GuardrailsConfigCommandsTest.java b/test/unit/org/apache/cassandra/tools/nodetool/GuardrailsConfigCommandsTest.java index 37958178b7..2512d30da9 100644 --- a/test/unit/org/apache/cassandra/tools/nodetool/GuardrailsConfigCommandsTest.java +++ b/test/unit/org/apache/cassandra/tools/nodetool/GuardrailsConfigCommandsTest.java @@ -213,6 +213,8 @@ public class GuardrailsConfigCommandsTest extends CQLTester "secondary_indexes_enabled true\n" + "simplestrategy_enabled true\n" + "uncompressed_tables_enabled true\n" + + "unset_training_min_frequency_enabled true\n" + + "unset_training_min_frequency_warned true\n" + "user_timestamps_enabled true\n" + "vector_type_enabled true\n" + "zero_ttl_on_twcs_enabled true\n" + --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
