Validate supported column type with SASI analyzer patch by Zhao Yang; reviewed by Andres de la Peña for CASSANDRA-13669
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/ea62d886 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/ea62d886 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/ea62d886 Branch: refs/heads/trunk Commit: ea62d8862c311e3d9b64d622bea0a68d3825aa7d Parents: 08a515d Author: Zhao Yang <zhaoyangsingap...@gmail.com> Authored: Tue May 15 09:43:39 2018 +0800 Committer: AndreÌs de la PenÌa <a.penya.gar...@gmail.com> Committed: Mon Jun 25 11:51:15 2018 +0100 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../apache/cassandra/index/sasi/SASIIndex.java | 5 +- .../index/sasi/analyzer/AbstractAnalyzer.java | 8 ++ .../index/sasi/analyzer/DelimiterAnalyzer.java | 12 ++- .../index/sasi/analyzer/NoOpAnalyzer.java | 6 ++ .../sasi/analyzer/NonTokenizingAnalyzer.java | 6 ++ .../index/sasi/analyzer/StandardAnalyzer.java | 18 +++++ .../cassandra/index/sasi/conf/IndexMode.java | 20 ++++- .../cassandra/index/sasi/SASIIndexTest.java | 77 ++++++++++++++++++++ 9 files changed, 147 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index d9c62ea..6f6e009 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.11.3 + * Validate supported column type with SASI analyzer (CASSANDRA-13669) * Remove BTree.Builder Recycler to reduce memory usage (CASSANDRA-13929) * Reduce nodetool GC thread count (CASSANDRA-14475) * Fix New SASI view creation during Index Redistribution (CASSANDRA-14055) http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/SASIIndex.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/SASIIndex.java b/src/java/org/apache/cassandra/index/sasi/SASIIndex.java index f127748..2c1d088 100644 --- a/src/java/org/apache/cassandra/index/sasi/SASIIndex.java +++ b/src/java/org/apache/cassandra/index/sasi/SASIIndex.java @@ -121,6 +121,9 @@ public class SASIIndex implements Index, INotificationConsumer CompactionManager.instance.submitIndexBuild(new SASIIndexBuilder(baseCfs, toRebuild)); } + /** + * Called via reflection at {@link IndexMetadata#validateCustomIndexOptions} + */ public static Map<String, String> validateOptions(Map<String, String> options, CFMetaData cfm) { if (!(cfm.partitioner instanceof Murmur3Partitioner)) @@ -140,7 +143,7 @@ public class SASIIndex implements Index, INotificationConsumer if (target.left.isPartitionKey()) throw new ConfigurationException("partition key columns are not yet supported by SASI"); - IndexMode.validateAnalyzer(options); + IndexMode.validateAnalyzer(options, target.left); IndexMode mode = IndexMode.getMode(target.left, options); if (mode.mode == Mode.SPARSE) http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java index 31c66cc..e3bb7a2 100644 --- a/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java +++ b/src/java/org/apache/cassandra/index/sasi/analyzer/AbstractAnalyzer.java @@ -43,6 +43,14 @@ public abstract class AbstractAnalyzer implements Iterator<ByteBuffer> public abstract void reset(ByteBuffer input); /** + * Test whether the given validator is compatible with the underlying analyzer. + * + * @param validator + * @return + */ + public abstract boolean isCompatibleWith(AbstractType<?> validator); + + /** * @return true if current analyzer provides text tokenization, false otherwise. */ public boolean isTokenizing() http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java index 24acef4..fea4b4f 100644 --- a/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java +++ b/src/java/org/apache/cassandra/index/sasi/analyzer/DelimiterAnalyzer.java @@ -37,10 +37,10 @@ import org.apache.cassandra.utils.AbstractIterator; public class DelimiterAnalyzer extends AbstractAnalyzer { - private static final Map<AbstractType<?>,Charset> VALID_ANALYZABLE_TYPES = new HashMap<AbstractType<?>,Charset>() + private static final Map<AbstractType<?>, Charset> VALID_ANALYZABLE_TYPES = new HashMap<AbstractType<?>, Charset>() {{ - put(UTF8Type.instance, StandardCharsets.UTF_8); - put(AsciiType.instance, StandardCharsets.US_ASCII); + put(UTF8Type.instance, StandardCharsets.UTF_8); + put(AsciiType.instance, StandardCharsets.US_ASCII); }}; private char delimiter; @@ -105,4 +105,10 @@ public class DelimiterAnalyzer extends AbstractAnalyzer { return true; } + + @Override + public boolean isCompatibleWith(AbstractType<?> validator) + { + return VALID_ANALYZABLE_TYPES.containsKey(validator); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java index 9939a13..5c9b748 100644 --- a/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java +++ b/src/java/org/apache/cassandra/index/sasi/analyzer/NoOpAnalyzer.java @@ -51,4 +51,10 @@ public class NoOpAnalyzer extends AbstractAnalyzer this.input = input; this.hasNext = true; } + + @Override + public boolean isCompatibleWith(AbstractType<?> validator) + { + return true; + } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java index 676b304..82084bc 100644 --- a/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java +++ b/src/java/org/apache/cassandra/index/sasi/analyzer/NonTokenizingAnalyzer.java @@ -123,4 +123,10 @@ public class NonTokenizingAnalyzer extends AbstractAnalyzer builder = builder.add("to_lower", new BasicResultFilters.LowerCase()); return builder.build(); } + + @Override + public boolean isCompatibleWith(AbstractType<?> validator) + { + return VALID_ANALYZABLE_TYPES.contains(validator); + } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java index 3b58bf9..e1a4a44 100644 --- a/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java +++ b/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java @@ -23,10 +23,13 @@ import java.io.InputStreamReader; import java.io.Reader; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.apache.cassandra.index.sasi.analyzer.filter.*; import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.AsciiType; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.io.util.DataInputBuffer; import org.apache.cassandra.utils.ByteBufferUtil; @@ -38,6 +41,15 @@ import com.carrotsearch.hppc.IntObjectOpenHashMap; public class StandardAnalyzer extends AbstractAnalyzer { + + private static final Set<AbstractType<?>> VALID_ANALYZABLE_TYPES = new HashSet<AbstractType<?>>() + { + { + add(UTF8Type.instance); + add(AsciiType.instance); + } + }; + public enum TokenType { EOF(-1), @@ -198,4 +210,10 @@ public class StandardAnalyzer extends AbstractAnalyzer { return true; } + + @Override + public boolean isCompatibleWith(AbstractType<?> validator) + { + return VALID_ANALYZABLE_TYPES.contains(validator); + } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java b/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java index c66dd02..5709a0f 100644 --- a/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java +++ b/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java @@ -93,20 +93,36 @@ public class IndexMode return analyzer; } - public static void validateAnalyzer(Map<String, String> indexOptions) throws ConfigurationException + public static void validateAnalyzer(Map<String, String> indexOptions, ColumnDefinition cd) throws ConfigurationException { // validate that a valid analyzer class was provided if specified if (indexOptions.containsKey(INDEX_ANALYZER_CLASS_OPTION)) { + Class<?> analyzerClass; try { - Class.forName(indexOptions.get(INDEX_ANALYZER_CLASS_OPTION)); + analyzerClass = Class.forName(indexOptions.get(INDEX_ANALYZER_CLASS_OPTION)); } catch (ClassNotFoundException e) { throw new ConfigurationException(String.format("Invalid analyzer class option specified [%s]", indexOptions.get(INDEX_ANALYZER_CLASS_OPTION))); } + + AbstractAnalyzer analyzer; + try + { + analyzer = (AbstractAnalyzer) analyzerClass.newInstance(); + if (!analyzer.isCompatibleWith(cd.type)) + throw new ConfigurationException(String.format("%s does not support type %s", + analyzerClass.getSimpleName(), + cd.type.asCQL3Type())); + } + catch (InstantiationException | IllegalAccessException e) + { + throw new ConfigurationException(String.format("Unable to initialize analyzer class option specified [%s]", + analyzerClass.getSimpleName())); + } } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/ea62d886/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java index a57af61..1579857 100644 --- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java +++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java @@ -31,10 +31,12 @@ import java.util.concurrent.Executors; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import org.apache.cassandra.SchemaLoader; import org.apache.cassandra.config.CFMetaData; import org.apache.cassandra.config.ColumnDefinition; +import org.apache.cassandra.config.Schema; import org.apache.cassandra.index.Index; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.*; @@ -55,6 +57,11 @@ import org.apache.cassandra.dht.Murmur3Partitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.InvalidRequestException; +import org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer; +import org.apache.cassandra.index.sasi.analyzer.DelimiterAnalyzer; +import org.apache.cassandra.index.sasi.analyzer.NoOpAnalyzer; +import org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer; +import org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer; import org.apache.cassandra.index.sasi.conf.ColumnIndex; import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder; import org.apache.cassandra.index.sasi.exceptions.TimeQuotaExceededException; @@ -2411,6 +2418,76 @@ public class SASIIndexTest Assert.assertEquals(index.searchMemtable(expression).getCount(), 0); } + @Test + public void testAnalyzerValidation() + { + final String TABLE_NAME = "analyzer_validation"; + QueryProcessor.executeOnceInternal(String.format("CREATE TABLE %s.%s (" + + " pk text PRIMARY KEY, " + + " ascii_v ascii, " + + " bigint_v bigint, " + + " blob_v blob, " + + " boolean_v boolean, " + + " date_v date, " + + " decimal_v decimal, " + + " double_v double, " + + " float_v float, " + + " inet_v inet, " + + " int_v int, " + + " smallint_v smallint, " + + " text_v text, " + + " time_v time, " + + " timestamp_v timestamp, " + + " timeuuid_v timeuuid, " + + " tinyint_v tinyint, " + + " uuid_v uuid, " + + " varchar_v varchar, " + + " varint_v varint" + + ");", + KS_NAME, + TABLE_NAME)); + + Columns regulars = Schema.instance.getCFMetaData(KS_NAME, TABLE_NAME).partitionColumns().regulars; + List<String> allColumns = regulars.stream().map(ColumnDefinition::toString).collect(Collectors.toList()); + List<String> textColumns = Arrays.asList("text_v", "ascii_v", "varchar_v"); + + new HashMap<Class<? extends AbstractAnalyzer>, List<String>>() + {{ + put(StandardAnalyzer.class, textColumns); + put(NonTokenizingAnalyzer.class, textColumns); + put(DelimiterAnalyzer.class, textColumns); + put(NoOpAnalyzer.class, allColumns); + }} + .forEach((analyzer, supportedColumns) -> { + for (String column : allColumns) + { + String query = String.format("CREATE CUSTOM INDEX ON %s.%s(%s) " + + "USING 'org.apache.cassandra.index.sasi.SASIIndex' " + + "WITH OPTIONS = {'analyzer_class': '%s', 'mode':'PREFIX'};", + KS_NAME, TABLE_NAME, column, analyzer.getName()); + + if (supportedColumns.contains(column)) + { + QueryProcessor.executeOnceInternal(query); + } + else + { + try + { + QueryProcessor.executeOnceInternal(query); + Assert.fail("Expected ConfigurationException"); + } + catch (ConfigurationException e) + { + // expected + Assert.assertTrue("Unexpected error message " + e.getMessage(), + e.getMessage().contains("does not support type")); + } + } + } + }); + } + private static ColumnFamilyStore loadData(Map<String, Pair<String, Integer>> data, boolean forceFlush) { return loadData(data, System.currentTimeMillis(), forceFlush); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org