Re: [PR] To improve accord interoperability test coverage, need to extend the harry model domain to handle more possible CQL states [cassandra]

via GitHub Thu, 23 Jan 2025 14:29:54 -0800


dcapwell commented on code in PR #3785:
URL: https://github.com/apache/cassandra/pull/3785#discussion_r1927769337



##########
test/harry/main/org/apache/cassandra/harry/model/ASTSingleTableModel.java:
##########
@@ -0,0 +1,1289 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.harry.model;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableSet;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.function.IntFunction;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import javax.annotation.Nullable;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+import accord.utils.Invariants;
+import org.apache.cassandra.cql3.ast.Conditional;
+import org.apache.cassandra.cql3.ast.Element;
+import org.apache.cassandra.cql3.ast.Expression;
+import org.apache.cassandra.cql3.ast.ExpressionEvaluator;
+import org.apache.cassandra.cql3.ast.FunctionCall;
+import org.apache.cassandra.cql3.ast.Mutation;
+import org.apache.cassandra.cql3.ast.Select;
+import org.apache.cassandra.cql3.ast.Symbol;
+import org.apache.cassandra.db.BufferClustering;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.harry.util.StringUtils;
+import org.apache.cassandra.schema.TableMetadata;
+import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.cassandra.utils.ImmutableUniqueList;
+import org.apache.cassandra.utils.Pair;
+import org.apache.cassandra.utils.TableUtil;
+
+import static org.apache.cassandra.harry.model.BytesPartitionState.asCQL;
+
+public class ASTSingleTableModel
+{
+    public final BytesPartitionState.Factory factory;
+    private final TreeMap<BytesPartitionState.Ref, BytesPartitionState> 
partitions = new TreeMap<>();
+
+    public ASTSingleTableModel(TableMetadata metadata)
+    {
+        this.factory = new BytesPartitionState.Factory(metadata);
+    }
+
+    public NavigableSet<BytesPartitionState.Ref> partitionKeys()
+    {
+        return partitions.navigableKeySet();
+    }
+
+    public int size()
+    {
+        return partitions.size();
+    }
+
+    public boolean isEmpty()
+    {
+        return partitions.isEmpty();
+    }
+
+    public TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> 
index(Symbol symbol)
+    {
+        if (factory.pkPositions.contains(symbol))
+            return indexPartitionColumn(symbol);
+        if (factory.staticPositions.contains(symbol))
+            return indexStaticColumn(symbol);
+        return indexRowColumn(symbol);
+    }
+
+    private TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> 
indexPartitionColumn(Symbol symbol)
+    {
+        int offset = factory.pkPositions.indexOf(symbol);
+        TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> index = new 
TreeMap<>(symbol.type()::compare);
+        for (BytesPartitionState partition : partitions.values())
+        {
+            if (partition.isEmpty()) continue;
+            ByteBuffer bb = partition.key.bufferAt(offset);
+            List<BytesPartitionState.PrimaryKey> list = 
index.computeIfAbsent(bb, i -> new ArrayList<>());
+            for (BytesPartitionState.Row row : partition.rows())
+                list.add(row.ref());
+        }
+        return index;
+    }
+
+    private TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> 
indexStaticColumn(Symbol symbol)
+    {
+        TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> index = new 
TreeMap<>(symbol.type()::compare);
+        for (BytesPartitionState partition : partitions.values())
+        {
+            if (partition.isEmpty()) continue;
+            ByteBuffer bb = partition.staticRow().get(symbol);
+            if (bb == null)
+                continue;
+            List<BytesPartitionState.PrimaryKey> list = 
index.computeIfAbsent(bb, i -> new ArrayList<>());
+            for (BytesPartitionState.Row row : partition.rows())
+                list.add(row.ref());
+        }
+        return index;
+    }
+
+    private TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> 
indexRowColumn(Symbol symbol)
+    {
+        boolean clustering = factory.ckPositions.contains(symbol);
+        int offset = clustering ? factory.ckPositions.indexOf(symbol) : 
factory.regularPositions.indexOf(symbol);
+        TreeMap<ByteBuffer, List<BytesPartitionState.PrimaryKey>> index = new 
TreeMap<>(symbol.type()::compare);
+        for (BytesPartitionState partition : partitions.values())
+        {
+            if (partition.isEmpty()) continue;
+            for (BytesPartitionState.Row row : partition.rows())
+            {
+                ByteBuffer bb = clustering ? row.clustering.bufferAt(offset) : 
row.get(offset);
+                if (bb == null)
+                    continue;
+                index.computeIfAbsent(bb, i -> new 
ArrayList<>()).add(row.ref());
+            }
+        }
+        return index;
+    }
+
+    public void update(Mutation mutation)
+    {
+        switch (mutation.kind)
+        {
+            case INSERT:
+                update((Mutation.Insert) mutation);
+                break;
+            case UPDATE:
+                update((Mutation.Update) mutation);
+                break;
+            case DELETE:
+                update((Mutation.Delete) mutation);
+                break;
+            default:
+                throw new UnsupportedOperationException(mutation.kind.name());
+        }
+    }
+
+    public void update(Mutation.Insert insert)
+    {
+        Clustering<ByteBuffer> pd = pd(insert);
+        BytesPartitionState partition = partitions.get(factory.createRef(pd));
+        if (partition == null)
+        {
+            partition = factory.create(pd);
+            partitions.put(partition.ref(), partition);
+        }
+        Map<Symbol, Expression> values = insert.values;
+        if (!factory.staticPositions.isEmpty() && 
!Sets.intersection(factory.staticPositions.asSet(), values.keySet()).isEmpty())
+        {
+            // static columns to add in.  If we are doing something like += to 
a row that doesn't exist, we still update statics...
+            Map<Symbol, ByteBuffer> write = new HashMap<>();
+            for (Symbol col : 
Sets.intersection(factory.staticPositions.asSet(), values.keySet()))
+                write.put(col, eval(values.get(col)));
+            partition.setStaticColumns(write);
+        }
+        Map<Symbol, ByteBuffer> write = new HashMap<>();
+        for (Symbol col : Sets.intersection(factory.regularPositions.asSet(), 
values.keySet()))
+            write.put(col, eval(values.get(col)));
+        partition.setColumns(key(insert.values, factory.ckPositions),
+                             write,
+                             true);
+    }
+
+    public void update(Mutation.Update update)
+    {
+        var split = splitOnPartition(update.where.simplify());
+        List<Clustering<ByteBuffer>> pks = split.left;
+        List<Conditional> remaining = split.right;
+        for (Clustering<ByteBuffer> pd : pks)
+        {
+            BytesPartitionState partition = 
partitions.get(factory.createRef(pd));
+            if (partition == null)
+            {
+                partition = factory.create(pd);
+                partitions.put(partition.ref(), partition);
+            }
+            Map<Symbol, Expression> set = update.set;
+            if (!factory.staticPositions.isEmpty() && 
!Sets.intersection(factory.staticPositions.asSet(), set.keySet()).isEmpty())
+            {
+                // static columns to add in.  If we are doing something like 
+= to a row that doesn't exist, we still update statics...
+                Map<Symbol, ByteBuffer> write = new HashMap<>();
+                for (Symbol col : 
Sets.intersection(factory.staticPositions.asSet(), set.keySet()))
+                    write.put(col, eval(set.get(col)));
+                partition.setStaticColumns(write);
+            }
+            for (Clustering<ByteBuffer> cd : clustering(remaining))
+            {
+                Map<Symbol, ByteBuffer> write = new HashMap<>();
+                for (Symbol col : 
Sets.intersection(factory.regularPositions.asSet(), set.keySet()))
+                    write.put(col, eval(set.get(col)));
+
+                partition.setColumns(cd, write, false);
+            }
+        }
+    }
+
+    private enum DeleteKind
+    {PARTITION, ROW, COLUMN}
+
+    public void update(Mutation.Delete delete)
+    {
+        //TODO (coverage): range deletes
+        var split = splitOnPartition(delete.where.simplify());
+        List<Clustering<ByteBuffer>> pks = split.left;
+        List<Clustering<ByteBuffer>> clusterings = split.right.isEmpty() ? 
Collections.emptyList() : clustering(split.right);
+        HashSet<Symbol> columns = delete.columns.isEmpty() ? null : new 
HashSet<>(delete.columns);
+        for (Clustering<ByteBuffer> pd : pks)
+        {
+            BytesPartitionState partition = 
partitions.get(factory.createRef(pd));
+            if (partition == null) return; // can't delete a partition that 
doesn't exist...
+
+            DeleteKind kind = DeleteKind.PARTITION;
+            if (!delete.columns.isEmpty())
+                kind = DeleteKind.COLUMN;
+            else if (!clusterings.isEmpty())
+                kind = DeleteKind.ROW;
+
+            switch (kind)
+            {
+                case PARTITION:
+                    partitions.remove(partition.ref());
+                    break;
+                case ROW:
+                    for (Clustering<ByteBuffer> cd : clusterings)
+                    {
+                        partition.deleteRow(cd);
+                        if (partition.shouldDelete())
+                            partitions.remove(partition.ref());
+                    }
+                    break;
+                case COLUMN:
+                    if (clusterings.isEmpty())
+                    {
+                        partition.deleteStaticColumns(columns);
+                    }
+                    else
+                    {
+                        for (Clustering<ByteBuffer> cd : clusterings)
+                        {
+                            partition.deleteColumns(cd, columns);
+                            if (partition.shouldDelete())
+                                partitions.remove(partition.ref());
+                        }
+                    }
+                    break;
+//                case SLICE:
+//                case RANGE:
+                default:
+                    throw new UnsupportedOperationException();
+            }
+        }
+    }
+
+    private List<Clustering<ByteBuffer>> clustering(List<Conditional> 
conditionals)
+    {
+        if (conditionals.isEmpty())
+        {
+            if (factory.ckPositions.isEmpty()) return 
Collections.singletonList(Clustering.EMPTY);
+            throw new IllegalArgumentException("No clustering columns defined 
in the WHERE clause, but clustering columns exist; expected " + 
factory.ckPositions);
+        }
+        var split = splitOnClustering(conditionals);
+        var clusterings = split.left;
+        var remaining = split.right;
+        if (!remaining.isEmpty())
+            throw new IllegalArgumentException("Non Partition/Clustering 
columns found in WHERE clause; " + 
remaining.stream().map(Element::toCQL).collect(Collectors.joining(", ")));
+        return clusterings;
+    }
+
+    private Pair<List<Clustering<ByteBuffer>>, List<Conditional>> 
splitOnPartition(List<Conditional> conditionals)
+    {
+        return splitOn(factory.pkPositions.asSet(), conditionals);
+    }
+
+    private Pair<List<Clustering<ByteBuffer>>, List<Conditional>> 
splitOnClustering(List<Conditional> conditionals)
+    {
+        return splitOn(factory.ckPositions.asSet(), conditionals);
+    }
+
+    private Pair<List<Clustering<ByteBuffer>>, List<Conditional>> 
splitOn(ImmutableUniqueList<Symbol>.AsSet columns, List<Conditional> 
conditionals)
+    {
+        // pk requires equality
+        Map<Symbol, Set<ByteBuffer>> pks = new HashMap<>();
+        List<Conditional> other = new ArrayList<>();
+        for (Conditional c : conditionals)
+        {
+            if (c instanceof Conditional.Where)
+            {
+                Conditional.Where w = (Conditional.Where) c;
+                if (w.kind == Conditional.Where.Inequality.EQUAL && 
columns.contains(w.lhs))
+                {
+                    Symbol col = (Symbol) w.lhs;
+                    ByteBuffer bb = eval(w.rhs);
+                    if (pks.containsKey(col))
+                        throw new IllegalArgumentException("Partition column " 
+ col + " was defined multiple times in the WHERE clause");
+                    pks.put(col, Collections.singleton(bb));
+                }
+                else
+                {
+                    other.add(c);
+                }
+            }
+            else if (c instanceof Conditional.In)
+            {
+                Conditional.In i = (Conditional.In) c;
+                if (columns.contains(i.ref))
+                {
+                    Symbol col = (Symbol) i.ref;
+                    if (pks.containsKey(col))
+                        throw new IllegalArgumentException("Partition column " 
+ col + " was defined multiple times in the WHERE clause");
+                    var set = 
i.expressions.stream().map(ASTSingleTableModel::eval).collect(Collectors.toSet());
+                    pks.put(col, set);
+                }
+                else
+                {
+                    other.add(c);
+                }
+            }
+            else
+            {
+                other.add(c);
+            }
+        }
+        if (!columns.equals(pks.keySet()))
+        {
+            var missing = Sets.difference(columns, pks.keySet());
+            throw new AssertionError("Unable to find expected columns " + 
missing);
+        }
+
+        List<Clustering<ByteBuffer>> partitionKeys = keys(columns, pks);
+        return Pair.create(partitionKeys, other);
+    }
+
+    private List<Clustering<ByteBuffer>> keys(Collection<Symbol> columns, 
Map<Symbol, Set<ByteBuffer>> pks)
+    {
+        //TODO (coverage): handle IN
+        ByteBuffer[] bbs = new ByteBuffer[columns.size()];
+        int idx = 0;
+        for (Symbol s : columns)
+        {
+            Set<ByteBuffer> values = pks.get(s);
+            if (values.size() > 1)
+                throw new UnsupportedOperationException("IN clause is 
currently unsupported... its on the backlog!");
+            bbs[idx++] = Iterables.getFirst(values, null);
+        }
+        return Collections.singletonList(BufferClustering.make(bbs));
+    }
+
+    private Clustering<ByteBuffer> pd(Mutation.Insert mutation)
+    {
+        return key(mutation.values, factory.pkPositions);
+    }
+
+    public BytesPartitionState get(BytesPartitionState.Ref ref)
+    {
+        return partitions.get(ref);
+    }
+
+    public List<BytesPartitionState> getByToken(long token)
+    {
+        return getByToken(new Murmur3Partitioner.LongToken(token));
+    }
+
+    public List<BytesPartitionState> getByToken(Token token)
+    {
+        NavigableSet<BytesPartitionState.Ref> keys = 
partitions.navigableKeySet();
+        // To support the case where 2+ keys share the same token, need to 
create a token ref before and after the token, to make sure
+        // the head/tail sets find the matches correctly
+        NavigableSet<BytesPartitionState.Ref> matches = 
keys.headSet(factory.createRef(token, true), true)
+                                                            
.tailSet(factory.createRef(token, false), true);
+        if (matches.isEmpty()) return Collections.emptyList();
+        return 
matches.stream().map(partitions::get).collect(Collectors.toList());
+    }
+
+    public void validate(ByteBuffer[][] actual, Select select)
+    {
+        SelectResult results = getRowsAsByteBuffer(select);
+        if (results.unordered)
+        {
+            validateAnyOrder(factory.selectOrder, toRow(factory.selectOrder, 
actual), toRow(factory.selectOrder, results.rows));
+        }
+        else
+        {
+            validate(actual, results.rows);
+        }
+    }
+
+    public void validate(ByteBuffer[][] actual, ByteBuffer[][] expected)
+    {
+        validate(factory.selectOrder, actual, expected);
+    }
+
+    private static void validate(ImmutableUniqueList<Symbol> columns, 
ByteBuffer[][] actual, ByteBuffer[][] expected)
+    {
+        // check any order
+        validateAnyOrder(columns, toRow(columns, actual), toRow(columns, 
expected));
+        validateOrder(columns, actual, expected);
+    }
+
+    private static void validateAnyOrder(ImmutableUniqueList<Symbol> columns, 
Set<Row> actual, Set<Row> expected)
+    {
+        var unexpected = Sets.difference(actual, expected);
+        var missing = Sets.difference(expected, actual);
+        StringBuilder sb = null;
+        if (!unexpected.isEmpty())
+        {
+            sb = new StringBuilder();
+            sb.append("Unexpected rows found:\n").append(table(columns, 
unexpected));
+        }
+
+        if (!missing.isEmpty())
+        {
+            if (sb == null)
+            {
+                sb = new StringBuilder();
+            }
+            else
+            {
+                sb.append('\n');
+            }
+            if (actual.isEmpty()) sb.append("No rows returned");
+            else sb.append("Missing rows:\n").append(table(columns, missing));
+        }
+        if (sb != null)
+        {
+            sb.append("\nExpected:\n").append(table(columns, expected));
+            throw new AssertionError(sb.toString());
+        }
+    }
+
+    private static String table(ImmutableUniqueList<Symbol> columns, 
Collection<Row> rows)
+    {
+        return 
TableUtil.table(columns.stream().map(Symbol::toCQL).collect(Collectors.toList()),
+                               // intellij or junit can be tripped up by utf 
control or invisible chars, so this logic tries to normalize to make things 
more safe
+                               () -> rows.stream()
+                                         .map(r -> 
r.asCQL().stream().map(StringUtils::escapeControlChars).collect(Collectors.toList()))
+                                         .iterator());
+    }
+
+    private static String table(ImmutableUniqueList<Symbol> columns, 
ByteBuffer[][] rows)
+    {
+        return 
TableUtil.table(columns.stream().map(Symbol::toCQL).collect(Collectors.toList()),
+                               () -> Stream.of(rows).map(row -> asCQL(columns, 
row)).iterator());
+    }
+
+    private static void validateOrder(ImmutableUniqueList<Symbol> columns, 
ByteBuffer[][] actual, ByteBuffer[][] expected)
+    {
+        StringBuilder sb = null;
+        for (int i = 0, size = Math.min(actual.length, expected.length); i < 
size; i++)
+        {
+            ByteBuffer[] as = actual[i];
+            ByteBuffer[] es = expected[i];
+            if (as.length != es.length)
+            {
+                if (sb == null)
+                    sb = new StringBuilder();
+                sb.append("\nExpected number of columns does not match");
+            }
+            for (int c = 0, cs = Math.min(as.length, es.length); c < cs; c++)
+            {
+                ByteBuffer a = as[c];
+                ByteBuffer e = es[c];
+                if (!Objects.equals(a, e))
+                {
+                    Symbol symbol = columns.get(c);
+                    if (sb == null)
+                        sb = new StringBuilder();
+                    sb.append(String.format("\nIncorrect value for row %d 
column %s: expected %s but was %s", i, symbol,
+                                            e == null ? "null" : 
symbol.type().asCQL3Type().toCQLLiteral(e),
+                                            a == null ? "null" : 
symbol.type().asCQL3Type().toCQLLiteral(a)));
+                }
+            }
+        }
+
+        if (sb != null)
+        {
+            sb.append("\nExpected:\n").append(table(columns, expected));
+            throw new AssertionError(sb.toString());
+        }
+    }
+
+    private static Set<Row> toRow(ImmutableUniqueList<Symbol> columns, 
ByteBuffer[][] rows)
+    {
+        Set<Row> set = new HashSet<>();
+        for (ByteBuffer[] row : rows)
+            set.add(new Row(columns, row));
+        return set;
+    }
+
+    private static class SelectResult
+    {
+        private final ByteBuffer[][] rows;
+        private final boolean unordered;
+
+        private SelectResult(ByteBuffer[][] rows, boolean unordered)
+        {
+            this.rows = rows;
+            this.unordered = unordered;
+        }
+    }
+
+    private SelectResult getRowsAsByteBuffer(Select select)
+    {
+        LookupContext ctx = context(select);
+        List<BytesPartitionState.PrimaryKey> primaryKeys;
+        if (ctx.unmatchable)
+        {
+            primaryKeys = Collections.emptyList();
+        }
+        else if (ctx.eq.keySet().containsAll(factory.pkPositions))
+        {
+            primaryKeys = findByPartitionEq(ctx);
+        }
+        else if (ctx.token != null)
+        {
+            primaryKeys = findKeysByToken(ctx);
+        }
+        else if (ctx.tokenLowerBound != null || ctx.tokenUpperBound != null)
+        {
+            primaryKeys = findKeysByTokenSearch(ctx);
+        }
+        else
+        {
+            primaryKeys = search(ctx);
+        }
+        //TODO (correctness): now that we have the rows we need to handle the 
selections/aggregation/limit/group-by/etc.
+        return new SelectResult(getRowsAsByteBuffer(primaryKeys), 
ctx.unordered);
+    }
+
+    public ByteBuffer[][] 
getRowsAsByteBuffer(List<BytesPartitionState.PrimaryKey> primaryKeys)
+    {
+        ByteBuffer[][] rows = new ByteBuffer[primaryKeys.size()][];
+        int idx = 0;
+        for (BytesPartitionState.PrimaryKey pk : primaryKeys)
+        {
+            BytesPartitionState partition = partitions.get(pk.partition);
+            BytesPartitionState.Row row = partition.get(pk.clustering);
+            rows[idx++] = getRowAsByteBuffer(partition, row);
+        }
+        return rows;
+    }
+
+    public ByteBuffer[][] 
getRowsAsByteBufferFromPartitions(List<BytesPartitionState> ps)
+    {
+        List<ByteBuffer[]> rows = new ArrayList<>();
+        for (BytesPartitionState p : ps)
+        {
+            for (BytesPartitionState.Row row : p.rows())
+                rows.add(getRowAsByteBuffer(p, row));
+        }
+        return rows.toArray(ByteBuffer[][]::new);
+    }
+
+    private ByteBuffer[] getRowAsByteBuffer(BytesPartitionState partition, 
@Nullable BytesPartitionState.Row row)
+    {
+        Clustering<ByteBuffer> pd = partition.key;
+        BytesPartitionState.Row staticRow = partition.staticRow();
+        ByteBuffer[] bbs = new ByteBuffer[factory.selectOrder.size()];
+        for (Symbol col : factory.pkPositions)
+            bbs[factory.selectOrder.indexOf(col)] = 
pd.bufferAt(factory.pkPositions.indexOf(col));
+        for (Symbol col : factory.staticPositions)
+            bbs[factory.selectOrder.indexOf(col)] = staticRow.get(col);
+        if (row != null)
+        {
+            for (Symbol col : factory.ckPositions)
+                bbs[factory.selectOrder.indexOf(col)] = 
row.clustering.bufferAt(factory.ckPositions.indexOf(col));
+            for (Symbol col : factory.regularPositions)
+                bbs[factory.selectOrder.indexOf(col)] = row.get(col);
+        }
+        return bbs;
+    }
+
+    private LookupContext context(Select select)
+    {
+        if (select.where.isEmpty())
+            throw new IllegalArgumentException("Select without a where clause 
is currently unsupported");
+        LookupContext ctx = new LookupContext(select);
+        context(ctx, select.where.get());
+        maybeNormalizeTokenBounds(ctx);
+        return ctx;
+    }
+
+    private void maybeNormalizeTokenBounds(LookupContext ctx)
+    {
+        if (ctx.tokenLowerBound != null && ctx.tokenUpperBound != null)
+        {
+            int rc = 
ctx.tokenLowerBound.token.compareTo(ctx.tokenUpperBound.token);
+            if (rc > 0)
+            {
+                // where token > 10 and < 0.... nothing matches that!
+                ctx.unmatchable = true;
+                ctx.tokenLowerBound = null;
+                ctx.tokenUpperBound = null;
+            }
+            else if (rc == 0)
+            {
+                // tokens match... but is _EQ allowed for both cases?
+                if (!(ctx.tokenLowerBound.inequality == 
Conditional.Where.Inequality.GREATER_THAN_EQ
+                      && ctx.tokenUpperBound.inequality == 
Conditional.Where.Inequality.LESS_THAN_EQ))
+                {
+                    // token < 42 and >= 42... nothing matches that!
+                    ctx.unmatchable = true;
+                    ctx.tokenLowerBound = null;
+                    ctx.tokenUpperBound = null;
+                }
+            }
+        }
+    }
+
+    private void context(LookupContext ctx, Conditional conditional)
+    {
+        if (conditional instanceof Conditional.Where)
+        {
+            Conditional.Where w = (Conditional.Where) conditional;
+            if (w.kind == Conditional.Where.Inequality.NOT_EQUAL)
+                throw new UnsupportedOperationException("!= is currently not 
supported");
+            if (w.lhs instanceof Symbol)
+            {
+                Symbol col = (Symbol) w.lhs;
+                switch (w.kind)
+                {
+                    case EQUAL:
+                        var override = ctx.eq.put(col, 
Collections.singletonList(w.rhs));
+                        if (override != null)
+                            throw new IllegalStateException("Column " + 
col.detailedName() + " had 2 '=' statements...");
+                        break;
+                    case LESS_THAN:
+                    case LESS_THAN_EQ:
+                    case GREATER_THAN:
+                    case GREATER_THAN_EQ:
+                        ctx.ltOrGt.computeIfAbsent(col, i -> new 
ArrayList<>()).add(new ColumnCondition(w.kind, eval(w.rhs)));
+                        break;
+                    //TODO (coverage): NOT_EQUAL
+                    default:
+                        throw new UnsupportedOperationException(w.kind.name());
+                }
+            }
+            else if (w.lhs instanceof FunctionCall)
+            {
+                FunctionCall fn = (FunctionCall) w.lhs;
+                switch (fn.name())
+                {
+                    case "token":
+                        FunctionCall rhs = (FunctionCall) w.rhs;
+                        List<ByteBuffer> pkValues = 
rhs.arguments.stream().map(ASTSingleTableModel::eval).collect(Collectors.toList());
+                        BytesPartitionState.Ref ref = factory.createRef(new 
BufferClustering(pkValues.toArray(ByteBuffer[]::new)));
+                        switch (w.kind)
+                        {
+                            case EQUAL:
+                                ctx.token = ref.token;
+                                break;
+                            case LESS_THAN:
+                            case LESS_THAN_EQ:
+                                ctx.tokenUpperBound = new 
TokenCondition(w.kind, ref.token);
+                                break;
+                            case GREATER_THAN:
+                            case GREATER_THAN_EQ:
+                                ctx.tokenLowerBound = new 
TokenCondition(w.kind, ref.token);
+                                break;
+                            default:
+                                throw new 
UnsupportedOperationException(w.kind.name());
+                        }
+                        break;
+                    default:
+                        throw new UnsupportedOperationException(fn.toCQL());
+                }
+            }
+            else
+            {
+                throw new 
UnsupportedOperationException(w.lhs.getClass().getCanonicalName());
+            }
+        }
+        else if (conditional instanceof Conditional.In)
+        {
+            Conditional.In in = (Conditional.In) conditional;
+            if (in.ref instanceof Symbol)
+            {
+                Symbol col = (Symbol) in.ref;
+                var override = ctx.eq.put(col, in.expressions);
+                if (override != null)
+                    throw new IllegalStateException("Column " + 
col.detailedName() + " had 2 '=' statements...");
+                //TODO (correctness): can't find any documentation saying 
clustering is ordered by the data... it "could" but is it garanateed?
+                if (factory.pkPositions.contains(col) || 
factory.ckPositions.contains(col))
+                    ctx.unordered = true;
+            }
+            else
+            {
+                throw new 
UnsupportedOperationException(in.ref.getClass().getCanonicalName());
+            }
+        }
+        else if (conditional instanceof Conditional.Between)
+        {
+            Conditional.Between between = (Conditional.Between) conditional;
+            if (between.ref instanceof Symbol)
+            {
+                Symbol col = (Symbol) between.ref;
+                List<ColumnCondition> list = ctx.ltOrGt.computeIfAbsent(col, i 
-> new ArrayList<>());
+                list.add(new 
ColumnCondition(Conditional.Where.Inequality.GREATER_THAN_EQ, 
eval(between.start)));
+                list.add(new 
ColumnCondition(Conditional.Where.Inequality.LESS_THAN_EQ, eval(between.end)));
+            }
+            else if (between.ref instanceof FunctionCall)
+            {
+                FunctionCall fn = (FunctionCall) between.ref;
+                switch (fn.name())
+                {
+                    case "token":
+                        // if the ref is a token, the only valid start/end are 
also token
+                        List<ByteBuffer> start = ((FunctionCall) 
between.start).arguments.stream().map(ASTSingleTableModel::eval).collect(Collectors.toList());
+                        Token startToken = factory.createRef(new 
BufferClustering(start.toArray(ByteBuffer[]::new))).token;
+
+                        List<ByteBuffer> end = ((FunctionCall) 
between.end).arguments.stream().map(ASTSingleTableModel::eval).collect(Collectors.toList());
+                        Token endToken = factory.createRef(new 
BufferClustering(end.toArray(ByteBuffer[]::new))).token;
+
+                        if (startToken.equals(endToken))
+                        {
+                            ctx.token = startToken;
+                        }
+                        else if (startToken.compareTo(endToken) > 0)
+                        {
+                            // start is larger than end... no matches
+                            ctx.unmatchable = true;
+                        }
+                        else
+                        {
+                            ctx.tokenLowerBound = new 
TokenCondition(Conditional.Where.Inequality.GREATER_THAN_EQ, startToken);
+                            ctx.tokenUpperBound = new 
TokenCondition(Conditional.Where.Inequality.LESS_THAN_EQ, endToken);
+                        }
+                        break;
+                    default:
+                        throw new UnsupportedOperationException(fn.toCQL());
+                }
+            }
+            else
+            {
+                throw new 
UnsupportedOperationException(between.ref.getClass().getCanonicalName());
+            }
+        }
+        else if (conditional instanceof Conditional.And)
+        {
+            Conditional.And and = (Conditional.And) conditional;
+            context(ctx, and.left);
+            context(ctx, and.right);
+        }
+        else
+        {
+            //TODO (coverage): IS
+            throw new 
UnsupportedOperationException(conditional.getClass().getCanonicalName());
+        }
+    }
+
+    private List<BytesPartitionState.PrimaryKey> search(LookupContext ctx)
+    {
+        // find by eq first
+        Set<BytesPartitionState.PrimaryKey> eqMatches = searchEq(ctx);
+        Set<BytesPartitionState.PrimaryKey> rangeMatches = searchRange(ctx);
+        return new ArrayList<>(new TreeSet<>(intersectionEmptySafe(eqMatches, 
rangeMatches)));
+    }
+
+    private static <T> Set<T> intersectionEmptySafe(Set<T> a, Set<T> b)
+    {
+        if (a.isEmpty()) return b;
+        if (b.isEmpty()) return a;
+        return new HashSet<>(Sets.intersection(a, b));
+    }
+
+    private Set<BytesPartitionState.PrimaryKey> searchRange(LookupContext ctx)
+    {
+        Set<BytesPartitionState.PrimaryKey> matches = null;
+        for (Map.Entry<Symbol, List<ColumnCondition>> e : 
ctx.ltOrGt.entrySet())
+        {
+            if (matches == null)
+                matches = new HashSet<>(searchRange(e.getKey(), e.getValue()));
+            else
+            {
+                boolean hadMatches = !matches.isEmpty();
+                matches = new HashSet<>(intersectionEmptySafe(matches, new 
HashSet<>(searchRange(e.getKey(), e.getValue()))));
+                if (hadMatches && matches.isEmpty())
+                    return Collections.emptySet();
+            }
+        }
+        return matches == null ? Collections.emptySet() : matches;
+    }
+
+    private List<BytesPartitionState.PrimaryKey> searchRange(Symbol symbol, 
List<ColumnCondition> conditions)
+    {
+        List<BytesPartitionState.PrimaryKey> matches = new ArrayList<>();
+        if (factory.pkPositions.contains(symbol) || 
factory.staticPositions.contains(symbol))
+        {
+            int pkOffset = factory.pkPositions.indexOf(symbol);
+            int sOffset = factory.staticPositions.indexOf(symbol);
+            for (BytesPartitionState p : partitions.values())
+            {
+                if (pkOffset != -1)
+                {
+                    ByteBuffer value = p.key.bufferAt(pkOffset);
+                    if (matches(symbol.type(), value, conditions))
+                    {
+                        if (p.isEmpty())
+                        {
+                            matches.add(p.partitionRowRef());
+                        }
+                        else
+                        {
+                            p.rows().forEach(r -> matches.add(r.ref()));
+                        }
+                    }
+                }
+                else
+                {
+                    // mutable columns, so may be null
+                    ByteBuffer value = p.staticRow().get(sOffset);
+                    if (value == null) continue;
+                    if (matches(symbol.type(), value, conditions))
+                    {
+                        if (p.isEmpty())
+                        {
+                            matches.add(p.partitionRowRef());
+                        }
+                        else
+                        {
+                            p.rows().forEach(r -> matches.add(r.ref()));
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            int ckOffset = factory.ckPositions.indexOf(symbol);
+            int rOffset = factory.regularPositions.indexOf(symbol);
+            for (BytesPartitionState p : partitions.values())
+            {
+                for (BytesPartitionState.Row row : p.rows())
+                {
+                    if (ckOffset != -1)
+                    {
+                        ByteBuffer value = row.clustering.bufferAt(ckOffset);
+                        if (matches(symbol.type(), value, conditions))
+                            matches.add(row.ref());
+                    }
+                    else
+                    {
+                        // mutable columns, so may be null
+                        var value = row.get(rOffset);
+                        if (value == null) continue;
+                        if (matches(symbol.type(), value, conditions))
+                            matches.add(row.ref());
+                    }
+                }
+            }
+        }
+        return matches;
+    }
+
+    private static boolean matches(AbstractType<?> type, ByteBuffer value, 
List<ColumnCondition> conditions)
+    {
+        for (ColumnCondition c : conditions)
+        {
+            int rc = type.compare(value, c.value);
+            switch (c.inequality)
+            {
+                case LESS_THAN:
+                    if (rc >= 0) return false;
+                    break;
+                case LESS_THAN_EQ:
+                    if (rc > 0) return false;
+                    break;
+                case GREATER_THAN:
+                    if (rc <= 0) return false;
+                    break;
+                case GREATER_THAN_EQ:
+                    if (rc < 0) return false;
+                    break;
+                default:
+                    throw new 
UnsupportedOperationException(c.inequality.name());
+            }
+        }
+        return true;
+    }
+
+    /**
+     * The common case there can only be 1 value, but in the case of {@link 
Conditional.In} this can be multiple.  When
+     * multiple values are found then the semantic is OR rather than AND like 
the other matches function {@link #matches(AbstractType, ByteBuffer, List)}
+     */
+    private static boolean matches(ByteBuffer value, List<? extends 
Expression> conditions)
+    {
+        for (Expression e : conditions)
+        {
+            ByteBuffer expected = eval(e);
+            if (expected.equals(value))
+                return true;
+        }
+        return false;
+    }
+
+    private Set<BytesPartitionState.PrimaryKey> searchEq(LookupContext ctx)
+    {
+        Set<BytesPartitionState.PrimaryKey> matches = null;
+        for (Map.Entry<Symbol, List<? extends Expression>> e : 
ctx.eq.entrySet())
+        {
+            for (Expression e2 : e.getValue())
+            {
+                ByteBuffer bb = eval(e2);
+                if (matches == null)
+                    matches = new HashSet<>(searchEq(e.getKey(), bb));
+                else
+                {
+                    boolean hadMatches = !matches.isEmpty();
+                    matches = new HashSet<>(intersectionEmptySafe(matches, new 
HashSet<>(searchEq(e.getKey(), bb))));
+                    if (hadMatches && matches.isEmpty())
+                        return Collections.emptySet();
+                }
+            }
+        }
+        return matches == null ? Collections.emptySet() : matches;
+    }
+
+    private List<BytesPartitionState.PrimaryKey> searchEq(Symbol symbol, 
ByteBuffer bb)
+    {
+        List<BytesPartitionState.PrimaryKey> matches = new ArrayList<>();
+        if (factory.pkPositions.contains(symbol) || 
factory.staticPositions.contains(symbol))
+        {
+            int pkOffset = factory.pkPositions.indexOf(symbol);
+            int sOffset = factory.staticPositions.indexOf(symbol);
+            for (BytesPartitionState p : partitions.values())
+            {
+                if (pkOffset != -1)
+                {
+                    if (p.key.bufferAt(pkOffset).equals(bb))
+                        p.rows().forEach(r -> matches.add(r.ref()));
+                }
+                else
+                {
+                    // mutable columns, so may be null
+                    ByteBuffer value = p.staticRow().get(sOffset);
+                    if (value == null) continue;
+                    if (value.equals(bb))
+                        p.rows().forEach(r -> matches.add(r.ref()));
+                }
+            }
+        }
+        else
+        {
+            int ckOffset = factory.ckPositions.indexOf(symbol);
+            int rOffset = factory.regularPositions.indexOf(symbol);
+            for (BytesPartitionState p : partitions.values())
+            {
+                for (BytesPartitionState.Row row : p.rows())
+                {
+                    if (ckOffset != -1)
+                    {
+                        if (row.clustering.bufferAt(ckOffset).equals(bb))
+                            matches.add(row.ref());
+                    }
+                    else
+                    {
+                        // mutable columns, so may be null
+                        var value = row.get(rOffset);
+                        if (value == null) continue;
+                        if (value.equals(bb))
+                            matches.add(row.ref());
+                    }
+                }
+            }
+        }
+        return matches;
+    }
+
+    private List<BytesPartitionState.PrimaryKey> findKeysByToken(LookupContext 
ctx)
+    {
+        return filter(ctx, getByToken(ctx.token));
+    }
+
+    private List<BytesPartitionState.PrimaryKey> 
findKeysByTokenSearch(LookupContext ctx)
+    {
+        return filter(ctx, getByTokenSearch(ctx.tokenLowerBound, 
ctx.tokenUpperBound));
+    }
+
+    private List<BytesPartitionState> getByTokenSearch(@Nullable 
TokenCondition tokenLowerBound,
+                                                       @Nullable 
TokenCondition tokenUpperBound)
+    {
+        if (tokenLowerBound == null && tokenUpperBound == null)
+            throw new IllegalArgumentException("At least one bound must be 
defined...");
+        NavigableSet<BytesPartitionState.Ref> keys = 
partitions.navigableKeySet();
+        // To support the case where 2+ keys share the same token, need to 
create a token ref before and after the token, to make sure
+        // the head/tail sets find the matches correctly
+        if (tokenLowerBound != null)
+        {
+            boolean inclusive;
+            switch (tokenLowerBound.inequality)
+            {
+                case GREATER_THAN:
+                    inclusive = false;
+                    break;
+                case GREATER_THAN_EQ:
+                    inclusive = true;
+                    break;
+                default:
+                    throw new 
UnsupportedOperationException(tokenLowerBound.inequality.name());
+            }
+            // when inclusive=true the ref should be before the token, that 
way the tokens match
+            // when inclusive=false the ref should be after the token, that 
way they are excluded
+            keys = keys.tailSet(factory.createRef(tokenLowerBound.token, 
!inclusive), inclusive);
+        }
+        if (tokenUpperBound != null)
+        {
+            boolean inclusive;
+            switch (tokenUpperBound.inequality)
+            {
+                case LESS_THAN:
+                    inclusive = false;
+                    break;
+                case LESS_THAN_EQ:
+                    inclusive = true;
+                    break;
+                default:
+                    throw new 
UnsupportedOperationException(tokenUpperBound.inequality.name());
+            }
+            // when inclusive=true the ref should be after the token
+            // when inclusive=false the ref should be before the token
+            keys = keys.headSet(factory.createRef(tokenUpperBound.token, 
inclusive), false);
+        }
+        if (keys.isEmpty()) return Collections.emptyList();
+        return keys.stream().map(partitions::get).collect(Collectors.toList());
+    }
+
+    private List<BytesPartitionState.PrimaryKey> filter(LookupContext ctx, 
List<BytesPartitionState> partitions)
+    {
+        if (partitions.isEmpty()) return Collections.emptyList();
+        List<BytesPartitionState.PrimaryKey> matches = new ArrayList<>();
+        for (BytesPartitionState p : partitions)
+            matches.addAll(filter(ctx, p));
+        return matches;
+    }
+
+    private List<BytesPartitionState.PrimaryKey> filter(LookupContext ctx, 
BytesPartitionState partition)
+    {
+        Map<Symbol, List<? extends Expression>> values = ctx.eq;
+        List<BytesPartitionState.PrimaryKey> rows = new 
ArrayList<>(partition.size());
+        if (!factory.ckPositions.isEmpty() && 
values.keySet().containsAll(factory.ckPositions))
+        {
+            // single row
+            for (Clustering<ByteBuffer> cd : keys(values, factory.ckPositions))
+            {
+                BytesPartitionState.Row row = partition.get(cd);
+                if (row != null && ctx.include(row))
+                    rows.add(row.ref());
+            }
+        }
+        else
+        {
+            // full partition
+            if (partition.isEmpty())
+            {
+                //TODO (now, correctness): if you query a non-partition column 
and this is empty, this condition isn't true...
+                rows.add(partition.partitionRowRef());
+            }
+            else
+            {
+                for (BytesPartitionState.Row row : partition.rows())
+                {
+                    if (ctx.include(row))
+                        rows.add(row.ref());
+                }
+            }
+        }
+        return rows;
+    }
+
+    private List<BytesPartitionState.PrimaryKey> 
findByPartitionEq(LookupContext ctx)
+    {
+        List<BytesPartitionState.PrimaryKey> matches = new ArrayList<>();
+        for (Clustering<ByteBuffer> pd : keys(ctx.eq, factory.pkPositions))
+        {
+            BytesPartitionState partition = 
partitions.get(factory.createRef(pd));
+            if (partition == null || !ctx.include(partition)) continue;
+            matches.addAll(filter(ctx, partition));
+        }
+        return matches;
+    }
+
+    private Clustering<ByteBuffer> key(Map<Symbol, Expression> values, 
ImmutableUniqueList<Symbol> columns)
+    {
+        // same as keys, but only one possible value can happen
+        List<Clustering<ByteBuffer>> keys = keys(Maps.transformValues(values, 
e -> Collections.singletonList(e)), columns);
+        Invariants.checkState(keys.size() == 1, "Expected 1 key, but found 
%d", keys.size());
+        return keys.get(0);
+    }
+
+    private List<Clustering<ByteBuffer>> keys(Map<Symbol, List<? extends 
Expression>> values, ImmutableUniqueList<Symbol> columns)
+    {
+        if (columns.isEmpty()) return 
Collections.singletonList(Clustering.EMPTY);
+        List<ByteBuffer[]> current = new ArrayList<>();
+        current.add(new ByteBuffer[columns.size()]);
+        for (Symbol symbol : columns)
+        {
+            int position = columns.indexOf(symbol);
+            List<? extends Expression> expressions = values.get(symbol);
+            ByteBuffer firstBB = eval(expressions.get(0));
+            current.forEach(bbs -> bbs[position] = firstBB);
+            if (expressions.size() > 1)
+            {
+                // this has a multiplying effect... if there is 1 row and 
there are 2 expressions, then we have 2 rows
+                // if there are 2 rows and 2 expressions, we have 4 rows... 
and so on...
+                List<ByteBuffer[]> copy = new ArrayList<>(current);
+                for (int i = 1; i < expressions.size(); i++)
+                {
+                    ByteBuffer bb = eval(expressions.get(i));
+                    for (ByteBuffer[] bbs : copy)
+                    {
+                        bbs = bbs.clone();
+                        bbs[position] = bb;
+                        current.add(bbs);
+                    }
+                }
+            }
+        }
+        return 
current.stream().map(BufferClustering::new).collect(Collectors.toList());
+    }
+
+    private static ByteBuffer eval(Expression e)
+    {
+        return ExpressionEvaluator.tryEvalEncoded(e).get();
+    }
+
+    private static class Row
+    {
+        private final ImmutableUniqueList<Symbol> columns;
+        private final ByteBuffer[] values;
+
+        private Row(ImmutableUniqueList<Symbol> columns, ByteBuffer[] values)
+        {
+            this.columns = columns;
+            this.values = values;
+        }
+
+        public String asCQL(Symbol symbol)
+        {
+            int offset = columns.indexOf(symbol);
+            assert offset >= 0;
+            ByteBuffer b = values[offset];
+            return (b == null || ByteBufferUtil.EMPTY_BYTE_BUFFER.equals(b)) ? 
"null" : symbol.type().asCQL3Type().toCQLLiteral(b);
+        }
+
+        public List<String> asCQL()
+        {
+            List<String> human = new ArrayList<>(values.length);
+            for (int i = 0; i < values.length; i++)
+                human.add(asCQL(columns.get(i)));
+            return human;
+        }
+
+        @Override
+        public boolean equals(Object o)
+        {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+            Row row = (Row) o;
+            return Arrays.equals(values, row.values);
+        }
+
+        @Override
+        public int hashCode()
+        {
+            return Arrays.hashCode(values);
+        }
+
+        @Override
+        public String toString()
+        {
+            return asCQL().toString();
+        }
+    }
+
+    private class LookupContext
+    {
+        private final Select select;

Review Comment:
   i had this for when i was working in a debugger... but in all cases i can 
just move higher up in the stack trace, so not needed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] To improve accord interoperability test coverage, need to extend the harry model domain to handle more possible CQL states [cassandra]

Reply via email to