This is an automated email from the ASF dual-hosted git repository. marcuse pushed a commit to branch cassandra-3.0 in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-3.0 by this push: new 767a68c Ensure legacy rows have primary key livenessinfo when they contain illegal cells 767a68c is described below commit 767a68cd00050298abf7bbfd8b322e5663439c23 Author: Marcus Eriksson <marc...@apache.org> AuthorDate: Thu Oct 17 10:24:57 2019 +0200 Ensure legacy rows have primary key livenessinfo when they contain illegal cells Patch by marcuse and Sam Tunnicliffe; reviewed by Benedict Elliott Smith for CASSANDRA-15365 --- CHANGES.txt | 1 + src/java/org/apache/cassandra/db/LegacyLayout.java | 56 ++++++++++++--- ...with_illegal_cell_names-ka-2-CompressionInfo.db | Bin 0 -> 43 bytes ...-legacy_ka_with_illegal_cell_names-ka-2-Data.db | Bin 0 -> 59 bytes ...acy_ka_with_illegal_cell_names-ka-2-Digest.sha1 | 1 + ...egacy_ka_with_illegal_cell_names-ka-2-Filter.db | Bin 0 -> 16 bytes ...legacy_ka_with_illegal_cell_names-ka-2-Index.db | Bin 0 -> 18 bytes ...y_ka_with_illegal_cell_names-ka-2-Statistics.db | Bin 0 -> 4452 bytes ...-legacy_ka_with_illegal_cell_names-ka-2-TOC.txt | 8 +++ ...th_illegal_cell_names_2-ka-1-CompressionInfo.db | Bin 0 -> 43 bytes ...egacy_ka_with_illegal_cell_names_2-ka-1-Data.db | Bin 0 -> 67 bytes ...y_ka_with_illegal_cell_names_2-ka-1-Digest.sha1 | 1 + ...acy_ka_with_illegal_cell_names_2-ka-1-Filter.db | Bin 0 -> 16 bytes ...gacy_ka_with_illegal_cell_names_2-ka-1-Index.db | Bin 0 -> 18 bytes ...ka_with_illegal_cell_names_2-ka-1-Statistics.db | Bin 0 -> 4452 bytes ...cy_ka_with_illegal_cell_names_2-ka-1-Summary.db | Bin 0 -> 92 bytes ...egacy_ka_with_illegal_cell_names_2-ka-1-TOC.txt | 8 +++ .../cassandra/io/sstable/LegacySSTableTest.java | 80 +++++++++++++++++++-- 18 files changed, 141 insertions(+), 14 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index bd12942..d58a199 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.0.20 + * Ensure legacy rows have primary key livenessinfo when they contain illegal cells (CASSANDRA-15365) Merged from 2.2 * In-JVM DTest: Set correct internode message version for upgrade test (CASSANDRA-15371) diff --git a/src/java/org/apache/cassandra/db/LegacyLayout.java b/src/java/org/apache/cassandra/db/LegacyLayout.java index 1a03c91..6e93d08 100644 --- a/src/java/org/apache/cassandra/db/LegacyLayout.java +++ b/src/java/org/apache/cassandra/db/LegacyLayout.java @@ -1291,6 +1291,22 @@ public abstract class LegacyLayout private LegacyRangeTombstone rowDeletion; private LegacyRangeTombstone collectionDeletion; + /** + * Used to track if we need to add pk liveness info (row marker) when removing invalid legacy cells. + * + * In 2.1 these invalid cells existed but were not queryable, in this case specifically because they + * represented values for clustering key columns that were written as data cells. + * + * However, the presence (or not) of such cells on an otherwise empty CQL row (or partition) would decide + * if an empty result row were returned for the CQL row (or partition). To maintain this behaviour we + * insert a row marker containing the liveness info of these invalid cells iff we have no other data + * on the row. + * + * See also CASSANDRA-15365 + */ + private boolean hasValidCells = false; + private LivenessInfo invalidLivenessInfo = null; + public CellGrouper(CFMetaData metadata, SerializationHelper helper) { this(metadata, helper, false); @@ -1317,6 +1333,8 @@ public abstract class LegacyLayout this.clustering = null; this.rowDeletion = null; this.collectionDeletion = null; + this.invalidLivenessInfo = null; + this.hasValidCells = false; } public boolean addAtom(LegacyAtom atom) @@ -1326,7 +1344,7 @@ public abstract class LegacyLayout : addRangeTombstone(atom.asRangeTombstone()); } - public boolean addCell(LegacyCell cell) + private boolean addCell(LegacyCell cell) { if (clustering == null) { @@ -1359,21 +1377,38 @@ public abstract class LegacyLayout builder.addRowDeletion(Row.Deletion.regular(new DeletionTime(cell.timestamp, cell.localDeletionTime))); else builder.addPrimaryKeyLivenessInfo(LivenessInfo.create(cell.timestamp, FAKE_TTL, cell.localDeletionTime)); + hasValidCells = true; + } + else if (column.isPrimaryKeyColumn() && metadata.isCQLTable()) + { + // SSTables generated offline and side-loaded may include invalid cells which have the column name + // of a primary key column. So that we don't fail when encountering these cells, we treat them the + // same way as 2.1 did, namely we include their clusterings in the new CQL row, but drop the invalid + // column part of the cell + noSpamLogger.warn("Illegal cell name for CQL3 table {}.{}. {} is defined as a primary key column", + metadata.ksName, metadata.cfName, column.name); + + if (invalidLivenessInfo != null) + { + // when we have several invalid cells we follow the logic in LivenessInfo#supersedes when picking the PKLI to keep: + LivenessInfo newInvalidLiveness = LivenessInfo.create(cell.timestamp, cell.isTombstone() ? FAKE_TTL : cell.ttl, cell.localDeletionTime); + if (newInvalidLiveness.supersedes(invalidLivenessInfo)) + invalidLivenessInfo = newInvalidLiveness; + } + else + { + invalidLivenessInfo = LivenessInfo.create(cell.timestamp, cell.isTombstone() ? FAKE_TTL : cell.ttl, cell.localDeletionTime); + } + return true; } else { if (collectionDeletion != null && collectionDeletion.start.collectionName.name.equals(column.name) && collectionDeletion.deletionTime.deletes(cell.timestamp)) return true; - if (column.isPrimaryKeyColumn() && metadata.isCQLTable()) - { - noSpamLogger.warn("Illegal cell name for CQL3 table {}.{}. {} is defined as a primary key column", - metadata.ksName, metadata.cfName, column.name); - return true; - } - if (helper.includes(column)) { + hasValidCells = true; CellPath path = null; if (column.isComplex()) { @@ -1422,6 +1457,7 @@ public abstract class LegacyLayout { builder.addRowDeletion(Row.Deletion.regular(tombstone.deletionTime)); rowDeletion = tombstone; + hasValidCells = true; } return true; } @@ -1434,6 +1470,7 @@ public abstract class LegacyLayout builder.newRow(clustering); builder.addRowDeletion(Row.Deletion.regular(tombstone.deletionTime)); rowDeletion = tombstone; + hasValidCells = true; return true; } @@ -1464,6 +1501,7 @@ public abstract class LegacyLayout builder.addComplexDeletion(tombstone.start.collectionName, tombstone.deletionTime); if (rowDeletion == null || tombstone.deletionTime.supersedes(rowDeletion.deletionTime)) collectionDeletion = tombstone; + hasValidCells = true; return true; } @@ -1488,6 +1526,8 @@ public abstract class LegacyLayout public Row getRow() { + if (!hasValidCells && invalidLivenessInfo != null) + builder.addPrimaryKeyLivenessInfo(invalidLivenessInfo); return builder.build(); } } diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-CompressionInfo.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-CompressionInfo.db new file mode 100644 index 0000000..26a0dbe Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-CompressionInfo.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Data.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Data.db new file mode 100644 index 0000000..c805f7d Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Data.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Digest.sha1 b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Digest.sha1 new file mode 100644 index 0000000..0c696fb --- /dev/null +++ b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Digest.sha1 @@ -0,0 +1 @@ +2529627719 \ No newline at end of file diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Filter.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Filter.db new file mode 100644 index 0000000..5543328 Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Filter.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Index.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Index.db new file mode 100644 index 0000000..fbdd950 Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Index.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Statistics.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Statistics.db new file mode 100644 index 0000000..0e471d4 Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-Statistics.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-TOC.txt b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-TOC.txt new file mode 100644 index 0000000..1222811 --- /dev/null +++ b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names/legacy_tables-legacy_ka_with_illegal_cell_names-ka-2-TOC.txt @@ -0,0 +1,8 @@ +Digest.sha1 +Data.db +Filter.db +Summary.db +Index.db +TOC.txt +CompressionInfo.db +Statistics.db diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-CompressionInfo.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-CompressionInfo.db new file mode 100644 index 0000000..908f3b1 Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-CompressionInfo.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Data.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Data.db new file mode 100644 index 0000000..33b88a0 Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Data.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Digest.sha1 b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Digest.sha1 new file mode 100644 index 0000000..20deb5b --- /dev/null +++ b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Digest.sha1 @@ -0,0 +1 @@ +3340111295 \ No newline at end of file diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Filter.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Filter.db new file mode 100644 index 0000000..5543328 Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Filter.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Index.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Index.db new file mode 100644 index 0000000..fbdd950 Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Index.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Statistics.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Statistics.db new file mode 100644 index 0000000..f83575c Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Statistics.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Summary.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Summary.db new file mode 100644 index 0000000..9b90005 Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-Summary.db differ diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-TOC.txt b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-TOC.txt new file mode 100644 index 0000000..8d621be --- /dev/null +++ b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_with_illegal_cell_names_2/legacy_tables-legacy_ka_with_illegal_cell_names_2-ka-1-TOC.txt @@ -0,0 +1,8 @@ +Data.db +TOC.txt +Filter.db +Summary.db +CompressionInfo.db +Statistics.db +Digest.sha1 +Index.db diff --git a/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java b/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java index ecb8125..42e8ff6 100644 --- a/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Random; +import com.google.common.collect.Iterables; import org.junit.After; import org.junit.Assert; import org.junit.BeforeClass; @@ -41,13 +42,17 @@ import org.apache.cassandra.cql3.QueryProcessor; import org.apache.cassandra.cql3.UntypedResultSet; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; +import org.apache.cassandra.db.LivenessInfo; import org.apache.cassandra.db.SinglePartitionSliceCommandTest; import org.apache.cassandra.db.compaction.Verifier; +import org.apache.cassandra.db.lifecycle.SSTableSet; import org.apache.cassandra.db.marshal.BytesType; import org.apache.cassandra.db.marshal.SetType; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.rows.RangeTombstoneMarker; +import org.apache.cassandra.db.rows.Row; import org.apache.cassandra.db.rows.Unfiltered; +import org.apache.cassandra.db.rows.UnfilteredRowIterator; import org.apache.cassandra.dht.IPartitioner; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; @@ -62,11 +67,11 @@ import org.apache.cassandra.streaming.StreamPlan; import org.apache.cassandra.streaming.StreamSession; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.cassandra.utils.FBUtilities; -import org.apache.cassandra.utils.Pair; import static org.apache.cassandra.cql3.CQLTester.assertRows; import static org.apache.cassandra.cql3.CQLTester.row; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; /** * Tests backwards compatibility for SSTables @@ -428,10 +433,15 @@ public class LegacySSTableTest * ["b:bb:pk","00000002",1555000750634000], * ["b:bb:v1","bbb",1555000750634000]]} * ] + * and an extra sstable with only the invalid cell name + * [ + * {"key": "3", + * "cells": [["a:aa:pk","68656c6c6f30",1570466358949]]} + * ] * */ - - QueryProcessor.executeInternal("CREATE TABLE legacy_tables.legacy_ka_with_illegal_cell_names (" + + String table = "legacy_ka_with_illegal_cell_names"; + QueryProcessor.executeInternal("CREATE TABLE legacy_tables." + table + " (" + " pk int," + " c1 text," + " c2 text," + @@ -439,10 +449,68 @@ public class LegacySSTableTest " PRIMARY KEY(pk, c1, c2))"); loadLegacyTable("legacy_%s_with_illegal_cell_names%s", "ka", ""); UntypedResultSet results = - QueryProcessor.executeOnceInternal( - String.format("SELECT * FROM legacy_tables.legacy_ka_with_illegal_cell_names")); + QueryProcessor.executeOnceInternal("SELECT * FROM legacy_tables."+table); - assertRows(results, row(1, "a", "aa", "aaa"), row(2, "b", "bb", "bbb")); + assertRows(results, row(1, "a", "aa", "aaa"), row(2, "b", "bb", "bbb"), row (3, "a", "aa", null)); + Keyspace.open("legacy_tables").getColumnFamilyStore(table).forceMajorCompaction(); + } + + @Test + public void testReadingLegacyTablesWithIllegalCellNamesPKLI() throws Exception { + /** + * + * Makes sure we grab the correct PKLI when we have illegal columns + * + * sstable looks like this: + * [ + * {"key": "3", + * "cells": [["a:aa:","",100], + * ["a:aa:pk","6d656570",200]]} + * ] + */ + /* + this generates the stable on 2.1: + CFMetaData metadata = CFMetaData.compile("create table legacy_tables.legacy_ka_with_illegal_cell_names_2 (pk int, c1 text, c2 text, v1 text, primary key (pk, c1, c2))", "legacy_tables"); + try (SSTableSimpleUnsortedWriter writer = new SSTableSimpleUnsortedWriter(new File("/tmp/sstable21"), + metadata, + new ByteOrderedPartitioner(), + 10)) + { + writer.newRow(bytes(3)); + writer.addColumn(new BufferCell(Util.cellname("a", "aa", ""), bytes(""), 100)); + writer.addColumn(new BufferCell(Util.cellname("a", "aa", "pk"), bytes("meep"), 200)); + } + */ + String table = "legacy_ka_with_illegal_cell_names_2"; + QueryProcessor.executeInternal("CREATE TABLE legacy_tables." + table + " (" + + " pk int," + + " c1 text," + + " c2 text," + + " v1 text," + + " PRIMARY KEY(pk, c1, c2))"); + loadLegacyTable("legacy_%s_with_illegal_cell_names_2%s", "ka", ""); + ColumnFamilyStore cfs = Keyspace.open("legacy_tables").getColumnFamilyStore(table); + assertEquals(1, Iterables.size(cfs.getSSTables(SSTableSet.CANONICAL))); + cfs.forceMajorCompaction(); + assertEquals(1, Iterables.size(cfs.getSSTables(SSTableSet.CANONICAL))); + SSTableReader sstable = Iterables.getFirst(cfs.getSSTables(SSTableSet.CANONICAL), null); + LivenessInfo livenessInfo = null; + try (ISSTableScanner scanner = sstable.getScanner()) + { + while (scanner.hasNext()) + { + try (UnfilteredRowIterator iter = scanner.next()) + { + while (iter.hasNext()) + { + Unfiltered uf = iter.next(); + livenessInfo = ((Row)uf).primaryKeyLivenessInfo(); + } + } + } + } + assertNotNull(livenessInfo); + assertEquals(100, livenessInfo.timestamp()); } @Test --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org