dcapwell commented on code in PR #3756:
URL: https://github.com/apache/cassandra/pull/3756#discussion_r1902249173


##########
test/unit/org/apache/cassandra/db/virtual/PrimaryIdTableTest.java:
##########
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.collect.ImmutableList;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.datastax.driver.core.ResultSet;
+import com.datastax.driver.core.Row;
+import com.datastax.driver.core.exceptions.InvalidQueryException;
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.cql3.CQLTester;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.io.sstable.format.bti.BtiFormat;
+import org.bouncycastle.util.encoders.Hex;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class PrimaryIdTableTest extends CQLTester
+{
+    private static final String KS_NAME = "vts";
+    private PrimaryIdTable primaryIdTable;
+    private String table;
+    private AtomicInteger scanned;
+
+    private final boolean useBtiFormat;
+    @Parameterized.Parameters(name = "Use BtiFormat = {0}")
+    public static Collection<Object[]> parameters() {
+        return Arrays.asList(new Object[][]{
+            {false}, {true}
+        });
+    }
+
+    public PrimaryIdTableTest(boolean useBtiFormat) {
+        this.useBtiFormat = useBtiFormat;
+    }
+
+    @Before
+    public void before()
+    {
+        if (useBtiFormat) {
+            DatabaseDescriptor.setSelectedSSTableFormat(new 
BtiFormat.BtiFormatFactory().getInstance(Collections.emptyMap()));
+        }
+        primaryIdTable = new PrimaryIdTable(KS_NAME);
+        scanned = new AtomicInteger();
+        VirtualKeyspaceRegistry.instance.register(new VirtualKeyspace(KS_NAME, 
ImmutableList.of(primaryIdTable)));
+
+        table = createTable("CREATE TABLE %s (key blob PRIMARY KEY, value 
blob)");
+
+        for (int i = -10; i < 1000; i++)
+        {
+            ByteBuffer key = Murmur3Partitioner.LongToken.keyForToken(i);
+            ByteBuffer value = ByteBuffer.wrap(new byte[1]);
+            execute("INSERT INTO %s (key, value) VALUES (?, ?)", key, value);
+        }
+        Util.flushTable(KEYSPACE, table);
+        primaryIdTable.readListener.add(unused -> scanned.incrementAndGet());
+    }
+
+    @Test
+    public void testPrimaryIdTable()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ?",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(1010, all.size());
+        for (int i = -10; i < 1000; i++)
+        {
+            Row row = all.get(i + 10);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));

Review Comment:
   why `BigInteger` here, these are `long` are they not?  shouldn't impact 
correctness, just wondering



##########
test/unit/org/apache/cassandra/db/virtual/PrimaryIdTableTest.java:
##########
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.collect.ImmutableList;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.datastax.driver.core.ResultSet;
+import com.datastax.driver.core.Row;
+import com.datastax.driver.core.exceptions.InvalidQueryException;
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.cql3.CQLTester;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.io.sstable.format.bti.BtiFormat;
+import org.bouncycastle.util.encoders.Hex;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class PrimaryIdTableTest extends CQLTester
+{
+    private static final String KS_NAME = "vts";
+    private PrimaryIdTable primaryIdTable;
+    private String table;
+    private AtomicInteger scanned;
+
+    private final boolean useBtiFormat;
+    @Parameterized.Parameters(name = "Use BtiFormat = {0}")
+    public static Collection<Object[]> parameters() {
+        return Arrays.asList(new Object[][]{
+            {false}, {true}
+        });
+    }
+
+    public PrimaryIdTableTest(boolean useBtiFormat) {
+        this.useBtiFormat = useBtiFormat;
+    }
+
+    @Before
+    public void before()
+    {
+        if (useBtiFormat) {
+            DatabaseDescriptor.setSelectedSSTableFormat(new 
BtiFormat.BtiFormatFactory().getInstance(Collections.emptyMap()));
+        }
+        primaryIdTable = new PrimaryIdTable(KS_NAME);
+        scanned = new AtomicInteger();
+        VirtualKeyspaceRegistry.instance.register(new VirtualKeyspace(KS_NAME, 
ImmutableList.of(primaryIdTable)));
+
+        table = createTable("CREATE TABLE %s (key blob PRIMARY KEY, value 
blob)");

Review Comment:
   no need to change anything, but in my tests on accord branch I use 
`vector<bigint, 2>`, this is also 16 bytes but is a bit more friendly when 
printed.



##########
test/unit/org/apache/cassandra/db/virtual/PrimaryIdTableTest.java:
##########
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.collect.ImmutableList;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.datastax.driver.core.ResultSet;
+import com.datastax.driver.core.Row;
+import com.datastax.driver.core.exceptions.InvalidQueryException;
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.cql3.CQLTester;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.io.sstable.format.bti.BtiFormat;
+import org.bouncycastle.util.encoders.Hex;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class PrimaryIdTableTest extends CQLTester
+{
+    private static final String KS_NAME = "vts";
+    private PrimaryIdTable primaryIdTable;
+    private String table;
+    private AtomicInteger scanned;
+
+    private final boolean useBtiFormat;
+    @Parameterized.Parameters(name = "Use BtiFormat = {0}")
+    public static Collection<Object[]> parameters() {
+        return Arrays.asList(new Object[][]{
+            {false}, {true}
+        });
+    }
+
+    public PrimaryIdTableTest(boolean useBtiFormat) {
+        this.useBtiFormat = useBtiFormat;
+    }
+
+    @Before
+    public void before()
+    {
+        if (useBtiFormat) {
+            DatabaseDescriptor.setSelectedSSTableFormat(new 
BtiFormat.BtiFormatFactory().getInstance(Collections.emptyMap()));
+        }
+        primaryIdTable = new PrimaryIdTable(KS_NAME);
+        scanned = new AtomicInteger();
+        VirtualKeyspaceRegistry.instance.register(new VirtualKeyspace(KS_NAME, 
ImmutableList.of(primaryIdTable)));
+
+        table = createTable("CREATE TABLE %s (key blob PRIMARY KEY, value 
blob)");
+
+        for (int i = -10; i < 1000; i++)
+        {
+            ByteBuffer key = Murmur3Partitioner.LongToken.keyForToken(i);
+            ByteBuffer value = ByteBuffer.wrap(new byte[1]);

Review Comment:
   put outside of loop to avoid unneeded allocations?



##########
src/java/org/apache/cassandra/db/virtual/PrimaryIdTable.java:
##########
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.function.Consumer;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.cql3.Operator;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.db.DataRange;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.DeletionTime;
+import org.apache.cassandra.db.PartitionPosition;
+import org.apache.cassandra.db.Slice;
+import org.apache.cassandra.db.Slices;
+import org.apache.cassandra.db.context.CounterContext;
+import org.apache.cassandra.db.filter.ClusteringIndexFilter;
+import org.apache.cassandra.db.filter.ColumnFilter;
+import org.apache.cassandra.db.filter.RowFilter;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.CounterColumnType;
+import org.apache.cassandra.db.marshal.IntegerType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.db.partitions.PartitionUpdate;
+import org.apache.cassandra.db.partitions.SingletonUnfilteredPartitionIterator;
+import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator;
+import org.apache.cassandra.db.rows.AbstractUnfilteredRowIterator;
+import org.apache.cassandra.db.rows.BTreeRow;
+import org.apache.cassandra.db.rows.BufferCell;
+import org.apache.cassandra.db.rows.Cell;
+import org.apache.cassandra.db.rows.EncodingStats;
+import org.apache.cassandra.db.rows.Row;
+import org.apache.cassandra.db.rows.Rows;
+import org.apache.cassandra.db.rows.Unfiltered;
+import org.apache.cassandra.db.rows.UnfilteredRowIterator;
+import org.apache.cassandra.db.rows.UnfilteredRowIterators;
+import org.apache.cassandra.dht.AbstractBounds;
+import org.apache.cassandra.dht.Bounds;
+import org.apache.cassandra.dht.LocalPartitioner;
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.exceptions.InvalidRequestException;
+import org.apache.cassandra.io.sstable.KeyReader;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.schema.ColumnMetadata;
+import org.apache.cassandra.schema.KeyspaceMetadata;
+import org.apache.cassandra.schema.Schema;
+import org.apache.cassandra.schema.TableMetadata;
+
+import static 
org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest;
+
+/**
+ * A virtual table for querying primary IDs of SSTables in a specific keyspace.
+ *
+ * <p>This table is implemented as a virtual table in Cassandra, meaning it 
does not
+ * store data persistently on disk but instead derives its data from live 
metadata.
+ *
+ * <p>The CQL equivalent of this virtual table is:
+ * <pre>
+ * CREATE TABLE system_views.primary_ids (
+ *     keyspace_name TEXT,
+ *     table_name TEXT,
+ *     token_value INT,
+ *     key TEXT,
+ *     size_estimate COUNTER,
+ *     sstables COUNTER,
+ *     PRIMARY KEY ((keyspace_name, table_name), token_value, key)
+ * );
+ * </pre>
+ *
+ * <p>Note:
+ * <ul>
+ *     <li>The `size_estimate` and `sstables` columns represent aggregate 
information about SSTable sizes and counts.</li>
+ *     <li>Range queries across multiple tables and updates are not supported 
as this is a read-only table.</li>
+ * </ul>
+ */
+public class PrimaryIdTable implements VirtualTable
+{
+    private static final Logger logger = 
LoggerFactory.getLogger(PrimaryIdTable.class);
+    public static final String NAME = "primary_ids";
+
+    private static final String TABLE_READ_ONLY_ERROR = "The specified table 
is read-only.";
+    private static final String UNSUPPORTED_RANGE_QUERY_ERROR = "Range queries 
are not supported. Please provide both a keyspace and a table name.";
+    private static final String REVERSED_QUERY_ERROR = "Reversed queries are 
not supported.";
+    private static final String KEYSPACE_NOT_EXIST_ERROR = "The keyspace '%s' 
does not exist.";
+    private static final String TABLE_NOT_EXIST_ERROR = "The table '%s' does 
not exist in the keyspace '%s'.";
+    private static final String KEY_ONLY_EQUALS_ERROR = "The 'key' column can 
only be used in an equality query for this virtual table.";
+    private static final String KEY_NOT_WITHIN_BOUNDS_ERROR = "The specified 
'key' is not within the provided token value bounds.";
+
+    private static final String COLUMN_KEYSPACE_NAME = "keyspace_name";
+    private static final String COLUMN_TABLE_NAME = "table_name";
+    private static final String COLUMN_TOKEN_VALUE = "token_value";
+    private static final String COLUMN_KEY = "key";
+    private static final String COLUMN_SIZE_ESTIMATE = "size_estimate";
+    private static final String COLUMN_SSTABLES = "sstables";
+
+    private final TableMetadata metadata;
+    private final ColumnMetadata sizeEstimateColumn;
+    private final ColumnMetadata sstablesColumn;
+
+    @VisibleForTesting
+    final CopyOnWriteArrayList<Consumer<DecoratedKey>> readListener = new 
CopyOnWriteArrayList<>();
+
+    public PrimaryIdTable(String keyspace)
+    {
+        this.metadata = TableMetadata.builder(keyspace, NAME)
+                                     .kind(TableMetadata.Kind.VIRTUAL)
+                                     .partitioner(new 
LocalPartitioner(CompositeType.getInstance(UTF8Type.instance, 
UTF8Type.instance)))
+                                     
.addPartitionKeyColumn(COLUMN_KEYSPACE_NAME, UTF8Type.instance)
+                                     .addPartitionKeyColumn(COLUMN_TABLE_NAME, 
UTF8Type.instance)
+                                     .addClusteringColumn(COLUMN_TOKEN_VALUE, 
IntegerType.instance)
+                                     .addClusteringColumn(COLUMN_KEY, 
UTF8Type.instance)
+                                     .addRegularColumn(COLUMN_SIZE_ESTIMATE, 
CounterColumnType.instance)
+                                     .addRegularColumn(COLUMN_SSTABLES, 
CounterColumnType.instance)

Review Comment:
   Friday after 5pm... I was not expecting to see counters... ill have to look 
closer Monday...



##########
test/unit/org/apache/cassandra/db/virtual/PrimaryIdTableTest.java:
##########
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.collect.ImmutableList;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.datastax.driver.core.ResultSet;
+import com.datastax.driver.core.Row;
+import com.datastax.driver.core.exceptions.InvalidQueryException;
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.cql3.CQLTester;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.io.sstable.format.bti.BtiFormat;
+import org.bouncycastle.util.encoders.Hex;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class PrimaryIdTableTest extends CQLTester
+{
+    private static final String KS_NAME = "vts";
+    private PrimaryIdTable primaryIdTable;
+    private String table;
+    private AtomicInteger scanned;
+
+    private final boolean useBtiFormat;
+    @Parameterized.Parameters(name = "Use BtiFormat = {0}")
+    public static Collection<Object[]> parameters() {
+        return Arrays.asList(new Object[][]{
+            {false}, {true}
+        });
+    }
+
+    public PrimaryIdTableTest(boolean useBtiFormat) {
+        this.useBtiFormat = useBtiFormat;
+    }
+
+    @Before
+    public void before()
+    {
+        if (useBtiFormat) {
+            DatabaseDescriptor.setSelectedSSTableFormat(new 
BtiFormat.BtiFormatFactory().getInstance(Collections.emptyMap()));
+        }
+        primaryIdTable = new PrimaryIdTable(KS_NAME);
+        scanned = new AtomicInteger();
+        VirtualKeyspaceRegistry.instance.register(new VirtualKeyspace(KS_NAME, 
ImmutableList.of(primaryIdTable)));
+
+        table = createTable("CREATE TABLE %s (key blob PRIMARY KEY, value 
blob)");
+
+        for (int i = -10; i < 1000; i++)
+        {
+            ByteBuffer key = Murmur3Partitioner.LongToken.keyForToken(i);
+            ByteBuffer value = ByteBuffer.wrap(new byte[1]);
+            execute("INSERT INTO %s (key, value) VALUES (?, ?)", key, value);
+        }
+        Util.flushTable(KEYSPACE, table);
+        primaryIdTable.readListener.add(unused -> scanned.incrementAndGet());
+    }
+
+    @Test
+    public void testPrimaryIdTable()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ?",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(1010, all.size());
+        for (int i = -10; i < 1000; i++)
+        {
+            Row row = all.get(i + 10);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        // 1010 + 100 for the 1 per 10 page, +1 for the last
+        assertEquals(1111, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueGreaterThanZero()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value > 0",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(999, all.size());
+        for (int i = 1; i < 1000; i++)
+        {
+            Row row = all.get(i - 1);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        assertEquals(1099, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueGreaterThanNegativeFive()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value > -5",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(1004, all.size());
+        for (int i = -4; i < 1000; i++)
+        {
+            Row row = all.get(i + 4);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        // 1004 + 100 for the 1 per 10 page, +1 for the last
+        assertEquals(1105, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueLessThanOrEqualToFive()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value <= 5",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(16, all.size());
+        for (int i = -10; i <= 5; i++)
+        {
+            Row row = all.get(i + 10);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        assertEquals(18, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueEqualToZero()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value = 0",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(1, all.size());
+        Row row = all.get(0);
+        assertEquals(BigInteger.valueOf(0), row.get("token_value", 
BigInteger.class));
+        assertEquals(2, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueBounds()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value > 0 AND token_value 
< 15",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(14, all.size());
+
+        for (int i = 0; i < 14; i++)
+        {
+            Row row = all.get(i);
+            assertEquals(BigInteger.valueOf(i + 1), row.get("token_value", 
BigInteger.class));
+        }
+        // 0->10 = 11, 10->16 = 7
+        assertEquals(18, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueBoundsWithKey()

Review Comment:
   can you include `BETWEEN` tests?



##########
test/unit/org/apache/cassandra/db/virtual/PrimaryIdTableTest.java:
##########
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.collect.ImmutableList;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.datastax.driver.core.ResultSet;
+import com.datastax.driver.core.Row;
+import com.datastax.driver.core.exceptions.InvalidQueryException;
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.cql3.CQLTester;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.io.sstable.format.bti.BtiFormat;
+import org.bouncycastle.util.encoders.Hex;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class PrimaryIdTableTest extends CQLTester
+{
+    private static final String KS_NAME = "vts";
+    private PrimaryIdTable primaryIdTable;
+    private String table;
+    private AtomicInteger scanned;
+
+    private final boolean useBtiFormat;
+    @Parameterized.Parameters(name = "Use BtiFormat = {0}")
+    public static Collection<Object[]> parameters() {
+        return Arrays.asList(new Object[][]{
+            {false}, {true}
+        });
+    }
+
+    public PrimaryIdTableTest(boolean useBtiFormat) {
+        this.useBtiFormat = useBtiFormat;
+    }
+
+    @Before
+    public void before()
+    {
+        if (useBtiFormat) {
+            DatabaseDescriptor.setSelectedSSTableFormat(new 
BtiFormat.BtiFormatFactory().getInstance(Collections.emptyMap()));
+        }
+        primaryIdTable = new PrimaryIdTable(KS_NAME);
+        scanned = new AtomicInteger();
+        VirtualKeyspaceRegistry.instance.register(new VirtualKeyspace(KS_NAME, 
ImmutableList.of(primaryIdTable)));
+
+        table = createTable("CREATE TABLE %s (key blob PRIMARY KEY, value 
blob)");
+
+        for (int i = -10; i < 1000; i++)
+        {
+            ByteBuffer key = Murmur3Partitioner.LongToken.keyForToken(i);
+            ByteBuffer value = ByteBuffer.wrap(new byte[1]);
+            execute("INSERT INTO %s (key, value) VALUES (?, ?)", key, value);
+        }
+        Util.flushTable(KEYSPACE, table);
+        primaryIdTable.readListener.add(unused -> scanned.incrementAndGet());
+    }
+
+    @Test
+    public void testPrimaryIdTable()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ?",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(1010, all.size());
+        for (int i = -10; i < 1000; i++)
+        {
+            Row row = all.get(i + 10);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        // 1010 + 100 for the 1 per 10 page, +1 for the last
+        assertEquals(1111, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueGreaterThanZero()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value > 0",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(999, all.size());
+        for (int i = 1; i < 1000; i++)
+        {
+            Row row = all.get(i - 1);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        assertEquals(1099, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueGreaterThanNegativeFive()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value > -5",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(1004, all.size());
+        for (int i = -4; i < 1000; i++)
+        {
+            Row row = all.get(i + 4);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        // 1004 + 100 for the 1 per 10 page, +1 for the last
+        assertEquals(1105, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueLessThanOrEqualToFive()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value <= 5",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(16, all.size());
+        for (int i = -10; i <= 5; i++)
+        {
+            Row row = all.get(i + 10);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }

Review Comment:
   this is copy/paste logic, only difference is the start/end... maybe refactor 
into a function?
   
   ```
   private static void assertResults(ResultSet rs, int start, int end) {
   for (int i = start; i < end; i++)
           {
               Row row = all.get(i + 10);
               assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
           }
   }
   ```
   
   then here
   
   ```
   assertResults(rs, -1, 6)
   ```



##########
src/java/org/apache/cassandra/db/virtual/PrimaryIdTable.java:
##########
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.function.Consumer;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.cql3.Operator;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.db.DataRange;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.DeletionTime;
+import org.apache.cassandra.db.PartitionPosition;
+import org.apache.cassandra.db.Slice;
+import org.apache.cassandra.db.Slices;
+import org.apache.cassandra.db.context.CounterContext;
+import org.apache.cassandra.db.filter.ClusteringIndexFilter;
+import org.apache.cassandra.db.filter.ColumnFilter;
+import org.apache.cassandra.db.filter.RowFilter;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.CounterColumnType;
+import org.apache.cassandra.db.marshal.IntegerType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.db.partitions.PartitionUpdate;
+import org.apache.cassandra.db.partitions.SingletonUnfilteredPartitionIterator;
+import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator;
+import org.apache.cassandra.db.rows.AbstractUnfilteredRowIterator;
+import org.apache.cassandra.db.rows.BTreeRow;
+import org.apache.cassandra.db.rows.BufferCell;
+import org.apache.cassandra.db.rows.Cell;
+import org.apache.cassandra.db.rows.EncodingStats;
+import org.apache.cassandra.db.rows.Row;
+import org.apache.cassandra.db.rows.Rows;
+import org.apache.cassandra.db.rows.Unfiltered;
+import org.apache.cassandra.db.rows.UnfilteredRowIterator;
+import org.apache.cassandra.db.rows.UnfilteredRowIterators;
+import org.apache.cassandra.dht.AbstractBounds;
+import org.apache.cassandra.dht.Bounds;
+import org.apache.cassandra.dht.LocalPartitioner;
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.exceptions.InvalidRequestException;
+import org.apache.cassandra.io.sstable.KeyReader;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.schema.ColumnMetadata;
+import org.apache.cassandra.schema.KeyspaceMetadata;
+import org.apache.cassandra.schema.Schema;
+import org.apache.cassandra.schema.TableMetadata;
+
+import static 
org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest;
+
+/**
+ * A virtual table for querying primary IDs of SSTables in a specific keyspace.
+ *
+ * <p>This table is implemented as a virtual table in Cassandra, meaning it 
does not
+ * store data persistently on disk but instead derives its data from live 
metadata.
+ *
+ * <p>The CQL equivalent of this virtual table is:
+ * <pre>
+ * CREATE TABLE system_views.primary_ids (
+ *     keyspace_name TEXT,
+ *     table_name TEXT,
+ *     token_value INT,
+ *     key TEXT,
+ *     size_estimate COUNTER,
+ *     sstables COUNTER,
+ *     PRIMARY KEY ((keyspace_name, table_name), token_value, key)
+ * );
+ * </pre>
+ *
+ * <p>Note:
+ * <ul>
+ *     <li>The `size_estimate` and `sstables` columns represent aggregate 
information about SSTable sizes and counts.</li>
+ *     <li>Range queries across multiple tables and updates are not supported 
as this is a read-only table.</li>
+ * </ul>
+ */
+public class PrimaryIdTable implements VirtualTable
+{
+    private static final Logger logger = 
LoggerFactory.getLogger(PrimaryIdTable.class);
+    public static final String NAME = "primary_ids";
+
+    private static final String TABLE_READ_ONLY_ERROR = "The specified table 
is read-only.";
+    private static final String UNSUPPORTED_RANGE_QUERY_ERROR = "Range queries 
are not supported. Please provide both a keyspace and a table name.";
+    private static final String REVERSED_QUERY_ERROR = "Reversed queries are 
not supported.";
+    private static final String KEYSPACE_NOT_EXIST_ERROR = "The keyspace '%s' 
does not exist.";
+    private static final String TABLE_NOT_EXIST_ERROR = "The table '%s' does 
not exist in the keyspace '%s'.";
+    private static final String KEY_ONLY_EQUALS_ERROR = "The 'key' column can 
only be used in an equality query for this virtual table.";
+    private static final String KEY_NOT_WITHIN_BOUNDS_ERROR = "The specified 
'key' is not within the provided token value bounds.";
+
+    private static final String COLUMN_KEYSPACE_NAME = "keyspace_name";
+    private static final String COLUMN_TABLE_NAME = "table_name";
+    private static final String COLUMN_TOKEN_VALUE = "token_value";
+    private static final String COLUMN_KEY = "key";
+    private static final String COLUMN_SIZE_ESTIMATE = "size_estimate";
+    private static final String COLUMN_SSTABLES = "sstables";
+
+    private final TableMetadata metadata;
+    private final ColumnMetadata sizeEstimateColumn;
+    private final ColumnMetadata sstablesColumn;
+
+    @VisibleForTesting
+    final CopyOnWriteArrayList<Consumer<DecoratedKey>> readListener = new 
CopyOnWriteArrayList<>();
+
+    public PrimaryIdTable(String keyspace)
+    {
+        this.metadata = TableMetadata.builder(keyspace, NAME)
+                                     .kind(TableMetadata.Kind.VIRTUAL)
+                                     .partitioner(new 
LocalPartitioner(CompositeType.getInstance(UTF8Type.instance, 
UTF8Type.instance)))
+                                     
.addPartitionKeyColumn(COLUMN_KEYSPACE_NAME, UTF8Type.instance)
+                                     .addPartitionKeyColumn(COLUMN_TABLE_NAME, 
UTF8Type.instance)
+                                     .addClusteringColumn(COLUMN_TOKEN_VALUE, 
IntegerType.instance)
+                                     .addClusteringColumn(COLUMN_KEY, 
UTF8Type.instance)
+                                     .addRegularColumn(COLUMN_SIZE_ESTIMATE, 
CounterColumnType.instance)
+                                     .addRegularColumn(COLUMN_SSTABLES, 
CounterColumnType.instance)
+                                     .build();
+        sizeEstimateColumn = metadata.regularColumns().getSimple(0);
+        sstablesColumn = metadata.regularColumns().getSimple(1);
+    }
+
+    @Override
+    public UnfilteredPartitionIterator select(DecoratedKey partitionKey, 
ClusteringIndexFilter clusteringIndexFilter, ColumnFilter columnFilter, 
RowFilter rowFilter)
+    {
+        if (clusteringIndexFilter.isReversed())
+            throw new InvalidRequestException(REVERSED_QUERY_ERROR);
+
+        ByteBuffer[] key = ((CompositeType) 
this.metadata.partitionKeyType).split(partitionKey.getKey());
+        String keyspace = UTF8Type.instance.getString(key[0]);
+        String table = UTF8Type.instance.getString(key[1]);
+
+        KeyspaceMetadata ksm = Schema.instance.getKeyspaceMetadata(keyspace);
+        if (ksm == null)
+            throw invalidRequest(KEYSPACE_NOT_EXIST_ERROR, keyspace);
+
+        TableMetadata metadata = ksm.getTableOrViewNullable(table);
+        if (metadata == null)
+            throw invalidRequest(TABLE_NOT_EXIST_ERROR, table, keyspace);
+
+        AbstractBounds<PartitionPosition> range = getBounds(metadata, 
clusteringIndexFilter, rowFilter);
+        return new SingletonUnfilteredPartitionIterator(select(partitionKey, 
metadata, clusteringIndexFilter, range));
+    }
+
+    private List<SSTableReader> getSStables(TableMetadata metadata, 
AbstractBounds<PartitionPosition> range)
+    {
+        return 
Lists.newArrayList(ColumnFamilyStore.getIfExists(metadata).getTracker().getView().liveSSTablesInBounds(range.left,
 range.right));

Review Comment:
   if you query a table that doesn't exist, or is a virtual table, won't this 
blow up?  should we validate ahead of time?



##########
test/unit/org/apache/cassandra/db/virtual/PrimaryIdTableTest.java:
##########
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.collect.ImmutableList;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.datastax.driver.core.ResultSet;
+import com.datastax.driver.core.Row;
+import com.datastax.driver.core.exceptions.InvalidQueryException;
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.cql3.CQLTester;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.io.sstable.format.bti.BtiFormat;
+import org.bouncycastle.util.encoders.Hex;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(Parameterized.class)
+public class PrimaryIdTableTest extends CQLTester
+{
+    private static final String KS_NAME = "vts";
+    private PrimaryIdTable primaryIdTable;
+    private String table;
+    private AtomicInteger scanned;
+
+    private final boolean useBtiFormat;
+    @Parameterized.Parameters(name = "Use BtiFormat = {0}")
+    public static Collection<Object[]> parameters() {
+        return Arrays.asList(new Object[][]{
+            {false}, {true}
+        });
+    }
+
+    public PrimaryIdTableTest(boolean useBtiFormat) {
+        this.useBtiFormat = useBtiFormat;
+    }
+
+    @Before
+    public void before()
+    {
+        if (useBtiFormat) {
+            DatabaseDescriptor.setSelectedSSTableFormat(new 
BtiFormat.BtiFormatFactory().getInstance(Collections.emptyMap()));
+        }
+        primaryIdTable = new PrimaryIdTable(KS_NAME);
+        scanned = new AtomicInteger();
+        VirtualKeyspaceRegistry.instance.register(new VirtualKeyspace(KS_NAME, 
ImmutableList.of(primaryIdTable)));
+
+        table = createTable("CREATE TABLE %s (key blob PRIMARY KEY, value 
blob)");
+
+        for (int i = -10; i < 1000; i++)
+        {
+            ByteBuffer key = Murmur3Partitioner.LongToken.keyForToken(i);
+            ByteBuffer value = ByteBuffer.wrap(new byte[1]);
+            execute("INSERT INTO %s (key, value) VALUES (?, ?)", key, value);
+        }
+        Util.flushTable(KEYSPACE, table);
+        primaryIdTable.readListener.add(unused -> scanned.incrementAndGet());
+    }
+
+    @Test
+    public void testPrimaryIdTable()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ?",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(1010, all.size());
+        for (int i = -10; i < 1000; i++)
+        {
+            Row row = all.get(i + 10);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        // 1010 + 100 for the 1 per 10 page, +1 for the last
+        assertEquals(1111, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueGreaterThanZero()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value > 0",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(999, all.size());
+        for (int i = 1; i < 1000; i++)
+        {
+            Row row = all.get(i - 1);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        assertEquals(1099, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueGreaterThanNegativeFive()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value > -5",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(1004, all.size());
+        for (int i = -4; i < 1000; i++)
+        {
+            Row row = all.get(i + 4);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        // 1004 + 100 for the 1 per 10 page, +1 for the last
+        assertEquals(1105, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueLessThanOrEqualToFive()
+    {
+        ResultSet rs = executeNetWithPaging("SELECT * FROM vts.primary_ids 
WHERE keyspace_name = ? AND table_name = ? AND token_value <= 5",
+                                            10, KEYSPACE, table);
+        List<Row> all = rs.all();
+        assertEquals(16, all.size());
+        for (int i = -10; i <= 5; i++)
+        {
+            Row row = all.get(i + 10);
+            assertEquals(BigInteger.valueOf(i), row.get("token_value", 
BigInteger.class));
+        }
+        assertEquals(18, scanned.get());
+    }
+
+    @Test
+    public void testTokenValueEqualToZero()

Review Comment:
   would be nice to have a token conflict test...  this *should* return the 
multiple values, but does it?



##########
src/java/org/apache/cassandra/db/virtual/PrimaryIdTable.java:
##########
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.function.Consumer;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.cql3.Operator;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.db.DataRange;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.DeletionTime;
+import org.apache.cassandra.db.PartitionPosition;
+import org.apache.cassandra.db.Slice;
+import org.apache.cassandra.db.Slices;
+import org.apache.cassandra.db.context.CounterContext;
+import org.apache.cassandra.db.filter.ClusteringIndexFilter;
+import org.apache.cassandra.db.filter.ColumnFilter;
+import org.apache.cassandra.db.filter.RowFilter;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.CounterColumnType;
+import org.apache.cassandra.db.marshal.IntegerType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.db.partitions.PartitionUpdate;
+import org.apache.cassandra.db.partitions.SingletonUnfilteredPartitionIterator;
+import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator;
+import org.apache.cassandra.db.rows.AbstractUnfilteredRowIterator;
+import org.apache.cassandra.db.rows.BTreeRow;
+import org.apache.cassandra.db.rows.BufferCell;
+import org.apache.cassandra.db.rows.Cell;
+import org.apache.cassandra.db.rows.EncodingStats;
+import org.apache.cassandra.db.rows.Row;
+import org.apache.cassandra.db.rows.Rows;
+import org.apache.cassandra.db.rows.Unfiltered;
+import org.apache.cassandra.db.rows.UnfilteredRowIterator;
+import org.apache.cassandra.db.rows.UnfilteredRowIterators;
+import org.apache.cassandra.dht.AbstractBounds;
+import org.apache.cassandra.dht.Bounds;
+import org.apache.cassandra.dht.LocalPartitioner;
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.exceptions.InvalidRequestException;
+import org.apache.cassandra.io.sstable.KeyReader;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.schema.ColumnMetadata;
+import org.apache.cassandra.schema.KeyspaceMetadata;
+import org.apache.cassandra.schema.Schema;
+import org.apache.cassandra.schema.TableMetadata;
+
+import static 
org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest;
+
+/**
+ * A virtual table for querying primary IDs of SSTables in a specific keyspace.
+ *
+ * <p>This table is implemented as a virtual table in Cassandra, meaning it 
does not
+ * store data persistently on disk but instead derives its data from live 
metadata.
+ *
+ * <p>The CQL equivalent of this virtual table is:
+ * <pre>
+ * CREATE TABLE system_views.primary_ids (
+ *     keyspace_name TEXT,
+ *     table_name TEXT,
+ *     token_value INT,
+ *     key TEXT,
+ *     size_estimate COUNTER,
+ *     sstables COUNTER,
+ *     PRIMARY KEY ((keyspace_name, table_name), token_value, key)
+ * );
+ * </pre>
+ *
+ * <p>Note:
+ * <ul>
+ *     <li>The `size_estimate` and `sstables` columns represent aggregate 
information about SSTable sizes and counts.</li>
+ *     <li>Range queries across multiple tables and updates are not supported 
as this is a read-only table.</li>
+ * </ul>
+ */
+public class PrimaryIdTable implements VirtualTable
+{
+    private static final Logger logger = 
LoggerFactory.getLogger(PrimaryIdTable.class);
+    public static final String NAME = "primary_ids";
+
+    private static final String TABLE_READ_ONLY_ERROR = "The specified table 
is read-only.";
+    private static final String UNSUPPORTED_RANGE_QUERY_ERROR = "Range queries 
are not supported. Please provide both a keyspace and a table name.";
+    private static final String REVERSED_QUERY_ERROR = "Reversed queries are 
not supported.";
+    private static final String KEYSPACE_NOT_EXIST_ERROR = "The keyspace '%s' 
does not exist.";
+    private static final String TABLE_NOT_EXIST_ERROR = "The table '%s' does 
not exist in the keyspace '%s'.";
+    private static final String KEY_ONLY_EQUALS_ERROR = "The 'key' column can 
only be used in an equality query for this virtual table.";
+    private static final String KEY_NOT_WITHIN_BOUNDS_ERROR = "The specified 
'key' is not within the provided token value bounds.";
+
+    private static final String COLUMN_KEYSPACE_NAME = "keyspace_name";
+    private static final String COLUMN_TABLE_NAME = "table_name";
+    private static final String COLUMN_TOKEN_VALUE = "token_value";
+    private static final String COLUMN_KEY = "key";
+    private static final String COLUMN_SIZE_ESTIMATE = "size_estimate";
+    private static final String COLUMN_SSTABLES = "sstables";
+
+    private final TableMetadata metadata;
+    private final ColumnMetadata sizeEstimateColumn;
+    private final ColumnMetadata sstablesColumn;
+
+    @VisibleForTesting
+    final CopyOnWriteArrayList<Consumer<DecoratedKey>> readListener = new 
CopyOnWriteArrayList<>();
+
+    public PrimaryIdTable(String keyspace)
+    {
+        this.metadata = TableMetadata.builder(keyspace, NAME)
+                                     .kind(TableMetadata.Kind.VIRTUAL)
+                                     .partitioner(new 
LocalPartitioner(CompositeType.getInstance(UTF8Type.instance, 
UTF8Type.instance)))
+                                     
.addPartitionKeyColumn(COLUMN_KEYSPACE_NAME, UTF8Type.instance)
+                                     .addPartitionKeyColumn(COLUMN_TABLE_NAME, 
UTF8Type.instance)
+                                     .addClusteringColumn(COLUMN_TOKEN_VALUE, 
IntegerType.instance)
+                                     .addClusteringColumn(COLUMN_KEY, 
UTF8Type.instance)
+                                     .addRegularColumn(COLUMN_SIZE_ESTIMATE, 
CounterColumnType.instance)
+                                     .addRegularColumn(COLUMN_SSTABLES, 
CounterColumnType.instance)
+                                     .build();
+        sizeEstimateColumn = metadata.regularColumns().getSimple(0);
+        sstablesColumn = metadata.regularColumns().getSimple(1);
+    }
+
+    @Override
+    public UnfilteredPartitionIterator select(DecoratedKey partitionKey, 
ClusteringIndexFilter clusteringIndexFilter, ColumnFilter columnFilter, 
RowFilter rowFilter)
+    {
+        if (clusteringIndexFilter.isReversed())
+            throw new InvalidRequestException(REVERSED_QUERY_ERROR);
+
+        ByteBuffer[] key = ((CompositeType) 
this.metadata.partitionKeyType).split(partitionKey.getKey());
+        String keyspace = UTF8Type.instance.getString(key[0]);
+        String table = UTF8Type.instance.getString(key[1]);
+
+        KeyspaceMetadata ksm = Schema.instance.getKeyspaceMetadata(keyspace);
+        if (ksm == null)
+            throw invalidRequest(KEYSPACE_NOT_EXIST_ERROR, keyspace);
+
+        TableMetadata metadata = ksm.getTableOrViewNullable(table);
+        if (metadata == null)
+            throw invalidRequest(TABLE_NOT_EXIST_ERROR, table, keyspace);
+
+        AbstractBounds<PartitionPosition> range = getBounds(metadata, 
clusteringIndexFilter, rowFilter);
+        return new SingletonUnfilteredPartitionIterator(select(partitionKey, 
metadata, clusteringIndexFilter, range));
+    }
+
+    private List<SSTableReader> getSStables(TableMetadata metadata, 
AbstractBounds<PartitionPosition> range)
+    {
+        return 
Lists.newArrayList(ColumnFamilyStore.getIfExists(metadata).getTracker().getView().liveSSTablesInBounds(range.left,
 range.right));
+    }
+
+    private UnfilteredRowIterator select(DecoratedKey partitionKey, 
TableMetadata metadata, ClusteringIndexFilter clusteringIndexFilter, 
AbstractBounds<PartitionPosition> range)
+    {
+        List<SSTableReader> sstables = getSStables(metadata, range);
+        if (sstables.isEmpty())
+            return UnfilteredRowIterators.noRowsIterator(metadata, 
partitionKey, Rows.EMPTY_STATIC_ROW, DeletionTime.LIVE, false);
+
+        List<UnfilteredRowIterator> sstableIterators = Lists.newArrayList();
+        for (SSTableReader sstable : sstables)
+            sstableIterators.add(getSStableRowIterator(metadata, partitionKey, 
sstable, clusteringIndexFilter, range));
+
+        return UnfilteredRowIterators.merge(sstableIterators);
+    }
+
+    private UnfilteredRowIterator getSStableRowIterator(TableMetadata target, 
DecoratedKey partitionKey, SSTableReader sstable, ClusteringIndexFilter filter, 
AbstractBounds<PartitionPosition> range)
+    {
+        final KeyReader reader;
+        try
+        {
+            // ignore warning on try-with-resources, the reader will be closed 
on endOfData or close
+            reader = sstable.keyReader(range.left);
+        }
+        catch (IOException e)
+        {
+            logger.error("Error generating keyReader for SSTable: {}", 
sstable, e);
+            throw new RuntimeException(e);
+        }
+
+        return new AbstractUnfilteredRowIterator(metadata, partitionKey, 
DeletionTime.LIVE,
+                                                 
metadata.regularAndStaticColumns(), Rows.EMPTY_STATIC_ROW,
+                                                 false, EncodingStats.NO_STATS)
+        {
+            public Unfiltered endOfData()
+            {
+                reader.close();
+                return super.endOfData();
+            }
+
+            public void close()
+            {
+                reader.close();
+            }
+
+            private Row buildRow(Clustering<?> clustering, long size)
+            {
+                Row.Builder row = BTreeRow.sortedBuilder();
+                row.newRow(clustering);
+                row.addCell(cell(sizeEstimateColumn, 
CounterContext.instance().createUpdate(size)));
+                row.addCell(cell(sstablesColumn, 
CounterContext.instance().createUpdate(1)));
+                return row.build();
+            }
+
+            @Override
+            protected Unfiltered computeNext()
+            {
+                while (!reader.isExhausted())
+                {
+                    DecoratedKey key = 
target.partitioner.decorateKey(reader.key());
+
+                    for (Consumer<DecoratedKey> listener : readListener)
+                        listener.accept(key);
+
+                    // Store the reader's current data position to calculate 
size later
+                    long lastPosition = reader.dataPosition();
+                    try
+                    {
+                        // Advance the reader to the next key for the next 
iteration. Also by moving to next key
+                        // we move the dataPosition to the start of the next 
key for calculating size
+                        reader.advance();
+                    }
+                    catch (IOException e)
+                    {
+                        logger.error("Error advancing reader for SSTable: {}", 
sstable, e);
+                        return endOfData();
+                    }
+
+                    // Calculate the size of the current key. If EOF use the 
length of the file
+                    long current = reader.dataPosition() == -1 ? 
sstable.uncompressedLength() : reader.dataPosition();
+                    long size = current - lastPosition;
+
+                    String keyString = 
target.partitionKeyType.getString(key.getKey());

Review Comment:
   ```suggestion
                       String keyString = 
target.partitionKeyType.asCQL3Type().toCQLLiteral(key.getKey());
   ```
   
   using cql literals is far better than type strings



##########
src/java/org/apache/cassandra/db/virtual/PrimaryIdTable.java:
##########
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.function.Consumer;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.cql3.Operator;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.db.DataRange;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.DeletionTime;
+import org.apache.cassandra.db.PartitionPosition;
+import org.apache.cassandra.db.Slice;
+import org.apache.cassandra.db.Slices;
+import org.apache.cassandra.db.context.CounterContext;
+import org.apache.cassandra.db.filter.ClusteringIndexFilter;
+import org.apache.cassandra.db.filter.ColumnFilter;
+import org.apache.cassandra.db.filter.RowFilter;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.CounterColumnType;
+import org.apache.cassandra.db.marshal.IntegerType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.db.partitions.PartitionUpdate;
+import org.apache.cassandra.db.partitions.SingletonUnfilteredPartitionIterator;
+import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator;
+import org.apache.cassandra.db.rows.AbstractUnfilteredRowIterator;
+import org.apache.cassandra.db.rows.BTreeRow;
+import org.apache.cassandra.db.rows.BufferCell;
+import org.apache.cassandra.db.rows.Cell;
+import org.apache.cassandra.db.rows.EncodingStats;
+import org.apache.cassandra.db.rows.Row;
+import org.apache.cassandra.db.rows.Rows;
+import org.apache.cassandra.db.rows.Unfiltered;
+import org.apache.cassandra.db.rows.UnfilteredRowIterator;
+import org.apache.cassandra.db.rows.UnfilteredRowIterators;
+import org.apache.cassandra.dht.AbstractBounds;
+import org.apache.cassandra.dht.Bounds;
+import org.apache.cassandra.dht.LocalPartitioner;
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.exceptions.InvalidRequestException;
+import org.apache.cassandra.io.sstable.KeyReader;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.schema.ColumnMetadata;
+import org.apache.cassandra.schema.KeyspaceMetadata;
+import org.apache.cassandra.schema.Schema;
+import org.apache.cassandra.schema.TableMetadata;
+
+import static 
org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest;
+
+/**
+ * A virtual table for querying primary IDs of SSTables in a specific keyspace.
+ *
+ * <p>This table is implemented as a virtual table in Cassandra, meaning it 
does not
+ * store data persistently on disk but instead derives its data from live 
metadata.
+ *
+ * <p>The CQL equivalent of this virtual table is:
+ * <pre>
+ * CREATE TABLE system_views.primary_ids (
+ *     keyspace_name TEXT,
+ *     table_name TEXT,
+ *     token_value INT,
+ *     key TEXT,
+ *     size_estimate COUNTER,
+ *     sstables COUNTER,
+ *     PRIMARY KEY ((keyspace_name, table_name), token_value, key)
+ * );
+ * </pre>
+ *
+ * <p>Note:
+ * <ul>
+ *     <li>The `size_estimate` and `sstables` columns represent aggregate 
information about SSTable sizes and counts.</li>
+ *     <li>Range queries across multiple tables and updates are not supported 
as this is a read-only table.</li>
+ * </ul>
+ */
+public class PrimaryIdTable implements VirtualTable
+{
+    private static final Logger logger = 
LoggerFactory.getLogger(PrimaryIdTable.class);
+    public static final String NAME = "primary_ids";
+
+    private static final String TABLE_READ_ONLY_ERROR = "The specified table 
is read-only.";
+    private static final String UNSUPPORTED_RANGE_QUERY_ERROR = "Range queries 
are not supported. Please provide both a keyspace and a table name.";
+    private static final String REVERSED_QUERY_ERROR = "Reversed queries are 
not supported.";
+    private static final String KEYSPACE_NOT_EXIST_ERROR = "The keyspace '%s' 
does not exist.";
+    private static final String TABLE_NOT_EXIST_ERROR = "The table '%s' does 
not exist in the keyspace '%s'.";
+    private static final String KEY_ONLY_EQUALS_ERROR = "The 'key' column can 
only be used in an equality query for this virtual table.";
+    private static final String KEY_NOT_WITHIN_BOUNDS_ERROR = "The specified 
'key' is not within the provided token value bounds.";
+
+    private static final String COLUMN_KEYSPACE_NAME = "keyspace_name";
+    private static final String COLUMN_TABLE_NAME = "table_name";
+    private static final String COLUMN_TOKEN_VALUE = "token_value";
+    private static final String COLUMN_KEY = "key";
+    private static final String COLUMN_SIZE_ESTIMATE = "size_estimate";
+    private static final String COLUMN_SSTABLES = "sstables";
+
+    private final TableMetadata metadata;
+    private final ColumnMetadata sizeEstimateColumn;
+    private final ColumnMetadata sstablesColumn;
+
+    @VisibleForTesting
+    final CopyOnWriteArrayList<Consumer<DecoratedKey>> readListener = new 
CopyOnWriteArrayList<>();
+
+    public PrimaryIdTable(String keyspace)
+    {
+        this.metadata = TableMetadata.builder(keyspace, NAME)
+                                     .kind(TableMetadata.Kind.VIRTUAL)
+                                     .partitioner(new 
LocalPartitioner(CompositeType.getInstance(UTF8Type.instance, 
UTF8Type.instance)))
+                                     
.addPartitionKeyColumn(COLUMN_KEYSPACE_NAME, UTF8Type.instance)
+                                     .addPartitionKeyColumn(COLUMN_TABLE_NAME, 
UTF8Type.instance)
+                                     .addClusteringColumn(COLUMN_TOKEN_VALUE, 
IntegerType.instance)
+                                     .addClusteringColumn(COLUMN_KEY, 
UTF8Type.instance)
+                                     .addRegularColumn(COLUMN_SIZE_ESTIMATE, 
CounterColumnType.instance)
+                                     .addRegularColumn(COLUMN_SSTABLES, 
CounterColumnType.instance)
+                                     .build();
+        sizeEstimateColumn = metadata.regularColumns().getSimple(0);
+        sstablesColumn = metadata.regularColumns().getSimple(1);
+    }
+
+    @Override
+    public UnfilteredPartitionIterator select(DecoratedKey partitionKey, 
ClusteringIndexFilter clusteringIndexFilter, ColumnFilter columnFilter, 
RowFilter rowFilter)
+    {
+        if (clusteringIndexFilter.isReversed())
+            throw new InvalidRequestException(REVERSED_QUERY_ERROR);
+
+        ByteBuffer[] key = ((CompositeType) 
this.metadata.partitionKeyType).split(partitionKey.getKey());
+        String keyspace = UTF8Type.instance.getString(key[0]);
+        String table = UTF8Type.instance.getString(key[1]);
+
+        KeyspaceMetadata ksm = Schema.instance.getKeyspaceMetadata(keyspace);
+        if (ksm == null)
+            throw invalidRequest(KEYSPACE_NOT_EXIST_ERROR, keyspace);
+
+        TableMetadata metadata = ksm.getTableOrViewNullable(table);
+        if (metadata == null)
+            throw invalidRequest(TABLE_NOT_EXIST_ERROR, table, keyspace);
+
+        AbstractBounds<PartitionPosition> range = getBounds(metadata, 
clusteringIndexFilter, rowFilter);
+        return new SingletonUnfilteredPartitionIterator(select(partitionKey, 
metadata, clusteringIndexFilter, range));
+    }
+
+    private List<SSTableReader> getSStables(TableMetadata metadata, 
AbstractBounds<PartitionPosition> range)
+    {
+        return 
Lists.newArrayList(ColumnFamilyStore.getIfExists(metadata).getTracker().getView().liveSSTablesInBounds(range.left,
 range.right));
+    }
+
+    private UnfilteredRowIterator select(DecoratedKey partitionKey, 
TableMetadata metadata, ClusteringIndexFilter clusteringIndexFilter, 
AbstractBounds<PartitionPosition> range)
+    {
+        List<SSTableReader> sstables = getSStables(metadata, range);
+        if (sstables.isEmpty())
+            return UnfilteredRowIterators.noRowsIterator(metadata, 
partitionKey, Rows.EMPTY_STATIC_ROW, DeletionTime.LIVE, false);
+
+        List<UnfilteredRowIterator> sstableIterators = Lists.newArrayList();
+        for (SSTableReader sstable : sstables)
+            sstableIterators.add(getSStableRowIterator(metadata, partitionKey, 
sstable, clusteringIndexFilter, range));
+
+        return UnfilteredRowIterators.merge(sstableIterators);
+    }
+
+    private UnfilteredRowIterator getSStableRowIterator(TableMetadata target, 
DecoratedKey partitionKey, SSTableReader sstable, ClusteringIndexFilter filter, 
AbstractBounds<PartitionPosition> range)
+    {
+        final KeyReader reader;
+        try
+        {
+            // ignore warning on try-with-resources, the reader will be closed 
on endOfData or close
+            reader = sstable.keyReader(range.left);
+        }
+        catch (IOException e)
+        {
+            logger.error("Error generating keyReader for SSTable: {}", 
sstable, e);
+            throw new RuntimeException(e);
+        }
+
+        return new AbstractUnfilteredRowIterator(metadata, partitionKey, 
DeletionTime.LIVE,
+                                                 
metadata.regularAndStaticColumns(), Rows.EMPTY_STATIC_ROW,
+                                                 false, EncodingStats.NO_STATS)
+        {
+            public Unfiltered endOfData()
+            {
+                reader.close();
+                return super.endOfData();
+            }
+
+            public void close()
+            {
+                reader.close();
+            }
+
+            private Row buildRow(Clustering<?> clustering, long size)
+            {
+                Row.Builder row = BTreeRow.sortedBuilder();
+                row.newRow(clustering);
+                row.addCell(cell(sizeEstimateColumn, 
CounterContext.instance().createUpdate(size)));
+                row.addCell(cell(sstablesColumn, 
CounterContext.instance().createUpdate(1)));
+                return row.build();
+            }
+
+            @Override
+            protected Unfiltered computeNext()
+            {
+                while (!reader.isExhausted())
+                {
+                    DecoratedKey key = 
target.partitioner.decorateKey(reader.key());
+
+                    for (Consumer<DecoratedKey> listener : readListener)
+                        listener.accept(key);
+
+                    // Store the reader's current data position to calculate 
size later
+                    long lastPosition = reader.dataPosition();
+                    try
+                    {
+                        // Advance the reader to the next key for the next 
iteration. Also by moving to next key
+                        // we move the dataPosition to the start of the next 
key for calculating size
+                        reader.advance();
+                    }
+                    catch (IOException e)
+                    {
+                        logger.error("Error advancing reader for SSTable: {}", 
sstable, e);
+                        return endOfData();
+                    }
+
+                    // Calculate the size of the current key. If EOF use the 
length of the file
+                    long current = reader.dataPosition() == -1 ? 
sstable.uncompressedLength() : reader.dataPosition();
+                    long size = current - lastPosition;
+
+                    String keyString = 
target.partitionKeyType.getString(key.getKey());
+
+                    // Check if the current key is outside the queried range; 
if so, stop
+                    if (range.right.compareTo(key) < 0)
+                        return endOfData();
+
+                    // Convert the token to a string and create a clustering 
object
+                    String tokenString = key.getToken().toString();
+                    Clustering<?> clustering = Clustering.make(
+                        IntegerType.instance.decompose(new 
BigInteger(tokenString)),
+                        UTF8Type.instance.decompose(keyString)
+                    );
+
+                    // Check if the current clustering matches the filter; if 
so, return the row
+                    if (filter.selects(clustering))
+                        return buildRow(clustering, size);
+                }
+                return endOfData();
+            }
+        };
+    }
+
+    /**
+     * This converts the clustering token/key into the partition level 
token/key for the target table. Also provides an
+     * optimization from RowFilter when a `key` is specified with or without 
the clustering `token` being set.
+     */
+    private AbstractBounds<PartitionPosition> getBounds(TableMetadata target, 
ClusteringIndexFilter clusteringIndexFilter, RowFilter rowFilter)
+    {
+        Slices s = clusteringIndexFilter.getSlices(metadata);
+        Token startToken = target.partitioner.getMinimumToken();
+        Token endToken = target.partitioner.getMaximumToken();
+
+        // find min/max token values from the clustering key
+        for (int i = 0; i < s.size(); i++)
+        {
+            Slice slice = s.get(i);
+            if (!slice.start().isEmpty())
+            {
+                BigInteger token = 
IntegerType.instance.compose(slice.start().bufferAt(0));
+                startToken = 
target.partitioner.getTokenFactory().fromString(token.toString());
+            }
+            if (!slice.end().isEmpty())
+            {
+                BigInteger token = 
IntegerType.instance.compose(slice.end().bufferAt(0));
+                endToken = 
target.partitioner.getTokenFactory().fromString(token.toString());
+            }
+        }
+
+        // override min/max of token if the `key` is specified
+        for (RowFilter.Expression expression : rowFilter.getExpressions())
+        {
+            if (expression.column().name.toString().equals(COLUMN_KEY))
+            {
+                if (expression.operator() != Operator.EQ)
+                    throw new InvalidRequestException(KEY_ONLY_EQUALS_ERROR);
+
+                String keyString = 
UTF8Type.instance.compose(expression.getIndexValue());
+                ByteBuffer keyAsBB = 
target.partitionKeyType.fromString(keyString);

Review Comment:
   ```suggestion
                   String keyString = 
UTF8Type.instance.compose(expression.getIndexValue());
                   ByteBuffer keyAsBB = 
target.partitionKeyType.asCQL3Type().fromCQLLiteral(keyString);
   ```
   
   can we use cql literals rather than type strings?  type strings are not the 
same and are not the best for human readability.



##########
src/java/org/apache/cassandra/db/virtual/PrimaryIdTable.java:
##########
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.db.virtual;
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.function.Consumer;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.cql3.Operator;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.db.DataRange;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.DeletionTime;
+import org.apache.cassandra.db.PartitionPosition;
+import org.apache.cassandra.db.Slice;
+import org.apache.cassandra.db.Slices;
+import org.apache.cassandra.db.context.CounterContext;
+import org.apache.cassandra.db.filter.ClusteringIndexFilter;
+import org.apache.cassandra.db.filter.ColumnFilter;
+import org.apache.cassandra.db.filter.RowFilter;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.CounterColumnType;
+import org.apache.cassandra.db.marshal.IntegerType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.db.partitions.PartitionUpdate;
+import org.apache.cassandra.db.partitions.SingletonUnfilteredPartitionIterator;
+import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator;
+import org.apache.cassandra.db.rows.AbstractUnfilteredRowIterator;
+import org.apache.cassandra.db.rows.BTreeRow;
+import org.apache.cassandra.db.rows.BufferCell;
+import org.apache.cassandra.db.rows.Cell;
+import org.apache.cassandra.db.rows.EncodingStats;
+import org.apache.cassandra.db.rows.Row;
+import org.apache.cassandra.db.rows.Rows;
+import org.apache.cassandra.db.rows.Unfiltered;
+import org.apache.cassandra.db.rows.UnfilteredRowIterator;
+import org.apache.cassandra.db.rows.UnfilteredRowIterators;
+import org.apache.cassandra.dht.AbstractBounds;
+import org.apache.cassandra.dht.Bounds;
+import org.apache.cassandra.dht.LocalPartitioner;
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.exceptions.InvalidRequestException;
+import org.apache.cassandra.io.sstable.KeyReader;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.schema.ColumnMetadata;
+import org.apache.cassandra.schema.KeyspaceMetadata;
+import org.apache.cassandra.schema.Schema;
+import org.apache.cassandra.schema.TableMetadata;
+
+import static 
org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest;
+
+/**
+ * A virtual table for querying primary IDs of SSTables in a specific keyspace.
+ *
+ * <p>This table is implemented as a virtual table in Cassandra, meaning it 
does not
+ * store data persistently on disk but instead derives its data from live 
metadata.
+ *
+ * <p>The CQL equivalent of this virtual table is:
+ * <pre>
+ * CREATE TABLE system_views.primary_ids (
+ *     keyspace_name TEXT,
+ *     table_name TEXT,
+ *     token_value INT,
+ *     key TEXT,
+ *     size_estimate COUNTER,
+ *     sstables COUNTER,
+ *     PRIMARY KEY ((keyspace_name, table_name), token_value, key)
+ * );
+ * </pre>
+ *
+ * <p>Note:
+ * <ul>
+ *     <li>The `size_estimate` and `sstables` columns represent aggregate 
information about SSTable sizes and counts.</li>
+ *     <li>Range queries across multiple tables and updates are not supported 
as this is a read-only table.</li>
+ * </ul>
+ */
+public class PrimaryIdTable implements VirtualTable
+{
+    private static final Logger logger = 
LoggerFactory.getLogger(PrimaryIdTable.class);
+    public static final String NAME = "primary_ids";
+
+    private static final String TABLE_READ_ONLY_ERROR = "The specified table 
is read-only.";
+    private static final String UNSUPPORTED_RANGE_QUERY_ERROR = "Range queries 
are not supported. Please provide both a keyspace and a table name.";
+    private static final String REVERSED_QUERY_ERROR = "Reversed queries are 
not supported.";
+    private static final String KEYSPACE_NOT_EXIST_ERROR = "The keyspace '%s' 
does not exist.";
+    private static final String TABLE_NOT_EXIST_ERROR = "The table '%s' does 
not exist in the keyspace '%s'.";
+    private static final String KEY_ONLY_EQUALS_ERROR = "The 'key' column can 
only be used in an equality query for this virtual table.";
+    private static final String KEY_NOT_WITHIN_BOUNDS_ERROR = "The specified 
'key' is not within the provided token value bounds.";
+
+    private static final String COLUMN_KEYSPACE_NAME = "keyspace_name";
+    private static final String COLUMN_TABLE_NAME = "table_name";
+    private static final String COLUMN_TOKEN_VALUE = "token_value";
+    private static final String COLUMN_KEY = "key";
+    private static final String COLUMN_SIZE_ESTIMATE = "size_estimate";
+    private static final String COLUMN_SSTABLES = "sstables";
+
+    private final TableMetadata metadata;
+    private final ColumnMetadata sizeEstimateColumn;
+    private final ColumnMetadata sstablesColumn;
+
+    @VisibleForTesting
+    final CopyOnWriteArrayList<Consumer<DecoratedKey>> readListener = new 
CopyOnWriteArrayList<>();
+
+    public PrimaryIdTable(String keyspace)
+    {
+        this.metadata = TableMetadata.builder(keyspace, NAME)
+                                     .kind(TableMetadata.Kind.VIRTUAL)
+                                     .partitioner(new 
LocalPartitioner(CompositeType.getInstance(UTF8Type.instance, 
UTF8Type.instance)))
+                                     
.addPartitionKeyColumn(COLUMN_KEYSPACE_NAME, UTF8Type.instance)
+                                     .addPartitionKeyColumn(COLUMN_TABLE_NAME, 
UTF8Type.instance)
+                                     .addClusteringColumn(COLUMN_TOKEN_VALUE, 
IntegerType.instance)
+                                     .addClusteringColumn(COLUMN_KEY, 
UTF8Type.instance)
+                                     .addRegularColumn(COLUMN_SIZE_ESTIMATE, 
CounterColumnType.instance)
+                                     .addRegularColumn(COLUMN_SSTABLES, 
CounterColumnType.instance)
+                                     .build();
+        sizeEstimateColumn = metadata.regularColumns().getSimple(0);
+        sstablesColumn = metadata.regularColumns().getSimple(1);
+    }
+
+    @Override
+    public UnfilteredPartitionIterator select(DecoratedKey partitionKey, 
ClusteringIndexFilter clusteringIndexFilter, ColumnFilter columnFilter, 
RowFilter rowFilter)
+    {
+        if (clusteringIndexFilter.isReversed())
+            throw new InvalidRequestException(REVERSED_QUERY_ERROR);
+
+        ByteBuffer[] key = ((CompositeType) 
this.metadata.partitionKeyType).split(partitionKey.getKey());
+        String keyspace = UTF8Type.instance.getString(key[0]);
+        String table = UTF8Type.instance.getString(key[1]);
+
+        KeyspaceMetadata ksm = Schema.instance.getKeyspaceMetadata(keyspace);
+        if (ksm == null)
+            throw invalidRequest(KEYSPACE_NOT_EXIST_ERROR, keyspace);
+
+        TableMetadata metadata = ksm.getTableOrViewNullable(table);
+        if (metadata == null)
+            throw invalidRequest(TABLE_NOT_EXIST_ERROR, table, keyspace);
+
+        AbstractBounds<PartitionPosition> range = getBounds(metadata, 
clusteringIndexFilter, rowFilter);
+        return new SingletonUnfilteredPartitionIterator(select(partitionKey, 
metadata, clusteringIndexFilter, range));
+    }
+
+    private List<SSTableReader> getSStables(TableMetadata metadata, 
AbstractBounds<PartitionPosition> range)
+    {
+        return 
Lists.newArrayList(ColumnFamilyStore.getIfExists(metadata).getTracker().getView().liveSSTablesInBounds(range.left,
 range.right));
+    }
+
+    private UnfilteredRowIterator select(DecoratedKey partitionKey, 
TableMetadata metadata, ClusteringIndexFilter clusteringIndexFilter, 
AbstractBounds<PartitionPosition> range)
+    {
+        List<SSTableReader> sstables = getSStables(metadata, range);
+        if (sstables.isEmpty())
+            return UnfilteredRowIterators.noRowsIterator(metadata, 
partitionKey, Rows.EMPTY_STATIC_ROW, DeletionTime.LIVE, false);
+
+        List<UnfilteredRowIterator> sstableIterators = Lists.newArrayList();
+        for (SSTableReader sstable : sstables)
+            sstableIterators.add(getSStableRowIterator(metadata, partitionKey, 
sstable, clusteringIndexFilter, range));
+
+        return UnfilteredRowIterators.merge(sstableIterators);
+    }
+
+    private UnfilteredRowIterator getSStableRowIterator(TableMetadata target, 
DecoratedKey partitionKey, SSTableReader sstable, ClusteringIndexFilter filter, 
AbstractBounds<PartitionPosition> range)
+    {
+        final KeyReader reader;
+        try
+        {
+            // ignore warning on try-with-resources, the reader will be closed 
on endOfData or close
+            reader = sstable.keyReader(range.left);

Review Comment:
   looking at this usage I wonder if we really need this new method, when we 
could leverage 
   
   
`org.apache.cassandra.io.sstable.format.SSTableReader#keyIterator(org.apache.cassandra.dht.AbstractBounds<org.apache.cassandra.db.PartitionPosition>)`
   
   It's `KeyIterator` rather than `KeyReader`, but should have everything we 
need.  



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to