Re: [PR] CASSANDRA-19871 [cassandra]

via GitHub Wed, 28 Aug 2024 02:30:47 -0700


krummas commented on code in PR #3501:
URL: https://github.com/apache/cassandra/pull/3501#discussion_r1732866464



##########
src/java/org/apache/cassandra/journal/Index.java:
##########
@@ -83,4 +84,47 @@ boolean mayContainIds(Iterable<K> ids)
     {
         return any(ids, this::mayContainId);
     }
+
+    interface IndexIterator<K>
+    {
+        boolean hasNext();
+        K currentKey();
+        int currentOffset();
+        int currentSize();
+        void next();
+    }
+
+    /**
+     * Helper methods
+     */
+
+    public static int readOffset(long record)
+    {
+        return (int) (0xffffffffL & (record >> 32));
+    }
+
+    public static long writeOffset(long record, int size)
+    {
+        record &= ~0xffffffff00000000L; //unset all higher bits
+        record |= ((long) size) << 32;
+        return record;
+    }
+
+    public static int readSize(long record)
+    {
+        return (int) (0xffffffffL & record);
+    }
+
+    public static long writeSize(long record, int offset)

Review Comment:
   parameter name should be size?



##########
src/java/org/apache/cassandra/journal/Index.java:
##########
@@ -83,4 +84,47 @@ boolean mayContainIds(Iterable<K> ids)
     {
         return any(ids, this::mayContainId);
     }
+
+    interface IndexIterator<K>
+    {
+        boolean hasNext();
+        K currentKey();
+        int currentOffset();
+        int currentSize();
+        void next();
+    }
+
+    /**
+     * Helper methods
+     */
+
+    public static int readOffset(long record)
+    {
+        return (int) (0xffffffffL & (record >> 32));
+    }
+
+    public static long writeOffset(long record, int size)
+    {
+        record &= ~0xffffffff00000000L; //unset all higher bits

Review Comment:
   easier to read without the complement



##########
src/java/org/apache/cassandra/journal/OnDiskIndex.java:
##########
@@ -207,27 +208,27 @@ public int[] lookUp(K id)
 
         for (int i = keyIndex - 1; i >= 0 && id.equals(keyAtIndex(i)); i--)
         {
-            int length = offsets.length;
-            offsets = Arrays.copyOf(offsets, length + 1);
-            offsets[length] = offsetAtIndex(i);
+            int length = records.length;
+            records = Arrays.copyOf(records, length + 1);
+            records[length] = recordAtIndex(i);
         }
 
         for (int i = keyIndex + 1; i < entryCount && id.equals(keyAtIndex(i)); 
i++)
         {
-            int length = offsets.length;
-            offsets = Arrays.copyOf(offsets, length + 1);
-            offsets[length] = offsetAtIndex(i);
+            int length = records.length;
+            records = Arrays.copyOf(records, length + 1);
+            records[length] = recordAtIndex(i);
         }
 
-        Arrays.sort(offsets);
-        return offsets;
+        Arrays.sort(records);

Review Comment:
   we compare the longs unsigned in `binarySearchUnsigned`, but not here, do we 
even need to compare unsigned? `offset` part of this long is always a normal int



##########
src/java/org/apache/cassandra/journal/Index.java:
##########
@@ -83,4 +84,47 @@ boolean mayContainIds(Iterable<K> ids)
     {
         return any(ids, this::mayContainId);
     }
+
+    interface IndexIterator<K>
+    {
+        boolean hasNext();
+        K currentKey();
+        int currentOffset();
+        int currentSize();
+        void next();
+    }
+
+    /**
+     * Helper methods
+     */
+
+    public static int readOffset(long record)
+    {
+        return (int) (0xffffffffL & (record >> 32));
+    }
+
+    public static long writeOffset(long record, int size)
+    {
+        record &= ~0xffffffff00000000L; //unset all higher bits
+        record |= ((long) size) << 32;
+        return record;
+    }
+
+    public static int readSize(long record)
+    {
+        return (int) (0xffffffffL & record);
+    }
+
+    public static long writeSize(long record, int offset)
+    {
+        record &= ~0xffffffffL; // unset all lower bits

Review Comment:
   probably a bit easier to read without the complement



##########
src/java/org/apache/cassandra/journal/OnDiskIndex.java:
##########
@@ -44,10 +45,10 @@
  */
 final class OnDiskIndex<K> extends Index<K>
 {
-    private static final int[] EMPTY = new int[0];
+    private static final long[] EMPTY = new long[0];
 
     private static final int FILE_PREFIX_SIZE = 4 + 4; // count of entries, CRC
-    private static final int VALUE_SIZE = 4;           // int offset
+    private static final int VALUE_SIZE = Long.BYTES;   // int offset + int 
length

Review Comment:
   nit: `"+ int size"` (its called size elsewhere)



##########
src/java/org/apache/cassandra/journal/InMemoryIndex.java:
##########
@@ -90,20 +116,20 @@ public K lastId()
     }
 
     @Override
-    public int[] lookUp(K id)
+    public long[] lookUp(K id)
     {
         return mayContainId(id) ? index.getOrDefault(id, EMPTY) : EMPTY;
     }
 
     @Override
-    public int lookUpFirst(K id)
+    public long lookUpFirst(K id)
     {
-        int[] offests = lookUp(id);
+        long[] offests = lookUp(id);

Review Comment:
   nit: `offsets`



##########
src/java/org/apache/cassandra/journal/SegmentWriter.java:
##########
@@ -70,10 +70,9 @@ int write(K key, ByteBuffer record, Set<Integer> hosts)
         int position = position();
         try
         {
-            index.update(key, position);
-            metadata.update(hosts);
-
             EntrySerializer.write(key, record, hosts, keySupport, trackedOut, 
descriptor.userVersion);
+            index.update(key, position, position());

Review Comment:
   this method is unused, but I think `size` here is `position() - position`



##########
src/java/org/apache/cassandra/journal/InMemoryIndex.java:
##########
@@ -48,33 +47,60 @@ static <K> InMemoryIndex<K> create(KeySupport<K> keySupport)
         return new InMemoryIndex<>(keySupport, new 
ConcurrentSkipListMap<>(keySupport));
     }
 
-    private InMemoryIndex(KeySupport<K> keySupport, NavigableMap<K, int[]> 
index)
+    private InMemoryIndex(KeySupport<K> keySupport, NavigableMap<K, long[]> 
index)
     {
         super(keySupport);
         this.index = index;
         this.lastId = new AtomicReference<>();
     }
 
-    public void update(K id, int offset)
+    public void update(K id, int offset, int size)

Review Comment:
   I guess we can't get `update` called with the same offset but different size?
   
   Anyway maybe it makes sense to make the binary search only search for the 
offset part of the longs



##########
src/java/org/apache/cassandra/journal/Index.java:
##########
@@ -83,4 +84,47 @@ boolean mayContainIds(Iterable<K> ids)
     {
         return any(ids, this::mayContainId);
     }
+
+    interface IndexIterator<K>
+    {
+        boolean hasNext();
+        K currentKey();
+        int currentOffset();
+        int currentSize();
+        void next();
+    }
+
+    /**
+     * Helper methods
+     */
+
+    public static int readOffset(long record)
+    {
+        return (int) (0xffffffffL & (record >> 32));
+    }
+
+    public static long writeOffset(long record, int size)

Review Comment:
   parameter name should be offset I think



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] CASSANDRA-19871 [cassandra]

Reply via email to