cadonna commented on code in PR #13756:
URL: https://github.com/apache/kafka/pull/13756#discussion_r1223073798


##########
streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedKeyValueBuffer.java:
##########
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.kstream.internals.Change;
+import org.apache.kafka.streams.kstream.internals.FullChangeSerde;
+import org.apache.kafka.streams.processor.ProcessorContext;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
+import org.apache.kafka.streams.processor.internals.SerdeGetter;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+
+import static java.util.Objects.requireNonNull;
+
+public class RocksDBTimeOrderedKeyValueBuffer<K, V> extends 
WrappedStateStore<RocksDBTimeOrderedKeyValueSegmentedBytesStore, Object, 
Object> implements TimeOrderedKeyValueBuffer<K, V> {
+
+    private final long gracePeriod;
+    private long bufferSize;
+    private long minTimestamp;
+    private int numRecords;
+    private Serde<K> keySerde;
+    private FullChangeSerde<V> valueSerde;
+    private final String topic;
+
+    public RocksDBTimeOrderedKeyValueBuffer(final 
RocksDBTimeOrderedKeyValueSegmentedBytesStore store,
+                                            final Duration gracePeriod,
+                                            final String topic) {
+        super(store);
+        this.gracePeriod = gracePeriod.toMillis();
+        minTimestamp = Long.MAX_VALUE;
+        numRecords = 0;
+        bufferSize = 0;
+        this.topic = topic;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void setSerdesIfNull(final SerdeGetter getter) {
+        keySerde = keySerde == null ? (Serde<K>) getter.keySerde() : keySerde;
+        valueSerde = valueSerde == null ? FullChangeSerde.wrap((Serde<V>) 
getter.valueSerde()) : valueSerde;
+    }
+
+    @Deprecated
+    @Override
+    public void init(final ProcessorContext context, final StateStore root) {
+        wrapped().init(context, wrapped());
+    }
+
+    @Override
+    public void init(final StateStoreContext context, final StateStore root) {
+        wrapped().init(context, wrapped());
+    }
+
+    @Override
+    public void evictWhile(final Supplier<Boolean> predicate, final 
Consumer<Eviction<K, V>> callback) {
+        KeyValue<Bytes, byte[]> keyValue;
+
+        if (predicate.get()) {
+            try (final KeyValueIterator<Bytes, byte[]> iterator = wrapped()
+                .fetchAll(0, wrapped().observedStreamTime - gracePeriod)) {
+                if (iterator.hasNext()) {
+                    keyValue = iterator.next();
+                } else {
+                    if (numRecords() == 0) {

Review Comment:
   Why do you not directly access the field?



##########
streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedKeyValueBuffer.java:
##########
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.kstream.internals.Change;
+import org.apache.kafka.streams.kstream.internals.FullChangeSerde;
+import org.apache.kafka.streams.processor.ProcessorContext;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
+import org.apache.kafka.streams.processor.internals.SerdeGetter;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+
+import static java.util.Objects.requireNonNull;
+
+public class RocksDBTimeOrderedKeyValueBuffer<K, V> extends 
WrappedStateStore<RocksDBTimeOrderedKeyValueSegmentedBytesStore, Object, 
Object> implements TimeOrderedKeyValueBuffer<K, V> {
+
+    private final long gracePeriod;
+    private long bufferSize;
+    private long minTimestamp;
+    private int numRecords;
+    private Serde<K> keySerde;
+    private FullChangeSerde<V> valueSerde;
+    private final String topic;
+
+    public RocksDBTimeOrderedKeyValueBuffer(final 
RocksDBTimeOrderedKeyValueSegmentedBytesStore store,
+                                            final Duration gracePeriod,
+                                            final String topic) {
+        super(store);
+        this.gracePeriod = gracePeriod.toMillis();
+        minTimestamp = Long.MAX_VALUE;
+        numRecords = 0;
+        bufferSize = 0;
+        this.topic = topic;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void setSerdesIfNull(final SerdeGetter getter) {
+        keySerde = keySerde == null ? (Serde<K>) getter.keySerde() : keySerde;
+        valueSerde = valueSerde == null ? FullChangeSerde.wrap((Serde<V>) 
getter.valueSerde()) : valueSerde;
+    }
+
+    @Deprecated
+    @Override
+    public void init(final ProcessorContext context, final StateStore root) {
+        wrapped().init(context, wrapped());
+    }
+
+    @Override
+    public void init(final StateStoreContext context, final StateStore root) {
+        wrapped().init(context, wrapped());
+    }
+
+    @Override
+    public void evictWhile(final Supplier<Boolean> predicate, final 
Consumer<Eviction<K, V>> callback) {
+        KeyValue<Bytes, byte[]> keyValue;
+
+        if (predicate.get()) {
+            try (final KeyValueIterator<Bytes, byte[]> iterator = wrapped()
+                .fetchAll(0, wrapped().observedStreamTime - gracePeriod)) {
+                if (iterator.hasNext()) {
+                    keyValue = iterator.next();
+                } else {
+                    if (numRecords() == 0) {
+                        minTimestamp = Long.MAX_VALUE;
+                    }
+                    return;
+                }
+
+                BufferValue bufferValue = 
BufferValue.deserialize(ByteBuffer.wrap(keyValue.value));
+                K key = keySerde.deserializer().deserialize(topic,
+                    
PrefixedWindowKeySchemas.TimeFirstWindowKeySchema.extractStoreKeyBytes(keyValue.key.get()));
+
+                Change<V> value = valueSerde.deserializeParts(
+                    topic,
+                    new Change<>(bufferValue.newValue(), 
bufferValue.oldValue())
+                );
+                while (keyValue != null && predicate.get()) {
+                    if (bufferValue.context().timestamp() != minTimestamp) {
+                        throw new IllegalStateException(
+                            "minTimestamp [" + minTimestamp + "] did not match 
the actual min timestamp [" +
+                                bufferValue.context().timestamp() + "]"
+                        );
+                    }
+                    callback.accept(new Eviction<>(key, value, 
bufferValue.context()));
+                    wrapped().remove(keyValue.key);
+                    numRecords--;
+                    bufferSize = bufferSize - computeRecordSize(keyValue.key, 
bufferValue);
+                    if (iterator.hasNext()) {
+                        keyValue = iterator.next();
+                        bufferValue = 
BufferValue.deserialize(ByteBuffer.wrap(keyValue.value));
+                        key = keySerde.deserializer().deserialize(topic,
+                            
PrefixedWindowKeySchemas.TimeFirstWindowKeySchema.extractStoreKeyBytes(keyValue.key.get()));
+                        value = valueSerde.deserializeParts(
+                            topic,
+                            new Change<>(bufferValue.newValue(), 
bufferValue.oldValue())
+                        );
+                        minTimestamp = bufferValue.context().timestamp();
+
+                    } else {
+                        keyValue = null;
+                        minTimestamp = Long.MAX_VALUE;
+                    }
+                }
+            }

Review Comment:
   This code contains some duplications and is a bit hard to read. I think this 
code can be simplified to something like this:
   ```java
   while (iterator.hasNext() && predicate.get()) {
       keyValue = iterator.next();
   
       BufferValue bufferValue = 
BufferValue.deserialize(ByteBuffer.wrap(keyValue.value));
       K key = keySerde.deserializer().deserialize(topic,
           
PrefixedWindowKeySchemas.TimeFirstWindowKeySchema.extractStoreKeyBytes(keyValue.key.get()));
       minTimestamp = bufferValue.context().timestamp();
   
       Change<V> value = valueSerde.deserializeParts(
           topic,
           new Change<>(bufferValue.newValue(), bufferValue.oldValue())
       );
   
       callback.accept(new Eviction<>(key, value, bufferValue.context()));
   
       wrapped().remove(keyValue.key);
       numRecords--;
       bufferSize = bufferSize - computeRecordSize(keyValue.key, bufferValue);
   }
   
   minTimestamp = Long.MAX_VALUE;
   ```
   
   This code should just give you an idea. I did not verify if it is correct.



##########
streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedKeyValueBuffer.java:
##########
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.kstream.internals.Change;
+import org.apache.kafka.streams.kstream.internals.FullChangeSerde;
+import org.apache.kafka.streams.processor.ProcessorContext;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
+import org.apache.kafka.streams.processor.internals.SerdeGetter;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+
+import static java.util.Objects.requireNonNull;
+
+public class RocksDBTimeOrderedKeyValueBuffer<K, V> extends 
WrappedStateStore<RocksDBTimeOrderedKeyValueSegmentedBytesStore, Object, 
Object> implements TimeOrderedKeyValueBuffer<K, V> {

Review Comment:
   Could you please explain why you need a segemented byte store? Why do you 
need segments?



##########
streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedKeyValueBuffer.java:
##########
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.kstream.internals.Change;
+import org.apache.kafka.streams.kstream.internals.FullChangeSerde;
+import org.apache.kafka.streams.processor.ProcessorContext;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
+import org.apache.kafka.streams.processor.internals.SerdeGetter;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+
+import static java.util.Objects.requireNonNull;
+
+public class RocksDBTimeOrderedKeyValueBuffer<K, V> extends 
WrappedStateStore<RocksDBTimeOrderedKeyValueSegmentedBytesStore, Object, 
Object> implements TimeOrderedKeyValueBuffer<K, V> {
+
+    private final long gracePeriod;
+    private long bufferSize;
+    private long minTimestamp;
+    private int numRecords;
+    private Serde<K> keySerde;
+    private FullChangeSerde<V> valueSerde;
+    private final String topic;
+
+    public RocksDBTimeOrderedKeyValueBuffer(final 
RocksDBTimeOrderedKeyValueSegmentedBytesStore store,
+                                            final Duration gracePeriod,
+                                            final String topic) {
+        super(store);
+        this.gracePeriod = gracePeriod.toMillis();
+        minTimestamp = Long.MAX_VALUE;
+        numRecords = 0;
+        bufferSize = 0;
+        this.topic = topic;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void setSerdesIfNull(final SerdeGetter getter) {
+        keySerde = keySerde == null ? (Serde<K>) getter.keySerde() : keySerde;
+        valueSerde = valueSerde == null ? FullChangeSerde.wrap((Serde<V>) 
getter.valueSerde()) : valueSerde;
+    }
+
+    @Deprecated
+    @Override
+    public void init(final ProcessorContext context, final StateStore root) {
+        wrapped().init(context, wrapped());
+    }
+
+    @Override
+    public void init(final StateStoreContext context, final StateStore root) {
+        wrapped().init(context, wrapped());
+    }
+
+    @Override
+    public void evictWhile(final Supplier<Boolean> predicate, final 
Consumer<Eviction<K, V>> callback) {
+        KeyValue<Bytes, byte[]> keyValue;
+
+        if (predicate.get()) {
+            try (final KeyValueIterator<Bytes, byte[]> iterator = wrapped()
+                .fetchAll(0, wrapped().observedStreamTime - gracePeriod)) {
+                if (iterator.hasNext()) {
+                    keyValue = iterator.next();
+                } else {
+                    if (numRecords() == 0) {
+                        minTimestamp = Long.MAX_VALUE;
+                    }
+                    return;
+                }
+
+                BufferValue bufferValue = 
BufferValue.deserialize(ByteBuffer.wrap(keyValue.value));
+                K key = keySerde.deserializer().deserialize(topic,
+                    
PrefixedWindowKeySchemas.TimeFirstWindowKeySchema.extractStoreKeyBytes(keyValue.key.get()));
+
+                Change<V> value = valueSerde.deserializeParts(
+                    topic,
+                    new Change<>(bufferValue.newValue(), 
bufferValue.oldValue())
+                );
+                while (keyValue != null && predicate.get()) {
+                    if (bufferValue.context().timestamp() != minTimestamp) {
+                        throw new IllegalStateException(
+                            "minTimestamp [" + minTimestamp + "] did not match 
the actual min timestamp [" +
+                                bufferValue.context().timestamp() + "]"
+                        );
+                    }
+                    callback.accept(new Eviction<>(key, value, 
bufferValue.context()));
+                    wrapped().remove(keyValue.key);
+                    numRecords--;
+                    bufferSize = bufferSize - computeRecordSize(keyValue.key, 
bufferValue);
+                    if (iterator.hasNext()) {
+                        keyValue = iterator.next();
+                        bufferValue = 
BufferValue.deserialize(ByteBuffer.wrap(keyValue.value));
+                        key = keySerde.deserializer().deserialize(topic,
+                            
PrefixedWindowKeySchemas.TimeFirstWindowKeySchema.extractStoreKeyBytes(keyValue.key.get()));
+                        value = valueSerde.deserializeParts(
+                            topic,
+                            new Change<>(bufferValue.newValue(), 
bufferValue.oldValue())
+                        );
+                        minTimestamp = bufferValue.context().timestamp();
+
+                    } else {
+                        keyValue = null;
+                        minTimestamp = Long.MAX_VALUE;
+                    }
+                }
+            }
+        }
+    }
+
+
+    @Override
+    public Maybe<ValueAndTimestamp<V>> priorValueForBuffered(final K key) {
+        return Maybe.undefined();
+    }
+
+    @Override
+    public void put(final long time, final Record<K, Change<V>> record, final 
ProcessorRecordContext recordContext) {
+        requireNonNull(record.value(), "value cannot be null");
+        requireNonNull(recordContext, "recordContext cannot be null");
+        if (wrapped().observedStreamTime - gracePeriod > record.timestamp()) {
+            return;
+        }
+        final Bytes serializedKey = Bytes.wrap(
+            
PrefixedWindowKeySchemas.TimeFirstWindowKeySchema.toStoreKeyBinary(keySerde.serializer().serialize(topic,
 record.key()),
+                record.timestamp(),
+                Long.valueOf(recordContext.offset()).intValue()).get());

Review Comment:
   Isn't this the same as this?
   ```suggestion
                   (int) recordContext.offset()).get());
   ```



##########
streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedKeyValueBuffer.java:
##########
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.kstream.internals.Change;
+import org.apache.kafka.streams.kstream.internals.FullChangeSerde;
+import org.apache.kafka.streams.processor.ProcessorContext;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
+import org.apache.kafka.streams.processor.internals.SerdeGetter;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+
+import static java.util.Objects.requireNonNull;
+
+public class RocksDBTimeOrderedKeyValueBuffer<K, V> extends 
WrappedStateStore<RocksDBTimeOrderedKeyValueSegmentedBytesStore, Object, 
Object> implements TimeOrderedKeyValueBuffer<K, V> {
+
+    private final Duration gracePeriod;
+    private long bufferSize;
+    private long minTimestamp;
+    private int numRec;
+    private Serde<K> keySerde;
+    private FullChangeSerde<V> valueSerde;

Review Comment:
   I agree that we should try to get rid of the `Change<V>`. Maybe you need a 
PR before this PR to prepare data structures for this KIP implementation. Such 
situations happen continuously.



##########
streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDbTimeOrderedKeyValueBytesStoreSupplier.java:
##########
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+public class RocksDbTimeOrderedKeyValueBytesStoreSupplier {

Review Comment:
   ```suggestion
   public class RocksDBTimeOrderedKeyValueBytesStoreSupplier {
   ```



##########
streams/src/main/java/org/apache/kafka/streams/state/internals/RocksDBTimeOrderedKeyValueBuffer.java:
##########
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.streams.state.internals;
+
+import org.apache.kafka.common.serialization.Serde;
+import org.apache.kafka.common.utils.Bytes;
+import org.apache.kafka.streams.KeyValue;
+import org.apache.kafka.streams.kstream.internals.Change;
+import org.apache.kafka.streams.kstream.internals.FullChangeSerde;
+import org.apache.kafka.streams.processor.ProcessorContext;
+import org.apache.kafka.streams.processor.StateStore;
+import org.apache.kafka.streams.processor.StateStoreContext;
+import org.apache.kafka.streams.processor.api.Record;
+import org.apache.kafka.streams.processor.internals.ProcessorRecordContext;
+import org.apache.kafka.streams.processor.internals.SerdeGetter;
+import org.apache.kafka.streams.state.KeyValueIterator;
+import org.apache.kafka.streams.state.ValueAndTimestamp;
+
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+
+import static java.util.Objects.requireNonNull;
+
+public class RocksDBTimeOrderedKeyValueBuffer<K, V> extends 
WrappedStateStore<RocksDBTimeOrderedKeyValueSegmentedBytesStore, Object, 
Object> implements TimeOrderedKeyValueBuffer<K, V> {
+
+    private final long gracePeriod;
+    private long bufferSize;
+    private long minTimestamp;
+    private int numRecords;
+    private Serde<K> keySerde;
+    private FullChangeSerde<V> valueSerde;
+    private final String topic;
+
+    public RocksDBTimeOrderedKeyValueBuffer(final 
RocksDBTimeOrderedKeyValueSegmentedBytesStore store,
+                                            final Duration gracePeriod,
+                                            final String topic) {
+        super(store);
+        this.gracePeriod = gracePeriod.toMillis();
+        minTimestamp = Long.MAX_VALUE;
+        numRecords = 0;
+        bufferSize = 0;
+        this.topic = topic;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void setSerdesIfNull(final SerdeGetter getter) {
+        keySerde = keySerde == null ? (Serde<K>) getter.keySerde() : keySerde;
+        valueSerde = valueSerde == null ? FullChangeSerde.wrap((Serde<V>) 
getter.valueSerde()) : valueSerde;
+    }
+
+    @Deprecated
+    @Override
+    public void init(final ProcessorContext context, final StateStore root) {
+        wrapped().init(context, wrapped());
+    }
+
+    @Override
+    public void init(final StateStoreContext context, final StateStore root) {
+        wrapped().init(context, wrapped());
+    }
+
+    @Override
+    public void evictWhile(final Supplier<Boolean> predicate, final 
Consumer<Eviction<K, V>> callback) {
+        KeyValue<Bytes, byte[]> keyValue;
+
+        if (predicate.get()) {
+            try (final KeyValueIterator<Bytes, byte[]> iterator = wrapped()
+                .fetchAll(0, wrapped().observedStreamTime - gracePeriod)) {
+                if (iterator.hasNext()) {
+                    keyValue = iterator.next();
+                } else {
+                    if (numRecords() == 0) {
+                        minTimestamp = Long.MAX_VALUE;
+                    }
+                    return;
+                }
+
+                BufferValue bufferValue = 
BufferValue.deserialize(ByteBuffer.wrap(keyValue.value));
+                K key = keySerde.deserializer().deserialize(topic,
+                    
PrefixedWindowKeySchemas.TimeFirstWindowKeySchema.extractStoreKeyBytes(keyValue.key.get()));
+
+                Change<V> value = valueSerde.deserializeParts(
+                    topic,
+                    new Change<>(bufferValue.newValue(), 
bufferValue.oldValue())
+                );
+                while (keyValue != null && predicate.get()) {
+                    if (bufferValue.context().timestamp() != minTimestamp) {
+                        throw new IllegalStateException(
+                            "minTimestamp [" + minTimestamp + "] did not match 
the actual min timestamp [" +
+                                bufferValue.context().timestamp() + "]"
+                        );
+                    }
+                    callback.accept(new Eviction<>(key, value, 
bufferValue.context()));
+                    wrapped().remove(keyValue.key);
+                    numRecords--;
+                    bufferSize = bufferSize - computeRecordSize(keyValue.key, 
bufferValue);
+                    if (iterator.hasNext()) {
+                        keyValue = iterator.next();
+                        bufferValue = 
BufferValue.deserialize(ByteBuffer.wrap(keyValue.value));
+                        key = keySerde.deserializer().deserialize(topic,
+                            
PrefixedWindowKeySchemas.TimeFirstWindowKeySchema.extractStoreKeyBytes(keyValue.key.get()));
+                        value = valueSerde.deserializeParts(
+                            topic,
+                            new Change<>(bufferValue.newValue(), 
bufferValue.oldValue())
+                        );
+                        minTimestamp = bufferValue.context().timestamp();
+
+                    } else {
+                        keyValue = null;
+                        minTimestamp = Long.MAX_VALUE;
+                    }
+                }
+            }
+        }
+    }
+
+
+    @Override
+    public Maybe<ValueAndTimestamp<V>> priorValueForBuffered(final K key) {
+        return Maybe.undefined();
+    }
+
+    @Override
+    public void put(final long time, final Record<K, Change<V>> record, final 
ProcessorRecordContext recordContext) {
+        requireNonNull(record.value(), "value cannot be null");
+        requireNonNull(recordContext, "recordContext cannot be null");
+        if (wrapped().observedStreamTime - gracePeriod > record.timestamp()) {
+            return;
+        }
+        final Bytes serializedKey = Bytes.wrap(
+            
PrefixedWindowKeySchemas.TimeFirstWindowKeySchema.toStoreKeyBinary(keySerde.serializer().serialize(topic,
 record.key()),
+                record.timestamp(),
+                Long.valueOf(recordContext.offset()).intValue()).get());

Review Comment:
   As far as I understand, the offset of a record might not be unique. First of 
all the java doc of `RecordContext` state that `RecordContext#offset()` might 
return -1. Second, I am not sure what happens when the upstream processor is a 
flat map that duplicates each record, for example. Do the two resulting records 
have the same offset?  



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to