rezarokni commented on a change in pull request #12864: URL: https://github.com/apache/beam/pull/12864#discussion_r492448931
########## File path: runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateInternals.java ########## @@ -470,81 +541,514 @@ protected WorkItemCommitRequest persistDirectly(WindmillStateCache.ForKey cache) } } - private static class WindmillOrderedList<T> extends SimpleWindmillState - implements OrderedListState<T> { + // Coder for closed-open ranges. + private static class RangeCoder<T extends Comparable> extends CustomCoder<Range<T>> { + private Coder<T> boundCoder; + + RangeCoder(Coder<T> boundCoder) { + this.boundCoder = NullableCoder.of(boundCoder); + } + + @Override + public void encode(Range<T> value, OutputStream outStream) throws CoderException, IOException { + Preconditions.checkState( + value.lowerBoundType().equals(BoundType.CLOSED), "unexpected range " + value); + Preconditions.checkState( + value.upperBoundType().equals(BoundType.OPEN), "unexpected range " + value); + boundCoder.encode(value.hasLowerBound() ? value.lowerEndpoint() : null, outStream); + boundCoder.encode(value.hasUpperBound() ? value.upperEndpoint() : null, outStream); + } + + @Override + public Range<T> decode(InputStream inStream) throws CoderException, IOException { + @Nullable T lower = boundCoder.decode(inStream); + @Nullable T upper = boundCoder.decode(inStream); + if (lower == null) { + return upper != null ? Range.lessThan(upper) : Range.all(); + } else if (upper == null) { + return Range.atLeast(lower); + } else { + return Range.closedOpen(lower, upper); + } + } + } + + private static class RangeSetCoder<T extends Comparable> extends CustomCoder<RangeSet<T>> { + private SetCoder<Range<T>> rangesCoder; + + RangeSetCoder(Coder<T> boundCoder) { + this.rangesCoder = SetCoder.of(new RangeCoder<>(boundCoder)); + } + + @Override + public void encode(RangeSet<T> value, OutputStream outStream) throws IOException { + rangesCoder.encode(value.asRanges(), outStream); + } + + @Override + public RangeSet<T> decode(InputStream inStream) throws CoderException, IOException { + return TreeRangeSet.create(rangesCoder.decode(inStream)); + } + } + + /** + * Tracker for the ids used in an ordered list. + * + * <p>Windmill accepts an int64 id for each timestamped-element in the list. Unique elements are + * identified by the pair of timestamp and id. This means that tow unique elements e1, e2 must + * have different (ts1, id1), (ts2, id2) pairs. To accomplish this we bucket time into five-minute Review comment: Should call out that in the case where there is no unique id, there is loss of information. Maybe should think about configuration which has to be explicitly overwritten to allow for inserts of two identical elements. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org