iht commented on code in PR #32060: URL: https://github.com/apache/beam/pull/32060#discussion_r1759761324
########## sdks/java/io/solace/src/main/java/org/apache/beam/sdk/io/solace/write/UnboundedBatchedSolaceWriter.java: ########## @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.solace.write; + +import com.solacesystems.jcsmp.DeliveryMode; +import com.solacesystems.jcsmp.Destination; +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.io.solace.SolaceIO.SubmissionMode; +import org.apache.beam.sdk.io.solace.broker.SessionServiceFactory; +import org.apache.beam.sdk.io.solace.data.Solace; +import org.apache.beam.sdk.metrics.Counter; +import org.apache.beam.sdk.metrics.Metrics; +import org.apache.beam.sdk.state.StateSpec; +import org.apache.beam.sdk.state.StateSpecs; +import org.apache.beam.sdk.state.TimeDomain; +import org.apache.beam.sdk.state.Timer; +import org.apache.beam.sdk.state.TimerSpec; +import org.apache.beam.sdk.state.TimerSpecs; +import org.apache.beam.sdk.state.ValueState; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.transforms.windowing.BoundedWindow; +import org.apache.beam.sdk.values.KV; +import org.joda.time.Duration; +import org.joda.time.Instant; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Internal +public final class UnboundedBatchedSolaceWriter { + /** + * This DoFn is the responsible for writing to Solace in batch mode (holding up any messages), and + * emit the corresponding output (success or fail; only for persistent messages), so the + * SolaceIO.Write connector can be composed with other subsequent transforms in the pipeline. + * + * <p>The DoFn will create several JCSMP sessions per VM, and the sessions and producers will be + * reused across different threads (if the number of threads is higher than the number of + * sessions, which is probably the most common case). + * + * <p>The producer uses the JCSMP send multiple mode to publish a batch of messages together with + * a single API call. The acks from this publication are also processed in batch, and returned as + * the output of the DoFn. + * + * <p>The batch size is 50, and this is currently the maximum value supported by Solace. + * + * <p>There are no acks if the delivery mode is set to DIRECT. + * + * <p>This writer DoFn offers higher throughput than {@link + * UnboundedStreamingSolaceWriter.WriterDoFn} but also higher latency. + */ + @Internal + public static class WriterDoFn extends UnboundedSolaceWriter.AbstractWriterDoFn { + + private static final Logger LOG = LoggerFactory.getLogger(WriterDoFn.class); + + private static final int ACKS_FLUSHING_INTERVAL_SECS = 10; + + private final Counter sentToBroker = + Metrics.counter(UnboundedBatchedSolaceWriter.class, "msgs_sent_to_broker"); + + private final Counter batchesRejectedByBroker = + Metrics.counter(UnboundedBatchedSolaceWriter.class, "batches_rejected"); + + // State variables are never explicitly "used" + @SuppressWarnings("UnusedVariable") + @StateId("processing_key") + private final StateSpec<ValueState<Integer>> processingKeySpec = StateSpecs.value(); + + @SuppressWarnings("UnusedVariable") + @TimerId("bundle_flusher") + private final TimerSpec bundleFlusherTimerSpec = TimerSpecs.timer(TimeDomain.PROCESSING_TIME); + + public WriterDoFn( + SerializableFunction<Solace.Record, Destination> destinationFn, + SessionServiceFactory sessionServiceFactory, + DeliveryMode deliveryMode, + SubmissionMode submissionMode, + int producersMapCardinality, + boolean publishLatencyMetrics) { + super( + destinationFn, + sessionServiceFactory, + deliveryMode, + submissionMode, + producersMapCardinality, + publishLatencyMetrics); + } + + // The state variable is here just to force a shuffling with a certain cardinality + @ProcessElement + public void processElement( + @Element KV<Integer, Solace.Record> element, + @StateId("processing_key") ValueState<Integer> ignoredProcessingKey, Review Comment: The idea is to force parallelization to a maximum number of instances, by controlling the cardinality of the key set in the input. The stateful `DoFn` will force a shuffling, and all elements with the same key will be procesed by the same instance (published by the same client). I guess that it should be the same effect in the stateful `DoFn` without necessarily using this state variable, since there is already a timer. Let me do some tests to make sure the shuffling still happens if I remove the state variable. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org