AHeise commented on a change in pull request #16783: URL: https://github.com/apache/flink/pull/16783#discussion_r689720051
########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() Review comment: Update example. Should start with `KafkaRecordSerializationSchema.builder("topic")`. Note we also want to get rid of `<String>`. ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; + @Nullable private final Function<IN, String> topicSelector; Review comment: If we translate topic -> not `@Nullable`. ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; + @Nullable private final Function<IN, String> topicSelector; + @Nullable private FlinkKafkaPartitioner<IN> partitioner; + @Nullable private SerializationSchema<IN> keySerializationSchema; + + private SerializationSchema<IN> valueSerializationSchema; Review comment: `@Nullable` ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchema.java ########## @@ -85,4 +86,28 @@ default void open( */ int[] getPartitionsForTopic(String topic); } + + /** + * Creates a default schema builder to provide common building blocks i.e. key serialization, + * value serialization, partitioning; based on a static target topic. + * + * @param topic target topic for all records + * @param <T> type of incoming elements + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + static <T> KafkaRecordSerializationSchemaBuilder<T> builder(String topic) { + return new KafkaRecordSerializationSchemaBuilder<T>(topic); + } + + /** + * Creates a default schema builder to provide common building blocks i.e. key serialization, + * value serialization, partitioning; based on a dynamic target topic. + * + * @param topicSelector to dynamically derive the target topic based on the incoming element + * @param <T> type of incoming elements + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + static <T> KafkaRecordSerializationSchemaBuilder<T> builder(Function<T, String> topicSelector) { + return new KafkaRecordSerializationSchemaBuilder<T>(topicSelector); + } Review comment: Maybe just start with `builder()`? With only one parameter, it looks like it's the only mandatory parameter. On a second thought, it could actually be the only parameter, if the builder uses the Flink Type system to serialize to `byte[]` by default. But that requires some small shenanigans with passes the type information through some internal functions in `DefaultKafkaSinkContext`... ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; + @Nullable private final Function<IN, String> topicSelector; + @Nullable private FlinkKafkaPartitioner<IN> partitioner; + @Nullable private SerializationSchema<IN> keySerializationSchema; + + private SerializationSchema<IN> valueSerializationSchema; + + KafkaRecordSerializationSchemaBuilder(String topic) { + this.topic = checkNotNull(topic); + this.topicSelector = null; + } + Review comment: Could be replace by using `t -> topic` on call-site. ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchema.java ########## @@ -85,4 +86,28 @@ default void open( */ int[] getPartitionsForTopic(String topic); } + + /** + * Creates a default schema builder to provide common building blocks i.e. key serialization, + * value serialization, partitioning; based on a static target topic. + * + * @param topic target topic for all records + * @param <T> type of incoming elements + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + static <T> KafkaRecordSerializationSchemaBuilder<T> builder(String topic) { + return new KafkaRecordSerializationSchemaBuilder<T>(topic); Review comment: Translate to constant `Function`? Then `KafkaRecordSerializationSchemaBuilder` just use one ctor and one invariant. ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; Review comment: Potentially remove ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; + @Nullable private final Function<IN, String> topicSelector; + @Nullable private FlinkKafkaPartitioner<IN> partitioner; + @Nullable private SerializationSchema<IN> keySerializationSchema; + + private SerializationSchema<IN> valueSerializationSchema; + + KafkaRecordSerializationSchemaBuilder(String topic) { + this.topic = checkNotNull(topic); + this.topicSelector = null; + } + + KafkaRecordSerializationSchemaBuilder(Function<IN, String> topicSupplier) { + this.topicSelector = checkNotNull(topicSupplier); + this.topic = null; + } + + /** + * Sets a custom partitioner determining the target partition of the target topic. + * + * @param partitioner + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withPartitioner( + FlinkKafkaPartitioner<IN> partitioner) { + this.partitioner = checkNotNull(partitioner); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the key + * of the {@link ProducerRecord}. + * + * @param keySerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializationSchema( + SerializationSchema<IN> keySerializationSchema) { + checkState(this.keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = checkNotNull(keySerializationSchema); + return this; + } + + /** + * Sets Kafka's {@link Serializer} to serialize incoming elements to the key of the {@link + * ProducerRecord}. + * + * @param keySerializer + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializer( + Class<? extends Serializer<IN>> keySerializer) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>(keySerializer, topic, topicSelector); Review comment: Good solution with immediately wrapping things into `KafkaSerializerWrapper`. However, now `topicSelector` is invoked up to 3 times per record. There are two solutions: - add another indirections `KafkaRecordSerializationSchemaPart` (yes, please find a better name) and let both `KafkaSerializerWrapper` and another class `DeserializationSchemaWrapper` that also take the topic. - use a stateful topic selector that caches the values for `KafkaSerializerWrapper` (maybe `TopicContext` or so). After some thinking: I like the second option better. ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; + @Nullable private final Function<IN, String> topicSelector; + @Nullable private FlinkKafkaPartitioner<IN> partitioner; + @Nullable private SerializationSchema<IN> keySerializationSchema; + + private SerializationSchema<IN> valueSerializationSchema; + + KafkaRecordSerializationSchemaBuilder(String topic) { + this.topic = checkNotNull(topic); + this.topicSelector = null; + } + + KafkaRecordSerializationSchemaBuilder(Function<IN, String> topicSupplier) { + this.topicSelector = checkNotNull(topicSupplier); + this.topic = null; + } + + /** + * Sets a custom partitioner determining the target partition of the target topic. + * + * @param partitioner + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withPartitioner( + FlinkKafkaPartitioner<IN> partitioner) { + this.partitioner = checkNotNull(partitioner); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the key + * of the {@link ProducerRecord}. + * + * @param keySerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializationSchema( + SerializationSchema<IN> keySerializationSchema) { + checkState(this.keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = checkNotNull(keySerializationSchema); + return this; + } + + /** + * Sets Kafka's {@link Serializer} to serialize incoming elements to the key of the {@link + * ProducerRecord}. + * + * @param keySerializer + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializer( + Class<? extends Serializer<IN>> keySerializer) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>(keySerializer, topic, topicSelector); + return this; + } + + /** + * Sets a configurable Kafka {@link Serializer} and pass a configuration to serialize incoming + * elements to the key of the {@link ProducerRecord}. + * + * @param keySerializerWithConfiguration + * @param configuration + * @param <T> type of the used serializer class + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public <T extends Configurable & Serializer<IN>> + KafkaRecordSerializationSchemaBuilder<IN> withConfigurableKeySerializer( + Class<T> keySerializerWithConfiguration, Map<String, String> configuration) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>( + keySerializerWithConfiguration, configuration, topic, topicSelector); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the + * value of the {@link ProducerRecord}. + * + * @param valueSerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withValueSerializationSchema( + SerializationSchema<IN> valueSerializationSchema) { + checkState(this.valueSerializationSchema == null, exceedsValueSerializerConfigurations()); + this.valueSerializationSchema = checkNotNull(valueSerializationSchema); + return this; + } + + /** + * Sets Kafka's {@link Serializer} to serialize incoming elements to the value of the {@link + * ProducerRecord}. + * + * @param valueSerializer + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withValueSerializer( Review comment: `withKafkaValueSerializer`? ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; + @Nullable private final Function<IN, String> topicSelector; + @Nullable private FlinkKafkaPartitioner<IN> partitioner; + @Nullable private SerializationSchema<IN> keySerializationSchema; + + private SerializationSchema<IN> valueSerializationSchema; + + KafkaRecordSerializationSchemaBuilder(String topic) { + this.topic = checkNotNull(topic); + this.topicSelector = null; + } + + KafkaRecordSerializationSchemaBuilder(Function<IN, String> topicSupplier) { + this.topicSelector = checkNotNull(topicSupplier); + this.topic = null; + } + + /** + * Sets a custom partitioner determining the target partition of the target topic. + * + * @param partitioner + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withPartitioner( + FlinkKafkaPartitioner<IN> partitioner) { + this.partitioner = checkNotNull(partitioner); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the key + * of the {@link ProducerRecord}. + * + * @param keySerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializationSchema( + SerializationSchema<IN> keySerializationSchema) { + checkState(this.keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = checkNotNull(keySerializationSchema); + return this; + } + + /** + * Sets Kafka's {@link Serializer} to serialize incoming elements to the key of the {@link + * ProducerRecord}. + * + * @param keySerializer + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializer( + Class<? extends Serializer<IN>> keySerializer) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>(keySerializer, topic, topicSelector); + return this; + } + + /** + * Sets a configurable Kafka {@link Serializer} and pass a configuration to serialize incoming + * elements to the key of the {@link ProducerRecord}. + * + * @param keySerializerWithConfiguration + * @param configuration + * @param <T> type of the used serializer class + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public <T extends Configurable & Serializer<IN>> + KafkaRecordSerializationSchemaBuilder<IN> withConfigurableKeySerializer( + Class<T> keySerializerWithConfiguration, Map<String, String> configuration) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>( + keySerializerWithConfiguration, configuration, topic, topicSelector); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the + * value of the {@link ProducerRecord}. + * + * @param valueSerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withValueSerializationSchema( + SerializationSchema<IN> valueSerializationSchema) { + checkState(this.valueSerializationSchema == null, exceedsValueSerializerConfigurations()); + this.valueSerializationSchema = checkNotNull(valueSerializationSchema); + return this; + } + + /** + * Sets Kafka's {@link Serializer} to serialize incoming elements to the value of the {@link + * ProducerRecord}. + * + * @param valueSerializer + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withValueSerializer( + Class<? extends Serializer<IN>> valueSerializer) { + checkState(valueSerializationSchema == null, exceedsValueSerializerConfigurations()); + this.valueSerializationSchema = + new KafkaSerializerWrapper<>(valueSerializer, topic, topicSelector); + return this; + } + + /** + * Sets a configurable Kafka {@link Serializer} and pass a configuration to serialize incoming + * elements to the value of the {@link ProducerRecord}. + * + * @param valueSerializerWithConfiguration + * @param configuration + * @param <T> type of the used serializer class + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public <T extends Configurable & Serializer<IN>> + KafkaRecordSerializationSchemaBuilder<IN> withConfigurableValueSerializer( Review comment: `withKafkaValueSerializer`? ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; + @Nullable private final Function<IN, String> topicSelector; + @Nullable private FlinkKafkaPartitioner<IN> partitioner; + @Nullable private SerializationSchema<IN> keySerializationSchema; + + private SerializationSchema<IN> valueSerializationSchema; + + KafkaRecordSerializationSchemaBuilder(String topic) { + this.topic = checkNotNull(topic); + this.topicSelector = null; + } + + KafkaRecordSerializationSchemaBuilder(Function<IN, String> topicSupplier) { + this.topicSelector = checkNotNull(topicSupplier); + this.topic = null; + } + + /** + * Sets a custom partitioner determining the target partition of the target topic. + * + * @param partitioner + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withPartitioner( + FlinkKafkaPartitioner<IN> partitioner) { + this.partitioner = checkNotNull(partitioner); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the key + * of the {@link ProducerRecord}. + * + * @param keySerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializationSchema( + SerializationSchema<IN> keySerializationSchema) { + checkState(this.keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = checkNotNull(keySerializationSchema); + return this; + } + + /** + * Sets Kafka's {@link Serializer} to serialize incoming elements to the key of the {@link + * ProducerRecord}. + * + * @param keySerializer + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializer( + Class<? extends Serializer<IN>> keySerializer) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>(keySerializer, topic, topicSelector); + return this; + } + + /** + * Sets a configurable Kafka {@link Serializer} and pass a configuration to serialize incoming + * elements to the key of the {@link ProducerRecord}. + * + * @param keySerializerWithConfiguration + * @param configuration + * @param <T> type of the used serializer class + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public <T extends Configurable & Serializer<IN>> + KafkaRecordSerializationSchemaBuilder<IN> withConfigurableKeySerializer( + Class<T> keySerializerWithConfiguration, Map<String, String> configuration) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>( + keySerializerWithConfiguration, configuration, topic, topicSelector); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the + * value of the {@link ProducerRecord}. + * + * @param valueSerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withValueSerializationSchema( + SerializationSchema<IN> valueSerializationSchema) { + checkState(this.valueSerializationSchema == null, exceedsValueSerializerConfigurations()); + this.valueSerializationSchema = checkNotNull(valueSerializationSchema); + return this; + } Review comment: There is a trick here to avoid binding the type of the KafkaRecordSerializationSchemaBuilder at `builder(...)`. ```suggestion public <T extends IN> KafkaRecordSerializationSchemaBuilder<T> withValueSerializationSchema( SerializationSchema<T> valueSerializationSchema) checkState(this.valueSerializationSchema == null, exceedsValueSerializerConfigurations()); @SuppressWarning("unchecked") KafkaRecordSerializationSchemaBuilder<T> self = (KafkaRecordSerializationSchemaBuilder<T> ) this; self.valueSerializationSchema = checkNotNull(valueSerializationSchema); return self; } ``` Now we capture T and use that the main type. This also works of you still specify the type explicitly in `builder`. ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; + @Nullable private final Function<IN, String> topicSelector; + @Nullable private FlinkKafkaPartitioner<IN> partitioner; + @Nullable private SerializationSchema<IN> keySerializationSchema; + + private SerializationSchema<IN> valueSerializationSchema; + + KafkaRecordSerializationSchemaBuilder(String topic) { + this.topic = checkNotNull(topic); + this.topicSelector = null; + } + + KafkaRecordSerializationSchemaBuilder(Function<IN, String> topicSupplier) { + this.topicSelector = checkNotNull(topicSupplier); + this.topic = null; + } + + /** + * Sets a custom partitioner determining the target partition of the target topic. + * + * @param partitioner + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withPartitioner( + FlinkKafkaPartitioner<IN> partitioner) { + this.partitioner = checkNotNull(partitioner); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the key + * of the {@link ProducerRecord}. + * + * @param keySerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializationSchema( + SerializationSchema<IN> keySerializationSchema) { + checkState(this.keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = checkNotNull(keySerializationSchema); + return this; + } + + /** + * Sets Kafka's {@link Serializer} to serialize incoming elements to the key of the {@link + * ProducerRecord}. + * + * @param keySerializer + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializer( + Class<? extends Serializer<IN>> keySerializer) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>(keySerializer, topic, topicSelector); + return this; + } + + /** + * Sets a configurable Kafka {@link Serializer} and pass a configuration to serialize incoming + * elements to the key of the {@link ProducerRecord}. + * + * @param keySerializerWithConfiguration + * @param configuration + * @param <T> type of the used serializer class + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public <T extends Configurable & Serializer<IN>> + KafkaRecordSerializationSchemaBuilder<IN> withConfigurableKeySerializer( + Class<T> keySerializerWithConfiguration, Map<String, String> configuration) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>( + keySerializerWithConfiguration, configuration, topic, topicSelector); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the + * value of the {@link ProducerRecord}. + * + * @param valueSerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withValueSerializationSchema( + SerializationSchema<IN> valueSerializationSchema) { + checkState(this.valueSerializationSchema == null, exceedsValueSerializerConfigurations()); + this.valueSerializationSchema = checkNotNull(valueSerializationSchema); + return this; + } + + /** + * Sets Kafka's {@link Serializer} to serialize incoming elements to the value of the {@link + * ProducerRecord}. + * + * @param valueSerializer + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withValueSerializer( + Class<? extends Serializer<IN>> valueSerializer) { + checkState(valueSerializationSchema == null, exceedsValueSerializerConfigurations()); + this.valueSerializationSchema = + new KafkaSerializerWrapper<>(valueSerializer, topic, topicSelector); + return this; + } + + /** + * Sets a configurable Kafka {@link Serializer} and pass a configuration to serialize incoming + * elements to the value of the {@link ProducerRecord}. + * + * @param valueSerializerWithConfiguration + * @param configuration + * @param <T> type of the used serializer class + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public <T extends Configurable & Serializer<IN>> + KafkaRecordSerializationSchemaBuilder<IN> withConfigurableValueSerializer( + Class<T> valueSerializerWithConfiguration, Map<String, String> configuration) { + checkState(valueSerializationSchema == null, exceedsValueSerializerConfigurations()); + this.valueSerializationSchema = + new KafkaSerializerWrapper<>( + valueSerializerWithConfiguration, configuration, topic, topicSelector); + return this; + } + + /** + * Constructs the {@link KafkaRecordSerializationSchemaBuilder} with the configured properties. + * + * @return {@link KafkaRecordSerializationSchema} + */ + public KafkaRecordSerializationSchema<IN> build() { + checkState(valueSerializationSchema != null, "No value serializer is configured."); + return new KafkaRecordSerializationSchema<>() { + + @Override + public void open( + SerializationSchema.InitializationContext context, KafkaSinkContext sinkContext) + throws Exception { + valueSerializationSchema.open(context); + if (keySerializationSchema != null) { + keySerializationSchema.open(context); + } + if (partitioner != null) { + partitioner.open( + sinkContext.getParallelInstanceId(), + sinkContext.getNumberOfParallelInstances()); + } + } + + @Override + public ProducerRecord<byte[], byte[]> serialize( + IN element, KafkaSinkContext context, Long timestamp) { + final String targetTopic = topic == null ? topicSelector.apply(element) : topic; + final byte[] value = valueSerializationSchema.serialize(element); + byte[] key = null; + if (keySerializationSchema != null) { + key = keySerializationSchema.serialize(element); + } + final OptionalInt partition = + partitioner != null + ? OptionalInt.of( + partitioner.partition( + element, + key, + value, + targetTopic, + context.getPartitionsForTopic(targetTopic))) + : OptionalInt.empty(); + if (partition.isPresent()) { + return new ProducerRecord<>(targetTopic, partition.getAsInt(), key, value); + } + if (key == null) { + return new ProducerRecord<>(targetTopic, value); + } + return new ProducerRecord<>(targetTopic, key, value); + } + }; + } + + private static String exceedsSerializerConfigurations(String serializerType) { + return String.format( + "It is only possible to configure exactly one of " + + "with%sSerializationSchema, with%<sSerializer or withConfigurable%<sSerializer.", + serializerType); + } + + private static String exceedsValueSerializerConfigurations() { + return exceedsSerializerConfigurations("Value"); + } + + private static String exceedsKeySerializerConfigurations() { + return exceedsSerializerConfigurations("Key"); + } Review comment: Inline and use `checkState(condition, format, params)`? At the very least, I'd inline the two lower methods. ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; + @Nullable private final Function<IN, String> topicSelector; + @Nullable private FlinkKafkaPartitioner<IN> partitioner; + @Nullable private SerializationSchema<IN> keySerializationSchema; + + private SerializationSchema<IN> valueSerializationSchema; + + KafkaRecordSerializationSchemaBuilder(String topic) { + this.topic = checkNotNull(topic); + this.topicSelector = null; + } + + KafkaRecordSerializationSchemaBuilder(Function<IN, String> topicSupplier) { + this.topicSelector = checkNotNull(topicSupplier); + this.topic = null; + } + + /** + * Sets a custom partitioner determining the target partition of the target topic. + * + * @param partitioner + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withPartitioner( Review comment: Is there also some Kafka partitioner? ########## File path: flink-connectors/flink-connector-kafka/src/main/java/org/apache/flink/streaming/connectors/kafka/sink/KafkaRecordSerializationSchemaBuilder.java ########## @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.connectors.kafka.sink; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.Configurable; +import org.apache.kafka.common.serialization.Serializer; + +import javax.annotation.Nullable; + +import java.util.Map; +import java.util.OptionalInt; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Builder to construct {@link KafkaRecordSerializationSchema}. + * + * <p>This class should give a first entrypoint when trying to serialize elements to {@link + * ProducerRecord}. The following examples show some of the possibilities. + * + * <pre>Simple key-value serialization: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializationSchema(new SimpleStringSchema()) + * .withValueSerializationSchema(new SimpleStringSchema()) + * .build() + * }</pre> + * + * <pre>Using Kafka's serialization stack: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withKeySerializer(StringSerializer.class) + * .withValueSerializer(StringSerializer.class) + * .build() + * }</pre> + * + * <pre>With custom partitioner: + * {@code + * new KafkaRecordSerializationSchemaBuilder<String>("topic") + * .withPartitioner(MY_FLINK_PARTITIONER) + * .withKeySerializationSchema(StringSerializer.class) + * .build() + * }</pre> + * + * <p>The different serialization methods for key and value are mutually exclusive thus i.e. it is + * not possible to use {@link #withKeySerializationSchema(SerializationSchema)} and {@link + * #withKeySerializer(Class)} on the same builder instance. + * + * <p>It is necessary to configure exactly one serialization method for the value. + * + * @param <IN> type of records to be serialized + * @see KafkaRecordSerializationSchema#builder(String) + */ +public class KafkaRecordSerializationSchemaBuilder<IN> { + + @Nullable private final String topic; + @Nullable private final Function<IN, String> topicSelector; + @Nullable private FlinkKafkaPartitioner<IN> partitioner; + @Nullable private SerializationSchema<IN> keySerializationSchema; + + private SerializationSchema<IN> valueSerializationSchema; + + KafkaRecordSerializationSchemaBuilder(String topic) { + this.topic = checkNotNull(topic); + this.topicSelector = null; + } + + KafkaRecordSerializationSchemaBuilder(Function<IN, String> topicSupplier) { + this.topicSelector = checkNotNull(topicSupplier); + this.topic = null; + } + + /** + * Sets a custom partitioner determining the target partition of the target topic. + * + * @param partitioner + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withPartitioner( + FlinkKafkaPartitioner<IN> partitioner) { + this.partitioner = checkNotNull(partitioner); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the key + * of the {@link ProducerRecord}. + * + * @param keySerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializationSchema( + SerializationSchema<IN> keySerializationSchema) { + checkState(this.keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = checkNotNull(keySerializationSchema); + return this; + } + + /** + * Sets Kafka's {@link Serializer} to serialize incoming elements to the key of the {@link + * ProducerRecord}. + * + * @param keySerializer + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withKeySerializer( + Class<? extends Serializer<IN>> keySerializer) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>(keySerializer, topic, topicSelector); + return this; + } + + /** + * Sets a configurable Kafka {@link Serializer} and pass a configuration to serialize incoming + * elements to the key of the {@link ProducerRecord}. + * + * @param keySerializerWithConfiguration + * @param configuration + * @param <T> type of the used serializer class + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public <T extends Configurable & Serializer<IN>> + KafkaRecordSerializationSchemaBuilder<IN> withConfigurableKeySerializer( + Class<T> keySerializerWithConfiguration, Map<String, String> configuration) { + checkState(keySerializationSchema == null, exceedsKeySerializerConfigurations()); + this.keySerializationSchema = + new KafkaSerializerWrapper<>( + keySerializerWithConfiguration, configuration, topic, topicSelector); + return this; + } + + /** + * Sets a {@link SerializationSchema} which is used to serialize the incoming element to the + * value of the {@link ProducerRecord}. + * + * @param valueSerializationSchema + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withValueSerializationSchema( + SerializationSchema<IN> valueSerializationSchema) { + checkState(this.valueSerializationSchema == null, exceedsValueSerializerConfigurations()); + this.valueSerializationSchema = checkNotNull(valueSerializationSchema); + return this; + } + + /** + * Sets Kafka's {@link Serializer} to serialize incoming elements to the value of the {@link + * ProducerRecord}. + * + * @param valueSerializer + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public KafkaRecordSerializationSchemaBuilder<IN> withValueSerializer( + Class<? extends Serializer<IN>> valueSerializer) { + checkState(valueSerializationSchema == null, exceedsValueSerializerConfigurations()); + this.valueSerializationSchema = + new KafkaSerializerWrapper<>(valueSerializer, topic, topicSelector); + return this; + } + + /** + * Sets a configurable Kafka {@link Serializer} and pass a configuration to serialize incoming + * elements to the value of the {@link ProducerRecord}. + * + * @param valueSerializerWithConfiguration + * @param configuration + * @param <T> type of the used serializer class + * @return {@link KafkaRecordSerializationSchemaBuilder} + */ + public <T extends Configurable & Serializer<IN>> + KafkaRecordSerializationSchemaBuilder<IN> withConfigurableValueSerializer( + Class<T> valueSerializerWithConfiguration, Map<String, String> configuration) { + checkState(valueSerializationSchema == null, exceedsValueSerializerConfigurations()); + this.valueSerializationSchema = + new KafkaSerializerWrapper<>( + valueSerializerWithConfiguration, configuration, topic, topicSelector); + return this; + } + + /** + * Constructs the {@link KafkaRecordSerializationSchemaBuilder} with the configured properties. + * + * @return {@link KafkaRecordSerializationSchema} + */ + public KafkaRecordSerializationSchema<IN> build() { + checkState(valueSerializationSchema != null, "No value serializer is configured."); + return new KafkaRecordSerializationSchema<>() { + + @Override + public void open( + SerializationSchema.InitializationContext context, KafkaSinkContext sinkContext) + throws Exception { + valueSerializationSchema.open(context); + if (keySerializationSchema != null) { + keySerializationSchema.open(context); + } + if (partitioner != null) { + partitioner.open( + sinkContext.getParallelInstanceId(), + sinkContext.getNumberOfParallelInstances()); + } + } + + @Override + public ProducerRecord<byte[], byte[]> serialize( + IN element, KafkaSinkContext context, Long timestamp) { + final String targetTopic = topic == null ? topicSelector.apply(element) : topic; + final byte[] value = valueSerializationSchema.serialize(element); + byte[] key = null; + if (keySerializationSchema != null) { + key = keySerializationSchema.serialize(element); + } + final OptionalInt partition = Review comment: `ProducerRecord` accepts `null` on all optional parameters, so I think we can simplify all cases to one. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org