gustavodemorais commented on code in PR #27901:
URL: https://github.com/apache/flink/pull/27901#discussion_r3065054469
##########
flink-table/flink-table-common/src/main/java/org/apache/flink/table/functions/BuiltInFunctionDefinition.java:
##########
@@ -72,6 +74,9 @@ public final class BuiltInFunctionDefinition implements
SpecializedFunction {
private final SqlCallSyntax sqlCallSyntax;
+ private final @Nullable Function<ChangelogFunction.ChangelogContext,
ChangelogMode>
Review Comment:
```suggestion
private final @Nullable Function<ChangelogContext, ChangelogMode>
```
##########
flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/plan/optimize/program/FlinkChangelogModeInferenceProgram.scala:
##########
@@ -1707,21 +1710,10 @@ class FlinkChangelogModeInferenceProgram extends
FlinkOptimizeProgram[StreamOpti
val definition = ShortcutUtils.unwrapFunctionDefinition(call)
val inputChangelogModes = children.map(toChangelogMode(_, None, None))
val changelogModeOpt: Option[ChangelogMode] = definition match {
- // User-defined PTFs that implement ChangelogFunction
Review Comment:
did you remove this by accident?
##########
flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java:
##########
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.types.inference.strategies;
+
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.api.DataTypes.Field;
+import org.apache.flink.table.api.ValidationException;
+import org.apache.flink.table.connector.ChangelogMode;
+import org.apache.flink.table.functions.ChangelogFunction;
+import org.apache.flink.table.functions.FunctionDefinition;
+import org.apache.flink.table.functions.TableSemantics;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.inference.ArgumentCount;
+import org.apache.flink.table.types.inference.CallContext;
+import org.apache.flink.table.types.inference.ConstantArgumentCount;
+import org.apache.flink.table.types.inference.InputTypeStrategy;
+import org.apache.flink.table.types.inference.Signature;
+import org.apache.flink.table.types.inference.Signature.Argument;
+import org.apache.flink.table.types.inference.TypeStrategy;
+import org.apache.flink.table.types.logical.LogicalTypeFamily;
+import org.apache.flink.types.ColumnList;
+import org.apache.flink.types.RowKind;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/** Type strategies for the {@code FROM_CHANGELOG} process table function. */
+@Internal
+public final class FromChangelogTypeStrategy {
+
+ private static final String DEFAULT_OP_COLUMN_NAME = "op";
+
+ private static final Set<String> VALID_ROW_KIND_NAMES =
+ Set.of("INSERT", "UPDATE_BEFORE", "UPDATE_AFTER", "DELETE");
+
+ //
--------------------------------------------------------------------------------------------
+ // Input validation
+ //
--------------------------------------------------------------------------------------------
+
+ public static final InputTypeStrategy INPUT_TYPE_STRATEGY =
+ new InputTypeStrategy() {
+ @Override
+ public ArgumentCount getArgumentCount() {
+ return ConstantArgumentCount.between(1, 3);
+ }
+
+ @Override
+ public Optional<List<DataType>> inferInputTypes(
+ final CallContext callContext, final boolean
throwOnFailure) {
+ return validateInputs(callContext, throwOnFailure);
+ }
+
+ @Override
+ public List<Signature> getExpectedSignatures(final
FunctionDefinition definition) {
+ return List.of(
+ Signature.of(Argument.of("input", "TABLE")),
+ Signature.of(
+ Argument.of("input", "TABLE"),
Argument.of("op", "DESCRIPTOR")),
+ Signature.of(
+ Argument.of("input", "TABLE"),
+ Argument.of("op", "DESCRIPTOR"),
+ Argument.of("op_mapping", "MAP<STRING,
STRING>")));
+ }
+ };
+
+ //
--------------------------------------------------------------------------------------------
+ // Output type inference
+ //
--------------------------------------------------------------------------------------------
+
+ public static final TypeStrategy OUTPUT_TYPE_STRATEGY =
+ callContext -> {
+ final TableSemantics tableSemantics =
+ callContext
+ .getTableSemantics(0)
+ .orElseThrow(
+ () ->
+ new ValidationException(
+ "First argument must
be a table for FROM_CHANGELOG."));
+
+ final String opColumnName = resolveOpColumnName(callContext);
+
+ final List<Field> outputFields =
buildOutputFields(tableSemantics, opColumnName);
+
+ return Optional.of(DataTypes.ROW(outputFields).notNull());
+ };
+
+ //
--------------------------------------------------------------------------------------------
+ // Helpers
+ //
--------------------------------------------------------------------------------------------
+
+ @SuppressWarnings("rawtypes")
+ private static Optional<List<DataType>> validateInputs(
+ final CallContext callContext, final boolean throwOnFailure) {
+ final boolean isMissingTableArg =
callContext.getTableSemantics(0).isEmpty();
+ if (isMissingTableArg) {
+ return callContext.fail(
+ throwOnFailure, "First argument must be a table for
FROM_CHANGELOG.");
+ }
+
+ final Optional<ColumnList> opDescriptor =
callContext.getArgumentValue(1, ColumnList.class);
+ final boolean hasInvalidOpDescriptor =
+ opDescriptor.isPresent() &&
opDescriptor.get().getNames().size() != 1;
+ if (hasInvalidOpDescriptor) {
+ return callContext.fail(
+ throwOnFailure,
+ "The descriptor for argument 'op' must contain exactly one
column name.");
+ }
+
+ // Validate that the op column exists in the input schema and is of
STRING type
+ final TableSemantics tableSemantics =
callContext.getTableSemantics(0).get();
+ final String opColumnName = resolveOpColumnName(callContext);
+ final List<Field> inputFields =
DataType.getFields(tableSemantics.dataType());
+ final Optional<Field> opField =
+ inputFields.stream().filter(f ->
f.getName().equals(opColumnName)).findFirst();
+ if (opField.isEmpty()) {
+ return callContext.fail(
+ throwOnFailure,
+ String.format(
+ "The op column '%s' does not exist in the input
schema.",
+ opColumnName));
+ }
+ if
(!opField.get().getDataType().getLogicalType().is(LogicalTypeFamily.CHARACTER_STRING))
{
+ return callContext.fail(
+ throwOnFailure,
+ String.format(
+ "The op column '%s' must be of STRING type, but
was '%s'.",
+ opColumnName,
opField.get().getDataType().getLogicalType()));
+ }
+
+ final boolean hasMappingArgProvided = !callContext.isArgumentNull(2);
+ final boolean isMappingArgLiteral = callContext.isArgumentLiteral(2);
+ if (hasMappingArgProvided && !isMappingArgLiteral) {
+ return callContext.fail(
+ throwOnFailure, "The 'op_mapping' argument must be a
constant MAP literal.");
+ }
+
+ final Optional<Map> opMapping = callContext.getArgumentValue(2,
Map.class);
+ if (opMapping.isPresent()) {
+ final Optional<List<DataType>> validationError =
+ validateOpMappingValues(callContext, opMapping.get(),
throwOnFailure);
+ if (validationError.isPresent()) {
+ return validationError;
+ }
+ }
+
+ return Optional.of(callContext.getArgumentDataTypes());
+ }
+
+ /**
+ * Validates op_mapping values. Values must be valid RowKind names from
{INSERT, UPDATE_AFTER,
+ * DELETE}. Keys are arbitrary user strings (e.g., 'c', 'u', 'd') and may
be comma-separated to
+ * map multiple user codes to the same RowKind. Each RowKind name must
appear at most once
+ * across all entries.
+ */
+ private static Optional<List<DataType>> validateOpMappingValues(
+ final CallContext callContext,
+ final Map<?, ?> opMapping,
+ final boolean throwOnFailure) {
+ final Set<String> allRowKindsSeen = new HashSet<>();
+
+ for (final Entry<?, ?> entry : opMapping.entrySet()) {
+ if (!(entry.getKey() instanceof String)) {
+ return callContext.fail(
+ throwOnFailure, "Invalid target mapping for argument
'op_mapping'.");
+ }
+ final Object value = entry.getValue();
+ if (!(value instanceof String)) {
+ return callContext.fail(
+ throwOnFailure, "Invalid target mapping for argument
'op_mapping'.");
+ }
+ final String rowKindName = ((String) value).trim();
+ if (!VALID_ROW_KIND_NAMES.contains(rowKindName)) {
+ return callContext.fail(
+ throwOnFailure,
+ String.format(
+ "Invalid target mapping for argument
'op_mapping'. "
+ + "Unknown change operation: '%s'.
Valid values are: %s.",
+ rowKindName, VALID_ROW_KIND_NAMES));
+ }
+ final boolean isDuplicate = !allRowKindsSeen.add(rowKindName);
+ if (isDuplicate) {
+ return callContext.fail(
+ throwOnFailure,
+ String.format(
+ "Invalid target mapping for argument
'op_mapping'. "
+ + "Duplicate change operation: '%s'.",
+ rowKindName));
+ }
+ }
+ return Optional.empty();
+ }
+
+ private static String resolveOpColumnName(final CallContext callContext) {
+ return callContext
+ .getArgumentValue(1, ColumnList.class)
+ .filter(cl -> !cl.getNames().isEmpty())
+ .map(cl -> cl.getNames().get(0))
+ .orElse(DEFAULT_OP_COLUMN_NAME);
+ }
+
+ private static List<Field> buildOutputFields(
+ final TableSemantics tableSemantics, final String opColumnName) {
+ final Set<Integer> partitionKeys =
+ IntStream.of(tableSemantics.partitionByColumns())
+ .boxed()
+ .collect(Collectors.toSet());
+ final List<Field> inputFields =
DataType.getFields(tableSemantics.dataType());
+
+ // Exclude partition keys (prepended by framework) and the op column
(becomes RowKind)
+ return IntStream.range(0, inputFields.size())
+ .filter(
+ i ->
+ !partitionKeys.contains(i)
+ &&
!inputFields.get(i).getName().equals(opColumnName))
+ .mapToObj(inputFields::get)
+ .collect(Collectors.toList());
+ }
+
+ /**
+ * Resolves the output changelog mode based on the op_mapping argument. If
op_mapping is absent
+ * (default) or includes UPDATE_BEFORE, returns retract mode (all).
Otherwise, returns upsert
+ * mode (no UPDATE_BEFORE).
+ */
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ public static ChangelogMode resolveChangelogMode(
+ final ChangelogFunction.ChangelogContext changelogContext) {
Review Comment:
I think we almost always follow this pattern
```suggestion
final ChangelogContext changelogContext) {
```
You've done this in multiple locations. Can you take a look?
##########
flink-table/flink-table-planner/src/main/scala/org/apache/flink/table/planner/plan/optimize/program/FlinkChangelogModeInferenceProgram.scala:
##########
@@ -1705,17 +1708,21 @@ class FlinkChangelogModeInferenceProgram extends
FlinkOptimizeProgram[StreamOpti
defaultTraitSet: T): T = {
val call = process.getCall
val definition = ShortcutUtils.unwrapFunctionDefinition(call)
- definition match {
- case changelogFunction: ChangelogFunction =>
- val inputChangelogModes = children.map(toChangelogMode(_, None, None))
+ val inputChangelogModes = children.map(toChangelogMode(_, None, None))
Review Comment:
can we undo the changes here?
##########
flink-table/flink-table-common/src/main/java/org/apache/flink/table/types/inference/strategies/FromChangelogTypeStrategy.java:
##########
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.types.inference.strategies;
+
+import org.apache.flink.annotation.Internal;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.api.DataTypes.Field;
+import org.apache.flink.table.api.ValidationException;
+import org.apache.flink.table.connector.ChangelogMode;
+import org.apache.flink.table.functions.ChangelogFunction;
+import org.apache.flink.table.functions.FunctionDefinition;
+import org.apache.flink.table.functions.TableSemantics;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.inference.ArgumentCount;
+import org.apache.flink.table.types.inference.CallContext;
+import org.apache.flink.table.types.inference.ConstantArgumentCount;
+import org.apache.flink.table.types.inference.InputTypeStrategy;
+import org.apache.flink.table.types.inference.Signature;
+import org.apache.flink.table.types.inference.Signature.Argument;
+import org.apache.flink.table.types.inference.TypeStrategy;
+import org.apache.flink.table.types.logical.LogicalTypeFamily;
+import org.apache.flink.types.ColumnList;
+import org.apache.flink.types.RowKind;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/** Type strategies for the {@code FROM_CHANGELOG} process table function. */
+@Internal
+public final class FromChangelogTypeStrategy {
+
+ private static final String DEFAULT_OP_COLUMN_NAME = "op";
+
+ private static final Set<String> VALID_ROW_KIND_NAMES =
+ Set.of("INSERT", "UPDATE_BEFORE", "UPDATE_AFTER", "DELETE");
+
+ //
--------------------------------------------------------------------------------------------
+ // Input validation
+ //
--------------------------------------------------------------------------------------------
+
+ public static final InputTypeStrategy INPUT_TYPE_STRATEGY =
+ new InputTypeStrategy() {
+ @Override
+ public ArgumentCount getArgumentCount() {
+ return ConstantArgumentCount.between(1, 3);
+ }
+
+ @Override
+ public Optional<List<DataType>> inferInputTypes(
+ final CallContext callContext, final boolean
throwOnFailure) {
+ return validateInputs(callContext, throwOnFailure);
+ }
+
+ @Override
+ public List<Signature> getExpectedSignatures(final
FunctionDefinition definition) {
+ return List.of(
+ Signature.of(Argument.of("input", "TABLE")),
+ Signature.of(
+ Argument.of("input", "TABLE"),
Argument.of("op", "DESCRIPTOR")),
+ Signature.of(
+ Argument.of("input", "TABLE"),
+ Argument.of("op", "DESCRIPTOR"),
+ Argument.of("op_mapping", "MAP<STRING,
STRING>")));
+ }
+ };
+
+ //
--------------------------------------------------------------------------------------------
+ // Output type inference
+ //
--------------------------------------------------------------------------------------------
+
+ public static final TypeStrategy OUTPUT_TYPE_STRATEGY =
+ callContext -> {
+ final TableSemantics tableSemantics =
+ callContext
+ .getTableSemantics(0)
+ .orElseThrow(
+ () ->
+ new ValidationException(
+ "First argument must
be a table for FROM_CHANGELOG."));
+
+ final String opColumnName = resolveOpColumnName(callContext);
+
+ final List<Field> outputFields =
buildOutputFields(tableSemantics, opColumnName);
+
+ return Optional.of(DataTypes.ROW(outputFields).notNull());
+ };
+
+ //
--------------------------------------------------------------------------------------------
+ // Helpers
+ //
--------------------------------------------------------------------------------------------
+
+ @SuppressWarnings("rawtypes")
+ private static Optional<List<DataType>> validateInputs(
+ final CallContext callContext, final boolean throwOnFailure) {
+ final boolean isMissingTableArg =
callContext.getTableSemantics(0).isEmpty();
+ if (isMissingTableArg) {
+ return callContext.fail(
+ throwOnFailure, "First argument must be a table for
FROM_CHANGELOG.");
+ }
+
+ final Optional<ColumnList> opDescriptor =
callContext.getArgumentValue(1, ColumnList.class);
+ final boolean hasInvalidOpDescriptor =
+ opDescriptor.isPresent() &&
opDescriptor.get().getNames().size() != 1;
+ if (hasInvalidOpDescriptor) {
+ return callContext.fail(
+ throwOnFailure,
+ "The descriptor for argument 'op' must contain exactly one
column name.");
+ }
+
+ // Validate that the op column exists in the input schema and is of
STRING type
+ final TableSemantics tableSemantics =
callContext.getTableSemantics(0).get();
+ final String opColumnName = resolveOpColumnName(callContext);
+ final List<Field> inputFields =
DataType.getFields(tableSemantics.dataType());
+ final Optional<Field> opField =
+ inputFields.stream().filter(f ->
f.getName().equals(opColumnName)).findFirst();
+ if (opField.isEmpty()) {
+ return callContext.fail(
+ throwOnFailure,
+ String.format(
+ "The op column '%s' does not exist in the input
schema.",
+ opColumnName));
+ }
+ if
(!opField.get().getDataType().getLogicalType().is(LogicalTypeFamily.CHARACTER_STRING))
{
+ return callContext.fail(
+ throwOnFailure,
+ String.format(
+ "The op column '%s' must be of STRING type, but
was '%s'.",
+ opColumnName,
opField.get().getDataType().getLogicalType()));
+ }
+
+ final boolean hasMappingArgProvided = !callContext.isArgumentNull(2);
+ final boolean isMappingArgLiteral = callContext.isArgumentLiteral(2);
+ if (hasMappingArgProvided && !isMappingArgLiteral) {
+ return callContext.fail(
+ throwOnFailure, "The 'op_mapping' argument must be a
constant MAP literal.");
+ }
+
+ final Optional<Map> opMapping = callContext.getArgumentValue(2,
Map.class);
+ if (opMapping.isPresent()) {
+ final Optional<List<DataType>> validationError =
+ validateOpMappingValues(callContext, opMapping.get(),
throwOnFailure);
+ if (validationError.isPresent()) {
+ return validationError;
+ }
+ }
+
+ return Optional.of(callContext.getArgumentDataTypes());
+ }
+
+ /**
+ * Validates op_mapping values. Values must be valid RowKind names from
{INSERT, UPDATE_AFTER,
+ * DELETE}. Keys are arbitrary user strings (e.g., 'c', 'u', 'd') and may
be comma-separated to
+ * map multiple user codes to the same RowKind. Each RowKind name must
appear at most once
+ * across all entries.
+ */
+ private static Optional<List<DataType>> validateOpMappingValues(
+ final CallContext callContext,
+ final Map<?, ?> opMapping,
+ final boolean throwOnFailure) {
+ final Set<String> allRowKindsSeen = new HashSet<>();
+
+ for (final Entry<?, ?> entry : opMapping.entrySet()) {
+ if (!(entry.getKey() instanceof String)) {
+ return callContext.fail(
+ throwOnFailure, "Invalid target mapping for argument
'op_mapping'.");
+ }
+ final Object value = entry.getValue();
+ if (!(value instanceof String)) {
+ return callContext.fail(
+ throwOnFailure, "Invalid target mapping for argument
'op_mapping'.");
+ }
+ final String rowKindName = ((String) value).trim();
+ if (!VALID_ROW_KIND_NAMES.contains(rowKindName)) {
+ return callContext.fail(
+ throwOnFailure,
+ String.format(
+ "Invalid target mapping for argument
'op_mapping'. "
+ + "Unknown change operation: '%s'.
Valid values are: %s.",
+ rowKindName, VALID_ROW_KIND_NAMES));
+ }
+ final boolean isDuplicate = !allRowKindsSeen.add(rowKindName);
+ if (isDuplicate) {
+ return callContext.fail(
+ throwOnFailure,
+ String.format(
+ "Invalid target mapping for argument
'op_mapping'. "
+ + "Duplicate change operation: '%s'.",
Review Comment:
Some users may think they can they need two entries mapping different user
codes to the same RowKind (e.g., 'c' -> INSERT, 'r'
-> INSERT).
I think here we can have a better message: "If you need mulitple change
codes to map to the same type, use a comma separated list, e.g. 'c, r' ->
'INSERT'. What do you think?
##########
flink-table/flink-table-planner/src/test/java/org/apache/flink/table/planner/plan/nodes/exec/stream/FromChangelogSemanticTests.java:
##########
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.planner.plan.nodes.exec.stream;
+
+import org.apache.flink.table.api.TableConfig;
+import org.apache.flink.table.api.config.OptimizerConfigOptions;
+import
org.apache.flink.table.planner.plan.nodes.exec.testutils.SemanticTestBase;
+import org.apache.flink.table.test.program.TableTestProgram;
+
+import java.util.List;
+
+/** Semantic tests for the built-in FROM_CHANGELOG process table function. */
+public class FromChangelogSemanticTests extends SemanticTestBase {
+
+ @Override
+ protected void applyDefaultEnvironmentOptions(TableConfig config) {
+ super.applyDefaultEnvironmentOptions(config);
+ config.set(
+
OptimizerConfigOptions.TABLE_OPTIMIZER_NONDETERMINISTIC_UPDATE_STRATEGY,
+ OptimizerConfigOptions.NonDeterministicUpdateStrategy.IGNORE);
+ }
+
+ @Override
+ public List<TableTestProgram> programs() {
+ return List.of(
+ FromChangelogTestPrograms.DEFAULT_OP_MAPPING,
+ FromChangelogTestPrograms.DEBEZIUM_MAPPING,
+ FromChangelogTestPrograms.UNMAPPED_CODES_DROPPED,
+ FromChangelogTestPrograms.TABLE_API_DEFAULT,
+ FromChangelogTestPrograms.MISSING_PARTITION_BY);
Review Comment:
I think there are two good tests we could add here
- round-trip FROM_CHANGELOG(TO_CHANGELOG(table)) (important, might work
better when we merge https://github.com/apache/flink/pull/27911 which is
approved)
- custom op column name - There's no test for op => DESCRIPTOR(operation)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]