chamikaramj commented on code in PR #35230: URL: https://github.com/apache/beam/pull/35230#discussion_r2139399292
########## sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/FilterUtils.java: ########## @@ -72,6 +74,56 @@ class FilterUtils { .put(SqlKind.OR, Operation.OR) .build(); + /** + * Parses a SQL filter expression string and returns a set of all field names referenced within + * it. + */ + static Set<String> getReferencedFieldNames(@Nullable String filter) { + if (filter == null || filter.trim().isEmpty()) { + return new HashSet<>(); + } + + SqlParser parser = SqlParser.create(filter); + try { + SqlNode expression = parser.parseExpression(); + Set<String> fieldNames = new HashSet<>(); + extractFieldNames(expression, fieldNames); + System.out.println("xxx fields in filter: " + fieldNames); + return fieldNames; + } catch (Exception exception) { + throw new RuntimeException( + String.format("Encountered an error when parsing filter: '%s'", filter), exception); + } + } + + private static void extractFieldNames(SqlNode node, Set<String> fieldNames) { + if (node instanceof SqlIdentifier) { + fieldNames.add(((SqlIdentifier) node).getSimple()); + } else if (node instanceof SqlBasicCall) { + // recursively check operands + SqlBasicCall call = (SqlBasicCall) node; + for (SqlNode operand : call.getOperandList()) { + extractFieldNames(operand, fieldNames); + } + } else if (node instanceof SqlNodeList) { + // For IN clauses, the right-hand side is a SqlNodeList, so iterate through its elements + SqlNodeList nodeList = (SqlNodeList) node; + for (SqlNode element : nodeList.getList()) { + if (element != null) { + extractFieldNames(element, fieldNames); + } + } + } + // SqlLiteral nodes do not contain field names, so we can ignore them. Review Comment: Pass through for other types is intentional ? ########## sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergUtils.java: ########## @@ -426,10 +454,49 @@ public static Row icebergRecordToBeamRow(Schema schema, Record record) { case DOUBLE: // Iceberg and Beam both use double case STRING: // Iceberg and Beam both use String case BOOLEAN: // Iceberg and Beam both use boolean + rowBuilder.addValue(icebergValue); + break; case ARRAY: case ITERABLE: + checkState( + icebergValue instanceof List, Review Comment: Does ITERABLE type always imply List here ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org