ahmedabu98 commented on code in PR #35230: URL: https://github.com/apache/beam/pull/35230#discussion_r2140485172
########## sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/FilterUtils.java: ########## @@ -72,6 +74,56 @@ class FilterUtils { .put(SqlKind.OR, Operation.OR) .build(); + /** + * Parses a SQL filter expression string and returns a set of all field names referenced within + * it. + */ + static Set<String> getReferencedFieldNames(@Nullable String filter) { + if (filter == null || filter.trim().isEmpty()) { + return new HashSet<>(); + } + + SqlParser parser = SqlParser.create(filter); + try { + SqlNode expression = parser.parseExpression(); + Set<String> fieldNames = new HashSet<>(); + extractFieldNames(expression, fieldNames); + System.out.println("xxx fields in filter: " + fieldNames); + return fieldNames; + } catch (Exception exception) { + throw new RuntimeException( + String.format("Encountered an error when parsing filter: '%s'", filter), exception); + } + } + + private static void extractFieldNames(SqlNode node, Set<String> fieldNames) { + if (node instanceof SqlIdentifier) { + fieldNames.add(((SqlIdentifier) node).getSimple()); + } else if (node instanceof SqlBasicCall) { + // recursively check operands + SqlBasicCall call = (SqlBasicCall) node; + for (SqlNode operand : call.getOperandList()) { + extractFieldNames(operand, fieldNames); + } + } else if (node instanceof SqlNodeList) { + // For IN clauses, the right-hand side is a SqlNodeList, so iterate through its elements + SqlNodeList nodeList = (SqlNodeList) node; + for (SqlNode element : nodeList.getList()) { + if (element != null) { + extractFieldNames(element, fieldNames); + } + } + } + // SqlLiteral nodes do not contain field names, so we can ignore them. Review Comment: Yep, not all SqlNodes will contain a field name -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org