liferoad commented on code in PR #35580: URL: https://github.com/apache/beam/pull/35580#discussion_r2205016418
########## sdks/python/apache_beam/options/pipeline_options.py: ########## @@ -220,6 +220,70 @@ def __call__(self, parser, namespace, values, option_string=None): % _GcsCustomAuditEntriesAction.MAX_ENTRIES) +class _CommaSeparatedListAction(argparse.Action): + """ + Argparse Action that splits comma-separated values and appends them to + a list. This allows options like --experiments=abc,def to be treated + as separate experiments 'abc' and 'def', similar to how Java SDK handles + them. + + For key=value experiments, only splits at commas that are not part of the + value. For example: 'abc,def,master_key=k1=v1,k2=v2' becomes + ['abc', 'def', 'master_key=k1=v1,k2=v2'] + """ + def __call__(self, parser, namespace, values, option_string=None): + if not hasattr(namespace, self.dest) or getattr(namespace, + self.dest) is None: + setattr(namespace, self.dest, []) + + # Split comma-separated values and extend the list + if isinstance(values, str): + # Smart splitting: only split at commas that are not part of + # key=value pairs + split_values = self._smart_split(values) + getattr(namespace, self.dest).extend(split_values) + else: + # If values is not a string, just append it + getattr(namespace, self.dest).append(values) + + def _smart_split(self, values): Review Comment: This will break the current behaviour tested by https://github.com/apache/beam/blob/2b289f2804d76e75e857928a058d2d7c33f85de3/sdks/python/apache_beam/options/pipeline_options_test.py#L636. This is why `master_key=k1=v1,k2=v2` should be treated as one key not simply split by comma. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org